import warnings
warnings.filterwarnings("ignore")

import pandas as pd
df=pd.read_csv('/Users/mekki/Python_Projects_Datasets/Practical Data Wrangling with Pandas/00-kyphosis.csv')

df.shape

(81, 4)

df.info

<bound method DataFrame.info of    Kyphosis  Age  Number  Start
0    absent   71       3      5
1    absent  158       3     14
2   present  128       4      5
3    absent    2       5      1
4    absent    1       4     15
..      ...  ...     ...    ...
76  present  157       3     13
77   absent   26       7     13
78   absent  120       2     13
79  present   42       7      6
80   absent   36       4     13

[81 rows x 4 columns]>

df.isnull().sum().sum()

0

round(df.describe(),2)

#The "Age" is in months, so we need to devide the results by 12
mmm = round(df['Age'].describe().loc[['mean','max','min']],2)
mmm_years = round(mmm / 12, 2)
mmm_years

mean     6.97
max     17.17
min      0.08
Name: Age, dtype: float64

import matplotlib.pyplot as plt
import seaborn as sns

df2 = df.select_dtypes(include=['number'])
sns.heatmap(df2.corr(),annot=True)
plt.show()

df['Age']=df['Age'].astype("float64")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 81 entries, 0 to 80
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Kyphosis  81 non-null     object 
 1   Age       81 non-null     float64
 2   Number    81 non-null     int64  
 3   Start     81 non-null     int64  
dtypes: float64(1), int64(2), object(1)
memory usage: 2.7+ KB

def months_to_years(age) :
    return age/12

df['Age in Years']=round(df['Age'].apply(months_to_years),2)
df

df[df['Age']==df['Age'].max()]

df[df['Age']==df['Age'].min()]

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

scaler = MinMaxScaler()
df['Age']=scaler.fit_transform(df['Age'].values.reshape(-1,1))

df.describe().round(2)

scaler = StandardScaler()
df['Age']=scaler.fit_transform(df['Age'].values.reshape(-1,1))

#df.describe().round(2)
round(df.describe(),2)

	Age	Number	Start
count	81.00	81.00	81.00
mean	83.65	4.05	11.49
std	58.10	1.62	4.88
min	1.00	2.00	1.00
25%	26.00	3.00	9.00
50%	87.00	4.00	13.00
75%	130.00	5.00	16.00
max	206.00	10.00	18.00

	Age	Number	Start	Age in Years
count	81.00	81.00	81.00	81.00
mean	0.40	4.05	11.49	6.97
std	0.28	1.62	4.88	4.84
min	0.00	2.00	1.00	0.08
25%	0.12	3.00	9.00	2.17
50%	0.42	4.00	13.00	7.25
75%	0.63	5.00	16.00	10.83
max	1.00	10.00	18.00	17.17

	Age	Number	Start	Age in Years
count	81.00	81.00	81.00	81.00
mean	0.00	4.05	11.49	6.97
std	1.01	1.62	4.88	4.84
min	-1.43	2.00	1.00	0.08
25%	-1.00	3.00	9.00	2.17
50%	0.06	4.00	13.00	7.25
75%	0.80	5.00	16.00	10.83
max	2.12	10.00	18.00	17.17

	Kyphosis	Age	Number	Start	Age in Years
0	absent	71.0	3	5	5.92
1	absent	158.0	3	14	13.17
2	present	128.0	4	5	10.67
3	absent	2.0	5	1	0.17
4	absent	1.0	4	15	0.08
...	...	...	...	...	...
76	present	157.0	3	13	13.08
77	absent	26.0	7	13	2.17
78	absent	120.0	2	13	10.00
79	present	42.0	7	6	3.50
80	absent	36.0	4	13	3.00