Data Wrangling with Pandas - Life Expectancy

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
In [2]:
df=pd.read_csv('/Users/mekki/Python_Projects_Datasets/Practical Data Wrangling with Pandas/00-Life_Expectancy_Data.csv')
df.head()
Out[2]:
Year Status Life expectancy Adult Mortality infant deaths Alcohol percentage expenditure Hepatitis B Measles BMI ... Polio Total expenditure Diphtheria HIV/AIDS GDP Population thinness 1-19 years thinness 5-9 years Income composition of resources Schooling
0 2015 Developing 65.0 263.0 62 0.01 71.279624 65.0 1154 19.1 ... 6.0 8.16 65.0 0.1 584.259210 33736494.0 17.2 17.3 0.479 10.1
1 2014 Developing 59.9 271.0 64 0.01 73.523582 62.0 492 18.6 ... 58.0 8.18 62.0 0.1 612.696514 327582.0 17.5 17.5 0.476 10.0
2 2013 Developing 59.9 268.0 66 0.01 73.219243 64.0 430 18.1 ... 62.0 8.13 64.0 0.1 631.744976 31731688.0 17.7 17.7 0.470 9.9
3 2012 Developing 59.5 272.0 69 0.01 78.184215 67.0 2787 17.6 ... 67.0 8.52 67.0 0.1 669.959000 3696958.0 17.9 18.0 0.463 9.8
4 2011 Developing 59.2 275.0 71 0.01 7.097109 68.0 3013 17.2 ... 68.0 7.87 68.0 0.1 63.537231 2978599.0 18.2 18.2 0.454 9.5

5 rows × 21 columns

In [3]:
df.shape
Out[3]:
(2938, 21)
In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2938 entries, 0 to 2937
Data columns (total 21 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Year                             2938 non-null   int64  
 1   Status                           2938 non-null   object 
 2   Life expectancy                  2928 non-null   float64
 3   Adult Mortality                  2928 non-null   float64
 4   infant deaths                    2938 non-null   int64  
 5   Alcohol                          2744 non-null   float64
 6   percentage expenditure           2938 non-null   float64
 7   Hepatitis B                      2385 non-null   float64
 8   Measles                          2938 non-null   int64  
 9    BMI                             2904 non-null   float64
 10  under-five deaths                2938 non-null   int64  
 11  Polio                            2919 non-null   float64
 12  Total expenditure                2712 non-null   float64
 13  Diphtheria                       2919 non-null   float64
 14   HIV/AIDS                        2938 non-null   float64
 15  GDP                              2490 non-null   float64
 16  Population                       2286 non-null   float64
 17   thinness  1-19 years            2904 non-null   float64
 18   thinness 5-9 years              2904 non-null   float64
 19  Income composition of resources  2771 non-null   float64
 20  Schooling                        2775 non-null   float64
dtypes: float64(16), int64(4), object(1)
memory usage: 482.1+ KB
In [5]:
df.isnull().sum().sum()
Out[5]:
2563
In [6]:
df.isnull().sum()
Out[6]:
Year                                 0
Status                               0
Life expectancy                     10
Adult Mortality                     10
infant deaths                        0
Alcohol                            194
percentage expenditure               0
Hepatitis B                        553
Measles                              0
 BMI                                34
under-five deaths                    0
Polio                               19
Total expenditure                  226
Diphtheria                          19
 HIV/AIDS                            0
GDP                                448
Population                         652
 thinness  1-19 years               34
 thinness 5-9 years                 34
Income composition of resources    167
Schooling                          163
dtype: int64
In [7]:
df.index
Out[7]:
RangeIndex(start=0, stop=2938, step=1)
In [8]:
df.dtypes.index
Out[8]:
Index(['Year', 'Status', 'Life expectancy ', 'Adult Mortality',
       'infant deaths', 'Alcohol', 'percentage expenditure', 'Hepatitis B',
       'Measles ', ' BMI ', 'under-five deaths ', 'Polio', 'Total expenditure',
       'Diphtheria ', ' HIV/AIDS', 'GDP', 'Population',
       ' thinness  1-19 years', ' thinness 5-9 years',
       'Income composition of resources', 'Schooling'],
      dtype='object')
In [9]:
type(df)
Out[9]:
pandas.core.frame.DataFrame
In [10]:
round(df.describe(),2)
Out[10]:
Year Life expectancy Adult Mortality infant deaths Alcohol percentage expenditure Hepatitis B Measles BMI under-five deaths Polio Total expenditure Diphtheria HIV/AIDS GDP Population thinness 1-19 years thinness 5-9 years Income composition of resources Schooling
count 2938.00 2928.00 2928.00 2938.00 2744.00 2938.00 2385.00 2938.00 2904.00 2938.00 2919.00 2712.00 2919.00 2938.00 2490.00 2.286000e+03 2904.00 2904.00 2771.00 2775.00
mean 2007.52 69.22 164.80 30.30 4.60 738.25 80.94 2419.59 38.32 42.04 82.55 5.94 82.32 1.74 7483.16 1.275338e+07 4.84 4.87 0.63 11.99
std 4.61 9.52 124.29 117.93 4.05 1987.91 25.07 11467.27 20.04 160.45 23.43 2.50 23.72 5.08 14270.17 6.101210e+07 4.42 4.51 0.21 3.36
min 2000.00 36.30 1.00 0.00 0.01 0.00 1.00 0.00 1.00 0.00 3.00 0.37 2.00 0.10 1.68 3.400000e+01 0.10 0.10 0.00 0.00
25% 2004.00 63.10 74.00 0.00 0.88 4.69 77.00 0.00 19.30 0.00 78.00 4.26 78.00 0.10 463.94 1.957932e+05 1.60 1.50 0.49 10.10
50% 2008.00 72.10 144.00 3.00 3.76 64.91 92.00 17.00 43.50 4.00 93.00 5.76 93.00 0.10 1766.95 1.386542e+06 3.30 3.30 0.68 12.30
75% 2012.00 75.70 228.00 22.00 7.70 441.53 97.00 360.25 56.20 28.00 97.00 7.49 97.00 0.80 5910.81 7.420359e+06 7.20 7.20 0.78 14.30
max 2015.00 89.00 723.00 1800.00 17.87 19479.91 99.00 212183.00 87.30 2500.00 99.00 17.60 99.00 50.60 119172.74 1.293859e+09 27.70 28.60 0.95 20.70
In [11]:
nc = df.select_dtypes(include=['number'])
round(nc.mean())
Out[11]:
Year                                   2008.0
Life expectancy                          69.0
Adult Mortality                         165.0
infant deaths                            30.0
Alcohol                                   5.0
percentage expenditure                  738.0
Hepatitis B                              81.0
Measles                                2420.0
 BMI                                     38.0
under-five deaths                        42.0
Polio                                    83.0
Total expenditure                         6.0
Diphtheria                               82.0
 HIV/AIDS                                 2.0
GDP                                    7483.0
Population                         12753375.0
 thinness  1-19 years                     5.0
 thinness 5-9 years                       5.0
Income composition of resources           1.0
Schooling                                12.0
dtype: float64
In [12]:
round(df['GDP'].describe(),2)
Out[12]:
count      2490.00
mean       7483.16
std       14270.17
min           1.68
25%         463.94
50%        1766.95
75%        5910.81
max      119172.74
Name: GDP, dtype: float64
In [13]:
round(df['GDP'].describe().loc[['mean','max','min']],2)
Out[13]:
mean      7483.16
max     119172.74
min          1.68
Name: GDP, dtype: float64

Dealing with Missing Data

In [14]:
import seaborn as sns
In [15]:
df
Out[15]:
Year Status Life expectancy Adult Mortality infant deaths Alcohol percentage expenditure Hepatitis B Measles BMI ... Polio Total expenditure Diphtheria HIV/AIDS GDP Population thinness 1-19 years thinness 5-9 years Income composition of resources Schooling
0 2015 Developing 65.0 263.0 62 0.01 71.279624 65.0 1154 19.1 ... 6.0 8.16 65.0 0.1 584.259210 33736494.0 17.2 17.3 0.479 10.1
1 2014 Developing 59.9 271.0 64 0.01 73.523582 62.0 492 18.6 ... 58.0 8.18 62.0 0.1 612.696514 327582.0 17.5 17.5 0.476 10.0
2 2013 Developing 59.9 268.0 66 0.01 73.219243 64.0 430 18.1 ... 62.0 8.13 64.0 0.1 631.744976 31731688.0 17.7 17.7 0.470 9.9
3 2012 Developing 59.5 272.0 69 0.01 78.184215 67.0 2787 17.6 ... 67.0 8.52 67.0 0.1 669.959000 3696958.0 17.9 18.0 0.463 9.8
4 2011 Developing 59.2 275.0 71 0.01 7.097109 68.0 3013 17.2 ... 68.0 7.87 68.0 0.1 63.537231 2978599.0 18.2 18.2 0.454 9.5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2933 2004 Developing 44.3 723.0 27 4.36 0.000000 68.0 31 27.1 ... 67.0 7.13 65.0 33.6 454.366654 12777511.0 9.4 9.4 0.407 9.2
2934 2003 Developing 44.5 715.0 26 4.06 0.000000 7.0 998 26.7 ... 7.0 6.52 68.0 36.7 453.351155 12633897.0 9.8 9.9 0.418 9.5
2935 2002 Developing 44.8 73.0 25 4.43 0.000000 73.0 304 26.3 ... 73.0 6.53 71.0 39.8 57.348340 125525.0 1.2 1.3 0.427 10.0
2936 2001 Developing 45.3 686.0 25 1.72 0.000000 76.0 529 25.9 ... 76.0 6.16 75.0 42.1 548.587312 12366165.0 1.6 1.7 0.427 9.8
2937 2000 Developing 46.0 665.0 24 1.68 0.000000 79.0 1483 25.5 ... 78.0 7.10 78.0 43.5 547.358879 12222251.0 11.0 11.2 0.434 9.8

2938 rows × 21 columns

In [16]:
df.isnull()
Out[16]:
Year Status Life expectancy Adult Mortality infant deaths Alcohol percentage expenditure Hepatitis B Measles BMI ... Polio Total expenditure Diphtheria HIV/AIDS GDP Population thinness 1-19 years thinness 5-9 years Income composition of resources Schooling
0 False False False False False False False False False False ... False False False False False False False False False False
1 False False False False False False False False False False ... False False False False False False False False False False
2 False False False False False False False False False False ... False False False False False False False False False False
3 False False False False False False False False False False ... False False False False False False False False False False
4 False False False False False False False False False False ... False False False False False False False False False False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2933 False False False False False False False False False False ... False False False False False False False False False False
2934 False False False False False False False False False False ... False False False False False False False False False False
2935 False False False False False False False False False False ... False False False False False False False False False False
2936 False False False False False False False False False False ... False False False False False False False False False False
2937 False False False False False False False False False False ... False False False False False False False False False False

2938 rows × 21 columns

In [17]:
df.isnull().sum()
Out[17]:
Year                                 0
Status                               0
Life expectancy                     10
Adult Mortality                     10
infant deaths                        0
Alcohol                            194
percentage expenditure               0
Hepatitis B                        553
Measles                              0
 BMI                                34
under-five deaths                    0
Polio                               19
Total expenditure                  226
Diphtheria                          19
 HIV/AIDS                            0
GDP                                448
Population                         652
 thinness  1-19 years               34
 thinness 5-9 years                 34
Income composition of resources    167
Schooling                          163
dtype: int64
In [18]:
df.isnull().sum().sum()
Out[18]:
2563
In [19]:
from matplotlib.colors import ListedColormap
custom_colors = ['#675A6E', '#9A87A5', '#CDB4DB', '#E6DAED', '#F3EDF6']
custom_cmap = ListedColormap(custom_colors)
sns.heatmap(df.isnull(),yticklabels=False, cbar=True,cmap=custom_cmap)
Out[19]:
<Axes: >
No description has been provided for this image

First Strategy

In [20]:
#drop any row that conatins a Null Values
df.dropna(how='any',inplace=True)
df
Out[20]:
Year Status Life expectancy Adult Mortality infant deaths Alcohol percentage expenditure Hepatitis B Measles BMI ... Polio Total expenditure Diphtheria HIV/AIDS GDP Population thinness 1-19 years thinness 5-9 years Income composition of resources Schooling
0 2015 Developing 65.0 263.0 62 0.01 71.279624 65.0 1154 19.1 ... 6.0 8.16 65.0 0.1 584.259210 33736494.0 17.2 17.3 0.479 10.1
1 2014 Developing 59.9 271.0 64 0.01 73.523582 62.0 492 18.6 ... 58.0 8.18 62.0 0.1 612.696514 327582.0 17.5 17.5 0.476 10.0
2 2013 Developing 59.9 268.0 66 0.01 73.219243 64.0 430 18.1 ... 62.0 8.13 64.0 0.1 631.744976 31731688.0 17.7 17.7 0.470 9.9
3 2012 Developing 59.5 272.0 69 0.01 78.184215 67.0 2787 17.6 ... 67.0 8.52 67.0 0.1 669.959000 3696958.0 17.9 18.0 0.463 9.8
4 2011 Developing 59.2 275.0 71 0.01 7.097109 68.0 3013 17.2 ... 68.0 7.87 68.0 0.1 63.537231 2978599.0 18.2 18.2 0.454 9.5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2933 2004 Developing 44.3 723.0 27 4.36 0.000000 68.0 31 27.1 ... 67.0 7.13 65.0 33.6 454.366654 12777511.0 9.4 9.4 0.407 9.2
2934 2003 Developing 44.5 715.0 26 4.06 0.000000 7.0 998 26.7 ... 7.0 6.52 68.0 36.7 453.351155 12633897.0 9.8 9.9 0.418 9.5
2935 2002 Developing 44.8 73.0 25 4.43 0.000000 73.0 304 26.3 ... 73.0 6.53 71.0 39.8 57.348340 125525.0 1.2 1.3 0.427 10.0
2936 2001 Developing 45.3 686.0 25 1.72 0.000000 76.0 529 25.9 ... 76.0 6.16 75.0 42.1 548.587312 12366165.0 1.6 1.7 0.427 9.8
2937 2000 Developing 46.0 665.0 24 1.68 0.000000 79.0 1483 25.5 ... 78.0 7.10 78.0 43.5 547.358879 12222251.0 11.0 11.2 0.434 9.8

1649 rows × 21 columns

In [21]:
df.isnull().sum().sum()
Out[21]:
0

Second Strategy

In [22]:
#Reloading the df
df=pd.read_csv('/Users/mekki/Python_Projects_Datasets/Practical Data Wrangling with Pandas/00-Life_Expectancy_Data.csv')
In [23]:
#Finding number of total null values
df.isnull().sum().sum()
Out[23]:
2563
In [24]:
#Finding number of null values in GDP
df['GDP'].isnull().sum().sum()
Out[24]:
448
In [25]:
#Finding GDP mean
df['GDP'].mean()
Out[25]:
7483.158469138474
In [26]:
#Replace GDP by its mean
df['GDP'].fillna(df['GDP'].mean(), inplace=True)
In [27]:
df['GDP'].isnull().sum().sum()
Out[27]:
0
In [28]:
#Plotting heatmap to check GDP column
sns.heatmap(df.isnull(),yticklabels=False, cbar=True,cmap="Reds")
Out[28]:
<Axes: >
No description has been provided for this image

Calculate the median total expenditure. Use the calculated median values to fill out missing data in the total expenditure column. Confirm that the process is successful

In [29]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2938 entries, 0 to 2937
Data columns (total 21 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Year                             2938 non-null   int64  
 1   Status                           2938 non-null   object 
 2   Life expectancy                  2928 non-null   float64
 3   Adult Mortality                  2928 non-null   float64
 4   infant deaths                    2938 non-null   int64  
 5   Alcohol                          2744 non-null   float64
 6   percentage expenditure           2938 non-null   float64
 7   Hepatitis B                      2385 non-null   float64
 8   Measles                          2938 non-null   int64  
 9    BMI                             2904 non-null   float64
 10  under-five deaths                2938 non-null   int64  
 11  Polio                            2919 non-null   float64
 12  Total expenditure                2712 non-null   float64
 13  Diphtheria                       2919 non-null   float64
 14   HIV/AIDS                        2938 non-null   float64
 15  GDP                              2938 non-null   float64
 16  Population                       2286 non-null   float64
 17   thinness  1-19 years            2904 non-null   float64
 18   thinness 5-9 years              2904 non-null   float64
 19  Income composition of resources  2771 non-null   float64
 20  Schooling                        2775 non-null   float64
dtypes: float64(16), int64(4), object(1)
memory usage: 482.1+ KB
In [30]:
df['Total expenditure'].median()
Out[30]:
5.755
In [31]:
df['Total expenditure'].isnull().sum()
Out[31]:
226
In [32]:
#Replace Total expenditure by its median
df['Total expenditure'].fillna(df['Total expenditure'].median(), inplace=True)
In [33]:
df['Total expenditure'].isnull().sum()
Out[33]:
0

Perform One-Hot Encoding

In [34]:
#Convert categorical data to numerical data
In [35]:
df
Out[35]:
Year Status Life expectancy Adult Mortality infant deaths Alcohol percentage expenditure Hepatitis B Measles BMI ... Polio Total expenditure Diphtheria HIV/AIDS GDP Population thinness 1-19 years thinness 5-9 years Income composition of resources Schooling
0 2015 Developing 65.0 263.0 62 0.01 71.279624 65.0 1154 19.1 ... 6.0 8.16 65.0 0.1 584.259210 33736494.0 17.2 17.3 0.479 10.1
1 2014 Developing 59.9 271.0 64 0.01 73.523582 62.0 492 18.6 ... 58.0 8.18 62.0 0.1 612.696514 327582.0 17.5 17.5 0.476 10.0
2 2013 Developing 59.9 268.0 66 0.01 73.219243 64.0 430 18.1 ... 62.0 8.13 64.0 0.1 631.744976 31731688.0 17.7 17.7 0.470 9.9
3 2012 Developing 59.5 272.0 69 0.01 78.184215 67.0 2787 17.6 ... 67.0 8.52 67.0 0.1 669.959000 3696958.0 17.9 18.0 0.463 9.8
4 2011 Developing 59.2 275.0 71 0.01 7.097109 68.0 3013 17.2 ... 68.0 7.87 68.0 0.1 63.537231 2978599.0 18.2 18.2 0.454 9.5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2933 2004 Developing 44.3 723.0 27 4.36 0.000000 68.0 31 27.1 ... 67.0 7.13 65.0 33.6 454.366654 12777511.0 9.4 9.4 0.407 9.2
2934 2003 Developing 44.5 715.0 26 4.06 0.000000 7.0 998 26.7 ... 7.0 6.52 68.0 36.7 453.351155 12633897.0 9.8 9.9 0.418 9.5
2935 2002 Developing 44.8 73.0 25 4.43 0.000000 73.0 304 26.3 ... 73.0 6.53 71.0 39.8 57.348340 125525.0 1.2 1.3 0.427 10.0
2936 2001 Developing 45.3 686.0 25 1.72 0.000000 76.0 529 25.9 ... 76.0 6.16 75.0 42.1 548.587312 12366165.0 1.6 1.7 0.427 9.8
2937 2000 Developing 46.0 665.0 24 1.68 0.000000 79.0 1483 25.5 ... 78.0 7.10 78.0 43.5 547.358879 12222251.0 11.0 11.2 0.434 9.8

2938 rows × 21 columns

In [36]:
df['Status'].unique()
Out[36]:
array(['Developing', 'Developed'], dtype=object)
In [37]:
Status_Encoded = pd.get_dummies(df['Status']).astype(int)
In [38]:
Status_Encoded
Out[38]:
Developed Developing
0 0 1
1 0 1
2 0 1
3 0 1
4 0 1
... ... ...
2933 0 1
2934 0 1
2935 0 1
2936 0 1
2937 0 1

2938 rows × 2 columns

In [39]:
#Checking for Developed Countries
Status_Encoded.query('Developed == 1').head(5)
Out[39]:
Developed Developing
112 1 0
113 1 0
114 1 0
115 1 0
116 1 0

Perform Normalization

In [40]:
#We need to perform Feature Scaling
In [41]:
from sklearn.preprocessing import MinMaxScaler
In [42]:
#Reloading the df
df=pd.read_csv('/Users/mekki/Python_Projects_Datasets/Practical Data Wrangling with Pandas/00-Life_Expectancy_Data.csv')
In [43]:
df['Life expectancy '].values
Out[43]:
array([65. , 59.9, 59.9, ..., 44.8, 45.3, 46. ])
In [44]:
scaler=MinMaxScaler()
df['Life expectancy ']=scaler.fit_transform(df['Life expectancy '].values.reshape(-1,1))
In [45]:
df['Life expectancy '].values
Out[45]:
array([0.54459203, 0.44781784, 0.44781784, ..., 0.16129032, 0.17077799,
       0.18406072])
In [46]:
round(df['Life expectancy '].describe())
Out[46]:
count    2928.0
mean        1.0
std         0.0
min         0.0
25%         1.0
50%         1.0
75%         1.0
max         1.0
Name: Life expectancy , dtype: float64

Perform Standardization

In [47]:
#Mean of 0 and STD of 1
In [48]:
from sklearn.preprocessing import StandardScaler
In [49]:
scaler=StandardScaler()
In [50]:
#Reloading the df
df=pd.read_csv('/Users/mekki/Python_Projects_Datasets/Practical Data Wrangling with Pandas/00-Life_Expectancy_Data.csv')
In [51]:
df['Life expectancy '].values
Out[51]:
array([65. , 59.9, 59.9, ..., 44.8, 45.3, 46. ])
In [52]:
df['Life expectancy ']=scaler.fit_transform(df['Life expectancy '].values.reshape(-1,1))
In [53]:
df['Life expectancy '].values
Out[53]:
array([-0.4436909 , -0.97927911, -0.97927911, ..., -2.56504028,
       -2.51253163, -2.43901952])
In [54]:
round(df['Life expectancy '].describe())
Out[54]:
count    2928.0
mean       -0.0
std         1.0
min        -3.0
25%        -1.0
50%         0.0
75%         1.0
max         2.0
Name: Life expectancy , dtype: float64

Pandas With Functions

In [55]:
#Reloading the df
df=pd.read_csv('/Users/mekki/Python_Projects_Datasets/Practical Data Wrangling with Pandas/00-Life_Expectancy_Data.csv')
In [56]:
df
Out[56]:
Year Status Life expectancy Adult Mortality infant deaths Alcohol percentage expenditure Hepatitis B Measles BMI ... Polio Total expenditure Diphtheria HIV/AIDS GDP Population thinness 1-19 years thinness 5-9 years Income composition of resources Schooling
0 2015 Developing 65.0 263.0 62 0.01 71.279624 65.0 1154 19.1 ... 6.0 8.16 65.0 0.1 584.259210 33736494.0 17.2 17.3 0.479 10.1
1 2014 Developing 59.9 271.0 64 0.01 73.523582 62.0 492 18.6 ... 58.0 8.18 62.0 0.1 612.696514 327582.0 17.5 17.5 0.476 10.0
2 2013 Developing 59.9 268.0 66 0.01 73.219243 64.0 430 18.1 ... 62.0 8.13 64.0 0.1 631.744976 31731688.0 17.7 17.7 0.470 9.9
3 2012 Developing 59.5 272.0 69 0.01 78.184215 67.0 2787 17.6 ... 67.0 8.52 67.0 0.1 669.959000 3696958.0 17.9 18.0 0.463 9.8
4 2011 Developing 59.2 275.0 71 0.01 7.097109 68.0 3013 17.2 ... 68.0 7.87 68.0 0.1 63.537231 2978599.0 18.2 18.2 0.454 9.5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2933 2004 Developing 44.3 723.0 27 4.36 0.000000 68.0 31 27.1 ... 67.0 7.13 65.0 33.6 454.366654 12777511.0 9.4 9.4 0.407 9.2
2934 2003 Developing 44.5 715.0 26 4.06 0.000000 7.0 998 26.7 ... 7.0 6.52 68.0 36.7 453.351155 12633897.0 9.8 9.9 0.418 9.5
2935 2002 Developing 44.8 73.0 25 4.43 0.000000 73.0 304 26.3 ... 73.0 6.53 71.0 39.8 57.348340 125525.0 1.2 1.3 0.427 10.0
2936 2001 Developing 45.3 686.0 25 1.72 0.000000 76.0 529 25.9 ... 76.0 6.16 75.0 42.1 548.587312 12366165.0 1.6 1.7 0.427 9.8
2937 2000 Developing 46.0 665.0 24 1.68 0.000000 79.0 1483 25.5 ... 78.0 7.10 78.0 43.5 547.358879 12222251.0 11.0 11.2 0.434 9.8

2938 rows × 21 columns

In [57]:
#Define a function
def percentage_expenditure_update(balance):
    return balance + 5
In [58]:
df['percentage expenditure']=df['percentage expenditure'].apply(percentage_expenditure_update)
df
Out[58]:
Year Status Life expectancy Adult Mortality infant deaths Alcohol percentage expenditure Hepatitis B Measles BMI ... Polio Total expenditure Diphtheria HIV/AIDS GDP Population thinness 1-19 years thinness 5-9 years Income composition of resources Schooling
0 2015 Developing 65.0 263.0 62 0.01 76.279624 65.0 1154 19.1 ... 6.0 8.16 65.0 0.1 584.259210 33736494.0 17.2 17.3 0.479 10.1
1 2014 Developing 59.9 271.0 64 0.01 78.523582 62.0 492 18.6 ... 58.0 8.18 62.0 0.1 612.696514 327582.0 17.5 17.5 0.476 10.0
2 2013 Developing 59.9 268.0 66 0.01 78.219243 64.0 430 18.1 ... 62.0 8.13 64.0 0.1 631.744976 31731688.0 17.7 17.7 0.470 9.9
3 2012 Developing 59.5 272.0 69 0.01 83.184215 67.0 2787 17.6 ... 67.0 8.52 67.0 0.1 669.959000 3696958.0 17.9 18.0 0.463 9.8
4 2011 Developing 59.2 275.0 71 0.01 12.097109 68.0 3013 17.2 ... 68.0 7.87 68.0 0.1 63.537231 2978599.0 18.2 18.2 0.454 9.5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2933 2004 Developing 44.3 723.0 27 4.36 5.000000 68.0 31 27.1 ... 67.0 7.13 65.0 33.6 454.366654 12777511.0 9.4 9.4 0.407 9.2
2934 2003 Developing 44.5 715.0 26 4.06 5.000000 7.0 998 26.7 ... 7.0 6.52 68.0 36.7 453.351155 12633897.0 9.8 9.9 0.418 9.5
2935 2002 Developing 44.8 73.0 25 4.43 5.000000 73.0 304 26.3 ... 73.0 6.53 71.0 39.8 57.348340 125525.0 1.2 1.3 0.427 10.0
2936 2001 Developing 45.3 686.0 25 1.72 5.000000 76.0 529 25.9 ... 76.0 6.16 75.0 42.1 548.587312 12366165.0 1.6 1.7 0.427 9.8
2937 2000 Developing 46.0 665.0 24 1.68 5.000000 79.0 1483 25.5 ... 78.0 7.10 78.0 43.5 547.358879 12222251.0 11.0 11.2 0.434 9.8

2938 rows × 21 columns