Covid-19 Data Visualization with Python

Task 1. Discovering the DataSet

InĀ [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
InĀ [2]:
dataset_url='https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv'
df=pd.read_csv(dataset_url)
df
Out[2]:
Date Country Confirmed Recovered Deaths
0 2020-01-22 Afghanistan 0 0 0
1 2020-01-23 Afghanistan 0 0 0
2 2020-01-24 Afghanistan 0 0 0
3 2020-01-25 Afghanistan 0 0 0
4 2020-01-26 Afghanistan 0 0 0
... ... ... ... ... ...
161563 2022-04-12 Zimbabwe 247094 0 5460
161564 2022-04-13 Zimbabwe 247160 0 5460
161565 2022-04-14 Zimbabwe 247208 0 5462
161566 2022-04-15 Zimbabwe 247237 0 5462
161567 2022-04-16 Zimbabwe 247237 0 5462

161568 rows Ɨ 5 columns

Data for Confirmed Cases

InĀ [3]:
df=df[df.Confirmed>0]
df
Out[3]:
Date Country Confirmed Recovered Deaths
33 2020-02-24 Afghanistan 5 0 0
34 2020-02-25 Afghanistan 5 0 0
35 2020-02-26 Afghanistan 5 0 0
36 2020-02-27 Afghanistan 5 0 0
37 2020-02-28 Afghanistan 5 0 0
... ... ... ... ... ...
161563 2022-04-12 Zimbabwe 247094 0 5460
161564 2022-04-13 Zimbabwe 247160 0 5460
161565 2022-04-14 Zimbabwe 247208 0 5462
161566 2022-04-15 Zimbabwe 247237 0 5462
161567 2022-04-16 Zimbabwe 247237 0 5462

148455 rows Ɨ 5 columns

Data for Morocco

InĀ [4]:
df[df.Country=='Morocco']
Out[4]:
Date Country Confirmed Recovered Deaths
99592 2020-03-02 Morocco 1 0 0
99593 2020-03-03 Morocco 1 0 0
99594 2020-03-04 Morocco 1 0 0
99595 2020-03-05 Morocco 2 0 0
99596 2020-03-06 Morocco 2 0 0
... ... ... ... ... ...
100363 2022-04-12 Morocco 1164135 0 16061
100364 2022-04-13 Morocco 1164189 0 16061
100365 2022-04-14 Morocco 1164259 0 16061
100366 2022-04-15 Morocco 1164296 0 16062
100367 2022-04-16 Morocco 1164345 0 16062

776 rows Ɨ 5 columns

Task 2. Visualizing Global Spread of Covid-19 from first day of the pandemic

Visualizing Global Spread of Covid-19

InĀ [5]:
df = df.sort_values('Date', ascending=True)
InĀ [6]:
fig=px.choropleth(df, locations = 'Country', locationmode = 'country names', color = 'Confirmed'
                  , animation_frame = 'Date', height=600, color_continuous_scale='reds')
fig.update_layout(title_text = 'Global Spread of Covid-19')
fig.show()

Visualizing Global Deaths due to Covid-19

InĀ [7]:
fig=px.choropleth(df, locations = 'Country', locationmode = 'country names', color = 'Deaths'
                  , animation_frame = 'Date', height=600, color_continuous_scale='dense')
fig.update_layout(title_text = 'Global Deaths of Covid-19')
fig.show()

Task 3. Visualizing the Intensity of Covid-19 Transmission

Data for Morocco

InĀ [8]:
df_morocco=df[df.Country == 'Morocco']
df_morocco
Out[8]:
Date Country Confirmed Recovered Deaths
99592 2020-03-02 Morocco 1 0 0
99593 2020-03-03 Morocco 1 0 0
99594 2020-03-04 Morocco 1 0 0
99595 2020-03-05 Morocco 2 0 0
99596 2020-03-06 Morocco 2 0 0
... ... ... ... ... ...
100363 2022-04-12 Morocco 1164135 0 16061
100364 2022-04-13 Morocco 1164189 0 16061
100365 2022-04-14 Morocco 1164259 0 16061
100366 2022-04-15 Morocco 1164296 0 16062
100367 2022-04-16 Morocco 1164345 0 16062

776 rows Ɨ 5 columns

Filtering out the columns needed

InĀ [9]:
df_morocco=df_morocco[['Date','Confirmed']]
df_morocco
Out[9]:
Date Confirmed
99592 2020-03-02 1
99593 2020-03-03 1
99594 2020-03-04 1
99595 2020-03-05 2
99596 2020-03-06 2
... ... ...
100363 2022-04-12 1164135
100364 2022-04-13 1164189
100365 2022-04-14 1164259
100366 2022-04-15 1164296
100367 2022-04-16 1164345

776 rows Ɨ 2 columns

Calculating the Infection Rate

InĀ [10]:
df_morocco.loc[:, 'Infection Rate'] = df_morocco['Confirmed'].diff()
df_morocco
Out[10]:
Date Confirmed Infection Rate
99592 2020-03-02 1 NaN
99593 2020-03-03 1 0.0
99594 2020-03-04 1 0.0
99595 2020-03-05 2 1.0
99596 2020-03-06 2 0.0
... ... ... ...
100363 2022-04-12 1164135 83.0
100364 2022-04-13 1164189 54.0
100365 2022-04-14 1164259 70.0
100366 2022-04-15 1164296 37.0
100367 2022-04-16 1164345 49.0

776 rows Ɨ 3 columns

Visualizing the Confirmed Cases and Infection Rate

InĀ [11]:
fig=px.line(df_morocco, x='Date', y=['Confirmed','Infection Rate'], height = 520, color_discrete_sequence=['#c1272d', '#006233'])
fig.update_layout(
    plot_bgcolor='#ffffff'
)

Find Max Infection Rate for all Countries

InĀ [12]:
countries = list(df['Country'].unique())
max_infection_rate=[]
for c in countries :
    MIR = df[df.Country==c].Confirmed.diff().max()
    max_infection_rate.append(MIR)
max_infection_rate
Out[12]:
[104345.0,
 621317.0,
 52284.0,
 1383795.0,
 1348.0,
 77402.0,
 39252.0,
 63808.0,
 454212.0,
 503349.0,
 10052.0,
 33406.0,
 175271.0,
 527487.0,
 1130.0,
 11366.0,
 4471.0,
 28891.0,
 414188.0,
 38867.0,
 202211.0,
 848169.0,
 228123.0,
 138985.0,
 372766.0,
 133480.0,
 99.0,
 5516.0,
 50228.0,
 243295.0,
 10760.0,
 41651.0,
 8173.0,
 6913.0,
 13515.0,
 6146.0,
 3243.0,
 89462.0,
 2521.0,
 12073.0,
 11812.0,
 58583.0,
 26320.0,
 50182.0,
 40018.0,
 26109.0,
 287149.0,
 2332.0,
 380498.0,
 8438.0,
 55709.0,
 6158.0,
 39814.0,
 8921.0,
 7408.0,
 109895.0,
 4206.0,
 520.0,
 12968.0,
 5497.0,
 43199.0,
 491.0,
 7439.0,
 7779.0,
 57378.0,
 4388.0,
 17670.0,
 1722.0,
 12039.0,
 2313.0,
 5928.0,
 75276.0,
 64718.0,
 11992.0,
 139853.0,
 25502.0,
 45022.0,
 349.0,
 45047.0,
 19923.0,
 57659.0,
 5254.0,
 37875.0,
 23332.0,
 30356.0,
 2291.0,
 35575.0,
 1002.0,
 18188.0,
 28504.0,
 9668.0,
 7.0,
 36737.0,
 99645.0,
 1677.0,
 16230.0,
 25833.0,
 12399.0,
 2838.0,
 6199.0,
 7380.0,
 4789.0,
 1005.0,
 6494.0,
 24556.0,
 19722.0,
 4481.0,
 823225.0,
 23611.0,
 1968.0,
 2858.0,
 12890.0,
 1186.0,
 9907.0,
 13612.0,
 468.0,
 5185.0,
 534.0,
 3749.0,
 66121.0,
 1642.0,
 722.0,
 1211.0,
 4418.0,
 3072.0,
 1284.0,
 5826.0,
 2723.0,
 3268.0,
 4397.0,
 1259.0,
 1871.0,
 2521.0,
 1404.0,
 1188.0,
 4044.0,
 2068.0,
 1750.0,
 1478.0,
 24307.0,
 1066.0,
 1497.0,
 2566.0,
 587.0,
 2960.0,
 1329.0,
 447.0,
 30006.0,
 11505.0,
 5555.0,
 415.0,
 1854.0,
 718.0,
 596.0,
 12677.0,
 301.0,
 2295.0,
 1543.0,
 737.0,
 9185.0,
 1469.0,
 5035.0,
 282.0,
 21324.0,
 905.0,
 392.0,
 532.0,
 4947.0,
 902.0,
 1517.0,
 5694.0,
 3915.0,
 238.0,
 1217.0,
 191.0,
 7083.0,
 7.0,
 41576.0,
 192.0,
 4710.0,
 1316.0,
 503.0,
 319.0,
 287.0,
 275.0,
 407.0,
 6925.0,
 681.0,
 3.0,
 566.0,
 701.0,
 0.0,
 350.0,
 112.0,
 371.0,
 832.0,
 0.0,
 55.0]

Create a new DataFrame with only the columns needed

InĀ [13]:
df_MIR = pd.DataFrame()
df_MIR['Country'] = countries
df_MIR['Max Infection Rate'] = max_infection_rate
df_MIR
Out[13]:
Country Max Infection Rate
0 Japan 104345.0
1 Korea, South 621317.0
2 Thailand 52284.0
3 US 1383795.0
4 Taiwan* 1348.0
... ... ...
193 Summer Olympics 2020 112.0
194 Palau 371.0
195 Tonga 832.0
196 Antarctica 0.0
197 Winter Olympics 2022 55.0

198 rows Ɨ 2 columns

Plot the Global Max Infection Rate

InĀ [14]:
fig=px.bar(df_MIR, x='Country', y='Max Infection Rate', color = 'Country', title = 'Global Max Infection Rate', height = 600)
fig.update_layout(
    plot_bgcolor='#ffffff'
)

Log Scaling to get a better view of the Data

InĀ [15]:
fig=px.bar(df_MIR, x='Country', y='Max Infection Rate', color = 'Country', title = 'Global Max Infection Rate', height = 600,
      log_y=True)
fig.update_layout(
    plot_bgcolor='#ffffff'
)

Task 4. Covid-19 before and after the 1st lockdown

InĀ [16]:
mlsd = '2020-03-20' #morocco lockdown start date
mlml = '2020-04-20' #morocco lockdown 1 month later
InĀ [17]:
df_morocco=df[df.Country == 'Morocco']
df_morocco
Out[17]:
Date Country Confirmed Recovered Deaths
99592 2020-03-02 Morocco 1 0 0
99593 2020-03-03 Morocco 1 0 0
99594 2020-03-04 Morocco 1 0 0
99595 2020-03-05 Morocco 2 0 0
99596 2020-03-06 Morocco 2 0 0
... ... ... ... ... ...
100363 2022-04-12 Morocco 1164135 0 16061
100364 2022-04-13 Morocco 1164189 0 16061
100365 2022-04-14 Morocco 1164259 0 16061
100366 2022-04-15 Morocco 1164296 0 16062
100367 2022-04-16 Morocco 1164345 0 16062

776 rows Ɨ 5 columns

Calculating the Infection Rate

InĀ [18]:
df_morocco['Infection Rate'] = df_morocco.Confirmed.diff()
df_morocco
Out[18]:
Date Country Confirmed Recovered Deaths Infection Rate
99592 2020-03-02 Morocco 1 0 0 NaN
99593 2020-03-03 Morocco 1 0 0 0.0
99594 2020-03-04 Morocco 1 0 0 0.0
99595 2020-03-05 Morocco 2 0 0 1.0
99596 2020-03-06 Morocco 2 0 0 0.0
... ... ... ... ... ... ...
100363 2022-04-12 Morocco 1164135 0 16061 83.0
100364 2022-04-13 Morocco 1164189 0 16061 54.0
100365 2022-04-14 Morocco 1164259 0 16061 70.0
100366 2022-04-15 Morocco 1164296 0 16062 37.0
100367 2022-04-16 Morocco 1164345 0 16062 49.0

776 rows Ɨ 6 columns

Plot the evolution of the Infection Rate in Morocco

InĀ [19]:
fig = px.line(df_morocco, x='Date', y='Infection Rate', title='Evolution of the Infection Rate - Morocco', height=600, color_discrete_sequence=['#c1272d', '#006233'])
fig.update_layout(
    plot_bgcolor='#ffffff'
)
InĀ [20]:
fig = px.line(df_morocco, x='Date', y='Infection Rate', title='Starting day Lockdown - Morocco', height=600, color_discrete_sequence=['#c1272d', '#006233'])
fig.update_layout(
    plot_bgcolor='#ffffff'
)
fig.add_shape(
    dict(
        type='line',
        x0=mlsd,
        y0=0,
        x1=mlsd,
        y1=df_morocco['Infection Rate'].max(),
        line = dict(color='#333333', width=2)
    ))
fig.add_annotation(
    dict(
        x=mlsd,
        y=df_morocco['Infection Rate'].max(),
        text= 'Starting Date of the Lockdown'
    )
)
InĀ [21]:
fig = px.line(df_morocco, x='Date', y='Infection Rate', title='Before and 1 Month After Lockdown - Morocco', height=600, color_discrete_sequence=['#c1272d'])
fig.update_layout(
    plot_bgcolor='#ffffff'
)
fig.add_shape(
    dict(
        type='line',
        x0=mlsd,
        y0=0,
        x1=mlsd,
        y1=df_morocco['Infection Rate'].max(),
        line = dict(color='#333333', width=2)
    ))
fig.add_shape(
    dict(
        type='line',
        x0=mlml,
        y0=0,
        x1=mlml,
        y1=df_morocco['Infection Rate'].max(),
        line = dict(color='#333333', width=2)
    )
)
fig.add_annotation(
    dict(
        x=mlsd,
        y=df_morocco['Infection Rate'].max(),
        text= 'Starting Date of the Lockdown'
    )
)

fig.add_annotation(
    dict(
        x=mlml,
        y=df_morocco['Infection Rate'].mean(),
        text= '1 month after Starting Date of the Lockdown'
    )
)

Task 5. Death Rate Before and After Lockdown in Morocco

Calculating the Death Rate

InĀ [22]:
df_morocco['Death Rate'] = df_morocco.Deaths.diff()
df_morocco
Out[22]:
Date Country Confirmed Recovered Deaths Infection Rate Death Rate
99592 2020-03-02 Morocco 1 0 0 NaN NaN
99593 2020-03-03 Morocco 1 0 0 0.0 0.0
99594 2020-03-04 Morocco 1 0 0 0.0 0.0
99595 2020-03-05 Morocco 2 0 0 1.0 0.0
99596 2020-03-06 Morocco 2 0 0 0.0 0.0
... ... ... ... ... ... ... ...
100363 2022-04-12 Morocco 1164135 0 16061 83.0 0.0
100364 2022-04-13 Morocco 1164189 0 16061 54.0 0.0
100365 2022-04-14 Morocco 1164259 0 16061 70.0 0.0
100366 2022-04-15 Morocco 1164296 0 16062 37.0 1.0
100367 2022-04-16 Morocco 1164345 0 16062 49.0 0.0

776 rows Ɨ 7 columns

Plot the evolution of the Death Rate in Morocco

InĀ [23]:
fig = px.line(df_morocco, x='Date', y=['Infection Rate', 'Death Rate'], title='Infection Rate vs. Death Rate - Morocco', height=520, color_discrete_sequence=['#c1272d', '#006233'])
fig.update_layout(
    plot_bgcolor='#ffffff'
)
fig.show()

Log Scaling to get a better understanding of the Data

InĀ [24]:
df_morocco['Infection Rate']/df_morocco['Infection Rate'].max()
Out[24]:
99592          NaN
99593     0.000000
99594     0.000000
99595     0.000083
99596     0.000000
            ...   
100363    0.006894
100364    0.004485
100365    0.005814
100366    0.003073
100367    0.004070
Name: Infection Rate, Length: 776, dtype: float64
InĀ [25]:
df_morocco['Infection Rate'] = df_morocco['Infection Rate']/df_morocco['Infection Rate'].max()
InĀ [26]:
df_morocco['Death Rate'] = df_morocco['Death Rate']/df_morocco['Death Rate'].max()
InĀ [27]:
fig = px.line(df_morocco, x='Date', y=['Infection Rate', 'Death Rate'], height=520, color_discrete_sequence=['#c1272d', '#006233'])
fig.update_layout(
    plot_bgcolor='#ffffff'
)
fig.show()

Plotting the Infection Rate vs. Death Rate before and after the 1st lockdown

InĀ [28]:
fig = px.line(df_morocco, x='Date', y=['Infection Rate', 'Death Rate'], height=520, color_discrete_sequence=['#c1272d', '#006233'])
fig.update_layout(
    plot_bgcolor='#ffffff',  # Background color of the plot
   #paper_bgcolor='#333333',  # Background color of the entire plot area
   #xaxis=dict(gridcolor='333333'),  # Color of the x-axis gridlines
   #yaxis=dict(gridcolor='#333333')  # Color of the y-axis gridlines
)
fig.add_shape(
    dict(
        type='line',
        x0=mlsd,
        y0=0,
        x1=mlsd,
        y1=df_morocco['Infection Rate'].max(),
        line = dict(color='#333333', width=2)
    ))
fig.add_shape(
    dict(
        type='line',
        x0=mlml,
        y0=0,
        x1=mlml,
        y1=df_morocco['Infection Rate'].max(),
        line = dict(color='#333333', width=2)
    )
)
fig.add_annotation(
    dict(
        x=mlsd,
        y=df_morocco['Infection Rate'].max(),
        text= 'Starting Date of the Lockdown'
    )
)

fig.add_annotation(
    dict(
        x=mlml,
        y=df_morocco['Infection Rate'].min(),
        text= '1 month later'
    )
)