Skip to content

Commit 885c489

Browse files
committed
Create make_continents.py
1 parent 1ce82e6 commit 885c489

File tree

1 file changed

+71
-0
lines changed

1 file changed

+71
-0
lines changed

COVID19/features/make_continents.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import pandas as pd
2+
3+
from utils import rename_countries, BOATS
4+
5+
def read_data(path):
6+
7+
df = pd.read_csv(f'{path}/countries.csv')
8+
9+
return df
10+
11+
def get_continents(df):
12+
"""
13+
Get list of countries with 3 letter codes and continents
14+
"""
15+
16+
to_drop = ['Continent_Code',
17+
'Two_Letter_Country_Code',
18+
'Country_Number']
19+
20+
df = df.drop(to_drop, axis=1)
21+
22+
df['Country'] = df['Country_Name'].apply(lambda x: x.split(", ")[0])
23+
df = df.rename(columns={"Continent_Name": "Continent", 'Three_Letter_Country_Code': 'Country Code'})
24+
df = df.drop(['Country_Name'], axis=1)
25+
df = df.drop_duplicates(subset=['Country'])
26+
27+
return df
28+
29+
def switch_country_names(df):
30+
"""
31+
Change values in countries.csv to match covid data.
32+
"""
33+
34+
to_swap = [('Russian Federation', 'Russia'),
35+
('Slovakia (Slovak Republic)', 'Slovakia'),
36+
('Kyrgyz Republic', 'Kyrgyzstan'),
37+
('Syrian Arab Republic', 'Syria'),
38+
('Libyan Arab Jamahiriya', 'Libya'),
39+
('Korea, South', 'Korea'),
40+
('Brunei Darussalam', 'Brunei'),
41+
('Cabo Verde', 'Cape Verde'),
42+
('Holy See (Vatican City State)', 'Holy See'),
43+
('United States of America', 'US'),
44+
('United Kingdom of Great Britain & Northern Ireland', 'United Kingdom'),
45+
("Lao People's Democratic Republic", 'Laos'),
46+
('Myanmar', 'Burma'),
47+
('Czech Republic', 'Czechia'),
48+
('Swaziland', 'Eswatini')]
49+
50+
for x in to_swap:
51+
df.loc[df['Country'] == x[0], 'Country'] = x[1]
52+
53+
return df
54+
55+
def make_continents(in_path, out_path):
56+
57+
df = read_data(in_path)
58+
df = get_continents(df=df)
59+
df = switch_country_names(df=df)
60+
61+
df = df[df['Continent'] != 'Antarctica']
62+
63+
df.to_csv(f'{out_path}/continents.csv', index=False)
64+
65+
if __name__ == '__main__':
66+
67+
in_path = './data/raw/datahub'
68+
out_path = './data/processed'
69+
70+
make_continents(in_path=in_path,
71+
out_path=out_path)

0 commit comments

Comments
 (0)