1
+ import pandas as pd
2
+
3
+ from utils import rename_countries , BOATS
4
+
5
+ def read_data (path ):
6
+
7
+ df = pd .read_csv (f'{ path } /countries.csv' )
8
+
9
+ return df
10
+
11
+ def get_continents (df ):
12
+ """
13
+ Get list of countries with 3 letter codes and continents
14
+ """
15
+
16
+ to_drop = ['Continent_Code' ,
17
+ 'Two_Letter_Country_Code' ,
18
+ 'Country_Number' ]
19
+
20
+ df = df .drop (to_drop , axis = 1 )
21
+
22
+ df ['Country' ] = df ['Country_Name' ].apply (lambda x : x .split (", " )[0 ])
23
+ df = df .rename (columns = {"Continent_Name" : "Continent" , 'Three_Letter_Country_Code' : 'Country Code' })
24
+ df = df .drop (['Country_Name' ], axis = 1 )
25
+ df = df .drop_duplicates (subset = ['Country' ])
26
+
27
+ return df
28
+
29
+ def switch_country_names (df ):
30
+ """
31
+ Change values in countries.csv to match covid data.
32
+ """
33
+
34
+ to_swap = [('Russian Federation' , 'Russia' ),
35
+ ('Slovakia (Slovak Republic)' , 'Slovakia' ),
36
+ ('Kyrgyz Republic' , 'Kyrgyzstan' ),
37
+ ('Syrian Arab Republic' , 'Syria' ),
38
+ ('Libyan Arab Jamahiriya' , 'Libya' ),
39
+ ('Korea, South' , 'Korea' ),
40
+ ('Brunei Darussalam' , 'Brunei' ),
41
+ ('Cabo Verde' , 'Cape Verde' ),
42
+ ('Holy See (Vatican City State)' , 'Holy See' ),
43
+ ('United States of America' , 'US' ),
44
+ ('United Kingdom of Great Britain & Northern Ireland' , 'United Kingdom' ),
45
+ ("Lao People's Democratic Republic" , 'Laos' ),
46
+ ('Myanmar' , 'Burma' ),
47
+ ('Czech Republic' , 'Czechia' ),
48
+ ('Swaziland' , 'Eswatini' )]
49
+
50
+ for x in to_swap :
51
+ df .loc [df ['Country' ] == x [0 ], 'Country' ] = x [1 ]
52
+
53
+ return df
54
+
55
+ def make_continents (in_path , out_path ):
56
+
57
+ df = read_data (in_path )
58
+ df = get_continents (df = df )
59
+ df = switch_country_names (df = df )
60
+
61
+ df = df [df ['Continent' ] != 'Antarctica' ]
62
+
63
+ df .to_csv (f'{ out_path } /continents.csv' , index = False )
64
+
65
+ if __name__ == '__main__' :
66
+
67
+ in_path = './data/raw/datahub'
68
+ out_path = './data/processed'
69
+
70
+ make_continents (in_path = in_path ,
71
+ out_path = out_path )
0 commit comments