yaph · connortodd21 · Aug 24, 2025 · Aug 24, 2025
diff --git a/Makefile b/Makefile
@@ -5,7 +5,10 @@ help:
 	@echo "clean-build - remove build artifacts"
 	@echo "clean-py - remove Python file artifacts"
 	@echo "clean-test - remove test and coverage artifacts"
+	@echo "clean-data - remove all downloaded files in data/"
 
+make_data:
+	mkdir data
 
 data/cities500.txt:
 	curl -o data/cities500.zip http://download.geonames.org/export/dump/cities500.zip
@@ -31,9 +34,9 @@ data/countryInfo.txt:
 	curl -o data/countryInfo.txt http://download.geonames.org/export/dump/countryInfo.txt
 
 data/us_counties.txt:
-	curl -o data/us_counties.txt https://www2.census.gov/geo/docs/reference/codes/files/national_county.txt
+	curl -o data/us_counties.txt https://www2.census.gov/geo/docs/reference/codes2020/national_county2020.txt
 
-dl: data/cities500.txt data/cities1000.txt data/cities5000.txt data/cities15000.txt data/countryInfo.txt data/us_counties.txt
+dl: make_data data/cities500.txt data/cities1000.txt data/cities5000.txt data/cities15000.txt data/countryInfo.txt data/us_counties.txt
 
 json:
 	'./bin/continents.py'
@@ -42,7 +45,10 @@ json:
 	'./bin/us_counties.py'
 	mv data/*.json geonamescache/data/
 
-clean: clean-build clean-py clean-test
+clean: clean-build clean-py clean-test clean-data
+
+clean-data:
+	rm -fr data/
 
 clean-build:
 	rm -fr build/

diff --git a/bin/us_counties.py b/bin/us_counties.py
@@ -6,7 +6,23 @@
 p_data = Path('data')
 
 reader = csv.reader(p_data.joinpath('us_counties.txt').open())
+next(reader) # skip header row
 
-counties = [{'fips': line[1] + line[2], 'name': line[3], 'state': line[0]} for line in reader]
+state_name_idx = 0
+state_fips_idx = 1
+county_fips_idx = 2
+county_name_idx = 4
 
-p_data.joinpath('us_counties.json').write_text(json.dumps(counties))
+counties = []
+for line in reader: 
+    current_line = line[0].split("|")
+    counties.append(
+        {
+            'fips': current_line[state_fips_idx] + current_line[county_fips_idx], 
+            'name': current_line[county_name_idx], 
+            'state': current_line[state_name_idx]
+        }
+    )
+
+# need ensure_ascii=False to handle special characters (for PR counties)
+p_data.joinpath('us_counties.json').write_text(json.dumps(counties, ensure_ascii=False))