Skip to content

Commit

Permalink
Show most frequently occururing values of city field
Browse files Browse the repository at this point in the history
  • Loading branch information
wblondel committed Feb 20, 2021
1 parent c0d84fb commit a4c6870
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion insee-deaths-manager/cli_check.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import click
from pathlib import Path
import csv
import pprint


@click.group(name='check')
Expand Down Expand Up @@ -109,6 +110,8 @@ def check_fields(csv_dir, full_name, gender, date, postcode, city):
else:
csv_files = [csv_dir]

counts = {}

for csv_file in csv_files:
click.echo(f"Opening {csv_file}")
with csv_file.open('r', encoding='utf-8', errors='strict') as csvfile:
Expand Down Expand Up @@ -143,12 +146,19 @@ def check_fields(csv_dir, full_name, gender, date, postcode, city):
"Le code lieu indique déjà que la commune est inconnue.")

if tests.city_not_known(row):
errors.append("Valeur du champ commune inutile.")
errors.append("Valeur du champ commune inutile ou incorrecte.")

if not tests.city_correctly_formated_when_arrondissement(row):
errors.append("Format requis: VILLE (X)X")
if row[4] in counts:
counts[row[4]] += 1
else:
counts[row[4]] = 1

if errors:
print(f"{deathreader.line_num} {row} {errors}")

click.echo(f"Closing {csv_file}")

if city:
pprint.pprint({k: v for k, v in sorted(counts.items(), key=lambda item: item[1])}, sort_dicts=False)

0 comments on commit a4c6870

Please sign in to comment.