Skip to content

Commit

Permalink
update to new parliamentary constituencies
Browse files Browse the repository at this point in the history
  • Loading branch information
drkane committed Jun 5, 2024
1 parent 4add5e4 commit 3168066
Show file tree
Hide file tree
Showing 10 changed files with 198 additions and 40 deletions.
10 changes: 4 additions & 6 deletions .github/workflows/pythonapp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Parse Python version
run: sed s/python-// runtime.txt | head > .python-version
- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version-file: ".python-version"
- name: Install dependencies
Expand All @@ -21,10 +21,8 @@ jobs:
pip install -r requirements.txt
- name: ruff
run: |
ruff .
- name: black
run: |
black . --check
ruff check .
ruff format . --check
- name: Test with pytest
run: |
pip install pytest
Expand Down
3 changes: 0 additions & 3 deletions .isort.cfg

This file was deleted.

3 changes: 2 additions & 1 deletion findthatpostcode/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from flask.cli import AppGroup

from . import boundaries, codes, placenames, postcodes, stats
from . import boundaries, codes, new_pcon, placenames, postcodes, stats


def init_app(app):
Expand All @@ -13,6 +13,7 @@ def init_app(app):
import_cli.add_command(stats.import_imd2019)
import_cli.add_command(stats.import_imd2015)
import_cli.add_command(placenames.import_placenames)
import_cli.add_command(new_pcon.import_new_pcon)

app.cli.add_command(import_cli)

Expand Down
22 changes: 22 additions & 0 deletions findthatpostcode/commands/boundaries.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Import commands for the register of geographic codes and code history database
"""

import csv
import glob
import io
Expand All @@ -16,6 +17,10 @@
from elasticsearch.helpers import scan
from flask import current_app
from flask.cli import with_appcontext
from pyproj import Transformer
from shapely import to_geojson
from shapely.geometry import shape
from shapely.ops import transform

from .. import db
from .codes import AREA_INDEX
Expand Down Expand Up @@ -67,6 +72,11 @@ def import_boundary(client, url, examine=False, code_field=None):
boundaries = json.load(f)
errors = []

# Check the CRS
transformer = None
if boundaries.get("crs", {}).get("properties", {}).get("name") == "EPSG:27700":
transformer = Transformer.from_crs("EPSG:27700", "EPSG:4326", always_xy=True)

# find the code field for a boundary
if len(boundaries.get("features", [])) == 0:
errors.append("[ERROR][%s] Features not found in file" % (url,))
Expand Down Expand Up @@ -132,6 +142,18 @@ def import_boundary(client, url, examine=False, code_field=None):
):
area_code = i["properties"][code_field]
prefix = area_code[0:3]
if transformer:
# create a shapely object from the geometry
geometry = shape(i["geometry"])
i["geometry"] = json.loads(
to_geojson(
transform(
transformer.transform,
geometry,
)
)
)

client.upload_fileobj(
io.BytesIO(json.dumps(i).encode("utf-8")),
current_app.config["S3_BUCKET"],
Expand Down
151 changes: 151 additions & 0 deletions findthatpostcode/commands/new_pcon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import codecs
import csv
import io
import zipfile
from collections import defaultdict

import click
import requests
import requests_cache
import tqdm
from elasticsearch.helpers import bulk
from flask import current_app
from flask.cli import with_appcontext

from findthatpostcode.commands.codes import AREA_INDEX
from findthatpostcode.commands.postcodes import PC_INDEX

from .. import db

PCON_NAMES_AND_CODES_URL = "https://opendata.arcgis.com/api/v3/datasets/9a876e4777bc47e392e670a7b8bc3f5c_0/downloads/data?format=csv&spatialRefId=4326&where=1%3D1"
PCON_2010_LOOKUP_URL = "https://opendata.arcgis.com/api/v3/datasets/c776b66c0e534b849cae5a5121b7a16a_0/downloads/data?format=csv&spatialRefId=4326&where=1%3D1"
PCON_POSTCODE_URL = "https://www.arcgis.com/sharing/rest/content/items/f60c78533aa7462cb934bb4a81afc1e0/data"
PCON_BOUNDARIES_URL = "https://stg-arcgisazurecdataprod1.az.arcgis.com/exportfiles-1559-23529/Westminster_Parliamentary_Constituencies_July_2024_Boundaries_UK_BSC_7275719608364942765.geojson"


@click.command("new_pcon")
@click.option("--area-index", default=AREA_INDEX)
@click.option("--postcode-index", default=PC_INDEX)
@with_appcontext
def import_new_pcon(area_index=AREA_INDEX, postcode_index=PC_INDEX):
if current_app.config["DEBUG"]:
requests_cache.install_cache()

es = db.get_db()

# get the names and codes for the new areas
r = requests.get(PCON_NAMES_AND_CODES_URL, stream=True)
reader = csv.DictReader(codecs.iterdecode(r.iter_lines(), "utf-8-sig"))
areas = {}
for row in reader:
names = [row["PCON24NM"]]
if row["PCON24NMW"]:
names.append(row["PCON24NMW"])
areas[row["PCON24CD"]] = {
"code": row["PCON24CD"],
"name": row["PCON24NM"],
"name_welsh": row["PCON24NMW"] if row["PCON24NMW"] else None,
"statutory_instrument_id": "1230/2023",
"statutory_instrument_title": "The Parliamentary Constituencies Order 2023",
"date_start": "2024-07-05T00:00:00",
"date_end": None,
"parent": None,
"entity": row["PCON24CD"][0:3],
"owner": "LGBC",
"active": True,
"areaehect": None,
"areachect": None,
"areaihect": None,
"arealhect": None,
"sort_order": row["PCON24CD"],
"predecessor": [],
"successor": [],
"equivalents": {},
"type": "pcon",
"alternative_names": names,
}

# get the lookup for the 2010 areas
r = requests.get(PCON_2010_LOOKUP_URL, stream=True)
reader = csv.DictReader(codecs.iterdecode(r.iter_lines(), "utf-8-sig"))
update_2010 = defaultdict(list)
for row in reader:
areas[row["PCON24CD"]]["predecessor"].append(row["PCON10CD"])
update_2010[row["PCON10CD"]].append(row["PCON24CD"])

# create new areas and update old areas
to_update = [
{
"_index": area_index,
"_type": "_doc",
"_op_type": "update",
"_id": area_id,
"doc_as_upsert": True,
"doc": area,
}
for area_id, area in areas.items()
] + [
{
"_index": area_index,
"_type": "_doc",
"_op_type": "update",
"_id": area_id,
"doc_as_upsert": True,
"doc": {
"active": False,
"successor": successors,
"date_end": "2024-07-05T00:00:00",
},
}
for area_id, successors in update_2010.items()
]
print(
"[new parliamentary constituencies] Processed %s new parliamentary constituencies"
% len(to_update)
)
print(
"[elasticsearch] %s parliamentary constituencies to create or update"
% len(to_update)
)
results = bulk(es, to_update)
print(
"[elasticsearch] saved %s new parliamentary constituencies to %s index"
% (results[0], area_index)
)
print("[elasticsearch] %s errors reported" % len(results[1]))

# fetch postcode data
r = requests.get(PCON_POSTCODE_URL)
z = zipfile.ZipFile(io.BytesIO(r.content))

for f in z.namelist():
if not f.endswith(".csv"):
continue
with z.open(f, "r") as infile:
reader = csv.DictReader(io.TextIOWrapper(infile, encoding="Windows-1252"))
postcode_updates = []
for row in tqdm.tqdm(reader):
postcode = row["pcd"]
# convert to "pcds" format
postcode = "%s %s" % (postcode[:-3].strip(), postcode[-3:])
record = {
"_index": postcode_index,
"_type": "_doc",
"_op_type": "update",
"_id": postcode,
"doc": {
"pcon": row["pconcd"],
},
}
postcode_updates.append(record)
print(
"[new parliamentary constituencies] Processed %s postcodes to update"
% len(postcode_updates)
)
print("[elasticsearch] %s postcodes to update" % len(postcode_updates))
results = bulk(es, postcode_updates, raise_on_error=False)
print(
"[elasticsearch] updated %s postcodes in %s index"
% (results[0], postcode_index)
)
print("[elasticsearch] %s errors reported" % len(results[1]))
1 change: 1 addition & 0 deletions findthatpostcode/commands/placenames.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Import commands for placenames
"""

import csv
import io
import zipfile
Expand Down
5 changes: 3 additions & 2 deletions findthatpostcode/commands/postcodes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Import commands for the register of geographic codes and code history database
"""

import csv
import datetime
import hashlib
Expand All @@ -19,8 +20,8 @@
PC_INDEX = "geo_postcode"

NSPL_URL = {
2011: "https://www.arcgis.com/sharing/rest/content/items/782899bd01934a8099ae8516cc021f68/data",
2021: "https://www.arcgis.com/sharing/rest/content/items/b86748732a054592bcf0218e86a43870/data",
2011: "https://www.arcgis.com/sharing/rest/content/items/521edce4159a451a932539b7fc786322/data",
2021: "https://www.arcgis.com/sharing/rest/content/items/f7464f3658ba439ba577651b32014cfe/data",
}
DEFAULT_YEAR = 2021

Expand Down
3 changes: 0 additions & 3 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,6 @@ flask import boundaries "https://opendata.arcgis.com/datasets/094f326b0b1247e3bc

You can add more than one URL to each import script.

These imports will also take a while, and add significantly to the size of the
elasticsearch index. It may increase in size to over 5GB.

### 7. Import placenames (optional)

A further related dataset is placenames. The [ONS has a list of these](http://geoportal.statistics.gov.uk/datasets/a6c138d17ac54532b0ca8ee693922f10_0)
Expand Down
5 changes: 3 additions & 2 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ requests-cache
tqdm
python-dotenv
dictlib
black
ruff
ElasticMock
pytest
Expand All @@ -16,4 +15,6 @@ blinker
sqlite-utils
ua-parser
boto3
sentry-sdk[flask]
sentry-sdk[flask]
pyproj
shapely
Loading

0 comments on commit 3168066

Please sign in to comment.