Skip to content

Commit

Permalink
Update to census 2022
Browse files Browse the repository at this point in the history
The results of the DE census 2022 were published in June 2024.

Update the processing script. Since a lot more buildings are mapped in
OSM in the meantime, require buildings now, not just residential land
use.
  • Loading branch information
hfs committed Jul 13, 2024
1 parent fa27f15 commit 0a4f937
Show file tree
Hide file tree
Showing 21 changed files with 186 additions and 141 deletions.
12 changes: 7 additions & 5 deletions 01_download.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
#!/bin/bash -e
set -o pipefail

source env.sh

cd data
echo ">>> Downloading Census 2011 data"
wget 'https://www.zensus2011.de/SharedDocs/Downloads/DE/Pressemitteilung/DemografischeGrunddaten/csv_Bevoelkerung_100m_Gitter.zip?__blob=publicationFile&v=3' \
echo ">>> Downloading Census 2022 data"
wget 'https://www.zensus2022.de/static/Zensus_Veroeffentlichung/Zensus2022_Bevoelkerungszahl.zip' \
--timestamping
ln -sf csv_Bevoelkerung_100m_Gitter.zip\?* csv_Bevoelkerung_100m_Gitter.zip
unzip -o csv_Bevoelkerung_100m_Gitter.zip
unzip -o Zensus2022_Bevoelkerungszahl.zip

echo ">>> Downloading OpenStreetMap dump for Germany"
wget 'http://download.geofabrik.de/europe/germany-latest.osm.pbf' \
wget "http://download.geofabrik.de/europe/$REGION-latest.osm.pbf" \
--timestamping
7 changes: 3 additions & 4 deletions 03_import_census.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
#!/bin/bash -e
set -o pipefail
source env.sh

echo ">>> Filter raw census data for inhabited cells"
sed -e '/;-1\r$/d; s/[^;]*;//' data/Zensus_Bevoelkerung_100m-Gitter.csv > data/Zensus_Bevoelkerung_100m-Gitter_filtered.csv

echo ">>> Import Census into PostgreSQL database '$PGDATABASE'"
psql -v ON_ERROR_STOP=1 --single-transaction <<EOF
DROP TABLE IF EXISTS census_germany;
CREATE TABLE census_germany (
id char(30),
x int8,
y int8,
population int8
);
\COPY census_germany FROM data/Zensus_Bevoelkerung_100m-Gitter_filtered.csv (FORMAT CSV, DELIMITER ';', NULL '', HEADER)
\COPY census_germany FROM data/Zensus2022_Bevoelkerungszahl_100m-Gitter.csv (FORMAT CSV, DELIMITER ';', NULL '', HEADER)
EOF
27 changes: 7 additions & 20 deletions 04_import_osm.sh
Original file line number Diff line number Diff line change
@@ -1,26 +1,13 @@
#!/bin/bash -e
source env.sh

REGION=germany
if [ data/$REGION-latest.osm.pbf -nt data/$REGION-filtered.osm.pbf ]; then
echo ">>> Filter OSM data"
osmium tags-filter --overwrite data/$REGION-latest.osm.pbf \
-o data/$REGION-filtered.osm.pbf -e filter_buildings.conf --progress
fi

echo ">>> Convert OSM dump into O5M format for filtering"
osmconvert data/$REGION-latest.osm.pbf -o=data/$REGION-latest.o5m
echo ">>> Filter OSM data"
osmfilter data/$REGION-latest.o5m \
--keep="building=yes =house =residential =apartments =detached =terrace =semidetached_house =static_caravan =bungalow =semi =dormitory =stilt_house =terraced_house =dwelling_house =chalet =summer_cottage =flats =semi-detached =row_house =summer_house =semi_detached =townhouse =houses =farm =hospital =construction" \
--keep="amenity=hospital =nursing_home =prison =school" \
--keep="(amenity=social_facility and social_facility=nursing_home)" \
--keep="building:part=yes =house =residential =apartments =detached =terrace =semidetached_house =static_caravan =bungalow =semi =dormitory =stilt_house =terraced_house =dwelling_house =chalet =summer_cottage =flats =semi-detached =row_house =summer_house =semi_detached =townhouse =houses =farm =hospital =construction" \
--keep="disused:building=yes =house =residential =apartments =detached =terrace =semidetached_house =static_caravan =bungalow =semi =dormitory =stilt_house =terraced_house =dwelling_house =chalet =summer_cottage =flats =semi-detached =row_house =summer_house =semi_detached =townhouse =houses =farm =hospital =construction" \
--keep="abandoned:building=yes =house =residential =apartments =detached =terrace =semidetached_house =static_caravan =bungalow =semi =dormitory =stilt_house =terraced_house =dwelling_house =chalet =summer_cottage =flats =semi-detached =row_house =summer_house =semi_detached =townhouse =houses =farm =hospital =construction" \
--keep="demolished:building=yes =house =residential =apartments =detached =terrace =semidetached_house =static_caravan =bungalow =semi =dormitory =stilt_house =terraced_house =dwelling_house =chalet =summer_cottage =flats =semi-detached =row_house =summer_house =semi_detached =townhouse =houses =farm =hospital =construction" \
--keep="removed:building=yes =house =residential =apartments =detached =terrace =semidetached_house =static_caravan =bungalow =semi =dormitory =stilt_house =terraced_house =dwelling_house =chalet =summer_cottage =flats =semi-detached =row_house =summer_house =semi_detached =townhouse =houses =farm =hospital =construction" \
--keep="razed:building=yes =house =residential =apartments =detached =terrace =semidetached_house =static_caravan =bungalow =semi =dormitory =stilt_house =terraced_house =dwelling_house =chalet =summer_cottage =flats =semi-detached =row_house =summer_house =semi_detached =townhouse =houses =farm =hospital =construction" \
--keep="landuse=residential =farmyard =allotments =brownfield =quarry =construction" \
--keep="tourism=camp_site =caravan_site" \
-o=data/$REGION-filtered.o5m
echo ">>> Import filtered OSM data into PostGIS database"
osm2pgsql --create --slim --cache $MEMORY --number-processes 8 \
--flat-nodes data/nodes.bin --style residential_and_buildings.lua \
--output flex --proj 3035 data/$REGION-filtered.o5m
rm data/nodes.bin
--flat-nodes data/nodes.bin --drop --style residential_and_buildings.lua \
--output flex data/$REGION-filtered.osm.pbf
1 change: 0 additions & 1 deletion 06_export_geojson.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@
source env.sh
echo ">>> Export results as GeoJSON file"
psql -f export_geojson.sql -t > data/unmapped_census.geojson
geojson-rewind data/unmapped_census.geojson > data/unmapped_census_fixed.geojson
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2020 Hermann Schwarting
Copyright (c) 2024 Hermann Schwarting

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
84 changes: 40 additions & 44 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,33 +1,28 @@
# [Maproulette: Unmapped residential areas in Germany](https://maproulette.org/browse/challenges/14893)
# [Maproulette: Germany – add missing buildings](https://maproulette.org/browse/challenges/48757)

Population data for Germany is available on a high-resolution 100 m grid from
the census 2011. By comparing this data to residential landuse areas and
buildings, one can find areas in Germany that are not yet mapped in
[OpenStreetMap](https://www.openstreetmap.org/). These are mostly small
villages and settlements.
the
[census 2022](https://www.zensus2022.de/DE/Ergebnisse-des-Zensus/_inhalt.html).
By comparing this data to buildings, one can find areas in Germany that are not
yet mapped in [OpenStreetMap](https://www.openstreetmap.org/). These are mostly
farms, weekend homes and some new construction.

The identified unmapped areas are fed as mapping tasks into
[Maproulette](https://maproulette.org/browse/challenges/14893), a micro-tasking
[Maproulette](https://maproulette.org/browse/challenges/48757), a micro-tasking
platform for OpenStreetMap contributors, where they can improve the map one
small issue at a time.


## UPDATE: All done! The challenge was completed on 2021-01-01.

Check out the follow-up projects:

* [Maproulette: Residential landuse areas without any buildings](https://github.com/hfs/landuse_without_buildings)
* [Maproulette: Buildings without landuse](https://github.com/hfs/buildings_without_landuse)
An earlier version of this project was run in 2020, see tag
[v1.0](https://github.com/hfs/unmapped-census/tree/v1.0) of this repository.


## Data sources

1. German Census 2011:
[Population on a 100 m grid](https://www.zensus2011.de/DE/Home/Aktuelles/DemografischeGrunddaten.html)
(“Demographie im 100 Meter-Gitter im CSV-Format”). Keep in mind that the
data is from 2011 and things have changed since then. E.g. new houses built
since then are not taken into account. But if some place was inhabited in
2011 it is very likely that it is still inhabited today.
1. German Census 2022:
[Population on 100 m grid](https://www.zensus2022.de/DE/Ergebnisse-des-Zensus/_inhalt.html)
(“Bevölkerungszahlen in Gitterzellen”). Keep in mind that the data is based
on the cut-off date 2022-05-15 and things may have changed since then or the
latest construction is not reflected in the data.
2. [OpenStreetMap dump for Germany from Geofabrik](https://download.geofabrik.de/europe/germany.html)


Expand All @@ -43,21 +38,20 @@ Create the PostGIS database where the data analysis will happen.

### [03_import_census.sh](03_import_census.sh) – Import census data

Filter the census data for grid cells with population > 0. Then import the data
into the PostGIS database.
Import the data into the PostGIS database.

The data looks like this:

![Map of census data on 100 m grid](doc/zensus2011_original.jpg)
![Map of census data on 100 m grid](doc/zensus2022_original.jpg)

### [04_import_osm.sh](04_import_osm.sh) – Import OSM data

Filter the OpenStreetMap data for residential and other relevant landuses and
buildings. OpenStreetMap contains all kinds of geospatial data, e.g. roads,
shops and schools. We are only interested in areas where people live like
residential areas or buildings. Some other types of places need to be included
like farms, allotments, nursery homes, hospitals or prisons, where people may
reside permanently.
Filter the OpenStreetMap data for buildings and some other relevant land uses.
OpenStreetMap contains all kinds of geospatial data, e.g. roads, shops and
schools. We are only interested in areas where people live like residential
areas or buildings. Some other types of places need to be included like nursery
homes, hospitals, prisons or military barracks, where people may reside
permanently.

On this map you can see how these areas overlap with the census cells. We are
interested in the cells which are not yet covered by residential areas.
Expand All @@ -75,10 +69,6 @@ should be looked at for sure.
Finally identify clusters of touching squares and merge them into single
polygons to identify each connected area that should become one task.

The polygons are filtered once more to include only those which are larger than
one cell and contain at least 12 people. This is done to avoid false positives
and to work on the most urgent cases first.

This map shows how the final output looks like:

![Map of output polygons](doc/missing_landuse.jpg)
Expand All @@ -98,24 +88,30 @@ the criteria.
The processing requires about 100 GB of temporary disk space and 1 hour of
computation time.

### Using Docker and Docker Compose

This is the easier way if you already have [Docker](https://www.docker.com/)
and don’t want to bother with the dependencies.
### Running via `podman`

```
docker-compose up
The easiest way to get all required dependencies and run the pipeline is to use
[podman](https://podman.io/). It should be readily available as package on
recent Linux distributions. If you have `podman` installed you can run

```bash
podman play kube kube.yaml
```

The output file is `data/unmapped_census_fixed.geojson`.
This will first build the image defined in
[unmapped_census/](unmapped_census/Dockerfile) which contains all the required
dependencies.

### Running manually
It starts this image and PostgreSQL+PostGIS as two containers and runs
[run.sh](run.sh), which just calls the scripts 01 to 06.

The generated output data can be found in `data/unmapped_census.geojson`.

Install PostgreSQL, PostGIS, `osm2pgsql`, `osmconvert` and `osmfilter` (package
`osmctools`) and `npm`.

Install [geojson-rewind](https://github.com/mapbox/geojson-rewind)
using `npm install -g @mapbox/geojson-rewind`.
### Running manually

Install PostgreSQL, PostGIS, `osm2pgsql` and `osmium`.

Edit `env.sh` to set the PostgreSQL credentials.

Expand All @@ -131,7 +127,7 @@ The source code of this project is licensed under the terms of the
The census data may be used for derivative works, if you mention the source
“Statistisches Bundesamt ([www.destatis.de](https://www.destatis.de/))”. See
their
[imprint](https://www.zensus2011.de/DE/Service/Impressum/impressum_node.html)
[imprint](https://www.zensus2022.de/DE/Service/Impressum/_inhalt.html)
for details.

As the output data is a Derivative Work of OpenStreetMap data, is has to be
Expand Down
Binary file modified doc/maproulette.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified doc/missing_landuse.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified doc/overlay_landuse_buildings.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified doc/remaining_cells.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed doc/zensus2011_original.jpg
Binary file not shown.
Binary file added doc/zensus2022_original.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
15 changes: 0 additions & 15 deletions docker-compose.yml

This file was deleted.

4 changes: 3 additions & 1 deletion env.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
export PGHOST=census_postgres
export REGION=germany

export PGHOST=postgres
export PGPORT=5432
export PGUSER=postgres
export PGPASSWORD=postgres
Expand Down
1 change: 0 additions & 1 deletion export_geojson.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ FROM (
SELECT
id,
population,
-- No idea why ForceRHR doesn't help
ST_ForceRHR(ST_Transform(geom, 4326)) AS geom
FROM census_unmapped
) u
Expand Down
3 changes: 3 additions & 0 deletions filter_buildings.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/building,disused:building,abandoned:building,demolished:building,removed:building,razed:building,building:part
/amenity=hospital,nursing_home,prison,school,social_facility
/landuse=brownfield,quarry,construction,military
50 changes: 50 additions & 0 deletions kube.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: "2024-07-08T00:00:00Z"
labels:
app: unmapped-census
name: unmapped-census
spec:
containers:
- name: postgres
image: docker.io/postgis/postgis:16-3.4
args:
- postgres
- -c
- logging_collector=true
env:
- name: POSTGRES_PASSWORD
value: postgres
securityContext:
capabilities:
drop:
- CAP_MKNOD
- CAP_NET_RAW
- CAP_AUDIT_WRITE
volumeMounts:
- mountPath: /var/lib/postgresql/data
name: postgresql-data
- name: run
image: unmapped_census:latest
command:
- ./run.sh
securityContext:
capabilities:
drop:
- CAP_MKNOD
- CAP_NET_RAW
- CAP_AUDIT_WRITE
volumeMounts:
- mountPath: /app
name: app
volumes:
- hostPath:
path: ./data/postgres
type: Directory
name: postgresql-data
- hostPath:
path: .
type: Directory
name: app
restartPolicy: Never
33 changes: 20 additions & 13 deletions residential_and_buildings.lua
Original file line number Diff line number Diff line change
@@ -1,34 +1,41 @@
osm2pgsql.srid = 3035

local srid = 3035
local tables = {}

tables.landuse = osm2pgsql.define_area_table('landuse', {
{ column = 'geom', type = 'geometry' },
{ column = 'geom', type = 'geometry', projection = srid },
})

tables.building = osm2pgsql.define_area_table('building', {
{ column = 'geom', type = 'geometry' },
{ column = 'geom', type = 'geometry', projection = srid },
})

create_area = { geom = { create = 'area' } }

local building_tags = { 'building', 'disused:building', 'abandoned:building', 'demolished:building', 'removed:building',
'razed:building', 'building:part', 'amenity' }

function osm2pgsql.process_way(object)
if object.tags.landuse then
tables.landuse:add_row(create_area)
tables.landuse:insert({ geom = object.as_polygon():transform(srid) })
end
if object.tags.building or object.tags['building:part'] then
tables.building:add_row(create_area)
for _, v in ipairs(building_tags) do
if object.tags[v] then
tables.building:insert({ geom = object.as_polygon():transform(srid) })
break
end
end
end

function osm2pgsql.process_relation(object)
if object.tags.type == 'multipolygon' and object.tags.landuse then
tables.landuse:add_row(create_area)
tables.landuse:insert({ geom = object.as_multipolygon():transform(srid) })
end
if object.tags.type == 'multipolygon' and
(object.tags.building or object.tags['building:part'])
then
tables.building:add_row(create_area)
if object.tags.type == 'multipolygon' then
for _, v in ipairs(building_tags) do
if object.tags[v] then
tables.building:insert({ geom = object.as_multipolygon():transform(srid) })
break
end
end
end
end

Loading

0 comments on commit 0a4f937

Please sign in to comment.