-
Notifications
You must be signed in to change notification settings - Fork 190
Replacing DataPusher with XLoader
Here we describe the changes needed to this (ckan-docker) repository to use the XLoader CKAN extension rather than DataPusher. Similar to DataPusher, XLoader is used to automatically download any tabular data files like CSV or Excel from resources when they are added to the CKAN site, parses them to extract the actual data, and then uses the DataStore API to push the data into the CKAN's DataStore database
There is one caveat though: The NGINX reverse proxy container has been taken out of the configuration.
XLoader is installed into the same container as CKAN core
At the time of writing (July 2023) the version of CKAN used is 2.10.1 and xloader 1.0.1
There are 3 file changes required:
docker-compose.yml
.env
Dockerfile
Also an xloader/
directory that includes the Dockerfile described below is needed
- docker-compose.yml
version: "3"
volumes:
ckan_storage:
pg_data:
solr_data:
services:
ckan-xloader:
container_name: ${XLOADER_CONTAINER_NAME}
build:
context: xloader/
dockerfile: Dockerfile
args:
- TZ=${TZ}
networks:
- xloadernet
- dbnet
- solrnet
- redisnet
env_file:
- .env
depends_on:
db:
condition: service_healthy
solr:
condition: service_healthy
redis:
condition: service_healthy
volumes:
- ckan_storage:/var/lib/ckan
ports:
- "0.0.0.0:${XLOADER_PORT_HOST}:${XLOADER_PORT}"
restart: unless-stopped
healthcheck:
test: ["CMD", "wget", "-qO", "/dev/null", "http://xloader:5000"]
db:
container_name: ${POSTGRESQL_CONTAINER_NAME}
build:
context: postgresql/
networks:
- dbnet
environment:
- POSTGRES_USER
- POSTGRES_PASSWORD
- POSTGRES_DB
- PGDATA
- CKAN_DB_USER
- CKAN_DB_PASSWORD
- CKAN_DB
- DATASTORE_READONLY_USER
- DATASTORE_READONLY_PASSWORD
- DATASTORE_DB
volumes:
- pg_data:/var/lib/postgresql/data
restart: unless-stopped
healthcheck:
test: ["CMD", "pg_isready", "-U", "${POSTGRES_USER}", "-d", "${POSTGRES_DB}"]
solr:
container_name: ${SOLR_CONTAINER_NAME}
networks:
- solrnet
image: ckan/ckan-solr:${SOLR_IMAGE_VERSION}
volumes:
- solr_data:/var/solr
restart: unless-stopped
healthcheck:
test: ["CMD", "wget", "-qO", "/dev/null", "http://localhost:8983/solr/"]
redis:
container_name: ${REDIS_CONTAINER_NAME}
image: redis:${REDIS_VERSION}
networks:
- redisnet
restart: unless-stopped
healthcheck:
test: ["CMD", "redis-cli", "-e", "QUIT"]
networks:
xloadernet:
solrnet:
internal: true
dbnet:
internal: true
redisnet:
internal: true
.env
# Container names
NGINX_CONTAINER_NAME=nginx
REDIS_CONTAINER_NAME=redis
POSTGRESQL_CONTAINER_NAME=db
SOLR_CONTAINER_NAME=solr
DATAPUSHER_CONTAINER_NAME=datapusher
CKAN_CONTAINER_NAME=ckan
XLOADER_CONTAINER_NAME=xloader
# Host Ports
CKAN_PORT_HOST=5000
NGINX_PORT_HOST=81
NGINX_SSLPORT_HOST=8443
# CKAN databases
POSTGRES_USER=postgres
POSTGRES_PASSWORD=postgres
POSTGRES_DB=postgres
POSTGRES_HOST=db
PGDATA=/var/lib/postgresql/data/db
CKAN_DB_USER=ckandbuser
CKAN_DB_PASSWORD=ckandbpassword
CKAN_DB=ckandb
DATASTORE_READONLY_USER=datastore_ro
DATASTORE_READONLY_PASSWORD=datastore
DATASTORE_DB=datastore
CKAN_SQLALCHEMY_URL=postgresql://ckandbuser:ckandbpassword@db/ckandb
CKAN_DATASTORE_WRITE_URL=postgresql://ckandbuser:ckandbpassword@db/datastore
CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:datastore@db/datastore
# Test database connections
TEST_CKAN_SQLALCHEMY_URL=postgres://ckan:ckan@db/ckan_test
TEST_CKAN_DATASTORE_WRITE_URL=postgresql://ckan:ckan@db/datastore_test
TEST_CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:datastore@db/datastore_test
# CKAN core
CKAN_VERSION=2.10.0
CKAN_SITE_ID=default
CKAN_SITE_URL=http://localhost:5000
CKAN_PORT=5000
CKAN_PORT_HOST=5000
CKAN___BEAKER__SESSION__SECRET=CHANGE_ME
# See https://docs.ckan.org/en/latest/maintaining/configuration.html#api-token-settings
CKAN___API_TOKEN__JWT__ENCODE__SECRET=string:CHANGE_ME
CKAN___API_TOKEN__JWT__DECODE__SECRET=string:CHANGE_ME
CKAN_SYSADMIN_NAME=ckan_admin
CKAN_SYSADMIN_PASSWORD=test1234
CKAN_SYSADMIN_EMAIL=your_email@example.com
CKAN_STORAGE_PATH=/var/lib/ckan
CKAN_SMTP_SERVER=smtp.corporateict.domain:25
CKAN_SMTP_STARTTLS=True
CKAN_SMTP_USER=user
CKAN_SMTP_PASSWORD=pass
CKAN_SMTP_MAIL_FROM=ckan@localhost
TZ=UTC
# Solr
SOLR_IMAGE_VERSION=2.9-solr8
CKAN_SOLR_URL=http://solr:8983/solr/ckan
TEST_CKAN_SOLR_URL=http://solr:8983/solr/ckan
# Redis
REDIS_VERSION=6
CKAN_REDIS_URL=redis://redis:6379/1
TEST_CKAN_REDIS_URL=redis://redis:6379/1
# Datapusher
DATAPUSHER_VERSION=0.0.20
CKAN_DATAPUSHER_URL=http://xloader:5000
CKAN__DATAPUSHER__CALLBACK_URL_BASE=http://xloader:5000
# Xloader
XLOADER_VERSION=1.0.1
XLOADER_PORT=5000
XLOADER_PORT_HOST=5000
# NGINX
NGINX_PORT=80
NGINX_SSLPORT=443
# Extensions
CKAN__PLUGINS="envvars image_view text_view recline_view datastore datapusher"
XLOADER__PLUGINS="envvars image_view text_view recline_view datastore xloader"
CKAN__HARVEST__MQ__TYPE=redis
CKAN__HARVEST__MQ__HOSTNAME=redis
CKAN__HARVEST__MQ__PORT=6379
CKAN__HARVEST__MQ__REDIS_DB=1
Dockerfile
FROM ckan/ckan-base-xloader:1.0.1
# Set up environment variables
ENV APP_DIR=/srv/app
ENV TZ=UTC
RUN echo ${TZ} > /etc/timezone
# Make sure both files are not exactly the same
RUN if ! [ /usr/share/zoneinfo/${TZ} -ef /etc/localtime ]; then \
cp /usr/share/zoneinfo/${TZ} /etc/localtime ;\
fi ;
EXPOSE 5000