Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env_example
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ export VENV_NAME="ccdb-data-pipeline"
# set this value to limit the number of records to load to reduce load time
# export MAX_RECORDS=10000

# set an optional batch size for bulk indexing runs; defaults to 2000
# export BATCH_SIZE=5000

# export INPUT_S3_BUCKET="foo"
# export INPUT_S3_KEY="bar"
# export INPUT_S3_KEY_METADATA="barrr"
Expand Down
5 changes: 4 additions & 1 deletion complaints/ccdb/index_ccdb.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import os
import sys
from functools import partial

Expand All @@ -12,6 +13,8 @@
get_es_connection)
from common.log import setup_logging

BATCH_SIZE = os.getenv("BATCH_SIZE", 2000)

# -----------------------------------------------------------------------------
# Enhancing Functions
# -----------------------------------------------------------------------------
Expand Down Expand Up @@ -137,7 +140,7 @@ def yield_chunked_docs(get_data_function, data, chunk_size):

def index_json_data(
es, logger, doc_type_name, settings_json, mapping_json, data, index_name,
backup_index_name, alias, chunk_size=2000, qas_timestamp=0
backup_index_name, alias, chunk_size=BATCH_SIZE, qas_timestamp=0
):
settings = load_json(logger, settings_json)
mapping = load_json(logger, mapping_json)
Expand Down