Skip to content

Commit c6adc6b

Browse files
jvanmalderjvanmalder
andauthored
configurable escapechar for csv reader (#55)
Co-authored-by: jvanmalder <jvanmalder@openanalytics.eu>
1 parent 746c585 commit c6adc6b

File tree

3 files changed

+6
-4
lines changed

3 files changed

+6
-4
lines changed

redisgraph_bulk_loader/bulk_insert.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,14 @@ def process_entities(entities):
6262
@click.option('--skip-invalid-nodes', '-s', default=False, is_flag=True, help='ignore nodes that use previously defined IDs')
6363
@click.option('--skip-invalid-edges', '-e', default=False, is_flag=True, help='ignore invalid edges, print an error message and continue loading (True), or stop loading after an edge loading failure (False)')
6464
@click.option('--quote', '-q', default=0, help='the quoting format used in the CSV file. QUOTE_MINIMAL=0,QUOTE_ALL=1,QUOTE_NONNUMERIC=2,QUOTE_NONE=3')
65+
@click.option('--escapechar', '-x', default='\\', help='the escape char used for the CSV reader (default \\). Use "none" for None.')
6566
# Buffer size restrictions
6667
@click.option('--max-token-count', '-c', default=1024, help='max number of processed CSVs to send per query (default 1024)')
6768
@click.option('--max-buffer-size', '-b', default=2048, help='max buffer size in megabytes (default 2048)')
6869
@click.option('--max-token-size', '-t', default=500, help='max size of each token in megabytes (default 500, max 512)')
6970
@click.option('--index', '-i', multiple=True, help='Label:Propery on which to create an index')
7071
@click.option('--full-text-index', '-f', multiple=True, help='Label:Propery on which to create an full text search index')
71-
def bulk_insert(graph, host, port, password, unix_socket_path, nodes, nodes_with_label, relations, relations_with_type, separator, enforce_schema, skip_invalid_nodes, skip_invalid_edges, quote, max_token_count, max_buffer_size, max_token_size, index, full_text_index):
72+
def bulk_insert(graph, host, port, password, unix_socket_path, nodes, nodes_with_label, relations, relations_with_type, separator, enforce_schema, skip_invalid_nodes, skip_invalid_edges, escapechar, quote, max_token_count, max_buffer_size, max_token_size, index, full_text_index):
7273
if sys.version_info[0] < 3:
7374
raise Exception("Python 3 is required for the RedisGraph bulk loader.")
7475

@@ -81,7 +82,7 @@ def bulk_insert(graph, host, port, password, unix_socket_path, nodes, nodes_with
8182
store_node_identifiers = any(relations) or any(relations_with_type)
8283

8384
# Initialize configurations with command-line arguments
84-
config = Config(max_token_count, max_buffer_size, max_token_size, enforce_schema, skip_invalid_nodes, skip_invalid_edges, separator, int(quote), store_node_identifiers)
85+
config = Config(max_token_count, max_buffer_size, max_token_size, enforce_schema, skip_invalid_nodes, skip_invalid_edges, separator, int(quote), store_node_identifiers, escapechar)
8586

8687
# Attempt to connect to Redis server
8788
try:

redisgraph_bulk_loader/config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
class Config:
2-
def __init__(self, max_token_count=1024 * 1023, max_buffer_size=2_048, max_token_size=512, enforce_schema=False, skip_invalid_nodes=False, skip_invalid_edges=False, separator=',', quoting=3, store_node_identifiers=False):
2+
def __init__(self, max_token_count=1024 * 1023, max_buffer_size=2_048, max_token_size=512, enforce_schema=False, skip_invalid_nodes=False, skip_invalid_edges=False, separator=',', quoting=3, store_node_identifiers=False, escapechar='\\'):
33
"""Settings for this run of the bulk loader"""
44
# Maximum number of tokens per query
55
# 1024 * 1024 is the hard-coded Redis maximum. We'll set a slightly lower limit so
@@ -17,6 +17,7 @@ def __init__(self, max_token_count=1024 * 1023, max_buffer_size=2_048, max_token
1717
self.skip_invalid_edges = skip_invalid_edges
1818
self.separator = separator
1919
self.quoting = quoting
20+
self.escapechar = None if escapechar.lower() == "none" else escapechar
2021

2122
# True if we are building relations as well as nodes
2223
self.store_node_identifiers = store_node_identifiers

redisgraph_bulk_loader/entity_file.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ def __init__(self, filename, label, config):
175175

176176
# Initialize CSV reader that ignores leading whitespace in each field
177177
# and does not modify input quote characters
178-
self.reader = csv.reader(self.infile, delimiter=config.separator, skipinitialspace=True, quoting=config.quoting, escapechar='\\')
178+
self.reader = csv.reader(self.infile, delimiter=config.separator, skipinitialspace=True, quoting=config.quoting, escapechar=config.escapechar)
179179

180180
self.packed_header = b''
181181
self.binary_entities = []

0 commit comments

Comments
 (0)