Skip to content

Commit 63cb8ce

Browse files
committed
Adjusted spacing
1 parent 78625b7 commit 63cb8ce

File tree

1 file changed

+50
-56
lines changed

1 file changed

+50
-56
lines changed

main.py

+50-56
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#!/usr/bin/env python3
2-
import csv,datetime,decimal,json,gzip,os,pyodbc,shutil,string,tempfile,uuid
2+
import csv, datetime, decimal, json, gzip, os, pyodbc, shutil, string, tempfile, uuid
33
import snowflake.connector as sf
44
import multiprocessing
5+
56
#####################################################################
67
## Pre-job
78
## 1) Validate connections
@@ -10,7 +11,7 @@
1011
## b) Replace environment variables ${something} style
1112
## c) Convert to dict
1213
#####################################################################
13-
#1
14+
# 1
1415
# Test environment variables
1516
try:
1617
PYODBC_DRIVER = os.environ['PYODBC_DRIVER']
@@ -49,14 +50,10 @@
4950
# 2
5051
# load job file
5152
with open('job_list.json') as table_list_file:
52-
table_list_raw \
53-
= table_list_file.read()
54-
table_list_template \
55-
= string.Template(table_list_raw)
56-
job_list_json \
57-
= table_list_template.substitute({x: os.environ[x] for x in os.environ})
58-
job_list \
59-
= json.loads(job_list_json)[0]
53+
table_list_raw = table_list_file.read()
54+
table_list_template = string.Template(table_list_raw)
55+
job_list_json = table_list_template.substitute({x: os.environ[x] for x in os.environ})
56+
job_list = json.loads(job_list_json)[0]
6057

6158

6259
#####################################################################
@@ -71,86 +68,83 @@
7168
## ...repeat for each job
7269
#####################################################################
7370
def write_data(chunk):
74-
path = os.path.join(chunk[0],'')
71+
path = os.path.join(chunk[0], '')
7572
job_name = chunk[1]
7673
header = chunk[2]
7774
rows = chunk[3]
7875
num = multiprocessing.current_process().name[16:]
7976
filename = f'{path}{job_name}.{num}.csv.gz'
8077
if os.path.exists(filename) is False:
8178
with gzip.open(filename, 'at', encoding='utf-8', newline='') as f:
82-
csv_writer = csv.writer(f,
83-
quoting=csv.QUOTE_NONNUMERIC)
79+
csv_writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
8480
csv_writer.writerows(header)
8581
with gzip.open(filename, 'at', encoding='utf-8', newline='') as f:
86-
csv_writer = csv.writer(f,quoting=csv.QUOTE_NONNUMERIC)
82+
csv_writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
8783
csv_writer.writerows(rows)
8884

89-
#1
85+
86+
# 1
9087
if __name__ == '__main__':
9188
src_def = {}
9289
for job in job_list.keys():
9390
src_qry = job_list[job]['extract']['query']
94-
probe_qry=f"""select * from ({src_qry}) subquery WHERE 0=1"""
91+
probe_qry = f"""select * from ({src_qry}) subquery WHERE 0=1"""
9592
odbc_cursor = pyodbc.connect(odbc_connection_string).cursor()
9693
odbc_cursor.execute(probe_qry)
9794
odbc_cursor.fetchone()
9895
src_def[job] = odbc_cursor.description
9996
odbc_cursor.close()
100-
## for item in src_def:
101-
## print(item, src_def[item], '\n')
10297

103-
#2
98+
99+
## for item in src_def:
100+
## print(item, src_def[item], '\n')
101+
102+
# 2
104103
def get_rows(cursor):
105104
while True:
106105
row = cursor.fetchmany(500)
107106
if len(row) != 0:
108107
yield row
109108
else:
110109
break
110+
111+
111112
tgt_sql = {}
112113
with tempfile.TemporaryDirectory() as tempdir:
113114
for job_name in job_list.keys():
114115
print(f'Extracting {job_name}')
115116
src_qry = job_list[job_name]['extract']['query']
116-
header = []
117-
header.append( \
118-
tuple(x[0].upper() for x in src_def[job_name]))
119-
odbc_cursor = \
120-
pyodbc.connect(odbc_connection_string).cursor()
117+
header = [tuple(x[0].upper() for x in src_def[job_name])]
118+
odbc_cursor = pyodbc.connect(odbc_connection_string).cursor()
121119
odbc_cursor.execute(src_qry)
122120

123121
with multiprocessing.Pool() as p:
124122
while True:
125123
try:
126124
rows = next(get_rows(odbc_cursor))
127-
p.map(write_data,(
128-
(tempdir,job_name,header,rows),))
125+
p.map(write_data, ((tempdir, job_name, header, rows),))
129126
except StopIteration:
130127
break
131-
## pauses the tempdir clean up
132-
## can be used to inspect files
133-
##os.system("pause")
128+
## pauses the tempdir clean up
129+
## can be used to inspect files
130+
##os.system("pause")
134131

135-
#####################################################################
136-
## Transform
137-
## 1) Covert source to target definition
138-
## 2) Generate sql for later use
139-
#####################################################################
132+
#####################################################################
133+
## Transform
134+
## 1) Covert source to target definition
135+
## 2) Generate sql for later use
136+
#####################################################################
140137
with open('type_conversion.json') as tc:
141138
tc2 = tc.read()
142139
tc3 = json.loads(tc2)[0]
143-
tc4 = dict([(eval(f'type({k})'),tc3[k]) for k in tc3.keys()])
144-
tgt_def = {}
145-
tgt_def[job_name]=[(col[0],tc4[col[1]]) for col in src_def[job_name]]
146-
col_name=','.join( \
147-
['"'+i[0].upper()+'"'+' '+i[1]+'\n' for i in tgt_def[job_name]])
148-
col_num=','.join(\
149-
['t.$'+str(n)+'\n' for n in range(1,len(tgt_def[job_name])+1)])
140+
tc4 = dict([(eval(f'type({k})'), tc3[k]) for k in tc3.keys()])
141+
tgt_def = {job_name: [(col[0], tc4[col[1]]) for col in src_def[job_name]]}
142+
col_name = ','.join(['"' + i[0].upper() + '"' + ' ' + i[1] + '\n' for i in tgt_def[job_name]])
143+
col_num = ','.join(['t.$' + str(n) + '\n' for n in range(1, len(tgt_def[job_name]) + 1)])
150144
database = job_list[job_name]['load']['database'].upper()
151145
schema = job_list[job_name]['load']['schema'].upper()
152146
table = job_list[job_name]['load']['table'].upper()
153-
path = os.path.join(tempdir,'').replace('\\','/')
147+
path = os.path.join(tempdir, '').replace('\\', '/')
154148
stage = job_name.upper()
155149
tgt_sql[job_name] = [
156150
f'CREATE DATABASE IF NOT EXISTS "{database}";',
@@ -175,34 +169,34 @@ def get_rows(cursor):
175169
f'USE DATABASE "{database}";',
176170
f'USE SCHEMA "{schema}";',
177171
f'USE WAREHOUSE LOAD_WH;',
178-
f'CREATE OR REPLACE TABLE "{schema}"."{table}"\n('+
179-
col_name+
180-
') AS SELECT\n '+
181-
col_num+
182-
f'FROM @"{stage}" t;',
172+
f'CREATE OR REPLACE TABLE "{schema}"."{table}"\n(' +
173+
col_name +
174+
') AS SELECT\n ' +
175+
col_num +
176+
f'FROM @"{stage}" t;',
183177
f'DROP STAGE "{stage}";'
184178
]
185-
#print(tgt_sql)
179+
# print(tgt_sql)
186180

187-
#####################################################################
188-
## Load
189-
## 1) Upload files
190-
## 2) Create tables on load
191-
#####################################################################
181+
#####################################################################
182+
## Load
183+
## 1) Upload files
184+
## 2) Create tables on load
185+
#####################################################################
192186
sf_cursor = sf.connect(**sf_dict).cursor()
193187
for job_name in job_list.keys():
194188
print(f'Uploading {job_name}')
195189
for stmt in tgt_sql[job_name][:7]:
196190
sf_cursor.execute(stmt)
197191
sf_cursor.close()
198-
192+
199193
sf_cursor = sf.connect(**sf_dict).cursor()
200194
for job_name in job_list.keys():
201195
print(f'Loading {job_name}')
202196
for stmt in tgt_sql[job_name][7:]:
203197
sf_cursor.execute(stmt)
204198
for job_name in job_list.keys():
205-
print(f'Completed {job_name}')
199+
print(f'Completed {job_name}')
206200
sf_cursor.close()
207-
201+
208202
### END ###

0 commit comments

Comments
 (0)