Skip to content

Commit

Permalink
data generators
Browse files Browse the repository at this point in the history
  • Loading branch information
stevelowenthal committed Apr 18, 2015
1 parent 6130d3c commit 882beac
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 11 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.idea/
*.iml
cache/
4 changes: 2 additions & 2 deletions generate_data/1.seed_zipcode_data/1.zipcodes-to-cassandra.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def init_cassandra():

# grab ip address information from application.cfg
app = Flask(__name__)
app.config.from_pyfile('/cornerstone/web/datastax/cornerstone-python/Cornerstone/application.cfg')
app.config.from_pyfile('../../web/application.cfg')
ip_addresses = app.config['DSE_CLUSTER'].split(',')

# connect to Cassandra
Expand Down Expand Up @@ -109,7 +109,7 @@ def parse_zipcodes(futures, session):
(?, ?, ?, ?, ?, ?, ?)''')

# read from zipcodes from csv file
with open('/cornerstone/scripts/datastax/black-friday/1.seed_zipcode_data/free-zipcode-database.csv', 'rb') as csvfile:
with open('free-zipcode-database.csv', 'rb') as csvfile:
csv_reader = csv.DictReader(csvfile, delimiter=',', quotechar='"')
for row in csv_reader:
# create insert values object
Expand Down
4 changes: 2 additions & 2 deletions generate_data/2.seed_retail_data/1.download-data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
set -x # echo on

# http://snap.stanford.edu/data/amazon/
wget -c http://snap.stanford.edu/data/amazon/Electronics.txt.gz -P /cache/downloads
wget -c http://snap.stanford.edu/data/amazon/brands.txt.gz -P /cache/downloads
wget -c http://snap.stanford.edu/data/amazon/Electronics.txt.gz -P ../../cache/downloads
wget -c http://snap.stanford.edu/data/amazon/brands.txt.gz -P ../../cache/downloads
6 changes: 3 additions & 3 deletions generate_data/2.seed_retail_data/2.data-to-cassandra.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def init_cassandra():

# grab ip address information from application.cfg
app = Flask(__name__)
app.config.from_pyfile('/cornerstone/web/datastax/cornerstone-python/Cornerstone/application.cfg')
app.config.from_pyfile('../../web/application.cfg')
ip_addresses = app.config['DSE_CLUSTER'].split(',')

# connect to Cassandra
Expand Down Expand Up @@ -104,7 +104,7 @@ def parse_products(futures, session):
'INSERT INTO retail.products (product_id, title, price) '
'VALUES (?, ?, ?)')

with gzip.open('/cache/downloads/Electronics.txt.gz') as f:
with gzip.open('../../cache/downloads/Electronics.txt.gz') as f:
# compile regular expressions
p = re.compile('^product')
p_id = re.compile('^product/productId: (.*)')
Expand Down Expand Up @@ -172,7 +172,7 @@ def parse_brands(futures, session):
'INSERT INTO retail.brands (brand) '
'VALUES (?)')

with gzip.open('/cache/downloads/brands.txt.gz') as f:
with gzip.open('../../cache/downloads/brands.txt.gz') as f:
# compile regular expressions
p = re.compile('(\w*) (.*)')

Expand Down
4 changes: 2 additions & 2 deletions generate_data/3.scan_data/1.extract-ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from cassandra.query import ordered_dict_factory

app = Flask(__name__)
app.config.from_pyfile('/cornerstone/web/datastax/cornerstone-python/Cornerstone/application.cfg')
app.config.from_pyfile('../../web/application.cfg')
ip_addresses = app.config['DSE_CLUSTER'].split(',')

cluster = Cluster(ip_addresses)
Expand All @@ -17,6 +17,6 @@

response = session.execute('SELECT product_id FROM retail.products')

with open('/cache/product_ids.txt', 'w') as f:
with open('../../cache/product_ids.txt', 'w') as f:
for row in response:
f.write('%s\n' % row['product_id'])
4 changes: 2 additions & 2 deletions generate_data/3.scan_data/2.extract-zipcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from cassandra.query import ordered_dict_factory

app = Flask(__name__)
app.config.from_pyfile('/cornerstone/web/datastax/cornerstone-python/Cornerstone/application.cfg')
app.config.from_pyfile('../../web/application.cfg')
ip_addresses = app.config['DSE_CLUSTER'].split(',')

cluster = Cluster(ip_addresses)
Expand All @@ -15,6 +15,6 @@

response = session.execute('SELECT zipcode FROM retail.zipcodes')

with open('/cache/zipcodes.txt', 'w') as f:
with open('../../cache/zipcodes.txt', 'w') as f:
for row in response:
f.write('%s\n' % row['zipcode'])

0 comments on commit 882beac

Please sign in to comment.