Skip to content

Commit 83569f6

Browse files
Added walmart scraper
1 parent 043ec50 commit 83569f6

File tree

1 file changed

+133
-0
lines changed

1 file changed

+133
-0
lines changed

walmart-scraper/walmart_scrape.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
import codecs
2+
from datetime import datetime
3+
import json
4+
import requests
5+
from string import split
6+
import MySQLdb
7+
8+
my_api_key = 'your_api_key'
9+
10+
date_time = datetime.strftime(datetime.today(), '%Y-%m-%d %H:%M:%S')
11+
12+
# Maximum number of calls allowed per day
13+
MAX_CALLS = 50000
14+
15+
response = requests.get('http://api.walmartlabs.com/v1/feeds/items?apiKey=' + my_api_key + '&categoryId=4044_90548_90791')
16+
17+
if response.status_code != 200:
18+
print '%s' % (response.status_code)
19+
20+
try:
21+
products_dict = response.json()
22+
#print json.dumps(products_dict, indent=4)
23+
except:
24+
pass
25+
26+
all_products = []
27+
unique_upcs = {}
28+
29+
for key in products_dict:
30+
for index in range(len(products_dict[key])):
31+
record = [ ]
32+
if products_dict[key][index]["categoryPath"] == "Home/Appliances/Refrigerators and Freezers":
33+
name = products_dict[key][index]["name"]
34+
name_list = name.split()
35+
if "Refrigerator" in name_list or "refrigerator" in name_list:
36+
if 'upc' in products_dict[key][index]:
37+
record.append(products_dict[key][index]['upc'])
38+
else:
39+
record.append('')
40+
41+
if 'modelNumber' in products_dict[key][index]:
42+
record.append(products_dict[key][index]['modelNumber'])
43+
else:
44+
record.append('')
45+
46+
if 'msrp' in products_dict[key][index]:
47+
record.append(str('%.2f' % float(products_dict[key][index]['msrp'])))
48+
else:
49+
record.append(0.00)
50+
51+
if 'salePrice' in products_dict[key][index]:
52+
record.append(str('%.2f' % float(products_dict[key][index]['salePrice'])))
53+
else:
54+
record.append(0.00)
55+
56+
if 'customerRating' in products_dict[key][index]:
57+
record.append(products_dict[key][index]['customerRating'])
58+
else:
59+
record.append('')
60+
61+
if 'numReviews' in products_dict[key][index]:
62+
record.append(str(products_dict[key][index]['numReviews']))
63+
else:
64+
record.append('')
65+
66+
record.append('Walmart')
67+
68+
if 'categoryPath' in products_dict[key][index]:
69+
record.append(products_dict[key][index]['categoryPath'].split('/')[-1])
70+
else:
71+
record.append('')
72+
73+
if 'itemId' in products_dict[key][index]:
74+
record.append(str(products_dict[key][index]['itemId']))
75+
else:
76+
record.append('')
77+
78+
if 'brandName' in products_dict[key][index]:
79+
record.append(products_dict[key][index]['brandName'])
80+
else:
81+
record.append('')
82+
83+
if 'color' in products_dict[key][index]:
84+
record.append(products_dict[key][index]['color'])
85+
else:
86+
record.append('')
87+
88+
if 'name' in products_dict[key][index]:
89+
record.append(products_dict[key][index]['name'])
90+
else:
91+
record.append('')
92+
93+
if 'productUrl' in products_dict[key][index]:
94+
record.append(products_dict[key][index]['productUrl'])
95+
else:
96+
record.append('')
97+
98+
if 'thumbnailImage' in products_dict[key][index]:
99+
record.append(products_dict[key][index]['thumbnailImage'])
100+
else:
101+
record.append('')
102+
103+
if 'mediumImage' in products_dict[key][index]:
104+
record.append(products_dict[key][index]['mediumImage'])
105+
else:
106+
record.append('')
107+
108+
if 'largeImage' in products_dict[key][index]:
109+
record.append(products_dict[key][index]['largeImage'])
110+
else:
111+
record.append('')
112+
113+
if 'longDescription' in products_dict[key][index]:
114+
record.append(products_dict[key][index]['longDescription'].encode('ascii', 'ignore'))
115+
else:
116+
record.append('')
117+
118+
record.append(date_time)
119+
120+
if (record[0] not in unique_upcs.keys() and record[0] != '') and (record[2] != '0.00' or record[2] != '') and (record[3] != '0.00' or record[3] != ''):
121+
all_products.append(record)
122+
unique_upcs[record[0]] = 1
123+
124+
125+
connection = MySQLdb.connect(host='localhost', port=3306, db='products_poc', user='your_username', passwd='your_password', use_unicode=True, charset="utf8")
126+
c = connection.cursor()
127+
128+
for row in all_products:
129+
print row
130+
c.execute("""INSERT INTO products (upc, model_number, regular_price, sale_price, review_score, review_count, retailer,
131+
department, sku, brand, color, name, url, image_small, image_medium, image_large, long_description, date_time)
132+
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);""", row)
133+
connection.commit()

0 commit comments

Comments
 (0)