-
Notifications
You must be signed in to change notification settings - Fork 0
/
co-relation_process.py
151 lines (113 loc) · 5.45 KB
/
co-relation_process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
from Utility import Utility
import os
import json
def insert_excel(row):
"""
This function will add a row in excel
"""
#load the excel, if exists
try:
df_ = pd.read_excel('sample_big_data.xlsx', sheet=1)
df_.loc[len(df_)] = row
except:
print("excel not exists, creating one")
df_ = pd.DataFrame(columns=columns)
df_.loc[0] = row
#initiate excel writer to write altered frame in excel
writer = pd.ExcelWriter('sample_big_data.xlsx', engine='xlsxwriter')
try:
df_.to_excel(writer,'Sheet1')
writer.save()
except:
print("excel found/created not able to append row")
location_list_footfall = {}
def insert_data():
#insert data in CR_Dataset
values = ""
column_names = set(location_list_footfall.keys())
for location_footfall in location_list_footfall:
values += location_list_footfall[location_footfall]
print column_names, values
# sql = "INSERT INTO CR_Reports {} VALUES {}".format(column_names)
def generate_report(unprocessed_data_list):
utility = Utility(log_file="big_data_process_log_file.txt", debug=1, db='Analytics')
for data in unprocessed_data_list:
image_key = data[0]
sql = "SELECT location, similarity, count(*) FROM BIG_DATA WHERE image_key = '{}' and device_type = '{}' GROUP BY location;".format(image_key,'footfall')
matched_location_list, matched_location_list_count = utility.query_database(sql)
if not matched_location_list_count:
print("passenger not found at any of the shops")
continue
for matched_location in matched_location_list_count:
similarity, location = matched_location[0], matched_location[1]
if int(similarity) < 85:
continue
location = "".join(location.split(" "))
try:
location_list_footfall[location] += 1
except:
print("appending new location in list")
location_list_footfall[location] = 1
#insert row into the database #cursor.execute('SELECT last_insert_id()')
print(location_list_footfall)
insert_data()
def main():
utility = Utility(log_file='big_data_process_log_file.txt', debug=1, db='Analytics')
sql = "SELECT ReportID FROM CR_Reports WHERE {}".format(filter_cr_reports)
print sql
report_list, report_list_count = utility.query_database(sql)
print report_list, report_list_count
if report_list_count:
print("report already processed")
return None
sql_1 = "SELECT distinct(image_key) FROM BIG_DATA WHERE {}".format(filter_big_data)
print sql_1
unprocessed_data_list, unprocessed_data_list_count = utility.query_database(sql_1)
print unprocessed_data_list_count
if unprocessed_data_list_count:
generate_report(unprocessed_data_list)
# read settings for process
f_path = os.path.abspath("Config.json")
print f_path
if f_path:
with open(f_path) as read_file:
list_ = json.load(read_file) # read configurations from file
# database credentials
big_data_host = list_["_mysql_big_data"]["host"]
big_data_user = list_["_mysql_big_data"]["user"]
big_data_passwd = list_["_mysql_big_data"]["passwd"]
big_data_db = list_["_mysql_big_data"]["db"]
# read filters
shop_filter = list_["report_filters"]["shop_filter"]
flight_filter = list_["report_filters"]["flight_filter"]
destination_filter = list_["report_filters"]["destination_filter"]
#create filter
if shop_filter:
filter_big_data = "location = '{}'".format(shop_filter)
filter_cr_reports = "shop_filter = '{}'".format(shop_filter)
if flight_filter:
filter_big_data += "and flight_name = '{}'".format(flight_filter)
filter_cr_reports += "and flight_filter = '{}'".format(flight_filter)
if destination_filter:
filter_big_data += "and destination = '{}'".format(destination_filter)
filter_cr_reports += "and destination_filter = '{}'".format(destination_filter)
print filter_big_data, filter_cr_reports
threshold_similarity = list_["big_data_config"]["threshold_similarity"]
debug = True
if shop_filter:
main()
else:
print("mandatory filters not available")
# try:
# # create a database connection for processing using sqlalchmey
# engine = create_engine('mysql://{}:{}@{}/{}'.format(big_data_db, big_data_user, big_data_host, big_data_passwd))
# Session = sessionmaker(engine)
# session = Session()
# except:
# print("Error 2: Not able to connect to BIG DATA database")
else:
print("Not able to read database credentials")
# CREATE TABLE CR_Reports (ReportID int(11) auto_increment PRIMARY KEY, ShopFilter VARCHAR(50), FlightFilter VARCHAR(50), DestinationFilter VARCHAR(50), DataSetId INT NOT NULL) ENGINE=INNODB;
# CREATE TABLE CR_DataSet (DataSetId int(11) AUTO_INCREMENT PRIMARY KEY, KarachiBakery int(11) default 0, TifinExpress int(11) default 0, KFC int(11)) ENGINE=INNODB;
# ALTER TABLE CR_Reports ADD CONSTRAINT DataSetId FOREIGN KEY (DataSetId) REFERENCES CR_DataSet (DataSetId) ON DELETE CASCADE;
#CREATE TABLE DeviceMapper (id int auto_increment PRIMARY KEY, DeviceName VARCHAR(50) NOT NULL, DeviceType VARCHAR(50) NOT NULL, DeviceLocation VARCHAR(50) NOT NULL);