|
86 | 86 | spell = json.dumps(kw["spell"])
|
87 | 87 | item_types = json.dumps(kw["item_types"])
|
88 | 88 |
|
89 |
| - for item in kw["items"]: |
| 89 | + try: |
| 90 | + for item in kw["items"]: |
| 91 | + row = dict() |
| 92 | + row["task_id"] = task_id |
| 93 | + row["status"] = status |
| 94 | + row["request"] = keyword |
| 95 | + row["request_type"] = request_type |
| 96 | + row["domain"] = domain |
| 97 | + row["location_code"] = location_code |
| 98 | + row["language_code"] = language_code |
| 99 | + row["timestamp"] = timestamp |
| 100 | + row["spell"] = spell |
| 101 | + row["item_types"] = item_types |
| 102 | + row["results_count"] = results_count |
| 103 | + row["item_type"] = item["type"] |
| 104 | + row["rank_group"] = item["rank_group"] |
| 105 | + row["rank_absolute"] = item["rank_absolute"] |
| 106 | + if "title" in item.keys(): |
| 107 | + row["title"] = item["title"] |
| 108 | + if "description" in item.keys(): |
| 109 | + row["description"] = item["description"] |
| 110 | + if "url" in item.keys(): |
| 111 | + row["url"] = item["url"] |
| 112 | + if "breadcrumb" in item.keys(): |
| 113 | + row["breadcrumb"] = item["breadcrumb"] |
| 114 | + |
| 115 | + if args.advanced: |
| 116 | + if "is_image" in item.keys(): |
| 117 | + row["is_image"] = item["is_image"] |
| 118 | + if "is_video" in item.keys(): |
| 119 | + row["is_video"] = item["is_video"] |
| 120 | + if "is_featured_snippet" in item.keys(): |
| 121 | + row["is_featured_snippet"] = item["is_featured_snippet"] |
| 122 | + if "is_malicious" in item.keys(): |
| 123 | + row["is_malicious"] = item["is_malicious"] |
| 124 | + if "is_web_story" in item.keys(): |
| 125 | + row["is_web_story"] = item["is_web_story"] |
| 126 | + if "amp_version" in item.keys(): |
| 127 | + row["amp_version"] = item["amp_version"] |
| 128 | + if "rating" in item.keys(): |
| 129 | + row["rating"] = json.dumps(item["rating"]) |
| 130 | + if "links" in item.keys(): |
| 131 | + row["sitelinks"] = json.dumps(item["links"]) |
| 132 | + if "faq" in item.keys(): |
| 133 | + row["faq"] = json.dumps(item["faq"]) |
| 134 | + if "items" in item.keys(): |
| 135 | + row["items"] = json.dumps(item["items"]) |
| 136 | + if ("rectangle" in item.keys()) and (item["rectangle"] is not None): |
| 137 | + row["pixels_from_top"] = item["rectangle"]["y"] |
| 138 | + |
| 139 | + if (args.knowledge_graph) and (item["type"] == "knowledge_graph"): |
| 140 | + if "sub_title" in item.keys(): |
| 141 | + row["sub_title"] = item["sub_title"] |
| 142 | + for i in item["items"]: |
| 143 | + if "data_attrid" in i.keys(): |
| 144 | + if "address" in str(i["data_attrid"]): |
| 145 | + row["address"] = i["text"] |
| 146 | + elif "phone" in str(i["data_attrid"]): |
| 147 | + row["phone"] = i["text"] |
| 148 | + |
| 149 | + with open(filename,'a',newline='') as file: |
| 150 | + writer = csv.DictWriter(file, fieldnames=fields, delimiter=";") |
| 151 | + writer.writerow(row) |
| 152 | + file.close() |
| 153 | + |
| 154 | + except TypeError as e: |
90 | 155 | row = dict()
|
91 | 156 | row["task_id"] = task_id
|
92 |
| - row["status"] = status |
| 157 | + row["status"] = "error or no results" |
93 | 158 | row["request"] = keyword
|
94 | 159 | row["request_type"] = request_type
|
95 | 160 | row["domain"] = domain
|
96 | 161 | row["location_code"] = location_code
|
97 | 162 | row["language_code"] = language_code
|
98 | 163 | row["timestamp"] = timestamp
|
99 |
| - row["spell"] = spell |
100 |
| - row["item_types"] = item_types |
101 |
| - row["results_count"] = results_count |
102 |
| - row["item_type"] = item["type"] |
103 |
| - row["rank_group"] = item["rank_group"] |
104 |
| - row["rank_absolute"] = item["rank_absolute"] |
105 |
| - if "title" in item.keys(): |
106 |
| - row["title"] = item["title"] |
107 |
| - if "description" in item.keys(): |
108 |
| - row["description"] = item["description"] |
109 |
| - if "url" in item.keys(): |
110 |
| - row["url"] = item["url"] |
111 |
| - if "breadcrumb" in item.keys(): |
112 |
| - row["breadcrumb"] = item["breadcrumb"] |
113 |
| - |
114 |
| - if args.advanced: |
115 |
| - if "is_image" in item.keys(): |
116 |
| - row["is_image"] = item["is_image"] |
117 |
| - if "is_video" in item.keys(): |
118 |
| - row["is_video"] = item["is_video"] |
119 |
| - if "is_featured_snippet" in item.keys(): |
120 |
| - row["is_featured_snippet"] = item["is_featured_snippet"] |
121 |
| - if "is_malicious" in item.keys(): |
122 |
| - row["is_malicious"] = item["is_malicious"] |
123 |
| - if "is_web_story" in item.keys(): |
124 |
| - row["is_web_story"] = item["is_web_story"] |
125 |
| - if "amp_version" in item.keys(): |
126 |
| - row["amp_version"] = item["amp_version"] |
127 |
| - if "rating" in item.keys(): |
128 |
| - row["rating"] = json.dumps(item["rating"]) |
129 |
| - if "links" in item.keys(): |
130 |
| - row["sitelinks"] = json.dumps(item["links"]) |
131 |
| - if "faq" in item.keys(): |
132 |
| - row["faq"] = json.dumps(item["faq"]) |
133 |
| - if "items" in item.keys(): |
134 |
| - row["items"] = json.dumps(item["items"]) |
135 |
| - if ("rectangle" in item.keys()) and (item["rectangle"] is not None): |
136 |
| - row["pixels_from_top"] = item["rectangle"]["y"] |
137 |
| - |
138 |
| - if (args.knowledge_graph) and (item["type"] == "knowledge_graph"): |
139 |
| - if "sub_title" in item.keys(): |
140 |
| - row["sub_title"] = item["sub_title"] |
141 |
| - for i in item["items"]: |
142 |
| - if "data_attrid" in i.keys(): |
143 |
| - if "address" in str(i["data_attrid"]): |
144 |
| - row["address"] = i["text"] |
145 |
| - elif "phone" in str(i["data_attrid"]): |
146 |
| - row["phone"] = i["text"] |
147 |
| - |
148 | 164 |
|
149 | 165 | with open(filename,'a',newline='') as file:
|
150 | 166 | writer = csv.DictWriter(file, fieldnames=fields, delimiter=";")
|
|
0 commit comments