Skip to content

Commit abf68e2

Browse files
Merge pull request #14 from HenryDashwood/master
Fix #12: replace record.asctime with self.format_time
2 parents 6e0e211 + b38b026 commit abf68e2

File tree

1 file changed

+104
-110
lines changed

1 file changed

+104
-110
lines changed

scrapeops_scrapy/core/error_logger.py

Lines changed: 104 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
1-
from scrapeops_scrapy.core.api import SOPSRequest
2-
from scrapeops_scrapy.normalizer.domains import DomainNormalizer
3-
from scrapeops_scrapy.utils import utils
41
import json
52
import logging
63
import re
4+
import time
5+
6+
from scrapeops_scrapy.core.api import SOPSRequest
7+
from scrapeops_scrapy.normalizer.domains import DomainNormalizer
8+
from scrapeops_scrapy.utils import utils
79

8-
class ErrorLogger(object):
910

11+
class ErrorLogger(object):
1012
ERROR_LOGGER_ACTIVE = True
1113

1214
def __init__(self, spider, crawler, spider_settings, server_hostname, server_ip, start_time, log_file):
1315
self.spider = spider
1416
self.crawler = crawler
15-
self.bot_name = crawler.settings.get('BOT_NAME', 'None')
17+
self.bot_name = crawler.settings.get("BOT_NAME", "None")
1618
self.spider_settings = spider_settings
1719
self.server_hostname = server_hostname
1820
self.server_ip = server_ip
@@ -28,72 +30,70 @@ def update_error_logger(self, job_name, job_id):
2830

2931
def log_error(self, reason=None, error=None, data=None, request_type=None):
3032
if ErrorLogger.ERROR_LOGGER_ACTIVE:
31-
self._error_history.append({
32-
'time': utils.current_time(),
33-
'reason': reason,
34-
'error': str(error),
35-
'data': data,
36-
'request_type': request_type,
37-
})
38-
33+
self._error_history.append(
34+
{
35+
"time": utils.current_time(),
36+
"reason": reason,
37+
"error": str(error),
38+
"data": data,
39+
"request_type": request_type,
40+
}
41+
)
3942

4043
def send_error_report(self, error_type=None, body=None, log_data=False):
4144
if ErrorLogger.ERROR_LOGGER_ACTIVE:
4245
try:
4346
data, status = SOPSRequest().error_report_request(error_type=error_type, body=body)
4447
if status.valid:
45-
if log_data and self.log_file is not None and data.get('sdk_error_id') is not None:
46-
with open(self.log_file, 'rb') as f:
48+
if log_data and self.log_file is not None and data.get("sdk_error_id") is not None:
49+
with open(self.log_file, "rb") as f:
4750
post_body = {
48-
'sops_sdk': 'scrapy',
49-
'spider_name': self.spider.name,
50-
'job_group_id': self.job_group_id,
51-
'job_group_name': self.job_group_name,
52-
'sdk_error_id': data.get('sdk_error_id')
53-
}
54-
_, status = SOPSRequest().error_report_request(error_type=error_type, body=post_body, files={'file': f})
51+
"sops_sdk": "scrapy",
52+
"spider_name": self.spider.name,
53+
"job_group_id": self.job_group_id,
54+
"job_group_name": self.job_group_name,
55+
"sdk_error_id": data.get("sdk_error_id"),
56+
}
57+
_, status = SOPSRequest().error_report_request(
58+
error_type=error_type, body=post_body, files={"file": f}
59+
)
5560
if status.valid is False:
56-
self.log_error(reason='send_error_logs_failed', error=status.error)
61+
self.log_error(reason="send_error_logs_failed", error=status.error)
5762

5863
if status.valid is False:
59-
self.log_error(reason='send_error_report_failed', error=status.error)
64+
self.log_error(reason="send_error_report_failed", error=status.error)
6065
except Exception:
6166
pass
6267

63-
6468
def sdk_error_close(self, reason=None, error=None, request_type=None, data=None):
6569
if ErrorLogger.ERROR_LOGGER_ACTIVE:
6670
self.log_error(reason=reason, error=error, data=data, request_type=request_type)
6771
error_data = {
68-
'final_reason': reason,
69-
'sops_sdk': 'scrapy',
70-
'spider_name': self.spider.name,
71-
'bot_name': self.bot_name,
72-
'server_ip': self.server_ip,
73-
'server_hostname': self.server_hostname,
74-
'job_group_id': self.job_group_id,
75-
'job_group_name': self.job_group_name,
76-
'job_args': utils.get_args(),
77-
'job_start_time': self.start_time,
78-
'sops_scrapeops_version': utils.get_scrapeops_version(),
79-
'sops_scrapy_version': utils.get_scrapy_version(),
80-
'sops_python_version': utils.get_python_version(),
81-
'sops_system_version': utils.get_system_version(),
82-
'sops_middleware_enabled': utils.scrapeops_middleware_installed(self.spider_settings),
83-
'error_history': self._error_history,
72+
"final_reason": reason,
73+
"sops_sdk": "scrapy",
74+
"spider_name": self.spider.name,
75+
"bot_name": self.bot_name,
76+
"server_ip": self.server_ip,
77+
"server_hostname": self.server_hostname,
78+
"job_group_id": self.job_group_id,
79+
"job_group_name": self.job_group_name,
80+
"job_args": utils.get_args(),
81+
"job_start_time": self.start_time,
82+
"sops_scrapeops_version": utils.get_scrapeops_version(),
83+
"sops_scrapy_version": utils.get_scrapy_version(),
84+
"sops_python_version": utils.get_python_version(),
85+
"sops_system_version": utils.get_system_version(),
86+
"sops_middleware_enabled": utils.scrapeops_middleware_installed(self.spider_settings),
87+
"error_history": self._error_history,
8488
}
85-
86-
self.send_error_report(error_type='sdk_close', body=error_data, log_data=True)
87-
8889

89-
90+
self.send_error_report(error_type="sdk_close", body=error_data, log_data=True)
9091

91-
class TailLogHandler(logging.Handler):
9292

93+
class TailLogHandler(logging.Handler):
9394
retryErrors = [
9495
"Couldn't bind",
95-
"Hostname couldn't be looked up'"
96-
"No route to host",
96+
"Hostname couldn't be looked up'" "No route to host",
9797
"Connection was refused by other side",
9898
"TCP connection timed out",
9999
"File used for UNIX socket is no good",
@@ -124,123 +124,117 @@ def __init__(self, log_dict, log_dict_cumulative):
124124
self.log_dict = log_dict
125125
self.log_dict_cumulative = log_dict_cumulative
126126

127-
128127
def flush(self):
129128
self.log_dict.clear()
130-
131129

132130
def emit(self, record):
133-
134131
try:
135-
136-
if(record.levelname == "ERROR" or record.levelname == "WARNING" or record.levelname == "CRITICAL"):
137-
138-
if hasattr(record, 'message'):
132+
if record.levelname == "ERROR" or record.levelname == "WARNING" or record.levelname == "CRITICAL":
133+
if hasattr(record, "message"):
139134
errorMessage = record.message
140-
fileAndLine = record.pathname + ', line: ' + str(record.lineno)
141-
dateTime = record.asctime
135+
fileAndLine = record.pathname + ", line: " + str(record.lineno)
136+
dateTime = self.format_time(record)
142137
type = record.levelname
143138
engine = record.name
144139

145-
146-
#covering warnings/probableCause/traceback missing
147-
traceback = 'No traceback available'
148-
probableCause = ''
140+
# covering warnings/probableCause/traceback missing
141+
traceback = "No traceback available"
142+
probableCause = ""
149143

150144
if record.exc_text is not None:
151145
traceback = record.exc_text
152-
splitTraceback = traceback.split('\n')
146+
splitTraceback = traceback.split("\n")
153147
probableCause = splitTraceback[len(splitTraceback) - 1]
154148

155-
156-
#covering retrys
157-
if("Gave up retrying <" in record.message):
158-
149+
# covering retrys
150+
if "Gave up retrying <" in record.message:
159151
for retryError in self.retryErrors:
160-
if(retryError in record.message):
161-
method = record.message.split('<')[1].split(' ')[0]
152+
if retryError in record.message:
153+
method = record.message.split("<")[1].split(" ")[0]
162154
errorMessage = "Error: Gave up retrying " + method + " request - " + retryError
163-
fileAndLine = ''
155+
fileAndLine = ""
164156
probableCause = retryError
165157
break
166-
158+
167159
# Deprecation Warnings
168160
if "ScrapyDeprecationWarning:" in record.message and record.message[0] == "/":
169161
splitString = record.message.split("ScrapyDeprecationWarning:")
170162
errorMessage = "ScrapyDeprecationWarning: " + splitString[1]
171163
probableCause = splitString[0]
172164

173-
174165
# "Some Other Error Occurred"
175-
if "Some other error occurred: " in record.message:
176-
splitError = record.message.split(' /')
166+
if "Some other error occurred: " in record.message:
167+
splitError = record.message.split(" /")
177168
cleanError = splitError[0].split(">: ")[1]
178169
errorMessage = "Some other error occurred: " + cleanError
179170
probableCause = cleanError
180171
traceback = record.message
181172

182-
183173
# Convert Urls To Domains in Error Messages
184-
urls = re.findall(r'(https?://[^\s]+)', errorMessage)
174+
urls = re.findall(r"(https?://[^\s]+)", errorMessage)
185175
for url in urls:
186176
domain = DomainNormalizer.get_domain(url)
187177
errorMessage = errorMessage.replace(url, domain)
188178

189-
190179
if errorMessage in self.log_dict:
191-
self.log_dict[errorMessage]['count'] = self.log_dict[errorMessage]['count'] + 1
180+
self.log_dict[errorMessage]["count"] = self.log_dict[errorMessage]["count"] + 1
192181
else:
193182
self.log_dict[errorMessage] = {
194-
'type': type,
195-
'engine': engine,
196-
'name': errorMessage,
197-
'count': 1,
198-
'traceback': traceback,
199-
'message' : probableCause,
200-
'filepath': fileAndLine,
201-
'dateTime': dateTime
202-
}
203-
204-
if(SOPSRequest.HIGH_FREQ_ACC == True):
205-
206-
if(errorMessage in self.log_dict_cumulative):
207-
self.log_dict_cumulative[errorMessage]['count'] = self.log_dict_cumulative[errorMessage]['count'] + 1
183+
"type": type,
184+
"engine": engine,
185+
"name": errorMessage,
186+
"count": 1,
187+
"traceback": traceback,
188+
"message": probableCause,
189+
"filepath": fileAndLine,
190+
"dateTime": dateTime,
191+
}
192+
193+
if SOPSRequest.HIGH_FREQ_ACC == True:
194+
if errorMessage in self.log_dict_cumulative:
195+
self.log_dict_cumulative[errorMessage]["count"] = (
196+
self.log_dict_cumulative[errorMessage]["count"] + 1
197+
)
208198
else:
209-
210-
self.log_dict_cumulative[errorMessage] = {
211-
'type': type,
212-
'engine': engine,
213-
'name': errorMessage,
214-
'count': 1,
215-
'traceback': traceback,
216-
'message' : probableCause,
217-
'filepath': fileAndLine,
218-
'dateTime': dateTime
199+
self.log_dict_cumulative[errorMessage] = {
200+
"type": type,
201+
"engine": engine,
202+
"name": errorMessage,
203+
"count": 1,
204+
"traceback": traceback,
205+
"message": probableCause,
206+
"filepath": fileAndLine,
207+
"dateTime": dateTime,
219208
}
220-
209+
221210
except Exception as e:
222-
logging.info('Error: Error in error logger')
211+
logging.info("Error: Error in error logger")
223212
logging.info(e, exc_info=True)
224213

225-
class TailLogger(object):
214+
def format_time(self, record):
215+
if self.formatter:
216+
return self.formatter.formatTime(record)
217+
else:
218+
# Fallback to a basic time format if no formatter is set
219+
return time.strftime("%Y-%m-%d %H:%M:%S")
220+
226221

222+
class TailLogger(object):
227223
def __init__(self):
228224
self._log_dict = {}
229225
self._log_dict_cumulative = {}
230226
self._log_handler = TailLogHandler(self._log_dict, self._log_dict_cumulative)
231227

232-
def contents(self, type = "diff"):
233-
234-
if(type == "cumulative"):
235-
jsonLogsCumulative = json.dumps(self._log_dict_cumulative, indent= 2)
228+
def contents(self, type="diff"):
229+
if type == "cumulative":
230+
jsonLogsCumulative = json.dumps(self._log_dict_cumulative, indent=2)
236231
return jsonLogsCumulative
237232

238233
else:
239-
jsonLogs = json.dumps(self._log_dict, indent= 2)
234+
jsonLogs = json.dumps(self._log_dict, indent=2)
240235
self._log_handler.flush()
241236
return jsonLogs
242237

243238
@property
244239
def log_handler(self):
245240
return self._log_handler
246-

0 commit comments

Comments
 (0)