Skip to content

Commit 822eeff

Browse files
committed
Improved the readbility and the logic to retry
1 parent 46c62bf commit 822eeff

File tree

1 file changed

+38
-27
lines changed

1 file changed

+38
-27
lines changed

aws/logs_monitoring/lambda_function.py

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -142,46 +142,57 @@ class DatadogHTTPClient(object):
142142

143143
def __init__(self, host, apiKey, max_retries=1, backoff=0.5):
144144
self._url = "https://{}/v1/input/{}".format(host, apiKey)
145-
self._max_retries = max_retries
146-
self._backoff = backoff
147-
148-
def send(self, content, metadata=None):
149-
retries = 0
150-
body = json.dumps(content)
151-
while retries <= self._max_retries:
145+
self._max_retries = max_retries if max_retries >= 0 else 1
146+
self._backoff = backoff if backoff > 0 else 0.5
147+
148+
def send(self, batch, metadata=None):
149+
"""
150+
Attemps to send a log with a linear retry strategy,
151+
only retries on server and network errors.
152+
"""
153+
body = json.dumps(batch)
154+
for retry in range(1 + self._max_retries):
155+
if retry > 0:
156+
print("Retrying, retry: {}, max: {}".format(retry, self._max_retries))
157+
time.sleep(self._backoff)
152158
try:
153159
resp = requests.post(
154160
self._url, headers=self._HEADERS, data=body, params=metadata
155161
)
156-
if resp.status_code >= 500:
157-
# server error
158-
print(
159-
"Server error, status: {}, reason {}".format(
160-
resp.status_code, resp.reason
161-
)
162-
)
163-
elif resp.status_code >= 400:
164-
# client error
165-
print(
166-
"Client error, status: {}, reason {}".format(
167-
resp.status_code, resp.reason
168-
)
169-
)
170-
break
171-
else:
172-
# success
173-
return
174162
except Exception as e:
163+
# most likely a network error
175164
print("Unexpected exception: {}".format(str(e)))
176-
retries += 1
177-
print("Could not send batch, dropping it")
165+
continue
166+
if resp.status_code >= 500:
167+
# server error
168+
print(
169+
"Server error, status {}, reason: {}".format(
170+
resp.status_code, resp.reason
171+
)
172+
)
173+
continue
174+
elif resp.status_code >= 400:
175+
# client error
176+
raise Exception(
177+
"client error, status: {}, reason {}".format(
178+
resp.status_code, resp.reason
179+
)
180+
)
181+
else:
182+
# success
183+
return
184+
raise Exception("max number of retries reached: {}".format(self._max_retries))
178185

179186

180187
class DatadogBatcher(object):
181188
def __init__(self, max_size=25):
182189
self._max_size = max_size
183190

184191
def batch(self, logs):
192+
"""
193+
Returns an array of batches,
194+
each batch contains at most max_size logs.
195+
"""
185196
batches = []
186197
if len(logs) % self._max_size != 0:
187198
nb_batchs = int(len(logs) / self._max_size) + 1

0 commit comments

Comments
 (0)