Skip to content

Commit 071065d

Browse files
committed
Improved the readbility and the logic to retry
1 parent 46c62bf commit 071065d

File tree

1 file changed

+39
-27
lines changed

1 file changed

+39
-27
lines changed

aws/logs_monitoring/lambda_function.py

Lines changed: 39 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import os
1212
import socket
1313
import requests
14+
import time
1415
import ssl
1516
import re
1617
from io import BytesIO, BufferedReader
@@ -142,46 +143,57 @@ class DatadogHTTPClient(object):
142143

143144
def __init__(self, host, apiKey, max_retries=1, backoff=0.5):
144145
self._url = "https://{}/v1/input/{}".format(host, apiKey)
145-
self._max_retries = max_retries
146-
self._backoff = backoff
147-
148-
def send(self, content, metadata=None):
149-
retries = 0
150-
body = json.dumps(content)
151-
while retries <= self._max_retries:
146+
self._max_retries = max_retries if max_retries >= 0 else 1
147+
self._backoff = backoff if backoff > 0 else 0.5
148+
149+
def send(self, batch, metadata=None):
150+
"""
151+
Attemps to send a log with a linear retry strategy,
152+
only retries on server and network errors.
153+
"""
154+
body = json.dumps(batch)
155+
for retry in range(1 + self._max_retries):
156+
if retry > 0:
157+
print("Retrying, retry: {}, max: {}".format(retry, self._max_retries))
158+
time.sleep(self._backoff)
152159
try:
153160
resp = requests.post(
154161
self._url, headers=self._HEADERS, data=body, params=metadata
155162
)
156-
if resp.status_code >= 500:
157-
# server error
158-
print(
159-
"Server error, status: {}, reason {}".format(
160-
resp.status_code, resp.reason
161-
)
162-
)
163-
elif resp.status_code >= 400:
164-
# client error
165-
print(
166-
"Client error, status: {}, reason {}".format(
167-
resp.status_code, resp.reason
168-
)
169-
)
170-
break
171-
else:
172-
# success
173-
return
174163
except Exception as e:
164+
# most likely a network error
175165
print("Unexpected exception: {}".format(str(e)))
176-
retries += 1
177-
print("Could not send batch, dropping it")
166+
continue
167+
if resp.status_code >= 500:
168+
# server error
169+
print(
170+
"Server error, status {}, reason: {}".format(
171+
resp.status_code, resp.reason
172+
)
173+
)
174+
continue
175+
elif resp.status_code >= 400:
176+
# client error
177+
raise Exception(
178+
"client error, status: {}, reason {}".format(
179+
resp.status_code, resp.reason
180+
)
181+
)
182+
else:
183+
# success
184+
return
185+
raise Exception("max number of retries reached: {}".format(self._max_retries))
178186

179187

180188
class DatadogBatcher(object):
181189
def __init__(self, max_size=25):
182190
self._max_size = max_size
183191

184192
def batch(self, logs):
193+
"""
194+
Returns an array of batches,
195+
each batch contains at most max_size logs.
196+
"""
185197
batches = []
186198
if len(logs) % self._max_size != 0:
187199
nb_batchs = int(len(logs) / self._max_size) + 1

0 commit comments

Comments
 (0)