Skip to content

Commit e696dac

Browse files
Merge pull request #39 from palantir/pk/error_handling
handle failed result post with error post
2 parents cc5b9e8 + f2adb41 commit e696dac

File tree

2 files changed

+40
-8
lines changed

2 files changed

+40
-8
lines changed

changelog/@unreleased/pr-39.v2.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
type: fix
2+
fix:
3+
description: 'After this fix: If the job posting fails 5 times (e.g. result too
4+
large) then the client tries 5 more times to post just a simple error message.'
5+
links:
6+
- https://github.com/palantir/python-compute-module/pull/39

compute_modules/client/internal_query_client.py

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from .encoder import CustomJSONEncoder
3434

3535
POST_RESULT_MAX_ATTEMPTS = 5
36+
POST_ERROR_MAX_ATTEMPTS = 3
3637
POST_SCHEMAS_MAX_ATTEMPTS = 5
3738

3839

@@ -162,6 +163,28 @@ def get_job_or_none(self) -> Any:
162163
self.logger.error(traceback.format_exc())
163164
return None
164165

166+
def report_job_result_failed(self, post_result_url: str, error: str) -> None:
167+
for _ in range(POST_ERROR_MAX_ATTEMPTS):
168+
try:
169+
with self.session.request(
170+
method="POST",
171+
url=post_result_url,
172+
headers=self.post_result_headers,
173+
data=json.dumps({"error": error}).encode("utf-8"),
174+
verify=self.certPath,
175+
) as response:
176+
if response.status_code == 204:
177+
self.logger.debug("Successfully reported that job result posting has failed")
178+
return
179+
else:
180+
self.logger.error(
181+
f"Failed to post result: {response.status_code} {response.reason} {response.text}"
182+
)
183+
except Exception as e:
184+
self.logger.error(f"Failed to report that post result has failed: {str(e)}")
185+
186+
raise RuntimeError(f"Unable to report that post result has failed after {POST_ERROR_MAX_ATTEMPTS} attempts")
187+
165188
def report_job_result(self, job_id: str, body: Any) -> None:
166189
post_result_path = f"{self.post_result_path}/{job_id}"
167190
post_result_url = self.build_url(post_result_path)
@@ -179,17 +202,20 @@ def report_job_result(self, job_id: str, body: Any) -> None:
179202
self.logger.debug("Successfully reported job result")
180203
return
181204
else:
182-
self.logger.error(
183-
f"Failed to post result: {response.status_code} {response.reason} {response.text}"
184-
)
205+
error = f"Failed to post result: {response.status_code} {response.reason} {response.text}"
206+
self.logger.error(error)
185207
except TypeError as e:
186-
self.logger.error(f"Failed to serialize result to json: {str(e)}")
187-
self.report_job_result(job_id, json.dumps(self.get_failed_query(e)).encode("utf-8"))
208+
error = f"Failed to serialize result to json: {self.get_failed_query(e)}"
209+
self.logger.error(error)
210+
self.report_job_result_failed(post_result_url, error)
188211
return
189212
except Exception as e:
190-
self.logger.error(f"POST of job result failed, attempting to re-establish connection: {str(e)}")
191-
self.logger.error(traceback.format_exc())
192-
raise RuntimeError(f"Unable to post job result after {POST_RESULT_MAX_ATTEMPTS} attempts")
213+
error = f"POST of job result failed, attempting to re-establish connection: {str(e)} \n {traceback.format_exc()}"
214+
self.logger.error(error)
215+
216+
error = f"Unable to post job result after {POST_RESULT_MAX_ATTEMPTS} attempts; \n Now attempting to return the error as the result: {error}"
217+
self.logger.error(error)
218+
self.report_job_result_failed(post_result_url, error)
193219

194220
def handle_job(self, job: Dict[str, Any]) -> None:
195221
self.logger.info("handling job")

0 commit comments

Comments
 (0)