From f4a3f85c9342f81665809b2aba6e5b8ca0f685dd Mon Sep 17 00:00:00 2001 From: Neil Kakkar Date: Thu, 29 Aug 2024 15:26:12 +0530 Subject: [PATCH] feat(errors): Add django integration and in app frames --- posthog/__init__.py | 5 +- posthog/client.py | 6 +- posthog/exception_capture.py | 62 +++++++++++++++---- posthog/exception_integrations/__init__.py | 5 ++ posthog/exception_integrations/django.py | 69 ++++++++++++++++++++++ posthog/test/test_exception_capture.py | 30 ++++++++++ 6 files changed, 165 insertions(+), 12 deletions(-) create mode 100644 posthog/exception_integrations/__init__.py create mode 100644 posthog/exception_integrations/django.py diff --git a/posthog/__init__.py b/posthog/__init__.py index a8e6416..1c0de18 100644 --- a/posthog/__init__.py +++ b/posthog/__init__.py @@ -1,7 +1,8 @@ import datetime # noqa: F401 -from typing import Callable, Dict, Optional, Tuple # noqa: F401 +from typing import Callable, Dict, List, Optional, Tuple # noqa: F401 from posthog.client import Client +from posthog.exception_capture import Integrations # noqa: F401 from posthog.version import VERSION __version__ = VERSION @@ -21,6 +22,7 @@ feature_flags_request_timeout_seconds = 3 # type: int # Currently alpha, use at your own risk enable_exception_autocapture = False # type: bool +exception_autocapture_integrations = [] # type: List[Integrations] default_client = None # type: Optional[Client] @@ -460,6 +462,7 @@ def _proxy(method, *args, **kwargs): # This kind of initialisation is very annoying for exception capture. We need to figure out a way around this, # or deprecate this proxy option fully (it's already in the process of deprecation, no new clients should be using this method since like 5-6 months) enable_exception_autocapture=enable_exception_autocapture, + exception_autocapture_integrations=exception_autocapture_integrations, ) # always set incase user changes it diff --git a/posthog/client.py b/posthog/client.py index 374b26b..bc64bf7 100644 --- a/posthog/client.py +++ b/posthog/client.py @@ -53,6 +53,7 @@ def __init__( historical_migration=False, feature_flags_request_timeout_seconds=3, enable_exception_autocapture=False, + exception_autocapture_integrations=None, ): self.queue = queue.Queue(max_queue_size) @@ -65,6 +66,8 @@ def __init__( self.debug = debug self.send = send self.sync_mode = sync_mode + # Used for session replay URL generation - we don't want the server host here. + self.raw_host = host self.host = determine_server_host(host) self.gzip = gzip self.timeout = timeout @@ -80,6 +83,7 @@ def __init__( self.disable_geoip = disable_geoip self.historical_migration = historical_migration self.enable_exception_autocapture = enable_exception_autocapture + self.exception_autocapture_integrations = exception_autocapture_integrations # personal_api_key: This should be a generated Personal API Key, private self.personal_api_key = personal_api_key @@ -92,7 +96,7 @@ def __init__( self.log.setLevel(logging.WARNING) if self.enable_exception_autocapture: - self.exception_capture = ExceptionCapture(self) + self.exception_capture = ExceptionCapture(self, integrations=self.exception_autocapture_integrations) if sync_mode: self.consumers = None diff --git a/posthog/exception_capture.py b/posthog/exception_capture.py index d26aa2a..307ef3d 100644 --- a/posthog/exception_capture.py +++ b/posthog/exception_capture.py @@ -1,24 +1,46 @@ import logging import sys import threading -from typing import TYPE_CHECKING +from enum import Enum +from typing import TYPE_CHECKING, List, Optional -from posthog.exception_utils import exceptions_from_error_tuple +from posthog.exception_utils import exceptions_from_error_tuple, handle_in_app +from posthog.utils import remove_trailing_slash if TYPE_CHECKING: from posthog.client import Client +class Integrations(str, Enum): + Django = "django" + + +DEFAULT_DISTINCT_ID = "python-exceptions" + + class ExceptionCapture: # TODO: Add client side rate limiting to prevent spamming the server with exceptions log = logging.getLogger("posthog") - def __init__(self, client: "Client"): + def __init__(self, client: "Client", integrations: Optional[List[Integrations]] = None): self.client = client self.original_excepthook = sys.excepthook sys.excepthook = self.exception_handler threading.excepthook = self.thread_exception_handler + self.enabled_integrations = [] + + for integration in integrations or []: + # TODO: Maybe find a better way of enabling integrations + # This is very annoying currently if we had to add any configuration per integration + if integration == Integrations.Django: + try: + from posthog.exception_integrations.django import DjangoIntegration + + enabled_integration = DjangoIntegration(self.exception_receiver) + self.enabled_integrations.append(enabled_integration) + except Exception as e: + self.log.exception(f"Failed to enable Django integration: {e}") def exception_handler(self, exc_type, exc_value, exc_traceback): # don't affect default behaviour. @@ -28,7 +50,14 @@ def exception_handler(self, exc_type, exc_value, exc_traceback): def thread_exception_handler(self, args): self.capture_exception(args.exc_type, args.exc_value, args.exc_traceback) - def capture_exception(self, exc_type, exc_value, exc_traceback): + def exception_receiver(self, exc_info, extra_properties): + if "distinct_id" in extra_properties: + metadata = {"distinct_id": extra_properties["distinct_id"]} + else: + metadata = None + self.capture_exception(exc_info[0], exc_info[1], exc_info[2], metadata) + + def capture_exception(self, exc_type, exc_value, exc_traceback, metadata=None): try: # if hasattr(sys, "ps1"): # # Disable the excepthook for interactive Python shells @@ -37,17 +66,30 @@ def capture_exception(self, exc_type, exc_value, exc_traceback): # Format stack trace like sentry all_exceptions_with_trace = exceptions_from_error_tuple((exc_type, exc_value, exc_traceback)) + # Add in-app property to frames in the exceptions + event = handle_in_app( + { + "exception": { + "values": all_exceptions_with_trace, + }, + } + ) + all_exceptions_with_trace_and_in_app = event["exception"]["values"] + + distinct_id = metadata.get("distinct_id") if metadata else DEFAULT_DISTINCT_ID + # Make sure we have a distinct_id if its empty in metadata + distinct_id = distinct_id or DEFAULT_DISTINCT_ID + properties = { - "$exception_type": all_exceptions_with_trace[0].get("type"), - "$exception_message": all_exceptions_with_trace[0].get("value"), - "$exception_list": all_exceptions_with_trace, - # TODO: Can we somehow get distinct_id from context here? Stateless lib makes this much harder? 😅 - # '$exception_personURL': f'{self.client.posthog_host}/project/{self.client.token}/person/{self.client.get_distinct_id()}' + "$exception_type": all_exceptions_with_trace_and_in_app[0].get("type"), + "$exception_message": all_exceptions_with_trace_and_in_app[0].get("value"), + "$exception_list": all_exceptions_with_trace_and_in_app, + "$exception_personURL": f"{remove_trailing_slash(self.client.raw_host)}/project/{self.client.api_key}/person/{distinct_id}", } # TODO: What distinct id should we attach these server-side exceptions to? # Any heuristic seems prone to errors - how can we know if exception occurred in the context of a user that captured some other event? - self.client.capture("python-exceptions", "$exception", properties=properties) + self.client.capture(distinct_id, "$exception", properties=properties) except Exception as e: self.log.exception(f"Failed to capture exception: {e}") diff --git a/posthog/exception_integrations/__init__.py b/posthog/exception_integrations/__init__.py new file mode 100644 index 0000000..90b8931 --- /dev/null +++ b/posthog/exception_integrations/__init__.py @@ -0,0 +1,5 @@ +class IntegrationEnablingError(Exception): + """ + The integration could not be enabled due to a user error like + `django` not being installed for the `DjangoIntegration`. + """ diff --git a/posthog/exception_integrations/django.py b/posthog/exception_integrations/django.py new file mode 100644 index 0000000..6d4f2d2 --- /dev/null +++ b/posthog/exception_integrations/django.py @@ -0,0 +1,69 @@ +import sys +from typing import TYPE_CHECKING + +from posthog.exception_integrations import IntegrationEnablingError + +try: + from django import VERSION as DJANGO_VERSION + from django.core import signals + +except ImportError: + raise IntegrationEnablingError("Django not installed") + + +if TYPE_CHECKING: + from typing import Any, Dict # noqa: F401 + + from django.core.handlers.wsgi import WSGIRequest # noqa: F401 + + +class DjangoIntegration: + # TODO: Abstract integrations one we have more and can see patterns + """ + Autocapture errors from a Django application. + """ + + identifier = "django" + + def __init__(self, capture_exception_fn=None): + + if DJANGO_VERSION < (4, 2): + raise IntegrationEnablingError("Django 4.2 or newer is required.") + + # TODO: Right now this seems too complicated / overkill for us, but seems like we can automatically plug in middlewares + # which is great for users (they don't need to do this) and everything should just work. + # We should consider this in the future, but for now we can just use the middleware and signals handlers. + # See: https://github.com/getsentry/sentry-python/blob/269d96d6e9821122fbff280e6a26956e5ed03c0b/sentry_sdk/integrations/django/__init__.py + + self.capture_exception_fn = capture_exception_fn + + def _got_request_exception(request=None, **kwargs): + # type: (WSGIRequest, **Any) -> None + + extra_props = {} + if request is not None: + # get headers metadata + extra_props = DjangoRequestExtractor(request).extract_person_data() + + self.capture_exception_fn(sys.exc_info(), extra_props) + + signals.got_request_exception.connect(_got_request_exception) + + +class DjangoRequestExtractor: + + def __init__(self, request): + # type: (Any) -> None + self.request = request + + def extract_person_data(self): + headers = self.headers() + return { + "distinct_id": headers.get("X-PostHog-Distinct-ID"), + "ip": headers.get("X-Forwarded-For"), + "user_agent": headers.get("User-Agent"), + } + + def headers(self): + # type: () -> Dict[str, str] + return dict(self.request.headers) diff --git a/posthog/test/test_exception_capture.py b/posthog/test/test_exception_capture.py index a4bbee7..ceead26 100644 --- a/posthog/test/test_exception_capture.py +++ b/posthog/test/test_exception_capture.py @@ -32,3 +32,33 @@ def test_excepthook(tmpdir): b'"$exception_list": [{"mechanism": {"type": "generic", "handled": true}, "module": null, "type": "ZeroDivisionError", "value": "division by zero", "stacktrace": {"frames": [{"filename": "app.py", "abs_path"' in output ) + + +def test_trying_to_use_django_integration_when_django_isnt_installed(tmpdir): + app = tmpdir.join("app.py") + app.write( + dedent( + """ + from posthog import Posthog, Integrations + posthog = Posthog('phc_x', host='https://eu.i.posthog.com', enable_exception_autocapture=True, exception_autocapture_integrations=[Integrations.Django], debug=True, on_error=lambda e, batch: print('error handling batch: ', e, batch)) + + # frame_value = "LOL" + + 1/0 + """ + ) + ) + + with pytest.raises(subprocess.CalledProcessError) as excinfo: + subprocess.check_output([sys.executable, str(app)], stderr=subprocess.STDOUT) + + output = excinfo.value.output + + assert b"ZeroDivisionError" in output + assert b"LOL" in output + assert b"DEBUG:posthog:data uploaded successfully" in output + assert ( + b'"$exception_list": [{"mechanism": {"type": "generic", "handled": true}, "module": null, "type": "ZeroDivisionError", "value": "division by zero", "stacktrace": {"frames": [{"filename": "app.py", "abs_path"' + in output + ) + assert b"Failed to enable Django integration: Django not installed" in output