Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(errors): Add django integration and in app frames #131

Merged
merged 8 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion posthog/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import datetime # noqa: F401
from typing import Callable, Dict, Optional, Tuple # noqa: F401
from typing import Callable, Dict, List, Optional, Tuple # noqa: F401

from posthog.client import Client
from posthog.exception_capture import Integrations # noqa: F401
from posthog.version import VERSION

__version__ = VERSION
Expand All @@ -21,6 +22,7 @@
feature_flags_request_timeout_seconds = 3 # type: int
# Currently alpha, use at your own risk
enable_exception_autocapture = False # type: bool
exception_autocapture_integrations = [] # type: List[Integrations]

default_client = None # type: Optional[Client]

Expand Down Expand Up @@ -460,6 +462,7 @@ def _proxy(method, *args, **kwargs):
# This kind of initialisation is very annoying for exception capture. We need to figure out a way around this,
# or deprecate this proxy option fully (it's already in the process of deprecation, no new clients should be using this method since like 5-6 months)
enable_exception_autocapture=enable_exception_autocapture,
exception_autocapture_integrations=exception_autocapture_integrations,
)

# always set incase user changes it
Expand Down
10 changes: 9 additions & 1 deletion posthog/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def __init__(
historical_migration=False,
feature_flags_request_timeout_seconds=3,
enable_exception_autocapture=False,
exception_autocapture_integrations=None,
):
self.queue = queue.Queue(max_queue_size)

Expand All @@ -65,6 +66,8 @@ def __init__(
self.debug = debug
self.send = send
self.sync_mode = sync_mode
# Used for session replay URL generation - we don't want the server host here.
self.raw_host = host
self.host = determine_server_host(host)
self.gzip = gzip
self.timeout = timeout
Expand All @@ -80,6 +83,8 @@ def __init__(
self.disable_geoip = disable_geoip
self.historical_migration = historical_migration
self.enable_exception_autocapture = enable_exception_autocapture
self.exception_autocapture_integrations = exception_autocapture_integrations
self.exception_capture = None

# personal_api_key: This should be a generated Personal API Key, private
self.personal_api_key = personal_api_key
Expand All @@ -92,7 +97,7 @@ def __init__(
self.log.setLevel(logging.WARNING)

if self.enable_exception_autocapture:
self.exception_capture = ExceptionCapture(self)
self.exception_capture = ExceptionCapture(self, integrations=self.exception_autocapture_integrations)

if sync_mode:
self.consumers = None
Expand Down Expand Up @@ -432,6 +437,9 @@ def shutdown(self):
self.flush()
self.join()

if self.exception_capture:
self.exception_capture.close()

def _load_feature_flags(self):
try:
response = get(
Expand Down
67 changes: 57 additions & 10 deletions posthog/exception_capture.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,51 @@
import logging
import sys
import threading
from typing import TYPE_CHECKING
from enum import Enum
from typing import TYPE_CHECKING, List, Optional

from posthog.exception_utils import exceptions_from_error_tuple
from posthog.exception_utils import exceptions_from_error_tuple, handle_in_app
from posthog.utils import remove_trailing_slash

if TYPE_CHECKING:
from posthog.client import Client


class Integrations(str, Enum):
Django = "django"


DEFAULT_DISTINCT_ID = "python-exceptions"


class ExceptionCapture:
# TODO: Add client side rate limiting to prevent spamming the server with exceptions

log = logging.getLogger("posthog")

def __init__(self, client: "Client"):
def __init__(self, client: "Client", integrations: Optional[List[Integrations]] = None):
self.client = client
self.original_excepthook = sys.excepthook
sys.excepthook = self.exception_handler
threading.excepthook = self.thread_exception_handler
self.enabled_integrations = []

for integration in integrations or []:
# TODO: Maybe find a better way of enabling integrations
# This is very annoying currently if we had to add any configuration per integration
if integration == Integrations.Django:
try:
from posthog.exception_integrations.django import DjangoIntegration

enabled_integration = DjangoIntegration(self.exception_receiver)
self.enabled_integrations.append(enabled_integration)
except Exception as e:
self.log.exception(f"Failed to enable Django integration: {e}")
Comment on lines +33 to +43
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it'd be cool if the integration could configure itself.

integrations = [DjangoIntegration]
init(client, integrations)

The Integration interface has a install|uninstall method that is called after the SDK is init.
the install method in the DjangoIntegration class does all the config.
if the SDK close is called, we also call all uninstall methods from all integrations
an example https://github.com/PostHog/posthog-android/blob/bb0f3d30cc3d9819000b43c281296a30483841fa/posthog-android/src/main/java/com/posthog/android/internal/PostHogActivityLifecycleCallbackIntegration.kt#L73-L77

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice, will do

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe integrations can be also a property in the client

client.integrations = [DjangoIntegration]
init(client)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

was a bit reluctant to spend more time on this interface, given i'm not yet sure how most integrations will look like. If they need to be initialised post client initialisation / need some config options from users to happen first, would make sense. We can make breaking changes here though as long as its in alpha, so not too worried.


def close(self):
sys.excepthook = self.original_excepthook
for integration in self.enabled_integrations:
integration.uninstall()

def exception_handler(self, exc_type, exc_value, exc_traceback):
# don't affect default behaviour.
Expand All @@ -28,7 +55,14 @@ def exception_handler(self, exc_type, exc_value, exc_traceback):
def thread_exception_handler(self, args):
self.capture_exception(args.exc_type, args.exc_value, args.exc_traceback)

def capture_exception(self, exc_type, exc_value, exc_traceback):
def exception_receiver(self, exc_info, extra_properties):
if "distinct_id" in extra_properties:
metadata = {"distinct_id": extra_properties["distinct_id"]}
else:
metadata = None
self.capture_exception(exc_info[0], exc_info[1], exc_info[2], metadata)

def capture_exception(self, exc_type, exc_value, exc_traceback, metadata=None):
try:
# if hasattr(sys, "ps1"):
# # Disable the excepthook for interactive Python shells
Expand All @@ -37,17 +71,30 @@ def capture_exception(self, exc_type, exc_value, exc_traceback):
# Format stack trace like sentry
all_exceptions_with_trace = exceptions_from_error_tuple((exc_type, exc_value, exc_traceback))

# Add in-app property to frames in the exceptions
event = handle_in_app(
{
"exception": {
"values": all_exceptions_with_trace,
},
}
)
all_exceptions_with_trace_and_in_app = event["exception"]["values"]

distinct_id = metadata.get("distinct_id") if metadata else DEFAULT_DISTINCT_ID
# Make sure we have a distinct_id if its empty in metadata
distinct_id = distinct_id or DEFAULT_DISTINCT_ID

properties = {
"$exception_type": all_exceptions_with_trace[0].get("type"),
"$exception_message": all_exceptions_with_trace[0].get("value"),
"$exception_list": all_exceptions_with_trace,
# TODO: Can we somehow get distinct_id from context here? Stateless lib makes this much harder? 😅
# '$exception_personURL': f'{self.client.posthog_host}/project/{self.client.token}/person/{self.client.get_distinct_id()}'
"$exception_type": all_exceptions_with_trace_and_in_app[0].get("type"),
"$exception_message": all_exceptions_with_trace_and_in_app[0].get("value"),
"$exception_list": all_exceptions_with_trace_and_in_app,
"$exception_personURL": f"{remove_trailing_slash(self.client.raw_host)}/project/{self.client.api_key}/person/{distinct_id}",
}

# TODO: What distinct id should we attach these server-side exceptions to?
# Any heuristic seems prone to errors - how can we know if exception occurred in the context of a user that captured some other event?

self.client.capture("python-exceptions", "$exception", properties=properties)
self.client.capture(distinct_id, "$exception", properties=properties)
except Exception as e:
self.log.exception(f"Failed to capture exception: {e}")
5 changes: 5 additions & 0 deletions posthog/exception_integrations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class IntegrationEnablingError(Exception):
"""
The integration could not be enabled due to a user error like
`django` not being installed for the `DjangoIntegration`.
"""
88 changes: 88 additions & 0 deletions posthog/exception_integrations/django.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import re
import sys
from typing import TYPE_CHECKING

from posthog.exception_integrations import IntegrationEnablingError

try:
from django import VERSION as DJANGO_VERSION
from django.core import signals

except ImportError:
raise IntegrationEnablingError("Django not installed")


if TYPE_CHECKING:
from typing import Any, Dict # noqa: F401

from django.core.handlers.wsgi import WSGIRequest # noqa: F401


class DjangoIntegration:
# TODO: Abstract integrations one we have more and can see patterns
"""
Autocapture errors from a Django application.
"""

identifier = "django"

def __init__(self, capture_exception_fn=None):

if DJANGO_VERSION < (4, 2):
raise IntegrationEnablingError("Django 4.2 or newer is required.")

# TODO: Right now this seems too complicated / overkill for us, but seems like we can automatically plug in middlewares
# which is great for users (they don't need to do this) and everything should just work.
# We should consider this in the future, but for now we can just use the middleware and signals handlers.
# See: https://github.com/getsentry/sentry-python/blob/269d96d6e9821122fbff280e6a26956e5ed03c0b/sentry_sdk/integrations/django/__init__.py

self.capture_exception_fn = capture_exception_fn

def _got_request_exception(request=None, **kwargs):
# type: (WSGIRequest, **Any) -> None

extra_props = {}
if request is not None:
# get headers metadata
extra_props = DjangoRequestExtractor(request).extract_person_data()

self.capture_exception_fn(sys.exc_info(), extra_props)

signals.got_request_exception.connect(_got_request_exception)

def uninstall(self):
pass


class DjangoRequestExtractor:

def __init__(self, request):
# type: (Any) -> None
self.request = request

def extract_person_data(self):
headers = self.headers()

# Extract traceparent and tracestate headers
traceparent = headers.get("traceparent")
tracestate = headers.get("tracestate")

# Extract the distinct_id from tracestate
distinct_id = None
if tracestate:
# TODO: Align on the format of the distinct_id in tracestate
# We can't have comma or equals in header values here, so maybe we should base64 encode it?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i dont remember the spec by heart but I know its possible base64 in some cases
https://www.w3.org/TR/trace-context/

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we are just reading from tracestate so we don't need to encode anything, or rather, should we decode? it depends on what the client does

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah I meant client should encode (and thus decode here) as otherwise distinct ids with , or = in them will go wonky

match = re.search(r"posthog-distinct-id=([^,]+)", tracestate)
if match:
distinct_id = match.group(1)

return {
"distinct_id": distinct_id,
"ip": headers.get("X-Forwarded-For"),
"user_agent": headers.get("User-Agent"),
"traceparent": traceparent,
}

def headers(self):
# type: () -> Dict[str, str]
return dict(self.request.headers)
3 changes: 3 additions & 0 deletions posthog/test/exception_integrations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import pytest

pytest.importorskip("django")
68 changes: 68 additions & 0 deletions posthog/test/exception_integrations/test_django.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from posthog.exception_integrations.django import DjangoRequestExtractor

DEFAULT_USER_AGENT = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
)


def mock_request_factory(override_headers):
class Request:
META = {}
# TRICKY: Actual django request dict object has case insensitive matching, and strips http from the names
headers = {
"User-Agent": DEFAULT_USER_AGENT,
"Referrer": "http://example.com",
"X-Forwarded-For": "193.4.5.12",
**(override_headers or {}),
}

return Request()


def test_request_extractor_with_no_trace():
request = mock_request_factory(None)
extractor = DjangoRequestExtractor(request)
assert extractor.extract_person_data() == {
"ip": "193.4.5.12",
"user_agent": DEFAULT_USER_AGENT,
"traceparent": None,
"distinct_id": None,
}


def test_request_extractor_with_trace():
request = mock_request_factory({"traceparent": "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01"})
extractor = DjangoRequestExtractor(request)
assert extractor.extract_person_data() == {
"ip": "193.4.5.12",
"user_agent": DEFAULT_USER_AGENT,
"traceparent": "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01",
"distinct_id": None,
}


def test_request_extractor_with_tracestate():
request = mock_request_factory(
{
"traceparent": "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01",
"tracestate": "posthog-distinct-id=1234",
}
)
extractor = DjangoRequestExtractor(request)
assert extractor.extract_person_data() == {
"ip": "193.4.5.12",
"user_agent": DEFAULT_USER_AGENT,
"traceparent": "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01",
"distinct_id": "1234",
}


def test_request_extractor_with_complicated_tracestate():
request = mock_request_factory({"tracestate": "posthog-distinct-id=alohaMountainsXUYZ,rojo=00f067aa0ba902b7"})
extractor = DjangoRequestExtractor(request)
assert extractor.extract_person_data() == {
"ip": "193.4.5.12",
"user_agent": DEFAULT_USER_AGENT,
"traceparent": None,
"distinct_id": "alohaMountainsXUYZ",
}
29 changes: 29 additions & 0 deletions posthog/test/test_exception_capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,32 @@ def test_excepthook(tmpdir):
b'"$exception_list": [{"mechanism": {"type": "generic", "handled": true}, "module": null, "type": "ZeroDivisionError", "value": "division by zero", "stacktrace": {"frames": [{"filename": "app.py", "abs_path"'
in output
)


def test_trying_to_use_django_integration(tmpdir):
app = tmpdir.join("app.py")
app.write(
dedent(
"""
from posthog import Posthog, Integrations
posthog = Posthog('phc_x', host='https://eu.i.posthog.com', enable_exception_autocapture=True, exception_autocapture_integrations=[Integrations.Django], debug=True, on_error=lambda e, batch: print('error handling batch: ', e, batch))

# frame_value = "LOL"

1/0
"""
)
)

with pytest.raises(subprocess.CalledProcessError) as excinfo:
subprocess.check_output([sys.executable, str(app)], stderr=subprocess.STDOUT)

output = excinfo.value.output

assert b"ZeroDivisionError" in output
assert b"LOL" in output
assert b"DEBUG:posthog:data uploaded successfully" in output
assert (
b'"$exception_list": [{"mechanism": {"type": "generic", "handled": true}, "module": null, "type": "ZeroDivisionError", "value": "division by zero", "stacktrace": {"frames": [{"filename": "app.py", "abs_path"'
in output
)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"flake8-print",
"pre-commit",
],
"test": ["mock>=2.0.0", "freezegun==0.3.15", "pylint", "flake8", "coverage", "pytest", "pytest-timeout"],
"test": ["mock>=2.0.0", "freezegun==0.3.15", "pylint", "flake8", "coverage", "pytest", "pytest-timeout", "django"],
"sentry": ["sentry-sdk", "django"],
}

Expand Down
Loading