Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add callsite info to event dictionary #378

Closed
aucampia opened this issue Dec 5, 2021 · 3 comments · Fixed by #385
Closed

Add callsite info to event dictionary #378

aucampia opened this issue Dec 5, 2021 · 3 comments · Fixed by #385

Comments

@aucampia
Copy link
Contributor

aucampia commented Dec 5, 2021

I would like to have the following values in the event dictionary (taken from logging module docs):

Attribute name Description
filename Filename portion of pathname.
pathname Full pathname of the source file where the logging call was issued (if available).
funcName Name of function containing the logging call.
module Module (name portion of filename).
process Process ID (if available).
processName Process name (if available).
thread Thread ID (if available).
threadName Thread name (if available).
lineno Source line number where the logging call was issued (if available).

These values can be added with custom processors, but it would be nice to add these to structlog to reduce the amount of code that has to be copied around.

If you are open to it, I would like to add the following processor to structlog:

class CallsiteParameter(str, enum.Enum):
    FUNC_NAME = "funcName"
    THREAD = "thread"
    THREAD_NAME = "threadName"
    PATHNAME = "pathname"
    FILENAME = "filename"
    LINENO = "lineno"
    MODULE = "module"
    PROCESS = "process"
    PROCESS_NAME = "processName"


call_site_parameters: Set[CallsiteParameter] = set(CallsiteParameter)


def _get_processname() -> str:
    processName = "MainProcess"
    mp: Any = sys.modules.get("multiprocessing")
    if mp is not None:
        # Errors may occur if multiprocessing has not finished loading
        # yet - e.g. if a custom import hook causes third-party code
        # to run when multiprocessing calls import. See issue 8200
        # for an example
        try:
            processName = mp.current_process().name
        except Exception:
            pass
    return processName


class CallsiteInfoAdder:
    __slots__ = ["_parameters", "_active_handlers"]

    _handlers: Dict[CallsiteParameter, Callable[[str, inspect.Traceback], Any]] = {
        CallsiteParameter.FUNC_NAME: lambda module, frame_info: frame_info.function,
        CallsiteParameter.THREAD: lambda module, frame_info: threading.get_ident(),
        CallsiteParameter.THREAD_NAME: lambda module, frame_info: threading.current_thread().name,
        CallsiteParameter.PATHNAME: lambda module, frame_info: frame_info.filename,
        CallsiteParameter.FILENAME: lambda module, frame_info: os.path.basename(
            frame_info.filename
        ),
        CallsiteParameter.LINENO: lambda module, frame_info: frame_info.lineno,
        CallsiteParameter.MODULE: lambda module, frame_info: module,
        CallsiteParameter.PROCESS: lambda module, frame_info: os.getpid(),
        CallsiteParameter.PROCESS_NAME: lambda module, frame_info: _get_processname,
    }

    def __init__(
        self, parameters: Set[CallsiteParameter] = call_site_parameters
    ) -> None:
        self._parameters = parameters
        self._active_handlers: List[
            Tuple[CallsiteParameter, Callable[[str, inspect.Traceback], Any]]
        ] = []
        for parameter in self._parameters:
            self._active_handlers.append((parameter, self._handlers[parameter]))

    def __call__(
        self, logger: logging.Logger, name: str, event_dict: EventDict
    ) -> EventDict:
        record: Optional[logging.LogRecord] = event_dict.get("_record")
        if record is not None:
            for parameter in self._parameters:
                event_dict[parameter.value] = record.__dict__[parameter.value]
        else:
            frame, module = structlog._frames._find_first_app_frame_and_name(
                additional_ignores=[__name__]
            )
            frame_info = inspect.getframeinfo(frame)
            for parameter, handler in self._active_handlers:
                handler(module, frame_info)
                event_dict[parameter.value] = handler(module, frame_info)
        return event_dict

Another alternative is a simple non configurable process like this:

def add_callsite_info(
    logger: logging.Logger, method_name: str, event_dict: EventDict
) -> EventDict:
    record: Optional[logging.LogRecord] = event_dict.get("_record")
    if record is not None:
        event_dict["funcName"] = record.funcName
        event_dict["thread"] = record.thread
        event_dict["threadName"] = record.threadName
        event_dict["pathname"] = record.pathname
        event_dict["filename"] = record.filename
        event_dict["lineno"] = record.lineno
        event_dict["module"] = record.module
        event_dict["process"] = record.process
        event_dict["processName"] = record.processName
    else:
        frame, module = structlog._frames._find_first_app_frame_and_name(
            additional_ignores=[__name__]
        )
        frame_info = inspect.getframeinfo(frame)
        event_dict["funcName"] = frame_info.function
        event_dict["thread"] = threading.get_ident()
        event_dict["threadName"] = threading.current_thread().name
        event_dict["pathname"] = frame_info.filename
        event_dict["filename"] = os.path.basename(frame_info.filename)
        event_dict["lineno"] = frame_info.lineno
        event_dict["module"] = module
        event_dict["process"] = os.getpid()
        processName = "MainProcess"
        mp: Any = sys.modules.get("multiprocessing")
        if mp is not None:
            # Errors may occur if multiprocessing has not finished loading
            # yet - e.g. if a custom import hook causes third-party code
            # to run when multiprocessing calls import. See issue 8200
            # for an example
            try:
                processName = mp.current_process().name
            except Exception:
                pass

        event_dict["processName"] = processName

    return event_dict

However the result in this case will be very noisy and may include things people do not care for.

@aucampia
Copy link
Contributor Author

aucampia commented Dec 7, 2021

I created a draft with code and tests, still need to add docs and other things, but would like some feedback before I proceed much further.

@hynek
Copy link
Owner

hynek commented Dec 8, 2021

Wow you're putting me really to shame with your industriousness (and generosity – thank you so much!)!

Since – if I'm not mistaken – all this data is available via logging extras, this is about making this data independent of stdlib logging, correct?

You're entirely correct, that this has to be configurable and it would be great if we could get both PRs in, because that would allow me to cut a 21.5 before new year and get the logfmt out to the people.

@aucampia
Copy link
Contributor Author

aucampia commented Dec 8, 2021

Since – if I'm not mistaken – all this data is available via logging extras, this is about making this data independent of stdlib logging, correct?

Yes, the specific use case I have is that I want:

  • all log events to include function name, filename, lineno, thread.
  • logging to work via the standard library logger and via structlog.

To make this work I configured structlog as can be seen here: https://gitlab.com/aucampia/try/python-structlog/-/blob/f1cac2a89d17f58d5ec14a3db6930414d1d3b8b2/src/aucampia/trx/structlog/cli.py#L175

def setup_structlog_formatter() -> None:
    shared_processors: List[Processor] = []
    structlog.configure(
        processors=shared_processors
        + [
            structlog.stdlib.filter_by_level,
            structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
        ],
        logger_factory=structlog.stdlib.LoggerFactory(),
        cache_logger_on_first_use=True,
    )

    formatter = structlog.stdlib.ProcessorFormatter(
        foreign_pre_chain=[add_extra, *shared_processors],
        keep_stack_info=True,
        processors=[
            CallsiteInfoAdder(
                {
                    CallsiteParameter.FUNC_NAME,
                    CallsiteParameter.FILENAME,
                    CallsiteParameter.LINENO,
                    CallsiteParameter.THREAD,
                }
            ),
            structlog.stdlib.add_log_level,
            structlog.stdlib.add_logger_name,
            structlog.stdlib.PositionalArgumentsFormatter(),
            structlog.processors.TimeStamper(fmt="iso"),
            structlog.processors.StackInfoRenderer(),
            structlog.processors.format_exc_info,
            structlog.processors.UnicodeDecoder(),
            structlog.stdlib.ProcessorFormatter.remove_processors_meta,
            structlog.processors.JSONRenderer(),
        ],
    )
    log_handler = logging.StreamHandler(stream=sys.stderr)
    log_handler.setFormatter(formatter)
    root_logger = logging.getLogger("")
    root_logger.propagate = True
    root_logger.setLevel(os.environ.get("PYLOGGING_LEVEL", logging.INFO))
    root_logger.addHandler(log_handler)

In summary:

  • Minimal config in structlog.configure, just enough to get it to ProcessorFormatter
  • Most config in ProcessorFormatter, including CallsiteInfoAdder

This in practice works correctly, messages from both logging and structlog end up with callsite info. I'm not sure if this is the optimal config though.

Example output messages with the above config:

$ pipx run --no-cache --spec git+https://gitlab.com/aucampia/try/python-structlog@f1cac2a89d17f58d5ec14a3db6930414d1d3b8b2 aucampia.trx.structlog.py -vvv run -s structlog_formatter -g stdlib_extra -g structlog 2>&1
{"setup": ["structlog_formatter"], "genlog": ["stdlib_extra", "structlog"]}
{"event": "entry", "foo": "bar", "what": "stdlib_extra", "thread": 139763534812992, "filename": "cli.py", "funcName": "genlog_stdlib_extra", "lineno": 220, "level": "info", "logger": "aucampia.trx.structlog.cli.genlog_stdlib_extra", "timestamp": "2021-12-08T08:42:05.179313Z"}
{"event": "fmt is a thing", "thread": 139763534812992, "filename": "cli.py", "funcName": "genlog_stdlib_extra", "lineno": 221, "level": "info", "logger": "aucampia.trx.structlog.cli.genlog_stdlib_extra", "timestamp": "2021-12-08T08:42:05.179427Z"}
{"event": "caught", "foo": "bar", "what": "stdlib_extra", "thread": 139763534812992, "filename": "cli.py", "funcName": "genlog_stdlib_extra", "lineno": 225, "level": "info", "logger": "aucampia.trx.structlog.cli.genlog_stdlib_extra", "timestamp": "2021-12-08T08:42:05.179512Z", "exception": "Traceback (most recent call last):\n  File \"/home/iwana/.local/pipx/.cache/78617db6b8f5638/lib64/python3.9/site-packages/aucampia/trx/structlog/cli.py\", line 223, in genlog_stdlib_extra\n    raise RuntimeError(\"is here ...\")\nRuntimeError: is here ..."}
{"foo": "bar", "what": "stdlib", "event": "entry", "thread": 139763534812992, "filename": "cli.py", "funcName": "genlog_structlog", "lineno": 232, "level": "info", "logger": "aucampia.trx.structlog.cli.genlog_structlog", "timestamp": "2021-12-08T08:42:05.179955Z"}
{"event": "fmt is a thing", "thread": 139763534812992, "filename": "cli.py", "funcName": "genlog_structlog", "lineno": 233, "level": "info", "logger": "aucampia.trx.structlog.cli.genlog_structlog", "timestamp": "2021-12-08T08:42:05.180045Z"}
{"foo": "bar", "what": "stdlib", "event": "caught", "thread": 139763534812992, "filename": "cli.py", "funcName": "genlog_structlog", "lineno": 237, "level": "info", "logger": "aucampia.trx.structlog.cli.genlog_structlog", "timestamp": "2021-12-08T08:42:05.180131Z", "exception": "Traceback (most recent call last):\n  File \"/home/iwana/.local/pipx/.cache/78617db6b8f5638/lib64/python3.9/site-packages/aucampia/trx/structlog/cli.py\", line 235, in genlog_structlog\n    raise RuntimeError(\"is here ...\")\nRuntimeError: is here ..."}
  • line 1 is just from sys.stderr.write()
  • line 2, 3 and 4 comes from stdlib's logger (ref)
    def genlog_stdlib_extra() -> None:
        logger = logging.getLogger(__name__ + ".genlog_stdlib_extra")
        logger.info("entry", extra={"foo": "bar", "what": "stdlib_extra"})
        logger.info("fmt %s", "is a thing")
        try:
            raise RuntimeError("is here ...")
        except Exception:
            logger.info(
                "caught", exc_info=True, extra={"foo": "bar", "what": "stdlib_extra"}
            )
  • line 5, 6, and 7 comes from logging (ref)
    def genlog_structlog() -> None:
        logger: FilteringBoundLogger = structlog.getLogger(__name__ + ".genlog_structlog")
        logger.info("entry", foo="bar", what="stdlib")
        logger.info("fmt is a thing")
        try:
            raise RuntimeError("is here ...")
        except Exception:
            logger.info("caught", exc_info=True, foo="bar", what="stdlib")
        logger.bind()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
2 participants