Skip to content

Commit

Permalink
CDK: airbytehq#3764 obfuscate secrets in logging (airbytehq#8211)
Browse files Browse the repository at this point in the history
* Secure logger implementation minus still broken new tests

* Secure logger implementation and unit tests

* code review changes

* filter text on uncaught exceptions

* auto-formatting

* Mistaken change left in code

* filter text on uncaught exceptions

* Simplify code, remove LoggingFilter and move obfuscation functionality to Formatter

* Update airbyte-cdk/python/airbyte_cdk/entrypoint.py

Co-authored-by: Eugene Kulak <widowmakerreborn@gmail.com>

* Obfuscate Secrets in Logging, code review changes

* Obfuscate Secrets in Logging, code review changes, unit test fixes

* CHANGELOG.md

Co-authored-by: Eugene Kulak <widowmakerreborn@gmail.com>
  • Loading branch information
eliziario and keu authored Dec 7, 2021
1 parent 487f9e7 commit 1d493ec
Show file tree
Hide file tree
Showing 9 changed files with 346 additions and 4 deletions.
3 changes: 3 additions & 0 deletions airbyte-cdk/python/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 0.1.40
Filter airbyte_secrets values at logger and other logging refactorings.

## 0.1.39
Add `__init__.py` to mark the directory `airbyte_cdk/utils` as a package.

Expand Down
12 changes: 10 additions & 2 deletions airbyte-cdk/python/airbyte_cdk/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
import tempfile
from typing import Iterable, List

from airbyte_cdk.logger import init_logger
from airbyte_cdk.logger import AirbyteLogFormatter, init_logger
from airbyte_cdk.models import AirbyteMessage, Status, Type
from airbyte_cdk.sources import Source
from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec_or_exit, split_config
from airbyte_cdk.utils.airbyte_secrets_utils import get_secrets

logger = init_logger("airbyte")

Expand All @@ -24,7 +25,8 @@ def __init__(self, source: Source):
self.source = source
self.logger = logging.getLogger(f"airbyte.{getattr(source, 'name', '')}")

def parse_args(self, args: List[str]) -> argparse.Namespace:
@staticmethod
def parse_args(args: List[str]) -> argparse.Namespace:
# set up parent parsers
parent_parser = argparse.ArgumentParser(add_help=False)
main_parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -72,6 +74,12 @@ def run(self, parsed_args: argparse.Namespace) -> Iterable[str]:
else:
raw_config = self.source.read_config(parsed_args.config)
config = self.source.configure(raw_config, temp_dir)

# Now that we have the config, we can use it to get a list of ai airbyte_secrets
# that we should filter in logging to avoid leaking secrets
config_secrets = get_secrets(self.source, config, self.logger)
AirbyteLogFormatter.update_secrets(config_secrets)

# Remove internal flags from config before validating so
# jsonschema's additionalProperties flag wont fail the validation
config, internal_config = split_config(config)
Expand Down
29 changes: 29 additions & 0 deletions airbyte-cdk/python/airbyte_cdk/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@

import logging
import logging.config
import sys
import traceback
from functools import partial
from typing import List

from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage

Expand All @@ -29,19 +32,43 @@
}


def init_unhandled_exception_output_filtering(logger: logging.Logger) -> None:
"""
Make sure unhandled exceptions are not printed to the console without passing through the Airbyte logger and having
secrets removed.
"""
def hook_fn(_logger, exception_type, exception_value, traceback_):
# For developer ergonomics, we want to see the stack trace in the logs when we do a ctrl-c
if issubclass(exception_type, KeyboardInterrupt):
sys.__excepthook__(exception_type, exception_value, traceback_)
return

logger.critical(str(exception_value))

sys.excepthook = partial(hook_fn, logger)


def init_logger(name: str = None):
"""Initial set up of logger"""
logging.setLoggerClass(AirbyteNativeLogger)
logging.addLevelName(TRACE_LEVEL_NUM, "TRACE")
logger = logging.getLogger(name)
logger.setLevel(TRACE_LEVEL_NUM)
logging.config.dictConfig(LOGGING_CONFIG)
init_unhandled_exception_output_filtering(logger)
return logger


class AirbyteLogFormatter(logging.Formatter):
"""Output log records using AirbyteMessage"""

_secrets = []

@classmethod
def update_secrets(cls, secrets: List[str]):
"""Update the list of secrets to be replaced in the log message"""
cls._secrets = secrets

# Transforming Python log levels to Airbyte protocol log levels
level_mapping = {
logging.FATAL: "FATAL",
Expand All @@ -56,6 +83,8 @@ def format(self, record: logging.LogRecord) -> str:
"""Return a JSON representation of the log message"""
message = super().format(record)
airbyte_level = self.level_mapping.get(record.levelno, "INFO")
for secret in AirbyteLogFormatter._secrets:
message = message.replace(secret, "****")
log_message = AirbyteMessage(type="LOG", log=AirbyteLogMessage(level=airbyte_level, message=message))
return log_message.json(exclude_unset=True)

Expand Down
24 changes: 24 additions & 0 deletions airbyte-cdk/python/airbyte_cdk/utils/airbyte_secrets_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#

import logging
from typing import Any, List, Mapping

from airbyte_cdk.sources import Source
from airbyte_cdk.utils.mapping_utils import all_key_pairs_dot_notation, get_value_by_dot_notation


def get_secrets(source: Source, config: Mapping[str, Any], logger: logging.Logger) -> List[Any]:
"""
Get a list of secrets from the source config based on the source specification
"""
flattened_key_values = all_key_pairs_dot_notation(source.spec(logger).connectionSpecification.get("properties", {}))
secret_key_names = [
".".join(key.split(".")[:1]) for key, value in flattened_key_values.items() if value and key.endswith("airbyte_secret")
]
return [
str(get_value_by_dot_notation(config, key))
for key in secret_key_names
if config.get(key)
]
41 changes: 41 additions & 0 deletions airbyte-cdk/python/airbyte_cdk/utils/mapping_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#

from functools import reduce
from typing import Any, List, Mapping, Optional


def all_key_pairs_dot_notation(dict_obj: Mapping) -> Mapping[str, Any]:
"""
Recursively iterate through a dictionary and return a dictionary of all key-value pairs in dot notation.
keys are prefixed with the list of keys passed in as prefix.
"""

def _all_key_pairs_dot_notation(_dict_obj: Mapping, prefix: List[str] = []) -> Mapping[str, Any]:
for key, value in _dict_obj.items():
if isinstance(value, dict):
prefix.append(str(key))
yield from _all_key_pairs_dot_notation(value, prefix)
prefix.pop()
else:
prefix.append(str(key))
yield ".".join(prefix), value
prefix.pop()

return {k: v for k, v in _all_key_pairs_dot_notation(dict_obj)}


def get_value_by_dot_notation(dict_obj: Mapping, key: str, default: Optional[Any] = ...) -> Any:
"""
Return the value of a key in dot notation in a arbitrarily nested Mapping.
dict_obj: Mapping
key: str
default: Any
raises: KeyError if default is not provided and the key is not found
ex.:
dict_obj = {"nested": {"key": "value"}}
get_value_by_dot_notation(dict_obj, "nested.key") == "value" -> True
"""

return reduce(lambda d, key_name: d[key_name] if default is ... else d.get(key_name, default), key.split("."), dict_obj)
2 changes: 1 addition & 1 deletion airbyte-cdk/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

setup(
name="airbyte-cdk",
version="0.1.39",
version="0.1.40",
description="A framework for writing Airbyte Connectors.",
long_description=README,
long_description_content_type="text/markdown",
Expand Down
1 change: 1 addition & 0 deletions airbyte-cdk/python/unit_tests/test_entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def config_mock(mocker, request):
({"username": "fake"}, {"type": "object", "properties": {"name": {"type": "string"}}, "additionalProperties": False}, False),
({"username": "fake"}, {"type": "object", "properties": {"username": {"type": "string"}}, "additionalProperties": False}, True),
({"username": "fake"}, {"type": "object", "properties": {"user": {"type": "string"}}}, True),
({"username": "fake"}, {"type": "object", "properties": {"user": {"type": "string", "airbyte_secret": True}}}, True),
(
{"username": "fake", "_limit": 22},
{"type": "object", "properties": {"username": {"type": "string"}}, "additionalProperties": False},
Expand Down
3 changes: 2 additions & 1 deletion airbyte-cdk/python/unit_tests/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@


import json
import logging
from typing import Dict

import pytest
Expand All @@ -12,7 +13,7 @@

@pytest.fixture(scope="session")
def logger():
logger = init_logger("Test logger")
logger = logging.getLogger("Test logger")
return logger


Expand Down
Loading

0 comments on commit 1d493ec

Please sign in to comment.