Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

If the LLM uses ellipsis in place of the REDACTED string, print the original version #614

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/codegate/pipeline/secrets/secrets.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,8 @@ class SecretUnredactionStep(OutputPipelineStep):
"""Pipeline step that unredacts protected content in the stream"""

def __init__(self):
self.redacted_pattern = re.compile(r"REDACTED<\$([^>]+)>")
self.marker_start = "REDACTED<$"
self.redacted_pattern = re.compile(r"REDACTED<(\$?[^>]+)>")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As with all regexes, this will start getting harder to maintain with time. Mind adding a couple of examples expected patterns?

self.marker_start = "REDACTED<"
self.marker_end = ">"

@property
Expand Down Expand Up @@ -365,6 +365,10 @@ async def process_chunk(
if match:
# Found a complete marker, process it
encrypted_value = match.group(1)
# Strip the $ if it exists before trying to decrypt
if encrypted_value.startswith('$'):
encrypted_value = encrypted_value[1:]

original_value = input_context.sensitive.manager.get_original_value(
encrypted_value,
input_context.sensitive.session_id,
Expand Down
Loading