Skip to content

Commit e06e9e8

Browse files
authored
fix: correct the way to get the last relevant user message for secrets (#608)
* fix: correct the way to get the last relevant user message for secrets When showing new secrets detection, we were just picking from last assistant message id. But in the case of aider, we need to pick more user messages. So reuse the logic of the method to get the relevant user block, to pick the index from the relevant user message and start counting from there Closes: #606 * removing aws secret access key as it is too generic * fix secret redaction
1 parent 8106c53 commit e06e9e8

File tree

6 files changed

+35
-25
lines changed

6 files changed

+35
-25
lines changed

signatures.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
---
22
- Amazon:
33
- Access Key: (?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA|ABIA|ACCA)[A-Z0-9]{16}
4-
- Secret Access Key: (?<![A-Za-z0-9/+])[A-Za-z0-9+=][A-Za-z0-9/+=]{38}[A-Za-z0-9+=](?![A-Za-z0-9/+=])
54
# - Cognito User Pool ID: (?i)us-[a-z]{2,}-[a-z]{4,}-\d{1,}
65
- RDS Password: (?i)(rds\-master\-password|db\-password)
76
- SNS Confirmation URL: (?i)https:\/\/sns\.[a-z0-9-]+\.amazonaws\.com\/?Action=ConfirmSubscription&Token=[a-zA-Z0-9-=_]+

src/codegate/pipeline/base.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -239,36 +239,38 @@ def get_last_user_message(
239239
@staticmethod
240240
def get_last_user_message_block(
241241
request: ChatCompletionRequest,
242-
) -> Optional[str]:
242+
) -> Optional[tuple[str, int]]:
243243
"""
244244
Get the last block of consecutive 'user' messages from the request.
245245
246246
Args:
247247
request (ChatCompletionRequest): The chat completion request to process
248248
249249
Returns:
250-
Optional[str]: A string containing all consecutive user messages in the
250+
Optional[str, int]: A string containing all consecutive user messages in the
251251
last user message block, separated by newlines, or None if
252252
no user message block is found.
253+
Index of the first message detected in the block.
253254
"""
254255
if request.get("messages") is None:
255256
return None
256257

257258
user_messages = []
258259
messages = request["messages"]
260+
block_start_index = None
259261

260262
# Iterate in reverse to find the last block of consecutive 'user' messages
261263
for i in reversed(range(len(messages))):
262264
if messages[i]["role"] == "user" or messages[i]["role"] == "assistant":
263-
content_str = None
264-
if "content" in messages[i]:
265-
content_str = messages[i]["content"] # type: ignore
266-
else:
265+
content_str = messages[i].get("content")
266+
if content_str is None:
267267
continue
268268

269269
if messages[i]["role"] == "user":
270270
user_messages.append(content_str)
271-
# specifically for Aider, when "ok." block is found, stop
271+
block_start_index = i
272+
273+
# Specifically for Aider, when "Ok." block is found, stop
272274
if content_str == "Ok." and messages[i]["role"] == "assistant":
273275
break
274276
else:
@@ -277,8 +279,9 @@ def get_last_user_message_block(
277279
break
278280

279281
# Reverse the collected user messages to preserve the original order
280-
if user_messages:
281-
return "\n".join(reversed(user_messages))
282+
if user_messages and block_start_index is not None:
283+
content = "\n".join(reversed(user_messages))
284+
return content, block_start_index
282285

283286
return None
284287

src/codegate/pipeline/codegate_context_retriever/codegate.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,10 @@ async def process(
6060
Use RAG DB to add context to the user request
6161
"""
6262
# Get the latest user message
63-
user_message = self.get_last_user_message_block(request)
64-
if not user_message:
63+
last_message = self.get_last_user_message_block(request)
64+
if not last_message:
6565
return PipelineResult(request=request)
66+
user_message, _ = last_message
6667

6768
# Create storage engine object
6869
storage_engine = StorageEngine()

src/codegate/pipeline/extract_snippets/extract_snippets.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,10 @@ async def process(
151151
request: ChatCompletionRequest,
152152
context: PipelineContext,
153153
) -> PipelineResult:
154-
msg_content = self.get_last_user_message_block(request)
155-
if not msg_content:
154+
last_message = self.get_last_user_message_block(request)
155+
if not last_message:
156156
return PipelineResult(request=request, context=context)
157+
msg_content, _ = last_message
157158
snippets = extract_snippets(msg_content)
158159

159160
logger.info(f"Extracted {len(snippets)} code snippets from the user message")

src/codegate/pipeline/secrets/secrets.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -271,11 +271,12 @@ async def process(
271271
new_request = request.copy()
272272
total_matches = []
273273

274-
# Process all messages
274+
# get last user message block to get index for the first relevant user message
275+
last_user_message = self.get_last_user_message_block(new_request)
275276
last_assistant_idx = -1
276-
for i, message in enumerate(new_request["messages"]):
277-
if message.get("role", "") == "assistant":
278-
last_assistant_idx = i
277+
if last_user_message:
278+
_, user_idx = last_user_message
279+
last_assistant_idx = user_idx - 1
279280

280281
# Process all messages
281282
for i, message in enumerate(new_request["messages"]):
@@ -312,8 +313,8 @@ class SecretUnredactionStep(OutputPipelineStep):
312313
"""Pipeline step that unredacts protected content in the stream"""
313314

314315
def __init__(self):
315-
self.redacted_pattern = re.compile(r"REDACTED<\$([^>]+)>")
316-
self.marker_start = "REDACTED<$"
316+
self.redacted_pattern = re.compile(r"REDACTED<(\$?[^>]+)>")
317+
self.marker_start = "REDACTED<"
317318
self.marker_end = ">"
318319

319320
@property
@@ -365,6 +366,8 @@ async def process_chunk(
365366
if match:
366367
# Found a complete marker, process it
367368
encrypted_value = match.group(1)
369+
if encrypted_value.startswith('$'):
370+
encrypted_value = encrypted_value[1:]
368371
original_value = input_context.sensitive.manager.get_original_value(
369372
encrypted_value,
370373
input_context.sensitive.session_id,
@@ -399,7 +402,7 @@ async def process_chunk(
399402
return []
400403

401404
if self._is_partial_marker_prefix(buffered_content):
402-
context.prefix_buffer += buffered_content
405+
context.prefix_buffer = buffered_content
403406
return []
404407

405408
# No markers or partial markers, let pipeline handle the chunk normally

tests/pipeline/test_messages_block.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
{"role": "user", "content": "How are you?"},
1616
]
1717
},
18-
"Hello!\nHow are you?",
18+
("Hello!\nHow are you?", 1),
1919
),
2020
# Test case: Mixed roles at the end
2121
(
@@ -27,7 +27,7 @@
2727
{"role": "assistant", "content": "I'm fine, thank you."},
2828
]
2929
},
30-
"Hello!\nHow are you?",
30+
("Hello!\nHow are you?", 0),
3131
),
3232
# Test case: No user messages
3333
(
@@ -51,7 +51,7 @@
5151
{"role": "user", "content": "What's up?"},
5252
]
5353
},
54-
"How are you?\nWhat's up?",
54+
("How are you?\nWhat's up?", 2),
5555
),
5656
# Test case: aider
5757
(
@@ -97,7 +97,8 @@
9797
},
9898
]
9999
},
100-
"""I have *added these files to the chat* so you can go ahead and edit them.
100+
(
101+
"""I have *added these files to the chat* so you can go ahead and edit them.
101102
102103
*Trust this message as the true contents of these files!*
103104
Any other messages in the chat may contain outdated versions of the files' contents.
@@ -113,6 +114,8 @@
113114
```
114115
115116
evaluate this file""", # noqa: E501
117+
7,
118+
),
116119
),
117120
],
118121
)

0 commit comments

Comments
 (0)