Skip to content

Commit d424ceb

Browse files
fix: address diff generation, security, and file handling issues for PR #1192
Co-Authored-By: Patched <tech@patched.codes>
1 parent 737f080 commit d424ceb

File tree

4 files changed

+129
-39
lines changed

4 files changed

+129
-39
lines changed

patchwork/steps/FixIssue/FixIssue.py

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import re
2+
import shlex
23
from pathlib import Path
34
from typing import Any, Optional
45

56
from git import Repo
7+
from git.exc import GitCommandError
68
from openai.types.chat import ChatCompletionMessageParam
79

810
from patchwork.common.client.llm.aio import AioLlmClient
@@ -131,13 +133,38 @@ def run(self):
131133
modified_files_with_diffs = []
132134
repo = Repo(cwd, search_parent_directories=True)
133135
for file in modified_files:
134-
file_path = Path(file)
135-
if file_path.exists():
136-
# Get the diff using git
137-
diff = repo.git.diff('HEAD', str(file))
138-
modified_files_with_diffs.append({
139-
"path": str(file),
140-
"diff": diff
141-
})
136+
# Sanitize the file path to prevent command injection
137+
safe_file = shlex.quote(str(file))
138+
try:
139+
# Check if file is tracked by git, even if deleted
140+
is_tracked = str(file) in repo.git.ls_files('--', safe_file).splitlines()
141+
is_staged = str(file) in repo.git.diff('--cached', '--name-only', safe_file).splitlines()
142+
is_unstaged = str(file) in repo.git.diff('--name-only', safe_file).splitlines()
143+
144+
if is_tracked or is_staged or is_unstaged:
145+
# Get both staged and unstaged changes
146+
staged_diff = repo.git.diff('--cached', safe_file) if is_staged else ""
147+
unstaged_diff = repo.git.diff(safe_file) if is_unstaged else ""
148+
149+
# Combine both diffs
150+
combined_diff = staged_diff + ('\n' + unstaged_diff if unstaged_diff else '')
151+
152+
if combined_diff.strip():
153+
# Validate dictionary structure before adding
154+
modified_file = {
155+
"path": str(file),
156+
"diff": combined_diff
157+
}
158+
# Ensure all required fields are present with correct types
159+
if not isinstance(modified_file["path"], str):
160+
raise TypeError(f"path must be str, got {type(modified_file['path'])}")
161+
if not isinstance(modified_file["diff"], str):
162+
raise TypeError(f"diff must be str, got {type(modified_file['diff'])}")
163+
modified_files_with_diffs.append(modified_file)
164+
except GitCommandError as e:
165+
# Log the error but continue processing other files
166+
print(f"Warning: Failed to generate diff for {safe_file}: {str(e)}")
167+
continue
168+
142169
return dict(modified_files=modified_files_with_diffs)
143170
return dict()

patchwork/steps/FixIssue/typed.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,14 @@ class FixIssueInputs(__FixIssueRequiredInputs, total=False):
3636

3737

3838
class ModifiedFile(TypedDict):
39+
"""Represents a file that has been modified by the FixIssue step.
40+
41+
Attributes:
42+
path: The relative path to the modified file from the repository root
43+
diff: A unified diff string showing the changes made to the file.
44+
The diff includes both staged and unstaged changes, and is
45+
generated using git diff commands with proper path sanitization.
46+
"""
3947
path: str
4048
diff: str
4149

patchwork/steps/ModifyCode/ModifyCode.py

Lines changed: 71 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,15 @@
88
from patchwork.step import Step, StepStatus
99

1010

11-
def save_file_contents(file_path, content):
12-
"""Utility function to save content to a file."""
13-
with open(file_path, "w") as file:
11+
def save_file_contents(file_path: str | Path, content: str) -> None:
12+
"""Utility function to save content to a file.
13+
14+
Args:
15+
file_path: Path to the file to save content to (str or Path)
16+
content: Content to write to the file
17+
"""
18+
path = Path(file_path)
19+
with path.open("w") as file:
1420
file.write(content)
1521

1622

@@ -36,20 +42,26 @@ def handle_indent(src: list[str], target: list[str], start: int, end: int) -> li
3642

3743

3844
def replace_code_in_file(
39-
file_path: str,
45+
file_path: str | Path,
4046
start_line: int | None,
4147
end_line: int | None,
4248
new_code: str,
4349
) -> None:
50+
"""Replace code in a file at the specified line range.
51+
52+
Args:
53+
file_path: Path to the file to modify (str or Path)
54+
start_line: Starting line number (1-based)
55+
end_line: Ending line number (1-based)
56+
new_code: New code to insert
57+
"""
4458
path = Path(file_path)
4559
new_code_lines = new_code.splitlines(keepends=True)
4660
if len(new_code_lines) > 0 and not new_code_lines[-1].endswith("\n"):
4761
new_code_lines[-1] += "\n"
4862

4963
if path.exists() and start_line is not None and end_line is not None:
50-
"""Replaces specified lines in a file with new code."""
5164
text = path.read_text()
52-
5365
lines = text.splitlines(keepends=True)
5466

5567
# Insert the new code at the start line after converting it into a list of lines
@@ -58,7 +70,7 @@ def replace_code_in_file(
5870
lines = new_code_lines
5971

6072
# Save the modified contents back to the file
61-
save_file_contents(file_path, "".join(lines))
73+
save_file_contents(path, "".join(lines))
6274

6375

6476
class ModifyCode(Step):
@@ -84,7 +96,8 @@ def run(self) -> dict:
8496
return dict(modified_code_files=[])
8597

8698
for code_snippet, extracted_response in sorted_list:
87-
uri = code_snippet.get("uri")
99+
# Use Path for consistent path handling
100+
file_path = Path(code_snippet.get("uri", ""))
88101
start_line = code_snippet.get("startLine")
89102
end_line = code_snippet.get("endLine")
90103
new_code = extracted_response.get("patch")
@@ -93,41 +106,68 @@ def run(self) -> dict:
93106
continue
94107

95108
# Get the original content for diffing
96-
file_path = Path(uri)
97-
original_path = None
98109
diff = ""
99110

100111
if file_path.exists():
101-
# Create a temporary copy of the original file
102-
with tempfile.NamedTemporaryFile(mode='w', delete=False) as tmp_file:
103-
shutil.copy2(uri, tmp_file.name)
104-
original_path = tmp_file.name
105-
106-
# Apply the changes
107-
replace_code_in_file(uri, start_line, end_line, new_code)
108-
109-
# Generate a proper unified diff
110-
with open(original_path, 'r') as f1, open(uri, 'r') as f2:
111-
diff = ''.join(difflib.unified_diff(
112-
f1.readlines(),
113-
f2.readlines(),
114-
fromfile='a/' + str(file_path),
115-
tofile='b/' + str(file_path)
116-
))
117-
118-
# Clean up temporary file
119-
Path(original_path).unlink()
112+
try:
113+
# Create a temporary directory with restricted permissions
114+
with tempfile.TemporaryDirectory(prefix='modifycode_') as temp_dir:
115+
# Create temporary file path within the secure directory
116+
temp_path = Path(temp_dir) / 'original_file'
117+
118+
# Copy original file with same permissions
119+
shutil.copy2(file_path, temp_path)
120+
121+
# Store original content
122+
with temp_path.open('r') as f1:
123+
original_lines = f1.readlines()
124+
125+
# Apply the changes
126+
replace_code_in_file(file_path, start_line, end_line, new_code)
127+
128+
# Read modified content
129+
with file_path.open('r') as f2:
130+
modified_lines = f2.readlines()
131+
132+
# Generate a proper unified diff
133+
# Use Path for consistent path handling
134+
relative_path = str(file_path)
135+
diff = ''.join(difflib.unified_diff(
136+
original_lines,
137+
modified_lines,
138+
fromfile=str(Path('a') / relative_path),
139+
tofile=str(Path('b') / relative_path)
140+
))
141+
142+
# temp_dir and its contents are automatically cleaned up
143+
except (OSError, IOError) as e:
144+
print(f"Warning: Failed to generate diff for {file_path}: {str(e)}")
145+
# Still proceed with the modification even if diff generation fails
146+
replace_code_in_file(file_path, start_line, end_line, new_code)
120147
else:
121148
# If file doesn't exist, just store the new code as the diff
122-
diff = f"+++ {file_path}\n{new_code}"
149+
# Use Path for consistent path handling
150+
relative_path = str(file_path)
151+
diff = f"+++ {Path(relative_path)}\n{new_code}"
123152

153+
# Create and validate the modified code file dictionary
124154
modified_code_file = dict(
125-
path=uri,
155+
path=str(file_path),
126156
start_line=start_line,
127157
end_line=end_line,
128158
diff=diff,
129159
**extracted_response
130160
)
161+
162+
# Ensure all required fields are present with correct types
163+
if not isinstance(modified_code_file["path"], str):
164+
raise TypeError(f"path must be str, got {type(modified_code_file['path'])}")
165+
if not isinstance(modified_code_file["start_line"], (int, type(None))):
166+
raise TypeError(f"start_line must be int or None, got {type(modified_code_file['start_line'])}")
167+
if not isinstance(modified_code_file["end_line"], (int, type(None))):
168+
raise TypeError(f"end_line must be int or None, got {type(modified_code_file['end_line'])}")
169+
if not isinstance(modified_code_file["diff"], str):
170+
raise TypeError(f"diff must be str, got {type(modified_code_file['diff'])}")
131171
modified_code_files.append(modified_code_file)
132172

133173
return dict(modified_code_files=modified_code_files)

patchwork/steps/ModifyCode/typed.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,21 @@ class ModifyCodeOutputs(TypedDict):
1111

1212

1313
class ModifiedCodeFile(TypedDict, total=False):
14+
"""Represents a file that has been modified by the ModifyCode step.
15+
16+
Attributes:
17+
path: The path to the modified file
18+
start_line: The starting line number of the modification (1-based)
19+
end_line: The ending line number of the modification (1-based)
20+
diff: A unified diff string showing the changes made to the file.
21+
Generated using Python's difflib with proper file handling
22+
and cleanup of temporary files.
23+
24+
Note:
25+
The diff field is generated securely using temporary files that
26+
are automatically cleaned up. File paths are properly sanitized
27+
to prevent potential security issues.
28+
"""
1429
path: str
1530
start_line: int
1631
end_line: int

0 commit comments

Comments
 (0)