Skip to content

Commit a441dfd

Browse files
committed
stdout comparison
1 parent 5851ec2 commit a441dfd

File tree

4 files changed

+44
-14
lines changed

4 files changed

+44
-14
lines changed

codeflash/verification/equivalence.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,27 @@
1+
import re
12
import sys
23

34
from codeflash.cli_cmds.console import logger
45
from codeflash.verification.comparator import comparator
56
from codeflash.verification.test_results import TestResults, TestType, VerificationType
67

78
INCREASED_RECURSION_LIMIT = 5000
9+
percentage_pattern = re.compile(r"\.\s+\[\d+%\]")
10+
passed_pattern = re.compile(r"\d+\s+passed\s+in\s+\d+\.\d+s")
11+
not_allowed = {"test", "codeflash"}
12+
13+
14+
def cleanup_stdout(stdout: str) -> str:
15+
return (
16+
"\n".join(
17+
line
18+
for line in stdout.splitlines()
19+
if not any(word in line for word in not_allowed)
20+
and not percentage_pattern.search(line)
21+
and not passed_pattern.search(line)
22+
)
23+
+ "\n"
24+
)
825

926

1027
def compare_test_results(original_results: TestResults, candidate_results: TestResults) -> bool:
@@ -22,6 +39,7 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
2239
for test_id in test_ids_superset:
2340
original_test_result = original_results.get_by_unique_invocation_loop_id(test_id)
2441
cdd_test_result = candidate_results.get_by_unique_invocation_loop_id(test_id)
42+
2543
if cdd_test_result is not None and original_test_result is None:
2644
continue
2745
# If helper function instance_state verification is not present, that's ok. continue
@@ -66,6 +84,10 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
6684
):
6785
are_equal = False
6886
break
87+
if not comparator(cleanup_stdout(original_test_result.stdout), cleanup_stdout(cdd_test_result.stdout)):
88+
are_equal = False
89+
break
90+
6991
sys.setrecursionlimit(original_recursion_limit)
7092
if did_all_timeout:
7193
return False

codeflash/verification/parse_test_output.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
def parse_func(file_path: Path) -> XMLParser:
4040
"""Parse the XML file with lxml.etree.XMLParser as the backend."""
4141
xml_parser = XMLParser(huge_tree=True)
42-
return parse(file_path, xml_parser)
42+
return parse(file_path, xml_parser) # type: ignore # noqa: PGH003, S320
4343

4444

4545
def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults:
@@ -259,6 +259,10 @@ def parse_test_xml(
259259
message = testcase.result[0].message.lower()
260260
if "timed out" in message:
261261
timed_out = True
262+
263+
stdout = run_result.stdout if run_result and run_result.stdout else None
264+
stderr = run_result.stderr if run_result and run_result.stderr else None
265+
262266
matches = re.findall(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!", testcase.system_out or "")
263267
if not matches or not len(matches):
264268
test_results.add(
@@ -278,9 +282,10 @@ def parse_test_xml(
278282
test_type=test_type,
279283
return_value=None,
280284
timed_out=timed_out,
285+
stdout=stdout,
286+
stderr=stderr,
281287
)
282288
)
283-
284289
else:
285290
for match in matches:
286291
split_val = match[5].split(":")
@@ -306,21 +311,17 @@ def parse_test_xml(
306311
test_type=test_type,
307312
return_value=None,
308313
timed_out=timed_out,
314+
stdout=stdout,
315+
stderr=stderr,
309316
)
310317
)
311318

312319
if not test_results:
313320
logger.info(
314321
f"Tests '{[test_file.original_file_path for test_file in test_files.test_files]}' failed to run, skipping"
315322
)
316-
if run_result is not None:
317-
stdout, stderr = "", ""
318-
try:
319-
stdout = run_result.stdout.decode()
320-
stderr = run_result.stderr.decode()
321-
except AttributeError:
322-
stdout = run_result.stderr
323-
logger.debug(f"Test log - STDOUT : {stdout} \n STDERR : {stderr}")
323+
stdout, stderr = run_result.stdout, run_result.stderr
324+
logger.debug(f"Test log - STDOUT : {stdout} \n STDERR : {stderr}")
324325
return test_results
325326

326327

@@ -335,7 +336,11 @@ def merge_test_results(
335336
# This is done to match the right iteration_id which might not be available in the xml
336337
for result in xml_test_results:
337338
if test_framework == "pytest":
338-
if result.id.test_function_name.endswith("]") and "[" in result.id.test_function_name: # parameterized test
339+
if (
340+
result.id.test_function_name
341+
and result.id.test_function_name.endswith("]")
342+
and "[" in result.id.test_function_name
343+
): # parameterized test
339344
test_function_name = result.id.test_function_name[: result.id.test_function_name.index("[")]
340345
else:
341346
test_function_name = result.id.test_function_name

codeflash/verification/test_results.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,10 @@ class FunctionTestInvocation:
8585
test_framework: str # unittest or pytest
8686
test_type: TestType
8787
return_value: Optional[object] # The return value of the function invocation
88-
timed_out: Optional[bool]
89-
verification_type: Optional[str] = VerificationType.FUNCTION_CALL
88+
time_out: Optional[bool]
89+
verification_type: Optional[VerificationType] = VerificationType.FUNCTION_CALL
90+
stdout: Optional[str] = None
91+
stderr: Optional[str] = None
9092

9193
@property
9294
def unique_invocation_loop_id(self) -> str:

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,8 @@ ignore = [
174174
"TD002",
175175
"TD003",
176176
"TD004",
177-
"PLR2004"
177+
"PLR2004",
178+
"UP007" # remove once we drop 3.9 support.
178179
]
179180

180181
[tool.ruff.lint.flake8-type-checking]

0 commit comments

Comments
 (0)