Skip to content

Test perf only after behavior passes #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
stdout comparison
  • Loading branch information
KRRT7 committed Feb 14, 2025
commit a441dfdce8aa8d1d09d49b8e76dcf10da5d7f9f0
22 changes: 22 additions & 0 deletions codeflash/verification/equivalence.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,27 @@
import re
import sys

from codeflash.cli_cmds.console import logger
from codeflash.verification.comparator import comparator
from codeflash.verification.test_results import TestResults, TestType, VerificationType

INCREASED_RECURSION_LIMIT = 5000
percentage_pattern = re.compile(r"\.\s+\[\d+%\]")
passed_pattern = re.compile(r"\d+\s+passed\s+in\s+\d+\.\d+s")
not_allowed = {"test", "codeflash"}


def cleanup_stdout(stdout: str) -> str:
return (
"\n".join(
line
for line in stdout.splitlines()
if not any(word in line for word in not_allowed)
and not percentage_pattern.search(line)
and not passed_pattern.search(line)
)
+ "\n"
)


def compare_test_results(original_results: TestResults, candidate_results: TestResults) -> bool:
Expand All @@ -22,6 +39,7 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
for test_id in test_ids_superset:
original_test_result = original_results.get_by_unique_invocation_loop_id(test_id)
cdd_test_result = candidate_results.get_by_unique_invocation_loop_id(test_id)

if cdd_test_result is not None and original_test_result is None:
continue
# If helper function instance_state verification is not present, that's ok. continue
Expand Down Expand Up @@ -66,6 +84,10 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
):
are_equal = False
break
if not comparator(cleanup_stdout(original_test_result.stdout), cleanup_stdout(cdd_test_result.stdout)):
are_equal = False
break

sys.setrecursionlimit(original_recursion_limit)
if did_all_timeout:
return False
Expand Down
27 changes: 16 additions & 11 deletions codeflash/verification/parse_test_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
def parse_func(file_path: Path) -> XMLParser:
"""Parse the XML file with lxml.etree.XMLParser as the backend."""
xml_parser = XMLParser(huge_tree=True)
return parse(file_path, xml_parser)
return parse(file_path, xml_parser) # type: ignore # noqa: PGH003, S320


def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults:
Expand Down Expand Up @@ -259,6 +259,10 @@ def parse_test_xml(
message = testcase.result[0].message.lower()
if "timed out" in message:
timed_out = True

stdout = run_result.stdout if run_result and run_result.stdout else None
stderr = run_result.stderr if run_result and run_result.stderr else None

matches = re.findall(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!", testcase.system_out or "")
if not matches or not len(matches):
test_results.add(
Expand All @@ -278,9 +282,10 @@ def parse_test_xml(
test_type=test_type,
return_value=None,
timed_out=timed_out,
stdout=stdout,
stderr=stderr,
)
)

else:
for match in matches:
split_val = match[5].split(":")
Expand All @@ -306,21 +311,17 @@ def parse_test_xml(
test_type=test_type,
return_value=None,
timed_out=timed_out,
stdout=stdout,
stderr=stderr,
)
)

if not test_results:
logger.info(
f"Tests '{[test_file.original_file_path for test_file in test_files.test_files]}' failed to run, skipping"
)
if run_result is not None:
stdout, stderr = "", ""
try:
stdout = run_result.stdout.decode()
stderr = run_result.stderr.decode()
except AttributeError:
stdout = run_result.stderr
logger.debug(f"Test log - STDOUT : {stdout} \n STDERR : {stderr}")
stdout, stderr = run_result.stdout, run_result.stderr
logger.debug(f"Test log - STDOUT : {stdout} \n STDERR : {stderr}")
return test_results


Expand All @@ -335,7 +336,11 @@ def merge_test_results(
# This is done to match the right iteration_id which might not be available in the xml
for result in xml_test_results:
if test_framework == "pytest":
if result.id.test_function_name.endswith("]") and "[" in result.id.test_function_name: # parameterized test
if (
result.id.test_function_name
and result.id.test_function_name.endswith("]")
and "[" in result.id.test_function_name
): # parameterized test
test_function_name = result.id.test_function_name[: result.id.test_function_name.index("[")]
else:
test_function_name = result.id.test_function_name
Expand Down
6 changes: 4 additions & 2 deletions codeflash/verification/test_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,10 @@ class FunctionTestInvocation:
test_framework: str # unittest or pytest
test_type: TestType
return_value: Optional[object] # The return value of the function invocation
timed_out: Optional[bool]
verification_type: Optional[str] = VerificationType.FUNCTION_CALL
time_out: Optional[bool]
verification_type: Optional[VerificationType] = VerificationType.FUNCTION_CALL
stdout: Optional[str] = None
stderr: Optional[str] = None

@property
def unique_invocation_loop_id(self) -> str:
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,8 @@ ignore = [
"TD002",
"TD003",
"TD004",
"PLR2004"
"PLR2004",
"UP007" # remove once we drop 3.9 support.
]

[tool.ruff.lint.flake8-type-checking]
Expand Down