Skip to content

Commit

Permalink
consider .ans and .out for deduplication of testcases
Browse files Browse the repository at this point in the history
  • Loading branch information
mzuenni committed Feb 7, 2024
1 parent e185db8 commit ea268d3
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 23 deletions.
17 changes: 12 additions & 5 deletions bin/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,13 @@
FILE_NAME_REGEX = '[a-zA-Z0-9][a-zA-Z0-9_.-]*[a-zA-Z0-9]'
COMPILED_FILE_NAME_REGEX = re.compile(FILE_NAME_REGEX)

KNOWN_DATA_EXTENSIONS = [
KNOWN_TESTCASE_EXTENSIONS = [
'.in',
'.ans',
'.out',
]

KNOWN_DATA_EXTENSIONS = KNOWN_TESTCASE_EXTENSIONS + [
'.interaction',
'.hint',
'.desc',
Expand All @@ -46,15 +49,19 @@
'.gif',
]

KNOWN_TEXT_DATA_EXTENSIONS = [
'.in',
'.ans',
'.out',
KNOWN_TEXT_DATA_EXTENSIONS = KNOWN_TESTCASE_EXTENSIONS + [
'.interaction',
'.hint',
'.desc',
]

INVALID_CASE_DIRECTORIES = [
'invalid_inputs',
'invalid_answers',
'invalid_outputs',
'bad',
]


SEED_DEPENDENCY_RETRIES = 10

Expand Down
57 changes: 39 additions & 18 deletions bin/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ def __init__(self, problem, generator_config, key, name: str, yaml, parent, list

# Hash of testcase for caching.
self.hash = None
hashes = {}

# Filled during generate(), since `self.config.solution` will only be set later for the default solution.
self.cache_data = {}

Expand All @@ -438,6 +438,14 @@ def __init__(self, problem, generator_config, key, name: str, yaml, parent, list
# root in /data
self.root = self.path.parts[0]

# files to consider for hashing
hashes = {}
extensions = config.KNOWN_TESTCASE_EXTENSIONS.copy()
if self.root not in config.INVALID_CASE_DIRECTORIES[1:]:
extensions.remove('.ans')
if self.root not in config.INVALID_CASE_DIRECTORIES[2:]:
extensions.remove('.out')

if yaml is None:
self.inline = True
yaml = dict()
Expand Down Expand Up @@ -491,7 +499,7 @@ def __init__(self, problem, generator_config, key, name: str, yaml, parent, list
if self.copy.is_file():
self.in_is_generated = False
self.rule['copy'] = str(self.copy)
for ext in ['.in', '.ans', '.out']:
for ext in extensions:
if self.copy.with_suffix(ext).is_file():
hashes[ext] = hash_file(self.copy.with_suffix(ext))

Expand All @@ -507,7 +515,7 @@ def __init__(self, problem, generator_config, key, name: str, yaml, parent, list
if '.in' in self.hardcoded:
self.in_is_generated = False
self.rule['in'] = self.hardcoded['.in']
for ext in ['.in', '.ans', '.out']:
for ext in extensions:
if ext in self.hardcoded:
hashes[ext] = hash_string(self.hardcoded[ext])

Expand All @@ -523,17 +531,8 @@ def __init__(self, problem, generator_config, key, name: str, yaml, parent, list
# An error is shown during generate.
return

# remove filed that should not be considered for hashing
if '.ans' in hashes and self.root not in ['invalid_answers', 'invalid_outputs']:
hashes.pop('.ans')
if '.out' in hashes and self.root not in ['invalid_outputs']:
hashes.pop('.out')

# build ordered list of hashes we want to consider
self.hash = []
for ext in ['.in', '.ans', '.out']:
if ext in hashes:
self.hash.append(hashes[ext])
self.hash = [hashes[ext] for ext in config.KNOWN_TESTCASE_EXTENSIONS if ext in hashes]

# combine hashes
if len(self.hash) == 1:
Expand Down Expand Up @@ -785,7 +784,29 @@ def move_generated():

def add_testdata_to_cache():
# Store the generated testdata for deduplication test cases.
test_hash = hash_file(target_infile)
hashes = {}

# remove files that should not be considered for this testcase
extensions = config.KNOWN_TESTCASE_EXTENSIONS.copy()
if t.root not in config.INVALID_CASE_DIRECTORIES[1:]:
extensions.remove('.ans')
if t.root not in config.INVALID_CASE_DIRECTORIES[2:]:
extensions.remove('.out')

for ext in extensions:
if target_infile.with_suffix(ext).is_file():
hashes[ext] = hash_file(target_infile.with_suffix(ext))

# build ordered list of hashes we want to consider
test_hash = [hashes[ext] for ext in extensions if ext in hashes]

# combine hashes
if len(test_hash) == 1:
test_hash = test_hash[0]
else:
test_hash = combine_hashes(test_hash)

# check for duplicates
if test_hash not in generator_config.generated_testdata:
generator_config.generated_testdata[test_hash] = t
else:
Expand Down Expand Up @@ -823,7 +844,7 @@ def add_testdata_to_cache():

# Step 3: Write hardcoded files.
for ext, contents in t.hardcoded.items():
if contents == '' and t.root in ['bad', 'invalid_inputs', 'invalid_answers', 'invalid_outputs']:
if contents == '' and t.root in config.INVALID_CASE_DIRECTORIES:
bar.error(f'Hardcoded {ext} data must not be empty!')
return
else:
Expand Down Expand Up @@ -871,7 +892,7 @@ def add_testdata_to_cache():

if not generator_up_to_date:
# Generate .ans and .interaction if needed.
if not config.args.no_solution and testcase.root not in ["invalid_inputs", "invalid_answers"]:
if not config.args.no_solution and testcase.root not in config.INVALID_CASE_DIRECTORIES:
if not problem.interactive:
# Generate a .ans if not already generated by earlier steps.
if not testcase.ans_path.is_file():
Expand Down Expand Up @@ -1316,7 +1337,7 @@ def parse(key, name, yaml, parent, listed=True):
nonlocal testcase_id
# Skip unlisted `data/bad` directory: we should not generate .ans files there.
if (
name in ['bad', 'invalid_inputs', 'invalid_answers']
name in config.INVALID_CASE_DIRECTORIES
and parent.path == Path('.')
and listed is False
):
Expand Down Expand Up @@ -1799,7 +1820,7 @@ def clean_testcase(t):
if (
not process_testcase(self.problem, t.path)
or t.listed
or (len(t.path.parts) > 0 and t.path.parts[0] in ['bad', 'invalid_inputs', 'invalid_answers'])
or (len(t.path.parts) > 0 and t.path.parts[0] in config.INVALID_CASE_DIRECTORIES)
):
bar.done()
return
Expand Down

0 comments on commit ea268d3

Please sign in to comment.