diff --git a/bin/config.py b/bin/config.py index 051a409c..28eb893a 100644 --- a/bin/config.py +++ b/bin/config.py @@ -32,10 +32,13 @@ FILE_NAME_REGEX = '[a-zA-Z0-9][a-zA-Z0-9_.-]*[a-zA-Z0-9]' COMPILED_FILE_NAME_REGEX = re.compile(FILE_NAME_REGEX) -KNOWN_DATA_EXTENSIONS = [ +KNOWN_TESTCASE_EXTENSIONS = [ '.in', '.ans', '.out', +] + +KNOWN_DATA_EXTENSIONS = KNOWN_TESTCASE_EXTENSIONS + [ '.interaction', '.hint', '.desc', @@ -46,15 +49,19 @@ '.gif', ] -KNOWN_TEXT_DATA_EXTENSIONS = [ - '.in', - '.ans', - '.out', +KNOWN_TEXT_DATA_EXTENSIONS = KNOWN_TESTCASE_EXTENSIONS + [ '.interaction', '.hint', '.desc', ] +INVALID_CASE_DIRECTORIES = [ + 'invalid_inputs', + 'invalid_answers', + 'invalid_outputs', + 'bad', +] + SEED_DEPENDENCY_RETRIES = 10 diff --git a/bin/generate.py b/bin/generate.py index cac47473..64fe9f14 100644 --- a/bin/generate.py +++ b/bin/generate.py @@ -423,7 +423,7 @@ def __init__(self, problem, generator_config, key, name: str, yaml, parent, list # Hash of testcase for caching. self.hash = None - hashes = {} + # Filled during generate(), since `self.config.solution` will only be set later for the default solution. self.cache_data = {} @@ -438,6 +438,14 @@ def __init__(self, problem, generator_config, key, name: str, yaml, parent, list # root in /data self.root = self.path.parts[0] + # files to consider for hashing + hashes = {} + extensions = config.KNOWN_TESTCASE_EXTENSIONS.copy() + if self.root not in config.INVALID_CASE_DIRECTORIES[1:]: + extensions.remove('.ans') + if self.root not in config.INVALID_CASE_DIRECTORIES[2:]: + extensions.remove('.out') + if yaml is None: self.inline = True yaml = dict() @@ -491,7 +499,7 @@ def __init__(self, problem, generator_config, key, name: str, yaml, parent, list if self.copy.is_file(): self.in_is_generated = False self.rule['copy'] = str(self.copy) - for ext in ['.in', '.ans', '.out']: + for ext in extensions: if self.copy.with_suffix(ext).is_file(): hashes[ext] = hash_file(self.copy.with_suffix(ext)) @@ -507,7 +515,7 @@ def __init__(self, problem, generator_config, key, name: str, yaml, parent, list if '.in' in self.hardcoded: self.in_is_generated = False self.rule['in'] = self.hardcoded['.in'] - for ext in ['.in', '.ans', '.out']: + for ext in extensions: if ext in self.hardcoded: hashes[ext] = hash_string(self.hardcoded[ext]) @@ -523,17 +531,8 @@ def __init__(self, problem, generator_config, key, name: str, yaml, parent, list # An error is shown during generate. return - # remove filed that should not be considered for hashing - if '.ans' in hashes and self.root not in ['invalid_answers', 'invalid_outputs']: - hashes.pop('.ans') - if '.out' in hashes and self.root not in ['invalid_outputs']: - hashes.pop('.out') - # build ordered list of hashes we want to consider - self.hash = [] - for ext in ['.in', '.ans', '.out']: - if ext in hashes: - self.hash.append(hashes[ext]) + self.hash = [hashes[ext] for ext in config.KNOWN_TESTCASE_EXTENSIONS if ext in hashes] # combine hashes if len(self.hash) == 1: @@ -785,7 +784,29 @@ def move_generated(): def add_testdata_to_cache(): # Store the generated testdata for deduplication test cases. - test_hash = hash_file(target_infile) + hashes = {} + + # remove files that should not be considered for this testcase + extensions = config.KNOWN_TESTCASE_EXTENSIONS.copy() + if t.root not in config.INVALID_CASE_DIRECTORIES[1:]: + extensions.remove('.ans') + if t.root not in config.INVALID_CASE_DIRECTORIES[2:]: + extensions.remove('.out') + + for ext in extensions: + if target_infile.with_suffix(ext).is_file(): + hashes[ext] = hash_file(target_infile.with_suffix(ext)) + + # build ordered list of hashes we want to consider + test_hash = [hashes[ext] for ext in extensions if ext in hashes] + + # combine hashes + if len(test_hash) == 1: + test_hash = test_hash[0] + else: + test_hash = combine_hashes(test_hash) + + # check for duplicates if test_hash not in generator_config.generated_testdata: generator_config.generated_testdata[test_hash] = t else: @@ -823,7 +844,7 @@ def add_testdata_to_cache(): # Step 3: Write hardcoded files. for ext, contents in t.hardcoded.items(): - if contents == '' and t.root in ['bad', 'invalid_inputs', 'invalid_answers', 'invalid_outputs']: + if contents == '' and t.root in config.INVALID_CASE_DIRECTORIES: bar.error(f'Hardcoded {ext} data must not be empty!') return else: @@ -871,7 +892,7 @@ def add_testdata_to_cache(): if not generator_up_to_date: # Generate .ans and .interaction if needed. - if not config.args.no_solution and testcase.root not in ["invalid_inputs", "invalid_answers"]: + if not config.args.no_solution and testcase.root not in config.INVALID_CASE_DIRECTORIES: if not problem.interactive: # Generate a .ans if not already generated by earlier steps. if not testcase.ans_path.is_file(): @@ -1316,7 +1337,7 @@ def parse(key, name, yaml, parent, listed=True): nonlocal testcase_id # Skip unlisted `data/bad` directory: we should not generate .ans files there. if ( - name in ['bad', 'invalid_inputs', 'invalid_answers'] + name in config.INVALID_CASE_DIRECTORIES and parent.path == Path('.') and listed is False ): @@ -1799,7 +1820,7 @@ def clean_testcase(t): if ( not process_testcase(self.problem, t.path) or t.listed - or (len(t.path.parts) > 0 and t.path.parts[0] in ['bad', 'invalid_inputs', 'invalid_answers']) + or (len(t.path.parts) > 0 and t.path.parts[0] in config.INVALID_CASE_DIRECTORIES) ): bar.done() return