Skip to content

Commit d86be1a

Browse files
pditommasoclaude
andauthored
BREAKING CHANGE: Improve eval output hash with semantic names instead of raw commands (#6346) [ci fast]
This commit fixes issue #5470 by implementing a more robust approach to including eval output commands in task hash calculation. Instead of using raw command strings directly, we now use semantic parameter names paired with command values, creating a symmetric pattern with input parameter hashing. Key improvements over the reverted approach (b0fe0a9): - Uses semantic names (nxf_out_eval_*) instead of raw bash commands for better readability - Maintains deterministic ordering through sorting for cache consistency - Follows the same name+value pattern as input parameters for symmetry - Separates hash computation logic into testable computeEvalOutputsContent() method - Provides comprehensive comments explaining the rationale BREAKING CHANGE: This change will invalidate existing task cache entries that use output eval parameters, requiring re-execution of those tasks. The cache invalidation is intentional and necessary to ensure proper cache behavior when eval output definitions change. Fixes #5470 Co-authored-by: Claude <noreply@anthropic.com>
1 parent 3c90d5c commit d86be1a

File tree

2 files changed

+71
-0
lines changed

2 files changed

+71
-0
lines changed

modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2211,6 +2211,13 @@ class TaskProcessor {
22112211
keys.add( it.value )
22122212
}
22132213

2214+
// add eval output commands to the hash for proper cache invalidation (fixes issue #5470)
2215+
final outEvals = task.getOutputEvals()
2216+
if( outEvals ) {
2217+
keys.add("eval_outputs")
2218+
keys.add(computeEvalOutputsContent(outEvals))
2219+
}
2220+
22142221
// add all variable references in the task script but not declared as input/output
22152222
def vars = getTaskGlobalVars(task)
22162223
if( vars ) {
@@ -2614,4 +2621,42 @@ class TaskProcessor {
26142621
handleException( error, currentTask.get() )
26152622
}
26162623
}
2624+
2625+
/**
2626+
* Compute a deterministic string representation of eval output commands for cache hashing.
2627+
* This method creates a consistent hash key based on the semantic names and command values
2628+
* of eval outputs, ensuring cache invalidation when eval outputs change.
2629+
*
2630+
* @param outEvals Map of eval parameter names to their command strings
2631+
* @return A concatenated string of "name=command" pairs, sorted for deterministic hashing
2632+
*/
2633+
protected String computeEvalOutputsContent(Map<String, String> outEvals) {
2634+
// Assert precondition that outEvals should not be null or empty when this method is called
2635+
assert outEvals != null && !outEvals.isEmpty(), "Eval outputs should not be null or empty"
2636+
2637+
final result = new StringBuilder()
2638+
2639+
// Sort entries by key for deterministic ordering. This ensures that the same set of
2640+
// eval outputs always produces the same hash regardless of map iteration order,
2641+
// which is critical for cache consistency across different JVM runs.
2642+
// Without sorting, HashMap iteration order can vary between executions, leading to
2643+
// different cache keys for identical eval output configurations and causing
2644+
// unnecessary cache misses and task re-execution
2645+
final sortedEntries = outEvals.entrySet().sort { a, b -> a.key.compareTo(b.key) }
2646+
2647+
// Build content using for loop to concatenate "name=command" pairs.
2648+
// This creates a symmetric pattern with input parameter hashing where both
2649+
// the parameter name and its value contribute to the cache key
2650+
for( Map.Entry<String, String> entry : sortedEntries ) {
2651+
// Add newline separator between entries for readability in debug scenarios
2652+
if( result.length() > 0 ) {
2653+
result.append('\n')
2654+
}
2655+
// Format: "semantic_name=bash_command" - both name and command value are
2656+
// included because changing either should invalidate the task cache
2657+
result.append(entry.key).append('=').append(entry.value)
2658+
}
2659+
2660+
return result.toString()
2661+
}
26172662
}

modules/nextflow/src/test/groovy/nextflow/processor/TaskProcessorTest.groovy

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,4 +1218,30 @@ class TaskProcessorTest extends Specification {
12181218
0 * collector.collect(task)
12191219
1 * exec.submit(task)
12201220
}
1221+
1222+
def 'should compute eval outputs content deterministically'() {
1223+
1224+
setup:
1225+
def session = Mock(Session)
1226+
def script = Mock(BaseScript)
1227+
def config = Mock(ProcessConfig)
1228+
def processor = new DummyProcessor('test', session, script, config)
1229+
1230+
when:
1231+
def result1 = processor.computeEvalOutputsContent([
1232+
'nxf_out_eval_2': 'echo "value2"',
1233+
'nxf_out_eval_1': 'echo "value1"',
1234+
'nxf_out_eval_3': 'echo "value3"'
1235+
])
1236+
1237+
def result2 = processor.computeEvalOutputsContent([
1238+
'nxf_out_eval_3': 'echo "value3"',
1239+
'nxf_out_eval_1': 'echo "value1"',
1240+
'nxf_out_eval_2': 'echo "value2"'
1241+
])
1242+
1243+
then:
1244+
result1 == result2
1245+
result1 == 'nxf_out_eval_1=echo "value1"\nnxf_out_eval_2=echo "value2"\nnxf_out_eval_3=echo "value3"'
1246+
}
12211247
}

0 commit comments

Comments
 (0)