JIT: initial support for reinforcement learning of CSE heuristic (#96880

) Adds special CSE heuristic modes to the JIT to support learning a good CSE heuristic via Policy Gradient, a form of reinforcement learning. The learning must be orchestrated by an external process, but the JIT does all of the actual gradient computations. The orchestration program will be added to jitutils. The overall process also relies on SPMI and the goal is to minimize perf score. Introduce two new CSE heuristic policies: * Replay: simply perform indicated sequence of CSEs * RL: used for the Policy Gradient, with 3 modes: * Stochastic: based on current parameters but allows random variation * Greedy: based on current parameters, deterministic * Update: compute updated parameters per Policy Gradient Also rework the Random policy to be a bit more random, it now alters both the CSEs performed and the order they are performed in. Add the ability to have jit config options that specify sequences of ints or doubles. Add the ability to just dump metric info for a jitted method, and add more details (perhaps considerably more) for CSEs. This is all still simple text format. Also factor out a common check for "non-viable" candidates -- these are CSE candidates that won't actually be CSEs. This leads to some minor diffs as the check is now slightly different for CSEs with zero uses and/or zero weighted uses. Contributes to #92915.
dotnet · Jan 29, 2024 · 8a0b3f3 · 8a0b3f3
1 parent fb953cc
commit 8a0b3f3
Show file tree

Hide file tree

Showing 9 changed files with 1,833 additions and 217 deletions.
diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
@@ -28,6 +28,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #endif
 
 #include "patchpointinfo.h"
+#include "optcse.h" // for cse metrics
 
 /*****************************************************************************/
 
@@ -2024,16 +2025,36 @@ void CodeGen::genEmitMachineCode()
     }
 
 #ifdef DEBUG
-    if (compiler->opts.disAsm || verbose)
+    const bool dspMetrics     = compiler->opts.dspMetrics;
+    const bool dspSummary     = compiler->opts.disAsm || verbose;
+    const bool dspMetricsOnly = dspMetrics && !dspSummary;
+
+    if (dspSummary || dspMetrics)
     {
-        printf("\n; Total bytes of code %d, prolog size %d, PerfScore %.2f, instruction count %d, allocated bytes for "
+        if (!dspMetricsOnly)
+        {
+            printf("\n");
+        }
+
+        printf("Total bytes of code %d, prolog size %d, PerfScore %.2f, instruction count %d, allocated bytes for "
                "code %d",
                codeSize, prologSize, compiler->info.compPerfScore, instrCount,
                GetEmitter()->emitTotalHotCodeSize + GetEmitter()->emitTotalColdCodeSize);
 
-        if (JitConfig.JitMetrics() > 0)
+        if (dspMetrics)
         {
-            printf(", num cse %d", compiler->optCSEcount);
+            printf(", num cse %d num cand %d", compiler->optCSEcount, compiler->optCSECandidateCount);
+
+            CSE_HeuristicCommon* const cseHeuristic = compiler->optGetCSEheuristic();
+            if (cseHeuristic != nullptr)
+            {
+                cseHeuristic->DumpMetrics();
+            }
+
+            if (compiler->info.compMethodSuperPMIIndex >= 0)
+            {
+                printf(" spmi index %d", compiler->info.compMethodSuperPMIIndex);
+            }
         }
 
 #if TRACK_LSRA_STATS
@@ -2046,7 +2067,10 @@ void CodeGen::genEmitMachineCode()
         printf(" (MethodHash=%08x) for method %s (%s)\n", compiler->info.compMethodHash(), compiler->info.compFullName,
                compiler->compGetTieringName(true));
 
-        printf("; ============================================================\n\n");
+        if (!dspMetricsOnly)
+        {
+            printf("; ============================================================\n\n");
+        }
         printf(""); // in our logic this causes a flush
     }
 

diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
@@ -2862,6 +2862,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
     opts.dspEHTable      = false;
     opts.dspDebugInfo    = false;
     opts.dspGCtbls       = false;
+    opts.dspMetrics      = false;
     opts.disAsm2         = false;
     opts.dspUnwind       = false;
     opts.compLongAddress = false;
@@ -2951,6 +2952,8 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
         {
             opts.optRepeat = true;
         }
+
+        opts.dspMetrics = (JitConfig.JitMetrics() != 0);
     }
 
     if (verboseDump)

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
@@ -2482,6 +2482,8 @@ class Compiler
     friend class CSE_DataFlow;
     friend class CSE_HeuristicCommon;
     friend class CSE_HeuristicRandom;
+    friend class CSE_HeuristicReplay;
+    friend class CSE_HeuristicRL;
     friend class CSE_Heuristic;
     friend class CodeGenInterface;
     friend class CodeGen;
@@ -9782,6 +9784,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         bool compLongAddress;          // Force using large pseudo instructions for long address
                                        // (IF_LARGEJMP/IF_LARGEADR/IF_LARGLDC)
         bool dspGCtbls;                // Display the GC tables
+        bool dspMetrics;               // Display metrics
 #endif
 
 // Default numbers used to perform loop alignment. All the numbers are chosen

diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
@@ -380,8 +380,8 @@ CONFIG_INTEGER(JitConstCSE, W("JitConstCSE"), 0)
 // Allow fine-grained controls of CSEs done in a particular method
 //
 // Specify method that will respond to the CSEMask.
-// 0 means feature disabled and all methods run CSE normally.
-CONFIG_INTEGER(JitCSEHash, W("JitCSEHash"), 0)
+// -1 means feature disabled and all methods run CSE normally.
+CONFIG_INTEGER(JitCSEHash, W("JitCSEHash"), -1)
 
 // Bitmask of allowed CSEs in methods specified by JitCSEHash.
 // These bits control the "cse attempts" made by normal jitting,
@@ -401,10 +401,40 @@ CONFIG_INTEGER(JitCSEMask, W("JitCSEMask"), 0)
 // Enable metric output in jit disasm & elsewhere
 CONFIG_INTEGER(JitMetrics, W("JitMetrics"), 0)
 
-// When nonzero, choose CSE candidates randomly, with probability
+// When nonzero, choose CSE candidates randomly, with hash salt
 // specified by the (decimal) value of the config
 CONFIG_INTEGER(JitRandomCSE, W("JitRandomCSE"), 0)
 
+// When set, specifies the exact CSEs to perform
+// as a sequence of CSE candidate numbers
+CONFIG_STRING(JitReplayCSE, W("JitReplayCSE"))
+
+// When set, specify the sequence of rewards from the CSE replay.
+// There should be one reward per step in the sequence.
+CONFIG_STRING(JitReplayCSEReward, W("JitReplayCSEReward"))
+
+// When set, specifies the initial parameter string for
+// a reinforcement-learning based CSE heuristic.
+//
+// Note you can also set JitReplayCSE and JitReplayCSEPerfScore
+// along with this, in which case we are asking for a policy
+// evaluation/update based on the provided sequence.
+CONFIG_STRING(JitRLCSE, W("JitRLCSE"))
+
+// When set, specify the alpha value (step size) to
+// use in learning.
+CONFIG_STRING(JitRLCSEAlpha, W("JitRLCSEAlpha"))
+
+// If nonzero, dump out details of policy evaluation and
+// gradient updates
+CONFIG_INTEGER(JitRLCSEVerbose, W("JitRLCSEVerbose"), 0)
+
+// If nonzero, dump candidate feature values
+CONFIG_INTEGER(JitRLCSECandidateFeatures, W("JitRLCSECandidateFeatures"), 0)
+
+// If nonzero, use the greedy policy with current parameters.
+CONFIG_INTEGER(JitRLCSEGreedy, W("JitRLCSEGreedy"), 0)
+
 #endif
 
 ///