openxla · trevor-m · Oct 17, 2024
diff --git a/xla/python/xla_client_test.py b/xla/python/xla_client_test.py
@@ -3378,6 +3378,8 @@ def testCompileOptionsSerialization(self):
       deb_opt.xla_gpu_kernel_cache_file = "/foo/bar"
       deb_opt.xla_gpu_enable_llvm_module_compilation_parallelism = True
       deb_opt.xla_gpu_per_fusion_autotune_cache_dir = "/bar/foo/"
+      deb_opt.xla_gpu_experimental_autotune_cache_mode = AutotuneCacheMode.READ
+
 
       b = options.SerializeAsString()
       restored = xla_client.CompileOptions.ParseFromString(b)
@@ -3398,6 +3400,7 @@ def testCompileOptionsSerialization(self):
           "xla_gpu_kernel_cache_file",
           "xla_gpu_enable_llvm_module_compilation_parallelism",
           "xla_gpu_per_fusion_autotune_cache_dir",
+          "xla_gpu_experimental_autotune_cache_mode",
       ):
         self.assertEqual(
             getattr(options.executable_build_options.debug_options, name),

diff --git a/xla/python/xla_compiler.cc b/xla/python/xla_compiler.cc
@@ -1093,6 +1093,11 @@ void BuildXlaCompilerSubmodule(nb::module_& m) {
       },
       nb::arg("platform"));
 
+  nb::enum_<DebugOptions::AutotuneCacheMode>(m, "AutotuneCacheMode")
+      .value("UNSPECIFIED", DebugOptions::AUTOTUNE_CACHE_MODE_UNSPECIFIED)
+      .value("UPDATE", DebugOptions::AUTOTUNE_CACHE_MODE_UPDATE)
+      .value("READ", DebugOptions::AUTOTUNE_CACHE_MODE_READ);
+
   nb::class_<DebugOptions>(m, "DebugOptions")
       .def("__repr__", &DebugOptions::DebugString)
       .def_prop_rw("xla_backend_optimization_level",
@@ -1239,7 +1244,10 @@ void BuildXlaCompilerSubmodule(nb::module_& m) {
                    &DebugOptions::xla_gpu_per_fusion_autotune_cache_dir,
                    [](DebugOptions* self, std::string value) {
                      self->set_xla_gpu_per_fusion_autotune_cache_dir(value);
-                   });
+                   })
+      .def_prop_rw("xla_gpu_experimental_autotune_cache_mode",
+                   &DebugOptions::xla_gpu_experimental_autotune_cache_mode,
+                   &DebugOptions::set_xla_gpu_experimental_autotune_cache_mode);
 
   nb::class_<ExecutableBuildOptions>(m, "ExecutableBuildOptions")
       .def(nb::init<>())

diff --git a/xla/python/xla_extension/__init__.pyi b/xla/python/xla_extension/__init__.pyi
@@ -282,6 +282,11 @@ def register_custom_call_partitioner(
 ) -> None: ...
 def encode_inspect_sharding_callback(handler: Any) -> bytes: ...
 
+class AutotuneCacheMode(enum.IntEnum):
+  UNSPECIFIED: AutotuneCacheMode
+  UPDATE: AutotuneCacheMode
+  READ: AutotuneCacheMode
+
 class DebugOptions:
   def __repr__(self) -> str: ...
   xla_cpu_enable_fast_math: bool
@@ -322,6 +327,7 @@ class DebugOptions:
   xla_gpu_kernel_cache_file: str
   xla_gpu_enable_llvm_module_compilation_parallelism: bool
   xla_gpu_per_fusion_autotune_cache_dir: str
+  xla_gpu_experimental_autotune_cache_mode: AutotuneCacheMode
 
 class CompiledMemoryStats:
   generated_code_size_in_bytes: int