Update on "Use llm_config instead of args in export_llama functions"

jackzhxng · jackzhxng · commit a9ae56a036c3 · 2025-05-22T18:46:24.000-07:00
Differential Revision: [D75263988](https://our.internmc.facebook.com/intern/diff/D75263988) [ghstack-poisoned]
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -1107,7 +1107,7 @@ def _export_llama(llm_config, args) -> LLMEdgeManager:  # noqa: C901
             use_kv_cache=llm_config.model.use_kv_cache,
             embedding_quantize=llm_config.quantization.embedding_quantize,
             pt2e_quantize=llm_config.quantization.pt2e_quantize,
-            coreml_ios=llm_config.backend.coreml.ios_version,
+            coreml_ios=llm_config.backend.coreml.ios,
             coreml_quantize=llm_config.backend.coreml.quantize,
             coreml_compute_units=llm_config.backend.coreml.compute_units,
             use_qnn_sha=llm_config.backend.qnn.use_sha,
diff --git a/examples/models/llama/tests/test_export_llama_lib.py b/examples/models/llama/tests/test_export_llama_lib.py
@@ -7,6 +7,7 @@
 import unittest
 
 from executorch.devtools.backend_debug import get_delegation_info
+from executorch.examples.models.llama.config.llm_config import LlmConfig
 from executorch.examples.models.llama.export_llama_lib import (
     _export_llama,
     build_args_parser,
@@ -34,13 +35,20 @@ def test_has_expected_ops_and_op_counts(self):
         # we cannot test quantization args in this way
         # since quantization requires promoting meta tensors
         # to device=cpu, which requires real weights.
+
+        llm_config = LlmConfig()
+        llm_config.model.use_sdpa_with_kv_cache = True
+        llm_config.model.use_kv_cache = True
+        llm_config.debug.verbose = True
+
+        # We still need args for backward compatibility during transition
         parser = build_args_parser()
         args = parser.parse_args([])
         args.use_sdpa_with_kv_cache = True
         args.use_kv_cache = True
         args.verbose = True
 
-        builder = _export_llama(args)
+        builder = _export_llama(llm_config, args)
         graph_module = builder.edge_manager.exported_program().graph_module
         delegation_info = get_delegation_info(graph_module)