jehna · jehna · Aug 24, 2024 · Aug 24, 2024 · Aug 24, 2024 · 0xdevalias
diff --git a/src/commands/local.ts b/src/commands/local.ts
@@ -25,9 +25,11 @@ export const local = cli()
       verbose.enabled = true;
     }
 
+    verbose.log("Starting local inference with options: ", opts);
+
     const prompt = await llama({
       model: opts.model,
-      disableGPU: opts.disableGPU,
+      disableGpu: opts.disableGpu,
       seed: opts.seed ? parseInt(opts.seed) : undefined
     });
     await unminify(filename, opts.outputDir, [

diff --git a/src/plugins/local-llm-rename/llama.ts b/src/plugins/local-llm-rename/llama.ts
@@ -1,6 +1,12 @@
-import { getLlama, LlamaChatSession, LlamaGrammar } from "node-llama-cpp";
+import {
+  getLlama,
+  LlamaChatSession,
+  LlamaGrammar,
+  LlamaModelOptions
+} from "node-llama-cpp";
 import { Gbnf } from "./gbnf.js";
 import { getModelPath, getModelWrapper } from "../../local-models.js";
+import { verbose } from "../../verbose.js";
 
 export type Prompt = (
   systemPrompt: string,
@@ -13,13 +19,16 @@ const IS_CI = process.env["CI"] === "true";
 export async function llama(opts: {
   seed?: number;
   model: string;
-  disableGPU?: boolean;
+  disableGpu?: boolean;
 }): Promise<Prompt> {
-  const llama = await getLlama();
-  const model = await llama.loadModel({
+  const disableGpu = opts.disableGpu ?? IS_CI;
+  const llama = await getLlama({ gpu: disableGpu ? false : "auto" });
+  const modelOpts: LlamaModelOptions = {
     modelPath: getModelPath(opts?.model),
-    gpuLayers: (opts?.disableGPU ?? IS_CI) ? 0 : undefined
-  });
+    gpuLayers: disableGpu ? 0 : undefined
+  };
+  verbose.log("Loading model with options", modelOpts);
+  const model = await llama.loadModel(modelOpts);
 
   const context = await model.createContext({ seed: opts?.seed });