withcatai
diff --git a/‎.github/ISSUE_TEMPLATE/bug-report.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/bug-report.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/documentation-issue.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/documentation-issue.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/feature-request.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/feature-request.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.vitepress/config.ts‎
Lines changed: 3 additions & 3 deletions b/‎.vitepress/config.ts‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/cli/pull.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/cli/pull.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/guide/CUDA.md‎
Lines changed: 27 additions & 0 deletions b/‎docs/guide/CUDA.md‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎docs/guide/awesome.md‎
Lines changed: 17 additions & 2 deletions b/‎docs/guide/awesome.md‎
Lines changed: 17 additions & 2 deletions
diff --git a/‎docs/guide/chat-session.md‎
Lines changed: 81 additions & 0 deletions b/‎docs/guide/chat-session.md‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎docs/guide/cmakeOptions.data.ts‎
Lines changed: 6 additions & 0 deletions b/‎docs/guide/cmakeOptions.data.ts‎
Lines changed: 6 additions & 0 deletions
@@ -3,6 +3,8 @@ description: Report a reproducible bug
 labels:
   - requires triage
   - bug
+title: "bug: "
+type: "Bug"
 body:
   - type: markdown
     attributes:
 
@@ -3,6 +3,8 @@ description: Documentation is unclear or otherwise insufficient.
 labels:
   - requires triage
   - documentation
+title: "docs: "
+type: "Documentation"
 body:
   - type: markdown
     attributes:
 
@@ -3,6 +3,8 @@ description: Suggest an new idea for this project
 labels:
   - requires triage
   - new feature
+title: "feat: "
+type: "Feature"
 body:
   - type: markdown
     attributes:
 
@@ -14,8 +14,10 @@ node_modules
 /.vitepress/.cache
 /test/.models
 /test/temp
+/test/.temp
 /temp
 /coverage
+/test-runner-profile
 
 /llama/compile_commands.json
 /llama/llama.cpp
 
@@ -470,8 +470,6 @@ export default defineConfig({
             }
         },
         sidebar: {
-            "/api/": getApiReferenceSidebar(),
-
             "/guide/": [{
                 text: "Guide",
                 base: "/guide",
@@ -550,7 +548,9 @@ export default defineConfig({
                         ]
                     }
                 ]
-            }]
+            }],
+
+            "/api/": getApiReferenceSidebar()
         },
         socialLinks: [
             {icon: "npm", link: "https://www.npmjs.com/package/node-llama-cpp"},
 
@@ -20,7 +20,7 @@ If a file already exists and its size matches the expected size, it will not be
 
 The supported URI schemes are:
 - **HTTP:** `https://`, `http://`
-- **Hugging Face:** `hf:<user>/<model>:<quant>` (`#<quant>` is optional, [but recommended](../guide/downloading-models.md#hf-scheme-specify-quant))
+- **Hugging Face:** `hf:<user>/<model>:<quant>` (`:<quant>` is optional, [but recommended](../guide/downloading-models.md#hf-scheme-specify-quant))
 - **Hugging Face:** `hf:<user>/<model>/<file-path>#<branch>` (`#<branch>` is optional)
 
 Learn more about using model URIs in the [Downloading Models guide](../guide/downloading-models.md#model-uris).
 
@@ -114,6 +114,33 @@ set NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_GENERATOR_TOOLSET=%CUDA_PATH%
 
 Then run the build command again to check whether setting the `CMAKE_GENERATOR_TOOLSET` cmake option fixed the issue.
 
+### Fix the `forward compatibility was attempted on non supported HW` Error {#fix-cuda-forward-compatibility}
+This error usually happens when the CUDA version you have installed on your machine is older than the CUDA version used in the prebuilt binaries supplied by `node-llama-cpp`.
+
+To resolve this issue, you can either [update your CUDA installation](https://developer.nvidia.com/cuda-downloads) to the latest version (recommended) or [build `node-llama-cpp` on your machine](#building) against the CUDA version you have installed.
+
+### Fix the `Binary GPU type mismatch. Expected: cuda, got: false` Error {#fix-cuda-gpu-type-mismatch}
+This error usually happens when you have multiple conflicting CUDA versions installed on your machine.
+
+To fix it, uninstall older CUDA versions and restart your machine (important).
+
+:::: details Check which CUDA libraries are picked up by `node-llama-cpp`'s prebuilt binaries on your machine
+
+Run this command inside of your project:
+
+::: code-group
+```shell [Linux]
+ldd ./node_modules/@node-llama-cpp/linux-x64-cuda/bins/linux-x64-cuda/libggml-cuda.so
+```
+
+```cmd [Windows]
+"C:\Program Files\Git\usr\bin\ldd.exe" node_modules\@node-llama-cpp\win-x64-cuda\bins\win-x64-cuda\ggml-cuda.dll
+```
+:::
+
+::::
+
+
 ## Using `node-llama-cpp` With CUDA
 It's recommended to use [`getLlama`](../api/functions/getLlama) without specifying a GPU type,
 so it'll detect the available GPU types and use the best one automatically.
 
@@ -2,17 +2,32 @@
 description: Awesome projects that use node-llama-cpp
 ---
 # Awesome `node-llama-cpp`
-Awesome projects that use `node-llama-cpp`.
+:sunglasses: Awesome projects that use `node-llama-cpp`.
+
+<script setup lang="ts">
+import DataBadge from "../../.vitepress/components/DataBadge/DataBadge.vue";
+</script>
 
 ## Open Source
 * [CatAI](https://github.com/withcatai/catai) - a simplified AI assistant API for Node.js, with REST API support
+  <br /><DataBadge title="License" content="MIT"/>
+
+* [Manzoni](https://manzoni.app/) ([GitHub](https://github.com/gems-platforms/manzoni-app)) - a text editor running local LLMs
+  <br /><DataBadge title="License" content="AGPL-3.0"/>
+
 
 ## Proprietary
-> List your project here!
+* [BashBuddy](https://bashbuddy.run) ([GitHub](https://github.com/wosherco/bashbuddy)) - write bash commands with natural language
+  <br /><DataBadge title="Partially open source" content="Source available" href="https://github.com/wosherco/bashbuddy/blob/main/LICENSE.md"/>
+
+* [nutshell](https://withnutshell.com) - Private AI meeting notes processed completely on your device
+
 
 
 <br />
 
 ---
 
+> To add a project to this list, [open a PR](https://github.com/withcatai/node-llama-cpp/edit/master/docs/guide/awesome.md).
+>
 > To have a project listed here, it should clearly state that it uses `node-llama-cpp`.
@@ -446,6 +446,87 @@ console.log("AI: " + a2);
 ```
 :::
 
+:::: details Saving and restoring a context sequence evaluation state {#save-and-restore-with-context-sequence-state}
+You can also save and restore the context sequence evaluation state to avoid re-evaluating the chat history
+when you load it on a new context sequence.
+
+Please note that context sequence state files can get very large (109MB for only 1K tokens).
+Using this feature is only recommended when the chat history is very long and you plan to load it often,
+or when the evaluation is too slow due to hardware limitations.
+
+::: warning
+When loading a context sequence state from a file,
+always ensure that the model used to create the context sequence is exactly the same as the one used to save the state file.
+
+Loading a state file created from a different model can crash the process,
+thus you have to pass `{acceptRisk: true}` to the [`loadStateFromFile`](../api/classes/LlamaContextSequence.md#loadstatefromfile) method to use it.
+
+Use with caution.
+:::
+
+::: code-group
+```typescript [Save chat history and context sequence state]
+import {fileURLToPath} from "url";
+import path from "path";
+import fs from "fs/promises";
+import {getLlama, LlamaChatSession} from "node-llama-cpp";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+
+const llama = await getLlama();
+const model = await llama.loadModel({
+    modelPath: path.join(__dirname, "models", "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf")
+});
+const context = await model.createContext();
+const contextSequence = context.getSequence();
+const session = new LlamaChatSession({contextSequence});
+
+
+const q1 = "Hi there, how are you?";
+console.log("User: " + q1);
+
+const a1 = await session.prompt(q1);
+console.log("AI: " + a1);
+
+const chatHistory = session.getChatHistory();// [!code highlight]
+await Promise.all([// [!code highlight]
+    contextSequence.saveStateToFile("state.bin"),// [!code highlight]
+    fs.writeFile("chatHistory.json", JSON.stringify(chatHistory), "utf8")// [!code highlight]
+]);// [!code highlight]
+```
+:::
+
+::: code-group
+```typescript [Restore chat history and context sequence state]
+import {fileURLToPath} from "url";
+import path from "path";
+import fs from "fs/promises";
+import {getLlama, LlamaChatSession} from "node-llama-cpp";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+// ---cut---
+const llama = await getLlama();
+const model = await llama.loadModel({
+    modelPath: path.join(__dirname, "models", "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf")
+});
+const context = await model.createContext();
+const contextSequence = context.getSequence();
+const session = new LlamaChatSession({contextSequence});
+
+await contextSequence.loadStateFromFile("state.bin", {acceptRisk: true});// [!code highlight]
+const chatHistory = JSON.parse(await fs.readFile("chatHistory.json", "utf8"));// [!code highlight]
+session.setChatHistory(chatHistory);// [!code highlight]
+
+const q2 = "Summarize what you said";
+console.log("User: " + q2);
+
+const a2 = await session.prompt(q2);
+console.log("AI: " + a2);
+```
+:::
+
+::::
+
 ## Prompt Without Updating Chat History {#prompt-without-updating-chat-history}
 Prompt without saving the prompt to the chat history.
 
 
@@ -90,6 +90,12 @@ function parseCmakeOptions(cmakeListsTxt: string, optionFilter: ((key: string) =
             }
         } else if (option.defaultValue === "${BUILD_SHARED_LIBS_DEFAULT}")
             option.defaultValue = htmlEscapeWithCodeMarkdown("`OFF` on MinGW, `ON` otherwise");
+        else if (option.defaultValue === "${GGML_CUDA_GRAPHS_DEFAULT}")
+            option.defaultValue = htmlEscapeWithCodeMarkdown("`ON`");
+        else if (option.defaultValue === "${GGML_NATIVE_DEFAULT}")
+            option.defaultValue = htmlEscapeWithCodeMarkdown("`OFF` when building for a different architecture,\n`ON` otherwise");
+        else if (option.key === "LLAMA_CURL")
+            option.defaultValue = htmlEscapeWithCodeMarkdown("`OFF`");
         else
             option.defaultValue = htmlEscapeWithCodeMarkdown(
                 option.defaultValue != null
Original file line number	Diff line number	Diff line change
`@@ -470,8 +470,6 @@ export default defineConfig({`
`470`	`470`	`}`
`471`	`471`	`},`
`472`	`472`	`sidebar: {`
`473`		`- "/api/": getApiReferenceSidebar(),`
`474`		`-`
`475`	`473`	`"/guide/": [{`
`476`	`474`	`text: "Guide",`
`477`	`475`	`base: "/guide",`
`@@ -550,7 +548,9 @@ export default defineConfig({`
`550`	`548`	`]`
`551`	`549`	`}`
`552`	`550`	`]`
`553`		`- }]`
	`551`	`+ }],`
	`552`	`+`
	`553`	`+ "/api/": getApiReferenceSidebar()`
`554`	`554`	`},`
`555`	`555`	`socialLinks: [`
`556`	`556`	`{icon: "npm", link: "https://www.npmjs.com/package/node-llama-cpp"},`