Merge pull request #1506 from vespa-engine/kkraune/icl

Add ICL example
vespa-engine · Oct 14, 2024 · 1e5cded · 1e5cded
2 parents 1328b3a + 1132f91
commit 1e5cded
Show file tree

Hide file tree

Showing 9 changed files with 10,586 additions and 0 deletions.
diff --git a/examples/in-context-learning/app/.gitignore b/examples/in-context-learning/app/.gitignore
@@ -0,0 +1,3 @@
+application.zip
+src/main/application/security/
+.idea/
diff --git a/examples/in-context-learning/app/.vespaignore b/examples/in-context-learning/app/.vespaignore
@@ -0,0 +1,6 @@
+# This file excludes unnecessary files from the application package. See
+# https://docs.vespa.ai/en/reference/vespaignore.html for more information.
+.DS_Store
+.gitignore
+README.md
+ext/
diff --git a/examples/in-context-learning/app/README.md b/examples/in-context-learning/app/README.md
@@ -0,0 +1,64 @@
+
+<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+
+<picture>
+  <source media="(prefers-color-scheme: dark)" srcset="https://vespa.ai/assets/vespa-ai-logo-heather.svg">
+  <source media="(prefers-color-scheme: light)" srcset="https://vespa.ai/assets/vespa-ai-logo-rock.svg">
+  <img alt="#Vespa" width="200" src="https://vespa.ai/assets/vespa-ai-logo-rock.svg" style="margin-bottom: 25px;">
+</picture>
+
+# Vespa sample applications - Categorize using an LLM
+This is a set of scripts/installs to back up our presentation at 
+* [MLCon](https://mlconference.ai/machine-learning-advanced-development/adaptive-incontext-learning/)
+* [data science connect COLLIDE](https://datasciconnect.com/events/collide/agenda/)
+
+For any questions, please register at the
+[Vespa Slack](https://join.slack.com/t/vespatalk/shared_invite/zt-nq61o73o-Lsun7Fnm5N8uA6UAfIycIg)
+and discuss in the _general_ channel.
+
+
+### Setup
+
+Install [Ollama](https://ollama.com/) and run models like: 
+```shell
+ollama run llama3.1
+```
+
+Use the [quick start](https://docs.vespa.ai/en/vespa-quick-start.html) or
+[Vespa getting started](https://cloud.vespa.ai/en/getting-started) to deploy this - laptop example:
+```shell
+podman run --detach --name vespa --hostname vespa-container \  
+  --publish 8080:8080 --publish 19071:19071 \
+  vespaengine/vespa
+
+vespa deploy app --wait 600
+```
+Use _podman_ or _docker_ of any tool to run Vespa on your laptop.
+
+
+### Generate data
+
+[feed_examples.py](feed_examples.py) converts the train data set to vespa feed format -
+feed this to the Vespa instance:
+```shell
+python3 feed_examples.py > samples.jsonl
+vespa feed samples.jsonl
+```
+
+
+### Evaluate data
+
+[categorize_group.py](categorize_group.py) runs through the test set
+and classifies based on examples retrieved from Vespa.
+
+See the `inference` function for how to set up queries and ranking profiles for the different options.
+
+Example script output:
+```
+category	size	relevance	retrieved_label	predicted_label	label_text	text
+3	10	13.75663952228003	get_physical_card	get_physical_card	card_arrival	How do I locate my card?
+0	10	19.146904249529296	card_arrival	card_arrival	card_arrival	I still have not received my new card, I ordered over a week ago.
+```
+
+### Other
+Use the `@timer_decorator` to time runtime of the functions.
diff --git a/examples/in-context-learning/app/schemas/train.sd b/examples/in-context-learning/app/schemas/train.sd
@@ -0,0 +1,89 @@
+# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+schema train {
+
+    document train {
+
+        field text type string {
+            indexing: summary | index
+            index: enable-bm25 
+        }
+
+        field label type string {
+            indexing: summary | attribute 
+        }
+    }
+
+    field doc_embedding_arctic type tensor<float>(x[384]) {
+        indexing: input text | embed arctic | attribute
+        attribute {
+            distance-metric: angular
+        }
+    }
+
+    field doc_embedding_e5 type tensor<float>(x[384]) {
+        indexing: input text | embed e5 | attribute
+        attribute {
+            distance-metric: angular
+        }
+    }
+
+    fieldset default {
+        fields: text
+    }
+
+    rank-profile bm25 inherits default {
+        inputs {
+            query(user_profile) tensor<float>(cat{})
+        }
+        first-phase {
+            expression: bm25(text) 
+        }
+    }
+
+    rank-profile hybrid_e5_normalized_bm25 inherits default {
+        inputs {
+            query(query_embedding) tensor<float>(x[384])
+        }
+        function normalized(val) {
+            expression: 2*atan(val/8)/(3.14159)
+        }
+        first-phase {
+            expression {
+                closeness(field, doc_embedding_e5) + normalized(bm25(text))
+            }
+        }
+        match-features {
+            closeness(field, doc_embedding_e5)
+            bm25(text)
+        }
+    }
+
+    rank-profile sim_e5 inherits default {
+        inputs {
+            query(query_embedding) tensor<float>(x[384])
+        }
+        first-phase {
+            expression {
+                closeness(field, doc_embedding_e5)
+            }
+        }
+        match-features {
+            closeness(field, doc_embedding_e5)
+        }
+    }
+
+    rank-profile sim_arctic inherits default {
+        inputs {
+            query(query_embedding) tensor<float>(x[384])
+        }
+        first-phase {
+            expression {
+                closeness(field, doc_embedding_arctic)
+            }
+        }
+        match-features {
+            closeness(field, doc_embedding_arctic)
+        }
+    }
+}
diff --git a/examples/in-context-learning/app/services.xml b/examples/in-context-learning/app/services.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<services version="1.0" xmlns:deploy="vespa" xmlns:preprocess="properties">
+
+
+    <container id="default" version="1.0">
+        <component id="e5" type="hugging-face-embedder">
+            <transformer-model url="https://huggingface.co/intfloat/e5-small-v2/resolve/main/model.onnx"/>
+            <tokenizer-model url="https://huggingface.co/intfloat/e5-small-v2/raw/main/tokenizer.json"/>
+            <!-- E5 prompt instructions -->
+            <prepend>
+                <query>query:</query>
+                <document>passage:</document>
+            </prepend>
+        </component>
+        <component id="arctic" type="hugging-face-embedder">
+            <transformer-model url="https://huggingface.co/Snowflake/snowflake-arctic-embed-xs/resolve/main/onnx/model_quantized.onnx"/>
+            <tokenizer-model url="https://huggingface.co/Snowflake/snowflake-arctic-embed-xs/raw/main/tokenizer.json"/>
+            <pooling-strategy>cls</pooling-strategy>
+            <prepend>
+                <query>Represent this sentence for searching relevant passages: </query>
+            </prepend>
+        </component>
+
+        <document-api/>
+
+        <search/>
+        <nodes>
+            <node hostalias="node1" />
+        </nodes>
+    </container>
+
+    <content id="content" version="1.0">
+        <min-redundancy>2</min-redundancy>
+        <documents>
+            <document type="train" mode="index" />
+        </documents>
+        <nodes>
+            <node hostalias="node1" distribution-key="0" />
+        </nodes>
+    </content>
+
+</services>