Skip to content

Commit

Permalink
download models (clip = minilm) for embedding service + quickstart wi…
Browse files Browse the repository at this point in the history
…th data ingestion with -vector option
  • Loading branch information
atarora authored and epugh committed Mar 15, 2023
1 parent 638893f commit 13ac31a
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 5 deletions.
6 changes: 6 additions & 0 deletions embeddings/app/clip/loadModel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
## This script downloads the clip model for embeddings service.

from sentence_transformers import SentenceTransformer

model = SentenceTransformer('clip-ViT-L-14')
model.save('embeddings/app/clip-ViT-L-14.model')
6 changes: 6 additions & 0 deletions embeddings/app/minilm/loadModel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
## This script downloads the minilm model for embeddings service.

from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L12-v2')
model.save('embeddings/app/all-MiniLM-L12-v2.model')
23 changes: 18 additions & 5 deletions quickstart.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ observability=false
shutdown=false
offline_lab=false
local_deploy=true
vector_search=false

while [ ! $# -eq 0 ]
do
Expand All @@ -32,6 +33,10 @@ do
local_deploy=false
log_major "Configuring Chorus for chorus.dev.o19s.com environment"
;;
--with-vector-search | -vector)
vector_search=true
echo -e "${MAJOR}Configuring Chorus with vector search services enabled${RESET}"
;;
--shutdown | -s)
shutdown=true
log_major "Shutting down Chorus"
Expand Down Expand Up @@ -108,14 +113,22 @@ curl --user solr:SolrRocks -X POST http://localhost:8983/api/collections -H 'Con
}
}
'

if [ ! -f ./icecat-products-150k-20200809.tar.gz ]; then
log_major "Downloading the sample product data."
# Populating product data for non-vector search
if [ ! $vector_search ] && [ ! -f ./icecat-products-150k-20200809.tar.gz ]; then
echo -e "${MAJOR}Downloading the sample product data.${RESET}"
curl --progress-bar -o icecat-products-150k-20200809.tar.gz -k https://querqy.org/datasets/icecat/icecat-products-150k-20200809.tar.gz

echo -e "${MAJOR}Populating products, please give it a few minutes!${RESET}"
tar xzf icecat-products-150k-20200809.tar.gz --to-stdout | curl --user solr:SolrRocks 'http://localhost:8983/solr/ecommerce/update?commit=true' --data-binary @- -H 'Content-type:application/json'
fi

# Populating product data for vector search
if [ $vector_search ]; then
echo -e "${MAJOR}Populating products for vector search, please give it a few minutes!${RESET}"
./index-vectors.sh
fi
log_major "Populating products, please give it a few minutes!"
tar xzf icecat-products-150k-20200809.tar.gz --to-stdout | curl --user solr:SolrRocks 'http://localhost:8983/solr/ecommerce/update?commit=true' --data-binary @- -H 'Content-type:application/json'

# Embedding service for vector search
echo -e "${MAJOR}Preparing embeddings rewriter.${RESET}"

curl --user solr:SolrRocks -X POST http://localhost:8983/solr/ecommerce/querqy/rewriter/embtxt?action=save -H 'Content-type:application/json' -d '{
Expand Down

0 comments on commit 13ac31a

Please sign in to comment.