Skip to content

Commit

Permalink
query time changes with , improved querqy rewriter version , thanks t…
Browse files Browse the repository at this point in the history
…o matthias #151
  • Loading branch information
atarora authored and epugh committed Mar 29, 2023
1 parent df88366 commit 3504ab9
Show file tree
Hide file tree
Showing 12 changed files with 42 additions and 22 deletions.
6 changes: 3 additions & 3 deletions data-encoder/ecommerce/vectors/products.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@


# Currently you need to unzip the 4.json.zip file first.
PATH_PRODUCTS_DATASET = "data-encoder/ecommerce/vectors/data/1.json"
PATH_PRODUCTS_DATASET = "data-encoder/ecommerce/vectors/data/test.json"
PATH_PRODUCTS_MODEL = "all-MiniLM-L6-v2"
PATH_PRODUCTS_VECTORS_JSON = "data-encoder/ecommerce/vectors/data/products-vectors-1.json"
PATH_PRODUCTS_VECTORS_JSON = "data-encoder/ecommerce/vectors/data/products-vectors-test.json"

# Load the CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
Expand Down Expand Up @@ -71,7 +71,7 @@ def calculate_product_image_vectors(product):
preprocess_image = preprocess(validated_image).unsqueeze(0).to(device)
# Encode the image
with torch.no_grad():
image_encoding = model.encode_image(preprocess_image)
image_encoding = model.encode_image(preprocess_image)[0]
#print(image_encoding)
return image_encoding
except Exception:
Expand Down
15 changes: 14 additions & 1 deletion data-encoder/ecommerce/vectors/query_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,17 @@
query_text = "Sony Portable Bluetooth Speaker MBS-100 docking speaker 1.0 channels Black Sony"
model = SentenceTransformer(PATH_PRODUCTS_MODEL)
text_emb = model.encode(query_text)
print(text_emb)
print(text_emb)


#import torch
#import torchvision.transforms as transforms
#import clip

# Load the CLIP model
#device = "cuda" if torch.cuda.is_available() else "cpu"
#model, preprocess = clip.load('ViT-L/14', device)
#qry_text = "mobilephone"
#qry_encoding = model.encode_text(clip.tokenize(qry_text),normalize_embeddings=True, convert_to_numpy=True)
#print(qry_encoding)
#print (qry_encoding.shape)
2 changes: 1 addition & 1 deletion embeddings/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ COPY ./app /code/app
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt

# Download models from the internet and store in Docker image
RUN python code/app/clip/loadModel.py
#RUN python code/app/clip/loadModel.py
RUN python code/app/minilm/loadModel.py

WORKDIR /code/app
Expand Down
6 changes: 0 additions & 6 deletions embeddings/app/clip/loadModel.py

This file was deleted.

9 changes: 0 additions & 9 deletions embeddings/app/clip/model.py

This file was deleted.

File renamed without changes.
10 changes: 10 additions & 0 deletions embeddings/app/clipL14/loadModel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
## This script downloads the clip model for embeddings service.

import torch
import torchvision.transforms as transforms
import clip

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-L/14', device)

#model.save('/code/app/clip-ViT-L-14.model')
11 changes: 11 additions & 0 deletions embeddings/app/clipL14/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import torch
import torchvision.transforms as transforms
import clip

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-L/14', device)

def get_text_sentence_embedding(text: str, normalize: bool = True):
return model.encode_text(clip.tokenize(text))[0]


Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from fastapi import APIRouter

from clip.model import get_text_sentence_embedding
from clipL14.model import get_text_sentence_embedding

from embeddings import EmbeddingsTextRequest, OutputFormat

Expand Down
2 changes: 1 addition & 1 deletion embeddings/app/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from fastapi import FastAPI
from clip import router as router_clip
from clipL14 import router as router_clip
from minilm import router as router_minilm

app = FastAPI()
Expand Down
1 change: 1 addition & 0 deletions embeddings/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ fastapi
uvicorn[standard]
gunicorn
pydantic
torch
git+https://github.com/openai/CLIP.git

Binary file modified solr/lib/querqy-embeddings-rewriter-1.0.0-SNAPSHOT.jar
Binary file not shown.

0 comments on commit 3504ab9

Please sign in to comment.