Skip to content

Commit

Permalink
Component to query weaviate (#20)
Browse files Browse the repository at this point in the history
  • Loading branch information
Hakimovich99 authored Nov 22, 2023
1 parent 8f9f875 commit e38b296
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/components/load_from_csv/fondant_component.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ image: ghcr.io/ml6team/load_from_csv:dev
produces:
text: #TODO: fill in here
fields:
question:
data:
type: string

args:
Expand Down
18 changes: 18 additions & 0 deletions src/components/retrieve_from_weaviate/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM --platform=linux/amd64 python:3.8-slim as base

# System dependencies
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install git -y

# Install requirements
COPY requirements.txt /
RUN pip3 install --no-cache-dir -r requirements.txt

# Set the working directory to the component folder
WORKDIR /component/src

# Copy over src-files
COPY src/ .

ENTRYPOINT ["fondant", "execute", "main"]
38 changes: 38 additions & 0 deletions src/components/retrieve_from_weaviate/fondant_component.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#metadata: to be matched w/ docker image
name: retrieve_from_weaviate
description: Component that retrieves chunks from a weaviate vectorDB
image: ghcr.io/ml6team/retrieve_from_weaviate:dev

consumes:
text: #TODO: fill in here
fields:
data:
type: string
embedding:
type: array
items:
type: float32

produces:
text: #TODO: fill in here
fields:
data:
type: string
retrieved_chunks:
type: array
items:
type: string

args:
weaviate_url:
description: The URL of the weaviate instance.
type: str
default: http://localhost:8080
class_name:
description:
The name of the weaviate class that will be created and used to store the embeddings.
Should follow the weaviate naming conventions.
type: str
top_k:
description: Number of chunks to retrieve
type: int
2 changes: 2 additions & 0 deletions src/components/retrieve_from_weaviate/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
weaviate-client==3.24.1
fondant[component]==0.7.0
37 changes: 37 additions & 0 deletions src/components/retrieve_from_weaviate/src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import pandas as pd
from fondant.component import PandasTransformComponent

import weaviate


class RetrieveChunks(PandasTransformComponent):
def __init__(self, *_, weaviate_url: str, class_name: str, top_k: int) -> None:
"""
Args:
weaviate_url: An argument passed to the component
"""
# Initialize your component here based on the arguments
self.client = weaviate.Client(weaviate_url)
self.class_name = class_name
self.k = top_k

def retrieve_chunks(self, vector_query: str):
"""Get results from weaviate database"""

result = (
self.client.query.get(self.class_name, ["passage"])
.with_near_vector({"vector": vector_query})
.with_limit(self.k)
.with_additional(["distance"])
.do()
)
result_dict = result["data"]["Get"][self.class_name]
text = [retrieved_chunk["passage"] for retrieved_chunk in result_dict]

return text

def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
dataframe[("text", "retrieved_chunks")] = dataframe[
("text", "embedding")
].apply(self.retrieve_chunks)
return dataframe

0 comments on commit e38b296

Please sign in to comment.