Skip to content

Commit

Permalink
Update db_build.py
Browse files Browse the repository at this point in the history
  • Loading branch information
kennethleungty committed Jul 11, 2023
1 parent 3b5f178 commit 7bb6133
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 10 deletions.
Binary file added assets/illustrations.pptx
Binary file not shown.
13 changes: 3 additions & 10 deletions db_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,19 @@

# Build vector database
def run_db_build():
print('Start DB Build')
loader = DirectoryLoader(cfg.DATA_PATH,
glob="*.pdf",
glob='*.pdf',
loader_cls=PyPDFLoader)
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=cfg.CHUNK_SIZE,
chunk_overlap=cfg.CHUNK_OVERLAP)
texts = text_splitter.split_documents(documents)

model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {'device': 'cpu'}

embeddings = HuggingFaceEmbeddings(model_name=model_name,
model_kwargs=model_kwargs)
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
model_kwargs={'device': 'cpu'})

vectorstore = FAISS.from_documents(texts, embeddings)
vectorstore.save_local(cfg.DB_FAISS_PATH)
print('FAISS Vectorstore - Build Complete')


if __name__ == "__main__":
run_db_build()

0 comments on commit 7bb6133

Please sign in to comment.