Skip to content

Updating langchain-neo4j package #891

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 40 additions & 166 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,183 +1,57 @@
aiohttp==3.9.3
aiosignal==1.3.1
annotated-types==0.6.0
antlr4-python3-runtime==4.9.3
anyio==4.3.0
async-timeout==4.0.3
asyncio==3.4.3
attrs==23.2.0
backoff==2.2.1
beautifulsoup4==4.12.3
boto3==1.34.140
botocore==1.34.140
cachetools==5.3.3
certifi==2024.2.2
cffi==1.16.0
chardet==5.2.0
charset-normalizer==3.3.2
click==8.1.7
coloredlogs==15.0.1
contourpy==1.2.0
cryptography==42.0.2
cycler==0.12.1
dataclasses-json==0.6.4
dataclasses-json-speakeasy==0.5.11
Deprecated==1.2.14
distro==1.9.0
docstring_parser==0.16
effdet==0.4.1
emoji==2.10.1
exceptiongroup==1.2.0
fastapi==0.111.0
boto3==1.35.69
botocore==1.35.69
certifi==2024.8.30
fastapi==0.115.5
fastapi-health==0.4.0
filelock==3.13.1
filetype==1.2.0
flatbuffers==23.5.26
fonttools==4.49.0
frozenlist==1.4.1
fsspec==2024.2.0
google-api-core==2.18.0
google-auth==2.29.0
google_auth_oauthlib==1.2.0
google-cloud-aiplatform==1.58.0
google-cloud-bigquery==3.19.0
google-api-core==2.23.0
google-auth==2.36.0
google_auth_oauthlib==1.2.1
google-cloud-core==2.4.1
google-cloud-resource-manager==1.12.3
google-cloud-storage==2.17.0
google-crc32c==1.5.0
google-resumable-media==2.7.0
googleapis-common-protos==1.63.0
greenlet==3.0.3
grpc-google-iam-v1==0.13.0
grpcio==1.62.1
google-ai-generativelanguage==0.6.6
grpcio-status==1.62.1
h11==0.14.0
httpcore==1.0.4
httpx==0.27.0
huggingface-hub
humanfriendly==10.0
idna==3.6
importlib-resources==6.1.1
json-repair==0.30.2
pip-install==1.3.5
iopath==0.1.10
Jinja2==3.1.3
jmespath==1.0.1
joblib==1.3.2
jsonpatch==1.33
jsonpath-python==1.0.6
jsonpointer==2.4
json-repair==0.25.2
kiwisolver==1.4.5
langchain==0.3.0
langchain-aws==0.2.1
langchain-anthropic==0.2.1
langchain-fireworks==0.2.0
langchain-google-genai==2.0.0
langchain-community==0.3.0
langchain-core==0.3.5
langchain-experimental==0.3.1
langchain-google-vertexai==2.0.1
langchain-groq==0.2.0
langchain-openai==0.2.0
langchain-text-splitters==0.3.0
langchain==0.3.8
langchain-aws==0.2.7
langchain-anthropic==0.3.0
langchain-fireworks==0.2.5
langchain-community==0.3.8
langchain-core==0.3.21
langchain-experimental==0.3.3
langchain-google-vertexai==2.0.7
langchain-groq==0.2.1
langchain-openai==0.2.9
langchain-text-splitters==0.3.2
langchain-huggingface==0.1.2
langdetect==1.0.9
langsmith==0.1.128
layoutparser==0.3.4
langsmith==0.1.146
langserve==0.3.0
#langchain-cli==0.0.25
lxml==5.1.0
MarkupSafe==2.1.5
marshmallow==3.20.2
matplotlib==3.7.2
mpmath==1.3.0
multidict==6.0.5
mypy-extensions==1.0.0
neo4j-rust-ext
networkx==3.2.1
nltk==3.8.1
numpy==1.26.4
omegaconf==2.3.0
onnx==1.16.1
onnxruntime==1.18.1
openai==1.47.1
opencv-python==4.8.0.76
orjson==3.9.15
packaging==23.2
pandas==2.2.0
pdf2image==1.17.0
pdfminer.six==20221105
pdfplumber==0.10.4
pikepdf==8.11.0
pillow==10.2.0
pillow_heif==0.15.0
portalocker==2.8.2
proto-plus==1.23.0
protobuf==4.23.4
psutil==6.0.0
pyasn1==0.6.0
pyasn1_modules==0.4.0
pycocotools==2.0.7
pycparser==2.21
pydantic==2.8.2
pydantic_core==2.20.1
pyparsing==3.0.9
pypdf==4.0.1
PyPDF2==3.0.1
pypdfium2==4.27.0
pytesseract==0.3.10
python-dateutil==2.8.2
nltk==3.9.1
openai==1.55.1
opencv-python==4.10.0.84
psutil==6.1.0
pydantic==2.9.0
python-dotenv==1.0.1
python-iso639==2024.2.7
python-magic==0.4.27
python-multipart==0.0.9
pytube==15.0.0
pytz==2024.1
PyYAML==6.0.1
rapidfuzz==3.6.1
regex==2023.12.25
requests==2.32.3
rsa==4.9
s3transfer==0.10.1
safetensors==0.4.1
shapely==2.0.3
six==1.16.0
sniffio==1.3.1
soupsieve==2.5
starlette==0.37.2
sse-starlette==2.1.2
PyPDF2==3.0.1
PyMuPDF==1.24.14
starlette==0.41.3
sse-starlette==2.1.3
starlette-session==0.4.3
sympy==1.12
tabulate==0.9.0
tenacity==8.2.3
tiktoken==0.7.0
timm==0.9.12
tokenizers==0.19
tqdm==4.66.2
transformers==4.42.3
types-protobuf
types-requests
typing-inspect==0.9.0
typing_extensions==4.12.2
tzdata==2024.1
unstructured==0.14.9
unstructured-client==0.23.8
unstructured-inference==0.7.36
unstructured.pytesseract==0.3.12
unstructured[all-docs]==0.14.9
tqdm==4.67.1
unstructured[all-docs]==0.16.6
urllib3==2.2.2
uvicorn==0.30.1
gunicorn==22.0.0
uvicorn==0.32.1
gunicorn==23.0.0
wikipedia==1.4.0
wrapt==1.16.0
yarl==1.9.4
youtube-transcript-api==0.6.2
youtube-transcript-api==0.6.3
zipp==3.17.0
sentence-transformers==3.0.1
google-cloud-logging==3.10.0
PyMuPDF==1.24.5
sentence-transformers==3.3.1
google-cloud-logging==3.11.3
pypandoc==1.13
graphdatascience==1.10
graphdatascience==1.12
Secweb==1.11.0
ragas==0.2.2
ragas==0.2.6
rouge_score==0.1.2
langchain-neo4j==0.1.1
6 changes: 3 additions & 3 deletions backend/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from src.ragas_eval import *
from starlette.types import ASGIApp, Message, Receive, Scope, Send
import gzip
from langchain_neo4j import Neo4jGraph

logger = CustomLogger()
CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks")
Expand Down Expand Up @@ -581,7 +582,6 @@ async def generate():
graph = create_graph_database_connection(uri, userName, decoded_password, database)
graphDb_data_Access = graphDBdataAccess(graph)
result = graphDb_data_Access.get_current_status_document_node(file_name)
# print(f'Result of document status in SSE : {result}')
if len(result) > 0:
status = json.dumps({'fileName':file_name,
'status':result[0]['Status'],
Expand Down Expand Up @@ -668,7 +668,7 @@ async def get_document_status(file_name, url, userName, password, database):
}
else:
status = {'fileName':file_name, 'status':'Failed'}
print(f'Result of document status in refresh : {result}')
logging.info(f'Result of document status in refresh : {result}')
return create_api_response('Success',message="",file_name=status)
except Exception as e:
message=f"Unable to get the document status"
Expand Down Expand Up @@ -961,7 +961,7 @@ async def fetch_chunktext(
async def backend_connection_configuation():
try:
graph = Neo4jGraph()
print(f'login connection status of object: {graph}')
logging.info(f'login connection status of object: {graph}')
if graph is not None:
graph_connection = True
isURI = os.getenv('NEO4J_URI')
Expand Down
11 changes: 3 additions & 8 deletions backend/src/QA_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,13 @@
import logging

import threading
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from typing import Any
from dotenv import load_dotenv


# LangChain imports
from langchain_community.vectorstores.neo4j_vector import Neo4jVector
from langchain_community.chat_message_histories import Neo4jChatMessageHistory
from langchain_neo4j import Neo4jVector
from langchain_neo4j import Neo4jChatMessageHistory
from langchain_neo4j import GraphCypherQAChain
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableBranch
Expand All @@ -21,7 +19,6 @@
from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
from langchain_text_splitters import TokenTextSplitter
from langchain_core.messages import HumanMessage, AIMessage
from langchain.chains import GraphCypherQAChain
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.callbacks import StdOutCallbackHandler, BaseCallbackHandler

Expand All @@ -38,8 +35,6 @@
from src.llm import get_llm
from src.shared.common_fn import load_embedding_model
from src.shared.constants import *
from src.graphDB_dataAccess import graphDBdataAccess
from src.ragas_eval import get_ragas_metrics
load_dotenv()

EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL')
Expand Down
4 changes: 1 addition & 3 deletions backend/src/create_chunks.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from langchain_text_splitters import TokenTextSplitter
from langchain.docstore.document import Document
from langchain_community.graphs import Neo4jGraph
from langchain_neo4j import Neo4jGraph
import logging
import os
from src.document_sources.youtube import get_chunks_with_timestamps, get_calculated_timestamps
import re

Expand All @@ -25,7 +24,6 @@ def split_file_into_chunks(self):
A list of chunks each of which is a langchain Document.
"""
logging.info("Split file into smaller chunks")
# number_of_chunks_allowed = int(os.environ.get('NUMBER_OF_CHUNKS_ALLOWED'))
text_splitter = TokenTextSplitter(chunk_size=200, chunk_overlap=20)
if 'page' in self.pages[0].metadata:
chunks = []
Expand Down
3 changes: 2 additions & 1 deletion backend/src/diffbot_transformer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer
from langchain_community.graphs import Neo4jGraph
#from langchain_community.graphs import Neo4jGraph
from langchain_neo4j import Neo4jGraph
from langchain.docstore.document import Document
from typing import List
import os
Expand Down
5 changes: 1 addition & 4 deletions backend/src/document_sources/gcs_bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,12 @@ def merge_file_gcs(bucket_name, original_file_name: str, folder_name_sha1_hashed
try:
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
# Retrieve chunks from GCS
# blobs = storage_client.list_blobs(bucket_name, prefix=folder_name_sha1_hashed)
# print(f'before sorted blobs: {blobs}')
chunks = []
for i in range(1,total_chunks+1):
blob_name = folder_name_sha1_hashed + '/' + f"{original_file_name}_part_{i}"
blob = bucket.blob(blob_name)
if blob.exists():
print(f'Blob Name: {blob.name}')
logging.info(f'Blob Name: {blob.name}')
chunks.append(blob.download_as_bytes())
blob.delete()

Expand Down
2 changes: 0 additions & 2 deletions backend/src/document_sources/local_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,8 @@

def load_document_content(file_path):
if Path(file_path).suffix.lower() == '.pdf':
print("in if")
return PyMuPDFLoader(file_path)
else:
print("in else")
return UnstructuredFileLoader(file_path, mode="elements",autodetect_encoding=True)

def get_documents_from_file_by_path(file_path,file_name):
Expand Down
Loading
Loading