Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Knowledge #2966

Merged
merged 11 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ backend/.python-version
**/.pnp
.pnp.js

Pipfile

# testing
**/coverage

Expand Down
11 changes: 11 additions & 0 deletions Pipfile
chloedia marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]

[dev-packages]

[requires]
python_version = "3.11"
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from langchain_core.pydantic_v1 import Field as FieldV1
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI

from quivr_api.logger import get_logger
from quivr_api.modules.brain.knowledge_brain_qa import KnowledgeBrainQA

Expand Down Expand Up @@ -113,7 +114,6 @@ def __init__(
)

def get_chain(self):

list_files_array = (
self.knowledge_qa.knowledge_service.get_all_knowledge_in_brain(
self.brain_id
Expand Down Expand Up @@ -176,7 +176,6 @@ def get_chain(self):
api_base=api_base,
) # pyright: ignore reportPrivateUsage=none
if self.model_compatible_with_function_calling(self.model):

# And finally, we do the part that returns the answers
llm_function = ChatOpenAI(
max_tokens=self.max_tokens,
Expand Down
6 changes: 2 additions & 4 deletions backend/api/quivr_api/modules/brain/service/brain_service.py
chloedia marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from uuid import UUID

from fastapi import HTTPException

from quivr_api.celery_config import celery
from quivr_api.logger import get_logger
from quivr_api.modules.brain.dto.inputs import (
Expand All @@ -17,13 +18,10 @@
IntegrationBrain,
IntegrationDescription,
)
from quivr_api.modules.knowledge.service.knowledge_service import KnowledgeService
from quivr_api.vectorstore.supabase import CustomSupabaseVectorStore

logger = get_logger(__name__)

knowledge_service = KnowledgeService()


class BrainService:
# brain_repository: BrainsInterface
Expand Down Expand Up @@ -151,7 +149,7 @@ def delete_brain(self, brain_id: UUID) -> dict[str, str]:
if brain_to_delete is None:
raise HTTPException(status_code=404, detail="Brain not found.")

knowledge_service.remove_brain_all_knowledge(brain_id)
# knowledge_service.remove_brain_all_knowledge(brain_id) #FIXME we don't really want to delete the knowledge @amine if a knowledge can be in multiple brain

self.brain_vector.delete_brain_vector(str(brain_id))
self.brain_user_repository.delete_brain_users(str(brain_id))
Expand Down
25 changes: 13 additions & 12 deletions backend/api/quivr_api/modules/chat/controller/chat_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,23 @@

from fastapi import APIRouter, Depends, HTTPException, Query, Request
from fastapi.responses import StreamingResponse

from quivr_api.logger import get_logger
from quivr_api.middlewares.auth import AuthBearer, get_current_user
from quivr_api.modules.brain.entity.brain_entity import RoleEnum
from quivr_api.modules.brain.service.brain_authorization_service import (
validate_brain_authorization,
)
from quivr_api.modules.brain.service.brain_authorization_service import \
validate_brain_authorization
from quivr_api.modules.brain.service.brain_service import BrainService
from quivr_api.modules.chat.dto.chats import ChatItem, ChatQuestion
from quivr_api.modules.chat.dto.inputs import (
ChatMessageProperties,
ChatUpdatableProperties,
CreateChatProperties,
QuestionAndAnswer,
)
from quivr_api.modules.chat.dto.inputs import (ChatMessageProperties,
ChatUpdatableProperties,
CreateChatProperties,
QuestionAndAnswer)
from quivr_api.modules.chat.entity.chat import Chat
from quivr_api.modules.chat.service.chat_service import ChatService
from quivr_api.modules.chat_llm_service.chat_llm_service import ChatLLMService
from quivr_api.modules.dependencies import get_service
from quivr_api.modules.knowledge.repository.knowledges import KnowledgeRepository
from quivr_api.modules.knowledge.service.knowledge_service import \
KnowledgeService
from quivr_api.modules.models.service.model_service import ModelService
from quivr_api.modules.prompt.service.prompt_service import PromptService
from quivr_api.modules.rag_service import RAGService
Expand All @@ -34,7 +31,9 @@

chat_router = APIRouter()
brain_service = BrainService()
knowledge_service = KnowledgeRepository()
KnowledgeServiceDep = Annotated[
KnowledgeService, Depends(get_service(KnowledgeService))
]
prompt_service = PromptService()


Expand Down Expand Up @@ -166,6 +165,7 @@ async def create_question_handler(
chat_id: UUID,
current_user: UserIdentityDep,
chat_service: ChatServiceDep,
knowledge_service: KnowledgeServiceDep,
model_service: ModelServiceDep,
brain_id: Annotated[UUID | None, Query()] = None,
):
Expand Down Expand Up @@ -231,6 +231,7 @@ async def create_stream_question_handler(
chat_id: UUID,
chat_service: ChatServiceDep,
current_user: UserIdentityDep,
knowledge_service: KnowledgeServiceDep,
model_service: ModelServiceDep,
brain_id: Annotated[UUID | None, Query()] = None,
) -> StreamingResponse:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from typing import Annotated
from uuid import UUID

from fastapi import APIRouter, Depends, HTTPException, Query

from quivr_api.logger import get_logger
from quivr_api.middlewares.auth import AuthBearer, get_current_user
from quivr_api.modules.brain.entity.brain_entity import RoleEnum
Expand All @@ -9,6 +11,7 @@
validate_brain_authorization,
)
from quivr_api.modules.brain.service.brain_vector_service import BrainVectorService
from quivr_api.modules.dependencies import get_service
from quivr_api.modules.knowledge.service.knowledge_service import KnowledgeService
from quivr_api.modules.upload.service.generate_file_signed_url import (
generate_file_signed_url,
Expand All @@ -18,13 +21,16 @@
knowledge_router = APIRouter()
logger = get_logger(__name__)

knowledge_service = KnowledgeService()
KnowledgeServiceDep = Annotated[
KnowledgeService, Depends(get_service(KnowledgeService))
]


@knowledge_router.get(
"/knowledge", dependencies=[Depends(AuthBearer())], tags=["Knowledge"]
)
async def list_knowledge_in_brain_endpoint(
knowledge_service: KnowledgeServiceDep,
brain_id: UUID = Query(..., description="The ID of the brain"),
current_user: UserIdentity = Depends(get_current_user),
):
Expand All @@ -34,7 +40,7 @@ async def list_knowledge_in_brain_endpoint(

validate_brain_authorization(brain_id=brain_id, user_id=current_user.id)

knowledges = knowledge_service.get_all_knowledge(brain_id)
knowledges = await knowledge_service.get_all_knowledge(brain_id)

return {"knowledges": knowledges}

Expand All @@ -49,16 +55,17 @@ async def list_knowledge_in_brain_endpoint(
)
async def delete_endpoint(
knowledge_id: UUID,
knowledge_service: KnowledgeServiceDep,
current_user: UserIdentity = Depends(get_current_user),
brain_id: UUID = Query(..., description="The ID of the brain"),
):
"""
Delete a specific knowledge from a brain.
"""

knowledge = knowledge_service.get_knowledge(knowledge_id)
knowledge = await knowledge_service.get_knowledge(knowledge_id)
file_name = knowledge.file_name if knowledge.file_name else knowledge.url
knowledge_service.remove_knowledge(knowledge_id)
await knowledge_service.remove_knowledge(knowledge_id)

brain_vector_service = BrainVectorService(brain_id)
if knowledge.file_name:
Expand All @@ -78,13 +85,14 @@ async def delete_endpoint(
)
async def generate_signed_url_endpoint(
knowledge_id: UUID,
knowledge_service: KnowledgeServiceDep,
current_user: UserIdentity = Depends(get_current_user),
):
"""
Generate a signed url to download the file from storage.
"""

knowledge = knowledge_service.get_knowledge(knowledge_id)
knowledge = await knowledge_service.get_knowledge(knowledge_id)

validate_brain_authorization(brain_id=knowledge.brain_id, user_id=current_user.id)

Expand Down
13 changes: 7 additions & 6 deletions backend/api/quivr_api/modules/knowledge/dto/inputs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from enum import Enum
from typing import Optional
from typing import Dict, Optional
from uuid import UUID

from pydantic import BaseModel
Expand All @@ -9,16 +9,17 @@ class KnowledgeStatus(str, Enum):
PROCESSING = "PROCESSING"
UPLOADED = "UPLOADED"
ERROR = "ERROR"


class CreateKnowledgeProperties(BaseModel):
brain_id: UUID
file_name: Optional[str] = None
url: Optional[str] = None
extension: str = "txt"
integration: Optional[str] = None
integration_link: Optional[str] = None
mime_type: str = "txt"
chloedia marked this conversation as resolved.
Show resolved Hide resolved
status: KnowledgeStatus = KnowledgeStatus.PROCESSING
source: Optional[str] = None
source_link: Optional[str] = None
file_size: Optional[int] = None
file_sha1: Optional[str] = None
metadata: Optional[Dict[str, str]] = None

def dict(self, *args, **kwargs):
knowledge_dict = super().dict(*args, **kwargs)
Expand Down
59 changes: 56 additions & 3 deletions backend/api/quivr_api/modules/knowledge/entity/knowledge.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from typing import Optional
from datetime import datetime
from typing import Dict, Optional
from uuid import UUID

from pydantic import BaseModel
from sqlalchemy import JSON, TIMESTAMP, Column, text
from sqlmodel import UUID as PGUUID
from sqlmodel import Field, SQLModel


class Knowledge(BaseModel):
Expand All @@ -11,5 +15,54 @@ class Knowledge(BaseModel):
url: Optional[str] = None
extension: str = "txt"
status: str
integration: Optional[str] = None
integration_link: Optional[str] = None
source: Optional[str] = None
source_link: Optional[str] = None
file_size: Optional[int] = None
file_sha1: Optional[str] = None
updated_at: Optional[datetime] = None
created_at: Optional[datetime] = None
metadata: Optional[Dict[str, str]] = None

def dict(self, *args, **kwargs):
knowledge_dict = super().dict(*args, **kwargs)
knowledge_dict["brain_id"] = str(knowledge_dict.get("brain_id"))
return knowledge_dict


class KnowledgeDB(SQLModel, table=True):
__tablename__ = "knowledge" # type: ignore

id: UUID | None = Field(
default=None,
sa_column=Column(
PGUUID,
server_default=text("uuid_generate_v4()"),
primary_key=True,
),
)
brain_id: UUID = Field(nullable=False)
file_name: Optional[str] = Field(default=None, max_length=255)
url: Optional[str] = Field(default=None, max_length=2048)
mime_type: str = Field(default="txt", max_length=100)
status: str = Field(max_length=50)
source: str = Field(max_length=255)
source_link: str = Field(max_length=2048)
file_size: Optional[int] = Field(gt=0) # FIXME: Should not be optional @chloedia
file_sha1: Optional[str] = Field(
chloedia marked this conversation as resolved.
Show resolved Hide resolved
max_length=40
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check for bytea and fixed size binary :
https://www.postgresql.org/docs/current/datatype-binary.html

) # FIXME: Should not be optional @chloedia
updated_at: datetime | None = Field(
default=None,
sa_column=Column(
TIMESTAMP(timezone=False),
server_default=text("CURRENT_TIMESTAMP"),
),
)
created_at: datetime | None = Field(
default=None,
sa_column=Column(
TIMESTAMP(timezone=False),
server_default=text("CURRENT_TIMESTAMP"),
),
)
metadata_: Optional[Dict[str, str]] = Field(default=None, sa_column=Column(JSON))
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,15 @@
from typing import List
from uuid import UUID

from quivr_api.modules.knowledge.dto.inputs import (
CreateKnowledgeProperties,
KnowledgeStatus,
)
from quivr_api.modules.knowledge.dto.inputs import (CreateKnowledgeProperties,
KnowledgeStatus)
from quivr_api.modules.knowledge.dto.outputs import DeleteKnowledgeResponse
from quivr_core.models import QuivrKnowledge as Knowledge
from quivr_api.modules.knowledge.entity.knowledge import KnowledgeDB


class KnowledgeInterface(ABC):
@abstractmethod
def insert_knowledge(self, knowledge: CreateKnowledgeProperties) -> Knowledge:
def insert_knowledge(self, knowledge: CreateKnowledgeProperties) -> KnowledgeDB:
"""
Add a knowledge
"""
Expand All @@ -34,7 +32,7 @@ def remove_knowledge_by_id(
pass

@abstractmethod
def get_knowledge_by_id(self, knowledge_id: UUID) -> Knowledge:
def get_knowledge_by_id(self, knowledge_id: UUID) -> KnowledgeDB:
"""
Get a knowledge by its id
Args:
Expand All @@ -43,7 +41,7 @@ def get_knowledge_by_id(self, knowledge_id: UUID) -> Knowledge:
pass

@abstractmethod
def get_all_knowledge_in_brain(self, brain_id: UUID) -> List[Knowledge]:
def get_all_knowledge_in_brain(self, brain_id: UUID) -> List[KnowledgeDB]:
"""
Get all the knowledge in a brain
Args:
Expand Down
Loading
Loading