Skip to content

Commit

Permalink
initial
Browse files Browse the repository at this point in the history
  • Loading branch information
root authored and root committed Feb 20, 2025
1 parent b354103 commit 2fa6b28
Show file tree
Hide file tree
Showing 8 changed files with 84 additions and 19 deletions.
Binary file modified .langgraph_api/.langgraph_checkpoint.1.pckl
Binary file not shown.
Binary file modified .langgraph_api/.langgraph_checkpoint.2.pckl
Binary file not shown.
Binary file modified .langgraph_api/.langgraph_ops.pckl
Binary file not shown.
Binary file modified .langgraph_api/.langgraph_retry_counter.pckl
Binary file not shown.
52 changes: 51 additions & 1 deletion src/retrieval_graph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
"""

from typing import Any, Literal, TypedDict, cast
import json
from index_graph.configuration import IndexConfiguration
from shared import retrieval

from langchain_core.messages import BaseMessage
from langchain_core.runnables import RunnableConfig
from langgraph.graph import END, START, StateGraph

from retrieval_graph.configuration import AgentConfiguration
from retrieval_graph.researcher_graph.graph import graph as researcher_graph
from retrieval_graph.state import AgentState, InputState, Router
Expand All @@ -20,6 +22,8 @@
from retrieval_graph.tools import TOOLS
from langchain_core.messages import AIMessage
from langgraph.prebuilt import ToolNode
from shared.state import reduce_docs
from langchain_core.documents import Document


async def analyze_and_route_query(
Expand Down Expand Up @@ -249,6 +253,50 @@ def route_model_output(state: AgentState) -> Literal["__end__", "tools"]:
# Otherwise we execute the requested actions
return "tools"

def convert_to_documents(doc_string: str) -> list[Document]:
"""Convert a list of document dictionaries to a list of Document objects.
Args:
doc_dicts (list[dict[str, str]]): A list of document dictionaries with 'url' and 'content'.
Returns:
list[Document]: A list of Document objects with content and metadata.
"""
doc_dicts = json.loads(doc_string)
documents = []
for doc_dict in doc_dicts:
# Create a Document object with content and metadata
document = Document(page_content=doc_dict["content"], metadata={"source": doc_dict["url"]})
documents.append(document)
return documents


async def index_docs(
state: AgentState, *, config: RunnableConfig
) -> dict[str, str]:
"""Asynchronously index documents in the given state using the configured retriever.
This function takes the documents from the state, ensures they have a user ID,
adds them to the retriever's index, and then signals for the documents to be
deleted from the state.
If docs are not provided in the state, they will be loaded
from the configuration.docs_file JSON file.
Args:
state (IndexState): The current state containing documents and retriever.
config (Optional[RunnableConfig]): Configuration for the indexing process.r
"""
if not config:
raise ValueError("Configuration required to run index_docs.")

configuration = IndexConfiguration.from_runnable_config(config)

with retrieval.make_retriever(config) as retriever:
await retriever.aadd_documents(convert_to_documents(state.messages[2].content))

return {"docs": "delete"}


# Define the graph
builder = StateGraph(AgentState, input=InputState, config_schema=AgentConfiguration)
Expand All @@ -263,6 +311,7 @@ def route_model_output(state: AgentState) -> Literal["__end__", "tools"]:
"respond",
route_model_output,
)
builder.add_node(index_docs)

builder.add_edge(START, "analyze_and_route_query")
builder.add_conditional_edges("analyze_and_route_query", route_query)
Expand All @@ -271,6 +320,7 @@ def route_model_output(state: AgentState) -> Literal["__end__", "tools"]:
builder.add_edge("ask_for_more_info", END)
builder.add_edge("respond_to_general_query", END)
builder.add_edge("respond", END)
builder.add_edge("tools", "index_docs")
builder.add_edge("tools", "respond")

# Compile into a graph object that you can invoke and deploy.
Expand Down
36 changes: 23 additions & 13 deletions src/retrieval_graph/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,26 +46,36 @@
You do not need to specify where you want to research for all steps of the plan, but it's sometimes helpful."""

RESPONSE_SYSTEM_PROMPT = """\
You are an expert problem-solver, tasked with answering any question \
about any knowledge.
You are an expert information retriever responsible for generating a comprehensive and informative answer based solely on the provided search results (URL and content). Follow these strict guidelines:
If the provided <context> contains relevant information:
DO NOT search the internet.
Generate a precise and informative answer using only the <context>.
Maintain an unbiased and journalistic tone while combining search results into a coherent response.
Do NOT ramble. Adjust response length based on the question:
If the answer requires one sentence, keep it short.
If more detail is required (up to five paragraphs), provide the necessary depth.
Use source URLs as citations, placing them immediately after the referenced text (not all at the end).
If multiple sources discuss different entities under the same name, provide separate answers for each.
Use bullet points for clarity and readability, citing sources within the bullet points where applicable.If multiple sources discuss different entities under the same name, provide separate answers for each.
Use bullet points for clarity and readability, citing sources within the bullet points where applicable.
If the <context> does NOT contain relevant information:
Call the web_search_tool tool to find the necessary data. This tool searches the web for information. use it if context is not enough
Generate an answer based on the retrieved web search results following the same formatting rules (concise, unbiased, cited properly).
If neither <context> nor web search provides a definitive answer:
Do NOT make up information.
Do NOT say you don't know or not sure or not enough infomation, use web_search_tool tool to get infomation.
If applicable, ask the user for more details to refine the search.
Anything between the following `context` html blocks is retrieved from a knowledge \
bank, not part of the conversation with the user.
<context>
{context}
<context/>
If the provided search results is not relevant to the question, say thay you will search the internet and do that\
If there is nothing in the context relevant to the question at hand, say thay you will search the internet and do that.\
You do have access to the internet to search for information via tool call called web_search_tool , use it \
DO NOT promt for more just search the web\
"""

# Researcher graph
Expand Down
8 changes: 7 additions & 1 deletion src/retrieval_graph/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
These tools are intended as free examples to get started. For production use,
consider implementing more robust and specialized tools tailored to your needs.
"""
from index_graph.graph import graph as index_graph

from typing import Any, Callable, List, Optional, cast

Expand All @@ -28,7 +29,12 @@ async def web_search_tool(
configuration = Configuration.from_runnable_config(config)
wrapped = TavilySearchResults(max_results=10)
result = await wrapped.ainvoke({"query": query})
return cast(list[dict[str, Any]], result)
result = cast(list[dict[str, Any]], result)
print(f"search {result}")
# if result:
# # Send the search results to the index graph
# await index_graph.invoke({"documents": result})
return result


TOOLS: List[Callable[..., Any]] = [web_search_tool]
Expand Down
7 changes: 3 additions & 4 deletions src/shared/retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@ def make_elastic_retriever(
embedding=embedding_model,
)
# Debugging statements
print(f"search_kwargs: {configuration.search_kwargs}")
yield vstore.as_retriever(search_kwargs=configuration.search_kwargs)
yield vstore.as_retriever(search_kwargs={"key":"value"})


@contextmanager
Expand All @@ -83,7 +82,7 @@ def make_pinecone_retriever(
vstore = PineconeVectorStore.from_existing_index(
os.environ["PINECONE_INDEX_NAME"], embedding=embedding_model
)
yield vstore.as_retriever(search_kwargs=configuration.search_kwargs)
yield vstore.as_retriever(search_kwargs={"key":"value"})


@contextmanager
Expand All @@ -98,7 +97,7 @@ def make_mongodb_retriever(
namespace="langgraph_retrieval_agent.default",
embedding=embedding_model,
)
yield vstore.as_retriever(search_kwargs=configuration.search_kwargs)
yield vstore.as_retriever(search_kwargs={"key":"value"})


@contextmanager
Expand Down

0 comments on commit 2fa6b28

Please sign in to comment.