Skip to content

Commit

Permalink
x
Browse files Browse the repository at this point in the history
  • Loading branch information
eyurtsev committed Nov 16, 2023
1 parent 7b6823d commit 9894475
Show file tree
Hide file tree
Showing 18 changed files with 181 additions and 159 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ test_watch:
######################

# Define a variable for Python and notebook files.
lint format: PYTHON_FILES=langchain_benchmarks tests
lint format: PYTHON_FILES=.
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=. --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')

lint lint_diff:
Expand Down
60 changes: 34 additions & 26 deletions csv-qa/custom_agent.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
from langchain.agents import OpenAIFunctionsAgent, AgentExecutor
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.tools import PythonAstREPLTool
import pandas as pd
from langchain.agents import AgentExecutor, OpenAIFunctionsAgent
from langchain.agents.agent_toolkits.conversational_retrieval.tool import (
create_retriever_tool,
)
from langchain.chat_models import ChatOpenAI
from langsmith import Client
from langchain.smith import RunEvalConfig, run_on_dataset
from pydantic import BaseModel, Field
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.smith import RunEvalConfig, run_on_dataset
from langchain.tools import PythonAstREPLTool
from langchain.vectorstores import FAISS
from langchain.agents.agent_toolkits.conversational_retrieval.tool import create_retriever_tool

from langsmith import Client
from pydantic import BaseModel, Field

pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 20)
pd.set_option("display.max_rows", 20)
pd.set_option("display.max_columns", 20)

embedding_model = OpenAIEmbeddings()
vectorstore = FAISS.load_local("titanic_data", embedding_model)
retriever_tool = create_retriever_tool(vectorstore.as_retriever(), "person_name_search", "Search for a person by name")
retriever_tool = create_retriever_tool(
vectorstore.as_retriever(), "person_name_search", "Search for a person by name"
)


TEMPLATE = """You are working with a pandas dataframe in Python. The name of the dataframe is `df`.
Expand All @@ -42,7 +45,6 @@
"""



class PythonInputs(BaseModel):
query: str = Field(description="code snippet to run")

Expand All @@ -51,27 +53,33 @@ class PythonInputs(BaseModel):
df = pd.read_csv("titanic.csv")
template = TEMPLATE.format(dhead=df.head().to_markdown())

prompt = ChatPromptTemplate.from_messages([
("system", template),
MessagesPlaceholder(variable_name="agent_scratchpad"),
("human", "{input}")
])
prompt = ChatPromptTemplate.from_messages(
[
("system", template),
MessagesPlaceholder(variable_name="agent_scratchpad"),
("human", "{input}"),
]
)

def get_chain():
repl = PythonAstREPLTool(locals={"df": df}, name="python_repl",
description="Runs code and returns the output of the final line",
args_schema=PythonInputs)
repl = PythonAstREPLTool(
locals={"df": df},
name="python_repl",
description="Runs code and returns the output of the final line",
args_schema=PythonInputs,
)
tools = [repl, retriever_tool]
agent = OpenAIFunctionsAgent(llm=ChatOpenAI(temperature=0, model="gpt-4"), prompt=prompt, tools=tools)
agent_executor = AgentExecutor(agent=agent, tools=tools, max_iterations=5, early_stopping_method="generate")
agent = OpenAIFunctionsAgent(
llm=ChatOpenAI(temperature=0, model="gpt-4"), prompt=prompt, tools=tools
)
agent_executor = AgentExecutor(
agent=agent, tools=tools, max_iterations=5, early_stopping_method="generate"
)
return agent_executor


client = Client()
eval_config = RunEvalConfig(
evaluators=[
"qa"
],
evaluators=["qa"],
)
chain_results = run_on_dataset(
client,
Expand Down
13 changes: 5 additions & 8 deletions csv-qa/pandas_agent_gpt_35.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pandas as pd
from langchain.chat_models import ChatOpenAI
from langchain.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langsmith import Client
from langchain.chat_models import ChatOpenAI
from langchain.smith import RunEvalConfig, run_on_dataset
from langsmith import Client

if __name__ == "__main__":
df = pd.read_csv("titanic.csv")
Expand All @@ -18,20 +18,17 @@ def get_chain():
df,
agent_type=AgentType.OPENAI_FUNCTIONS,
agent_executor_kwargs=agent_executor_kwargs,
max_iterations=5
max_iterations=5,
)
return agent


client = Client()
eval_config = RunEvalConfig(
evaluators=[
"qa"
],
evaluators=["qa"],
)
chain_results = run_on_dataset(
client,
dataset_name="Titanic CSV Data",
llm_or_chain_factory=get_chain,
evaluation=eval_config,
)
)
14 changes: 5 additions & 9 deletions csv-qa/pandas_agent_gpt_4.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import pandas as pd
from langchain.chat_models import ChatOpenAI
from langchain.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langsmith import Client
from langchain.chat_models import ChatOpenAI
from langchain.smith import RunEvalConfig, run_on_dataset
from langsmith import Client

if __name__ == "__main__":
df = pd.read_csv("titanic.csv")


def get_chain():
llm = ChatOpenAI(temperature=0, model="gpt-4")
agent_executor_kwargs = {
Expand All @@ -19,20 +18,17 @@ def get_chain():
df,
agent_type=AgentType.OPENAI_FUNCTIONS,
agent_executor_kwargs=agent_executor_kwargs,
max_iterations=5
max_iterations=5,
)
return agent


client = Client()
eval_config = RunEvalConfig(
evaluators=[
"qa"
],
evaluators=["qa"],
)
chain_results = run_on_dataset(
client,
dataset_name="Titanic CSV Data",
llm_or_chain_factory=get_chain,
evaluation=eval_config,
)
)
51 changes: 29 additions & 22 deletions csv-qa/pandas_agent_instruct.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
from langchain.agents import ZeroShotAgent, AgentExecutor
from langchain.prompts import PromptTemplate
from langchain.tools import PythonAstREPLTool
import pandas as pd
from langchain.agents import AgentExecutor, ZeroShotAgent
from langchain.agents.agent_toolkits.conversational_retrieval.tool import (
create_retriever_tool,
)
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langsmith import Client
from langchain.prompts import PromptTemplate
from langchain.smith import RunEvalConfig, run_on_dataset
from pydantic import BaseModel, Field
from langchain.embeddings import OpenAIEmbeddings
from langchain.tools import PythonAstREPLTool
from langchain.vectorstores import FAISS
from langchain.agents.agent_toolkits.conversational_retrieval.tool import create_retriever_tool

from langsmith import Client
from pydantic import BaseModel, Field

pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 20)
pd.set_option("display.max_rows", 20)
pd.set_option("display.max_columns", 20)

embedding_model = OpenAIEmbeddings()
vectorstore = FAISS.load_local("titanic_data", embedding_model)
retriever_tool = create_retriever_tool(vectorstore.as_retriever(), "person_name_search", "Search for a person by name")
retriever_tool = create_retriever_tool(
vectorstore.as_retriever(), "person_name_search", "Search for a person by name"
)


TEMPLATE = """You are working with a pandas dataframe in Python. The name of the dataframe is `df`.
Expand All @@ -41,7 +44,6 @@
<logic>Use `python_repl` since even though the question is about a person, you don't know their name so you can't include it.</logic>"""



class PythonInputs(BaseModel):
query: str = Field(description="code snippet to run")

Expand All @@ -50,22 +52,27 @@ class PythonInputs(BaseModel):
df = pd.read_csv("titanic.csv")
template = TEMPLATE.format(dhead=df.head().to_markdown())


def get_chain():
repl = PythonAstREPLTool(locals={"df": df}, name="python_repl",
description="Runs code and returns the output of the final line",
args_schema=PythonInputs)
repl = PythonAstREPLTool(
locals={"df": df},
name="python_repl",
description="Runs code and returns the output of the final line",
args_schema=PythonInputs,
)
tools = [repl, retriever_tool]
agent = ZeroShotAgent.from_llm_and_tools(llm=OpenAI(temperature=0, model="gpt-3.5-turbo-instruct"), tools=tools, prefix=template)
agent_executor = AgentExecutor(agent=agent, tools=tools, max_iterations=5, early_stopping_method="generate")
agent = ZeroShotAgent.from_llm_and_tools(
llm=OpenAI(temperature=0, model="gpt-3.5-turbo-instruct"),
tools=tools,
prefix=template,
)
agent_executor = AgentExecutor(
agent=agent, tools=tools, max_iterations=5, early_stopping_method="generate"
)
return agent_executor


client = Client()
eval_config = RunEvalConfig(
evaluators=[
"qa"
],
evaluators=["qa"],
)
chain_results = run_on_dataset(
client,
Expand Down
45 changes: 24 additions & 21 deletions csv-qa/pandas_ai.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,47 @@
import pandas as pd
from langchain.chat_models import ChatOpenAI
from langchain.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langsmith import Client
from langchain.smith import RunEvalConfig, run_on_dataset
import pandas as pd
from pandasai import PandasAI

from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.smith import RunEvalConfig, run_on_dataset
from langsmith import Client
from pandasai import PandasAI

if __name__ == "__main__":
df = pd.read_csv("titanic.csv")

pandas_ai = PandasAI(ChatOpenAI(temperature=0, model="gpt-4"), enable_cache=False)
prompt = ChatPromptTemplate.from_messages([
("system",
"Answer the users question about some data. A data scientist will run some code and the results will be returned to you to use in your answer"),
("human", "Question: {input}"),
("human", "Data Scientist Result: {result}"),
])
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"Answer the users question about some data. A data scientist will run some code and the results will be returned to you to use in your answer",
),
("human", "Question: {input}"),
("human", "Data Scientist Result: {result}"),
]
)

def get_chain():
chain = {
"input": lambda x: x["input_question"],
"result": lambda x: pandas_ai(df, prompt=x['input_question'])
} | prompt | ChatOpenAI(temperature=0, model="gpt-4") | StrOutputParser()
chain = (
{
"input": lambda x: x["input_question"],
"result": lambda x: pandas_ai(df, prompt=x["input_question"]),
}
| prompt
| ChatOpenAI(temperature=0, model="gpt-4")
| StrOutputParser()
)
return chain


client = Client()
eval_config = RunEvalConfig(
evaluators=[
"qa"
],
evaluators=["qa"],
)
chain_results = run_on_dataset(
client,
dataset_name="Titanic CSV Data",
llm_or_chain_factory=get_chain,
evaluation=eval_config,
)
)
Loading

0 comments on commit 9894475

Please sign in to comment.