fix: temperature needs to be added handled effectively (explodinggradients#1759)

jjmachan · shahules786 · commit 0b3b9e4de83a · 2024-12-16T21:19:28.000+05:30
diff --git a/src/ragas/llms/base.py b/src/ragas/llms/base.py
@@ -129,7 +129,7 @@ class LangchainLLMWrapper(BaseRagasLLM):
 
     def __init__(
         self,
-        langchain_llm: BaseLanguageModel,
+        langchain_llm: BaseLanguageModel[BaseMessage],
         run_config: t.Optional[RunConfig] = None,
         is_finished_parser: t.Optional[t.Callable[[LLMResult], bool]] = None,
         cache: t.Optional[CacheInterface] = None,
@@ -198,29 +198,36 @@ def generate_text(
         callbacks: Callbacks = None,
     ) -> LLMResult:
         # figure out the temperature to set
+        old_temperature: float | None = None
         if temperature is None:
             temperature = self.get_temperature(n=n)
+        if hasattr(self.langchain_llm, "temperature"):
+            self.langchain_llm.temperature = temperature  # type: ignore
+            old_temperature = temperature
 
         if is_multiple_completion_supported(self.langchain_llm):
-            return self.langchain_llm.generate_prompt(
+            result = self.langchain_llm.generate_prompt(
                 prompts=[prompt],
                 n=n,
-                temperature=temperature,
                 stop=stop,
                 callbacks=callbacks,
             )
         else:
             result = self.langchain_llm.generate_prompt(
                 prompts=[prompt] * n,
-                temperature=temperature,
                 stop=stop,
                 callbacks=callbacks,
             )
             # make LLMResult.generation appear as if it was n_completions
             # note that LLMResult.runs is still a list that represents each run
             generations = [[g[0] for g in result.generations]]
             result.generations = generations
-            return result
+
+        # reset the temperature to the original value
+        if old_temperature is not None:
+            self.langchain_llm.temperature = old_temperature  # type: ignore
+
+        return result
 
     async def agenerate_text(
         self,
@@ -230,29 +237,38 @@ async def agenerate_text(
         stop: t.Optional[t.List[str]] = None,
         callbacks: Callbacks = None,
     ) -> LLMResult:
+        # handle temperature
+        old_temperature: float | None = None
         if temperature is None:
             temperature = self.get_temperature(n=n)
+        if hasattr(self.langchain_llm, "temperature"):
+            self.langchain_llm.temperature = temperature  # type: ignore
+            old_temperature = temperature
 
-        if is_multiple_completion_supported(self.langchain_llm):
-            return await self.langchain_llm.agenerate_prompt(
+        # handle n
+        if hasattr(self.langchain_llm, "n"):
+            self.langchain_llm.n = n  # type: ignore
+            result = await self.langchain_llm.agenerate_prompt(
                 prompts=[prompt],
-                n=n,
-                temperature=temperature,
                 stop=stop,
                 callbacks=callbacks,
             )
         else:
             result = await self.langchain_llm.agenerate_prompt(
                 prompts=[prompt] * n,
-                temperature=temperature,
                 stop=stop,
                 callbacks=callbacks,
             )
             # make LLMResult.generation appear as if it was n_completions
             # note that LLMResult.runs is still a list that represents each run
             generations = [[g[0] for g in result.generations]]
             result.generations = generations
-            return result
+
+        # reset the temperature to the original value
+        if old_temperature is not None:
+            self.langchain_llm.temperature = old_temperature  # type: ignore
+
+        return result
 
     def set_run_config(self, run_config: RunConfig):
         self.run_config = run_config
diff --git a/src/ragas/optimizers/genetic.py b/src/ragas/optimizers/genetic.py
@@ -519,9 +519,8 @@ def dict_to_str(dict: t.Dict[str, t.Any]) -> str:
                         ),
                         expected_output=dataset[idx]["prompts"][prompt_name][
                             "edited_output"
-                        ] or dataset[idx]["prompts"][prompt_name][
-                            "prompt_output"
-                        ],
+                        ]
+                        or dataset[idx]["prompts"][prompt_name]["prompt_output"],
                     )
                     for idx in indices
                 ]
diff --git a/tests/e2e/test_langchain_llm_attributes.py b/tests/e2e/test_langchain_llm_attributes.py
@@ -0,0 +1,35 @@
+import pytest
+from langchain_anthropic import ChatAnthropic
+from langchain_aws import ChatBedrock, ChatBedrockConverse
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_google_vertexai import ChatVertexAI
+from langchain_openai import ChatOpenAI
+
+models = [
+    ChatOpenAI(model="gpt-4o"),
+    # AzureChatOpenAI(model="gpt-4o", api_version="2024-04-09"),
+    ChatGoogleGenerativeAI(model="gemini-1.5-pro"),
+    ChatAnthropic(
+        model_name="claude-3-5-sonnet-20240620",
+        timeout=10,
+        stop=["\n\n"],
+        temperature=0.5,
+    ),
+    ChatBedrock(model="anthropic.claude-3-5-sonnet-20240620"),
+    ChatBedrockConverse(model="anthropic.claude-3-5-sonnet-20240620"),
+    ChatVertexAI(model="gemini-1.5-pro"),
+]
+
+
+@pytest.mark.parametrize("model", models)
+def test_langchain_chat_models_have_temperature(model):
+    assert hasattr(model, "temperature")
+    model.temperature = 0.5
+    assert model.temperature == 0.5
+
+
+@pytest.mark.parametrize("model", models)
+def test_langchain_chat_models_have_n(model):
+    assert hasattr(model, "n")
+    model.n = 2
+    assert model.n == 2

Original file line number	Diff line number	Diff line change
`@@ -519,9 +519,8 @@ def dict_to_str(dict: t.Dict[str, t.Any]) -> str:`
`519`	`519`	`),`
`520`	`520`	`expected_output=dataset[idx]["prompts"][prompt_name][`
`521`	`521`	`"edited_output"`
`522`		`- ] or dataset[idx]["prompts"][prompt_name][`
`523`		`- "prompt_output"`
`524`		`- ],`
	`522`	`+ ]`
	`523`	`+ or dataset[idx]["prompts"][prompt_name]["prompt_output"],`
`525`	`524`	`)`
`526`	`525`	`for idx in indices`
`527`	`526`	`]`