From a0e1080dc498f5bbdf73ae47f50dd7c23798043b Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Fri, 17 Nov 2023 15:09:05 +0100
Subject: [PATCH 1/4] Add warning filter for pydantic warning emitted by HF
 import (#375)

* Add warning filter for pydantic warning emitted by HF import.

* Add warning filter for Apple Paravirtual warning.
---
 pyproject.toml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 54def2f1..e5dfbc7c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,9 @@ filterwarnings = [
     "ignore:^.*The `dict` method is deprecated; use `model_dump` instead.*",
     "ignore:^.*The `parse_obj` method is deprecated; use `model_validate` instead.*",
     "ignore:^.*`__get_validators__` is deprecated.*",
-    "ignore:^.*The `construct` method is deprecated.*"
+    "ignore:^.*The `construct` method is deprecated.*",
+    "ignore:^.*Skipping device Apple Paravirtual device that does not support Metal 2.0.*",
+    "ignore:^.*Pydantic V1 style `@validator` validators are deprecated."
 ]
 markers = [
     "external: interacts with a (potentially cost-incurring) third-party API",

From 6f2b2416cd23f28fac4b1feeec24c7cff9e83ad2 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Fri, 17 Nov 2023 15:15:28 +0100
Subject: [PATCH 2/4] Add workaround for `langchain` model ID issue (#374)

* Add workaround for langchain model ID issue.

* Refactor.

* Extend filterwarnings.

* Revert filterwarnings.

* Extend filterwarnings.

* Fix pydantic imports.

* Extend filterwarnings.
---
 pyproject.toml                           |  2 +-
 spacy_llm/compat.py                      |  5 +-
 spacy_llm/models/langchain/model.py      | 69 ++++++++++++++++--------
 spacy_llm/tests/models/test_langchain.py | 11 +++-
 4 files changed, 60 insertions(+), 27 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index e5dfbc7c..1ba77808 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ filterwarnings = [
     "ignore:^.*`__get_validators__` is deprecated.*",
     "ignore:^.*The `construct` method is deprecated.*",
     "ignore:^.*Skipping device Apple Paravirtual device that does not support Metal 2.0.*",
-    "ignore:^.*Pydantic V1 style `@validator` validators are deprecated."
+    "ignore:^.*Pydantic V1 style `@validator` validators are deprecated.*"
 ]
 markers = [
     "external: interacts with a (potentially cost-incurring) third-party API",
diff --git a/spacy_llm/compat.py b/spacy_llm/compat.py
index 76148b77..1554622f 100644
--- a/spacy_llm/compat.py
+++ b/spacy_llm/compat.py
@@ -55,6 +55,7 @@
 PYDANTIC_V2 = VERSION.startswith("2.")
 
 if PYDANTIC_V2:
-    from pydantic.v1 import BaseModel, ValidationError, validator  # noqa: F401
+    from pydantic.v1 import BaseModel, ExtraError, ValidationError  # noqa: F401
+    from pydantic.v1 import validator
 else:
-    from pydantic import BaseModel, ValidationError, validator  # noqa: F401
+    from pydantic import BaseModel, ExtraError, ValidationError, validator  # noqa: F401
diff --git a/spacy_llm/models/langchain/model.py b/spacy_llm/models/langchain/model.py
index 03657cdf..2e4be55f 100644
--- a/spacy_llm/models/langchain/model.py
+++ b/spacy_llm/models/langchain/model.py
@@ -2,11 +2,10 @@
 
 from confection import SimpleFrozenDict
 
-from ...compat import has_langchain, langchain
+from ...compat import ExtraError, ValidationError, has_langchain, langchain
 from ...registry import registry
 
 try:
-    from langchain import base_language  # noqa: F401
     from langchain import llms  # noqa: F401
 except (ImportError, AttributeError):
     llms = None
@@ -18,30 +17,59 @@ def __init__(
         name: str,
         api: str,
         config: Dict[Any, Any],
-        query: Callable[
-            ["langchain.base_language.BaseLanguageModel", Iterable[Any]],
-            Iterable[Any],
-        ],
+        query: Callable[["langchain.llms.BaseLLM", Iterable[Any]], Iterable[Any]],
     ):
         """Initializes model instance for integration APIs.
         name (str): Name of LangChain model to instantiate.
         api (str): Name of class/API.
         config (Dict[Any, Any]): Config passed on to LangChain model.
-        query (Callable[[Any, Iterable[_PromptType]], Iterable[_ResponseType]]): Callable executing LLM prompts when
+        query (Callable[[langchain.llms.BaseLLM, Iterable[Any]], Iterable[Any]]): Callable executing LLM prompts when
             supplied with the `integration` object.
         """
-        self._langchain_model = LangChain.get_type_to_cls_dict()[api](
-            model_name=name, **config
-        )
+        self._langchain_model = LangChain._init_langchain_model(name, api, config)
         self.query = query
         self._check_installation()
 
+    @classmethod
+    def _init_langchain_model(
+        cls, name: str, api: str, config: Dict[Any, Any]
+    ) -> "langchain.llms.BaseLLM":
+        """Initializes langchain model. langchain expects a range of different model ID argument names, depending on the
+        model class. There doesn't seem to be a clean way to determine those from the outset, we'll fail our way through
+        them.
+        Includes error checks for model ID arguments.
+        name (str): Name of LangChain model to instantiate.
+        api (str): Name of class/API.
+        config (Dict[Any, Any]): Config passed on to LangChain model.
+        """
+        model_init_args = ["model", "model_name", "model_id"]
+        for model_init_arg in model_init_args:
+            try:
+                return cls.get_type_to_cls_dict()[api](
+                    **{model_init_arg: name}, **config
+                )
+            except ValidationError as err:
+                if model_init_arg == model_init_args[-1]:
+                    # If init error indicates that model ID arg is extraneous: raise error with hint on how to proceed.
+                    if any(
+                        [
+                            rerr
+                            for rerr in err.raw_errors
+                            if isinstance(rerr.exc, ExtraError)
+                            and model_init_arg in rerr.loc_tuple()
+                        ]
+                    ):
+                        raise ValueError(
+                            "Couldn't initialize LangChain model with known model ID arguments. Please report this to "
+                            "https://github.com/explosion/spacy-llm/issues. Thank you!"
+                        ) from err
+                    # Otherwise: raise error as-is.
+                    raise err
+
     @staticmethod
-    def get_type_to_cls_dict() -> Dict[
-        str, Type["langchain.base_language.BaseLanguageModel"]
-    ]:
+    def get_type_to_cls_dict() -> Dict[str, Type["langchain.llms.BaseLLM"]]:
         """Returns langchain.llms.type_to_cls_dict.
-        RETURNS (Dict[str, Type[langchain.base_language.BaseLanguageModel]]): langchain.llms.type_to_cls_dict.
+        RETURNS (Dict[str, Type[langchain.llms.BaseLLM]]): langchain.llms.type_to_cls_dict.
         """
         return getattr(langchain.llms, "type_to_cls_dict")
 
@@ -54,10 +82,10 @@ def __call__(self, prompts: Iterable[Any]) -> Iterable[Any]:
 
     @staticmethod
     def query_langchain(
-        model: "langchain.base_language.BaseLanguageModel", prompts: Iterable[Any]
+        model: "langchain.llms.BaseLLM", prompts: Iterable[Any]
     ) -> Iterable[Any]:
         """Query LangChain model naively.
-        model (langchain.base_language.BaseLanguageModel): LangChain model.
+        model (langchain.llms.BaseLLM): LangChain model.
         prompts (Iterable[Any]): Prompts to execute.
         RETURNS (Iterable[Any]): LLM responses.
         """
@@ -77,10 +105,7 @@ def _langchain_model_maker(class_id: str):
         def langchain_model(
             name: str,
             query: Optional[
-                Callable[
-                    ["langchain.base_language.BaseLanguageModel", Iterable[str]],
-                    Iterable[str],
-                ]
+                Callable[["langchain.llms.BaseLLM", Iterable[str]], Iterable[str]]
             ] = None,
             config: Dict[Any, Any] = SimpleFrozenDict(),
             langchain_class_id: str = class_id,
@@ -123,9 +148,7 @@ def register_models() -> None:
 
 @registry.llm_queries("spacy.CallLangChain.v1")
 def query_langchain() -> (
-    Callable[
-        ["langchain.base_language.BaseLanguageModel", Iterable[Any]], Iterable[Any]
-    ]
+    Callable[["langchain.llms.BaseLLM", Iterable[Any]], Iterable[Any]]
 ):
     """Returns query Callable for LangChain.
     RETURNS (Callable[["langchain.llms.BaseLLM", Iterable[Any]], Iterable[Any]]:): Callable executing simple prompts on
diff --git a/spacy_llm/tests/models/test_langchain.py b/spacy_llm/tests/models/test_langchain.py
index fd48e0bb..57e984dc 100644
--- a/spacy_llm/tests/models/test_langchain.py
+++ b/spacy_llm/tests/models/test_langchain.py
@@ -1,9 +1,11 @@
 import os
+from typing import List
 
 import pytest
 import spacy
 
 from spacy_llm.compat import has_langchain
+from spacy_llm.models.langchain import LangChain
 from spacy_llm.tests.compat import has_azure_openai_key
 
 PIPE_CFG = {
@@ -11,12 +13,19 @@
         "@llm_models": "langchain.OpenAI.v1",
         "name": "ada",
         "config": {"temperature": 0.3},
-        "query": {"@llm_queries": "spacy.CallLangChain.v1"},
     },
     "task": {"@llm_tasks": "spacy.NoOp.v1"},
 }
 
 
+def langchain_model_reg_handles() -> List[str]:
+    """Returns a list of all LangChain model reg handles."""
+    return [
+        f"langchain.{cls.__name__}.v1"
+        for class_id, cls in LangChain.get_type_to_cls_dict().items()
+    ]
+
+
 @pytest.mark.external
 @pytest.mark.skipif(has_langchain is False, reason="LangChain is not installed")
 def test_initialization():

From 230ec00bbe6fb023359b1482e35b21f3257bf5b3 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Fri, 17 Nov 2023 18:03:37 +0100
Subject: [PATCH 3/4] Bump version. (#376)

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 1495c6a2..03acef17 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [metadata]
-version = 0.6.3
+version = 0.6.4
 description = Integrating LLMs into structured NLP pipelines
 author = Explosion
 author_email = contact@explosion.ai

From b3bae16e481e549ec2c6ed6e43712e2f626bbfe2 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Wed, 22 Nov 2023 13:00:03 +0100
Subject: [PATCH 4/4] Add merch link [ci skip]

---
 .github/FUNDING.yml | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .github/FUNDING.yml

diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 00000000..a9faa302
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1 @@
+custom: https://explosion.ai/merch