From a0e1080dc498f5bbdf73ae47f50dd7c23798043b Mon Sep 17 00:00:00 2001 From: Raphael Mitsch Date: Fri, 17 Nov 2023 15:09:05 +0100 Subject: [PATCH 1/4] Add warning filter for pydantic warning emitted by HF import (#375) * Add warning filter for pydantic warning emitted by HF import. * Add warning filter for Apple Paravirtual warning. --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 54def2f1..e5dfbc7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,9 @@ filterwarnings = [ "ignore:^.*The `dict` method is deprecated; use `model_dump` instead.*", "ignore:^.*The `parse_obj` method is deprecated; use `model_validate` instead.*", "ignore:^.*`__get_validators__` is deprecated.*", - "ignore:^.*The `construct` method is deprecated.*" + "ignore:^.*The `construct` method is deprecated.*", + "ignore:^.*Skipping device Apple Paravirtual device that does not support Metal 2.0.*", + "ignore:^.*Pydantic V1 style `@validator` validators are deprecated." ] markers = [ "external: interacts with a (potentially cost-incurring) third-party API", From 6f2b2416cd23f28fac4b1feeec24c7cff9e83ad2 Mon Sep 17 00:00:00 2001 From: Raphael Mitsch Date: Fri, 17 Nov 2023 15:15:28 +0100 Subject: [PATCH 2/4] Add workaround for `langchain` model ID issue (#374) * Add workaround for langchain model ID issue. * Refactor. * Extend filterwarnings. * Revert filterwarnings. * Extend filterwarnings. * Fix pydantic imports. * Extend filterwarnings. --- pyproject.toml | 2 +- spacy_llm/compat.py | 5 +- spacy_llm/models/langchain/model.py | 69 ++++++++++++++++-------- spacy_llm/tests/models/test_langchain.py | 11 +++- 4 files changed, 60 insertions(+), 27 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e5dfbc7c..1ba77808 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ filterwarnings = [ "ignore:^.*`__get_validators__` is deprecated.*", "ignore:^.*The `construct` method is deprecated.*", "ignore:^.*Skipping device Apple Paravirtual device that does not support Metal 2.0.*", - "ignore:^.*Pydantic V1 style `@validator` validators are deprecated." + "ignore:^.*Pydantic V1 style `@validator` validators are deprecated.*" ] markers = [ "external: interacts with a (potentially cost-incurring) third-party API", diff --git a/spacy_llm/compat.py b/spacy_llm/compat.py index 76148b77..1554622f 100644 --- a/spacy_llm/compat.py +++ b/spacy_llm/compat.py @@ -55,6 +55,7 @@ PYDANTIC_V2 = VERSION.startswith("2.") if PYDANTIC_V2: - from pydantic.v1 import BaseModel, ValidationError, validator # noqa: F401 + from pydantic.v1 import BaseModel, ExtraError, ValidationError # noqa: F401 + from pydantic.v1 import validator else: - from pydantic import BaseModel, ValidationError, validator # noqa: F401 + from pydantic import BaseModel, ExtraError, ValidationError, validator # noqa: F401 diff --git a/spacy_llm/models/langchain/model.py b/spacy_llm/models/langchain/model.py index 03657cdf..2e4be55f 100644 --- a/spacy_llm/models/langchain/model.py +++ b/spacy_llm/models/langchain/model.py @@ -2,11 +2,10 @@ from confection import SimpleFrozenDict -from ...compat import has_langchain, langchain +from ...compat import ExtraError, ValidationError, has_langchain, langchain from ...registry import registry try: - from langchain import base_language # noqa: F401 from langchain import llms # noqa: F401 except (ImportError, AttributeError): llms = None @@ -18,30 +17,59 @@ def __init__( name: str, api: str, config: Dict[Any, Any], - query: Callable[ - ["langchain.base_language.BaseLanguageModel", Iterable[Any]], - Iterable[Any], - ], + query: Callable[["langchain.llms.BaseLLM", Iterable[Any]], Iterable[Any]], ): """Initializes model instance for integration APIs. name (str): Name of LangChain model to instantiate. api (str): Name of class/API. config (Dict[Any, Any]): Config passed on to LangChain model. - query (Callable[[Any, Iterable[_PromptType]], Iterable[_ResponseType]]): Callable executing LLM prompts when + query (Callable[[langchain.llms.BaseLLM, Iterable[Any]], Iterable[Any]]): Callable executing LLM prompts when supplied with the `integration` object. """ - self._langchain_model = LangChain.get_type_to_cls_dict()[api]( - model_name=name, **config - ) + self._langchain_model = LangChain._init_langchain_model(name, api, config) self.query = query self._check_installation() + @classmethod + def _init_langchain_model( + cls, name: str, api: str, config: Dict[Any, Any] + ) -> "langchain.llms.BaseLLM": + """Initializes langchain model. langchain expects a range of different model ID argument names, depending on the + model class. There doesn't seem to be a clean way to determine those from the outset, we'll fail our way through + them. + Includes error checks for model ID arguments. + name (str): Name of LangChain model to instantiate. + api (str): Name of class/API. + config (Dict[Any, Any]): Config passed on to LangChain model. + """ + model_init_args = ["model", "model_name", "model_id"] + for model_init_arg in model_init_args: + try: + return cls.get_type_to_cls_dict()[api]( + **{model_init_arg: name}, **config + ) + except ValidationError as err: + if model_init_arg == model_init_args[-1]: + # If init error indicates that model ID arg is extraneous: raise error with hint on how to proceed. + if any( + [ + rerr + for rerr in err.raw_errors + if isinstance(rerr.exc, ExtraError) + and model_init_arg in rerr.loc_tuple() + ] + ): + raise ValueError( + "Couldn't initialize LangChain model with known model ID arguments. Please report this to " + "https://github.com/explosion/spacy-llm/issues. Thank you!" + ) from err + # Otherwise: raise error as-is. + raise err + @staticmethod - def get_type_to_cls_dict() -> Dict[ - str, Type["langchain.base_language.BaseLanguageModel"] - ]: + def get_type_to_cls_dict() -> Dict[str, Type["langchain.llms.BaseLLM"]]: """Returns langchain.llms.type_to_cls_dict. - RETURNS (Dict[str, Type[langchain.base_language.BaseLanguageModel]]): langchain.llms.type_to_cls_dict. + RETURNS (Dict[str, Type[langchain.llms.BaseLLM]]): langchain.llms.type_to_cls_dict. """ return getattr(langchain.llms, "type_to_cls_dict") @@ -54,10 +82,10 @@ def __call__(self, prompts: Iterable[Any]) -> Iterable[Any]: @staticmethod def query_langchain( - model: "langchain.base_language.BaseLanguageModel", prompts: Iterable[Any] + model: "langchain.llms.BaseLLM", prompts: Iterable[Any] ) -> Iterable[Any]: """Query LangChain model naively. - model (langchain.base_language.BaseLanguageModel): LangChain model. + model (langchain.llms.BaseLLM): LangChain model. prompts (Iterable[Any]): Prompts to execute. RETURNS (Iterable[Any]): LLM responses. """ @@ -77,10 +105,7 @@ def _langchain_model_maker(class_id: str): def langchain_model( name: str, query: Optional[ - Callable[ - ["langchain.base_language.BaseLanguageModel", Iterable[str]], - Iterable[str], - ] + Callable[["langchain.llms.BaseLLM", Iterable[str]], Iterable[str]] ] = None, config: Dict[Any, Any] = SimpleFrozenDict(), langchain_class_id: str = class_id, @@ -123,9 +148,7 @@ def register_models() -> None: @registry.llm_queries("spacy.CallLangChain.v1") def query_langchain() -> ( - Callable[ - ["langchain.base_language.BaseLanguageModel", Iterable[Any]], Iterable[Any] - ] + Callable[["langchain.llms.BaseLLM", Iterable[Any]], Iterable[Any]] ): """Returns query Callable for LangChain. RETURNS (Callable[["langchain.llms.BaseLLM", Iterable[Any]], Iterable[Any]]:): Callable executing simple prompts on diff --git a/spacy_llm/tests/models/test_langchain.py b/spacy_llm/tests/models/test_langchain.py index fd48e0bb..57e984dc 100644 --- a/spacy_llm/tests/models/test_langchain.py +++ b/spacy_llm/tests/models/test_langchain.py @@ -1,9 +1,11 @@ import os +from typing import List import pytest import spacy from spacy_llm.compat import has_langchain +from spacy_llm.models.langchain import LangChain from spacy_llm.tests.compat import has_azure_openai_key PIPE_CFG = { @@ -11,12 +13,19 @@ "@llm_models": "langchain.OpenAI.v1", "name": "ada", "config": {"temperature": 0.3}, - "query": {"@llm_queries": "spacy.CallLangChain.v1"}, }, "task": {"@llm_tasks": "spacy.NoOp.v1"}, } +def langchain_model_reg_handles() -> List[str]: + """Returns a list of all LangChain model reg handles.""" + return [ + f"langchain.{cls.__name__}.v1" + for class_id, cls in LangChain.get_type_to_cls_dict().items() + ] + + @pytest.mark.external @pytest.mark.skipif(has_langchain is False, reason="LangChain is not installed") def test_initialization(): From 230ec00bbe6fb023359b1482e35b21f3257bf5b3 Mon Sep 17 00:00:00 2001 From: Raphael Mitsch Date: Fri, 17 Nov 2023 18:03:37 +0100 Subject: [PATCH 3/4] Bump version. (#376) --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 1495c6a2..03acef17 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [metadata] -version = 0.6.3 +version = 0.6.4 description = Integrating LLMs into structured NLP pipelines author = Explosion author_email = contact@explosion.ai From b3bae16e481e549ec2c6ed6e43712e2f626bbfe2 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 22 Nov 2023 13:00:03 +0100 Subject: [PATCH 4/4] Add merch link [ci skip] --- .github/FUNDING.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/FUNDING.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..a9faa302 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +custom: https://explosion.ai/merch