diff --git a/spacy_llm/cache.py b/spacy_llm/cache.py index 3e1559e1..92f4e80b 100644 --- a/spacy_llm/cache.py +++ b/spacy_llm/cache.py @@ -72,7 +72,7 @@ def initialize(self, vocab: Vocab, task: LLMTask) -> None: """ Initialize cache with data not available at construction time. vocab (Vocab): Vocab object. - task (LLMTask): Task. + task (ShardingLLMTask): Task. """ self._vocab = vocab if isinstance(task, PromptTemplateProvider): diff --git a/spacy_llm/tasks/__init__.py b/spacy_llm/tasks/__init__.py index 1b297142..0bfd7930 100644 --- a/spacy_llm/tasks/__init__.py +++ b/spacy_llm/tasks/__init__.py @@ -7,6 +7,7 @@ from .lemma import LemmaTask, make_lemma_task from .ner import NERTask, make_ner_task_v3 from .noop import NoopTask, ShardingNoopTask, make_noop_task, make_noopnoshards_task +from .raw import RawTask, make_raw_task from .rel import RELTask, make_rel_task from .sentiment import SentimentTask, make_sentiment_task from .spancat import SpanCatTask, make_spancat_task_v3 @@ -16,6 +17,7 @@ _LATEST_TASKS = ( "spacy.EntityLinker.v1", "spacy.NER.v3", + "spacy.Raw.v1", "spacy.REL.v1", "spacy.Sentiment.v1", "spacy.SpanCat.v3", @@ -43,6 +45,7 @@ "make_ner_task_v3", "make_noop_task", "make_noopnoshards_task", + "make_raw_task", "make_rel_task", "make_sentiment_task", "make_spancat_task_v3", @@ -53,6 +56,7 @@ "LemmaTask", "NERTask", "NoopTask", + "RawTask", "RELTask", "SentimentTask", "ShardingNoopTask", diff --git a/spacy_llm/tasks/lemma/registry.py b/spacy_llm/tasks/lemma/registry.py index d4d555d3..4b8e8b45 100644 --- a/spacy_llm/tasks/lemma/registry.py +++ b/spacy_llm/tasks/lemma/registry.py @@ -41,7 +41,6 @@ def make_lemma_task( prompt_example_type (Optional[Type[FewshotExample]]): Type to use for fewshot examples. examples (ExamplesConfigType): Optional callable that reads a file containing task examples for few-shot learning. If None is passed, then zero-shot learning will be used. - n_token_estimator (Optional[NTokenEstimator]): Estimates number of tokens in a string. shard_mapper (Optional[ShardMapper]): Maps docs to shards if they don't fit into the model context. shard_reducer (Optional[ShardReducer]): Reduces doc shards back into one doc instance. scorer (Optional[Scorer]): Scorer function. diff --git a/spacy_llm/tasks/lemma/util.py b/spacy_llm/tasks/lemma/util.py index a77f8507..786a57eb 100644 --- a/spacy_llm/tasks/lemma/util.py +++ b/spacy_llm/tasks/lemma/util.py @@ -1,3 +1,4 @@ +import warnings from typing import Any, Dict, Iterable, List, Optional from spacy.scorer import Scorer @@ -34,4 +35,10 @@ def reduce_shards_to_doc(task: LemmaTask, shards: Iterable[Doc]) -> Doc: RETURNS (Doc): Fused doc instance. """ # Lemmas are token-specific, so we can just merge shards. - return Doc.from_docs(list(shards), ensure_whitespace=True) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + category=UserWarning, + message=".*Skipping .* while merging docs.", + ) + return Doc.from_docs(list(shards), ensure_whitespace=True) diff --git a/spacy_llm/tasks/raw/__init__.py b/spacy_llm/tasks/raw/__init__.py new file mode 100644 index 00000000..4e5aee1c --- /dev/null +++ b/spacy_llm/tasks/raw/__init__.py @@ -0,0 +1,5 @@ +from .registry import make_raw_task +from .task import RawTask +from .util import RawExample + +__all__ = ["make_raw_task", "RawExample", "RawTask"] diff --git a/spacy_llm/tasks/raw/parser.py b/spacy_llm/tasks/raw/parser.py new file mode 100644 index 00000000..6e80dabc --- /dev/null +++ b/spacy_llm/tasks/raw/parser.py @@ -0,0 +1,19 @@ +from typing import Iterable, List + +from spacy.tokens import Doc + +from .task import RawTask + + +def parse_responses_v1( + task: RawTask, shards: Iterable[Iterable[Doc]], responses: Iterable[Iterable[str]] +) -> Iterable[List[str]]: + """Parses LLM responses for spacy.Raw.v1. Note that no parsing happens here, as we don't know what the result is + expected to look like. + task (RawTask): Task instance. + shards (Iterable[Iterable[Doc]]): Doc shards. + responses (Iterable[Iterable[str]]): LLM responses. + RETURNS (Iterable[List[str]]): Reply as string per shard and doc. + """ + for responses_for_doc in responses: + yield list(responses_for_doc) diff --git a/spacy_llm/tasks/raw/registry.py b/spacy_llm/tasks/raw/registry.py new file mode 100644 index 00000000..91a5b02f --- /dev/null +++ b/spacy_llm/tasks/raw/registry.py @@ -0,0 +1,55 @@ +from typing import Optional, Type + +from ...registry import registry +from ...ty import ExamplesConfigType, FewshotExample, ShardMapper, ShardReducer +from ...ty import TaskResponseParser +from ..util.sharding import make_shard_mapper +from .parser import parse_responses_v1 +from .task import DEFAULT_RAW_TEMPLATE_V1, RawTask +from .util import RawExample, reduce_shards_to_doc + + +@registry.llm_misc("spacy.RawParser.v1") +def make_raw_parser() -> TaskResponseParser[RawTask]: + return parse_responses_v1 + + +@registry.llm_misc("spacy.RawShardReducer.v1") +def make_shard_reducer() -> ShardReducer: + return reduce_shards_to_doc + + +@registry.llm_tasks("spacy.Raw.v1") +def make_raw_task( + template: str = DEFAULT_RAW_TEMPLATE_V1, + field: str = "llm_reply", + parse_responses: Optional[TaskResponseParser[RawTask]] = None, + prompt_example_type: Optional[Type[FewshotExample]] = None, + examples: ExamplesConfigType = None, + shard_mapper: Optional[ShardMapper] = None, + shard_reducer: Optional[ShardReducer] = None, +): + """Raw.v1 task factory. + + template (str): Prompt template passed to the model. + field (str): Field to store replies in. + parse_responses (Optional[TaskResponseParser]): Callable for parsing LLM responses for this task. + prompt_example_type (Optional[Type[FewshotExample]]): Type to use for fewshot examples. + examples (ExamplesConfigType): Optional callable that reads a file containing task examples for + few-shot learning. If None is passed, then zero-shot learning will be used. + shard_mapper (Optional[ShardMapper]): Maps docs to shards if they don't fit into the model context. + shard_reducer (Optional[ShardReducer]): Reduces doc shards back into one doc instance. + """ + raw_examples = examples() if callable(examples) else examples + example_type = prompt_example_type or RawExample + raw_examples = [example_type(**eg) for eg in raw_examples] if raw_examples else None + + return RawTask( + template=template, + field=field, + parse_responses=parse_responses or parse_responses_v1, + prompt_example_type=example_type, + prompt_examples=raw_examples, + shard_mapper=shard_mapper or make_shard_mapper(), + shard_reducer=shard_reducer or make_shard_reducer(), + ) diff --git a/spacy_llm/tasks/raw/task.py b/spacy_llm/tasks/raw/task.py new file mode 100644 index 00000000..e0d5d3e2 --- /dev/null +++ b/spacy_llm/tasks/raw/task.py @@ -0,0 +1,86 @@ +from typing import Callable, Iterable, List, Optional, Type + +from spacy import Language +from spacy.tokens import Doc +from spacy.training import Example + +from ...compat import Self +from ...ty import FewshotExample, ShardMapper, ShardReducer, TaskResponseParser +from ..builtin_task import BuiltinTask +from ..templates import read_template + +DEFAULT_RAW_TEMPLATE_V1 = read_template("raw.v1") + + +class RawTask(BuiltinTask): + def __init__( + self, + parse_responses: TaskResponseParser[Self], + prompt_example_type: Type[FewshotExample[Self]], + prompt_examples: Optional[List[FewshotExample[Self]]], + template: str, + field: str, + shard_mapper: ShardMapper, + shard_reducer: ShardReducer[Self], + ): + """Raw task. Expects prompt template without instructions for LLM, i. e. docs have to provide instructions + themselves. + + parse_responses (TaskResponseParser[Self]): Callable for parsing LLM responses for this task. + prompt_example_type (Type[FewshotExample[Self]): Type to use for fewshot examples. + prompt_examples (Optional[List[FewshotExample[Self]]]): Optional list of few-shot examples to include in prompts. + template (str): Prompt template passed to the model. + field (str): Field to store responses in. + shard_mapper (ShardMapper): Maps docs to shards if they don't fit into the model context. + shard_reducer (ShardReducer[Self]): Reduces doc shards back into one doc instance. + """ + super().__init__( + parse_responses=parse_responses, + prompt_example_type=prompt_example_type, + template=template, + prompt_examples=prompt_examples, + shard_mapper=shard_mapper, + shard_reducer=shard_reducer, + ) + self._field = field + self._check_doc_extension() + + def parse_responses( + self, shards: Iterable[Iterable[Doc]], responses: Iterable[Iterable[str]] + ) -> Iterable[Doc]: + shards_teed = self._tee_2d_iterable(shards, 2) + for shards_for_doc, responses_for_doc in zip( + shards_teed[0], self._parse_responses(self, shards_teed[1], responses) + ): + updated_shards_for_doc: List[Doc] = [] + for shard, response in zip(shards_for_doc, responses_for_doc): + setattr(shard._, self._field, response) + updated_shards_for_doc.append(shard) + + yield self._shard_reducer(self, updated_shards_for_doc) # type: ignore[arg-type] + + def initialize( + self, + get_examples: Callable[[], Iterable["Example"]], + nlp: Language, + n_prompt_examples: int = 0, + ) -> None: + super()._initialize( + get_examples=get_examples, nlp=nlp, n_prompt_examples=n_prompt_examples + ) + + def _check_doc_extension(self): + """Add extension if need be.""" + if not Doc.has_extension(self._field): + Doc.set_extension(self._field, default=None) + + @property + def _cfg_keys(self) -> List[str]: + return ["_template"] + + @property + def field(self) -> str: + """Return field used to store replies in docs. + RETURNS (str): Field used to store replies in docs. + """ + return self._field diff --git a/spacy_llm/tasks/raw/util.py b/spacy_llm/tasks/raw/util.py new file mode 100644 index 00000000..2ece737f --- /dev/null +++ b/spacy_llm/tasks/raw/util.py @@ -0,0 +1,44 @@ +import warnings +from typing import Iterable, Optional + +from spacy.tokens import Doc +from spacy.training import Example + +from ...compat import Self +from ...ty import FewshotExample +from .task import RawTask + + +class RawExample(FewshotExample[RawTask]): + text: str + reply: str + + @classmethod + def generate(cls, example: Example, task: RawTask) -> Optional[Self]: + return cls( + text=example.reference.text, reply=getattr(example.reference._, task.field) + ) + + +def reduce_shards_to_doc(task: RawTask, shards: Iterable[Doc]) -> Doc: + """Reduces shards to docs for RawTask. + task (RawTask): Task. + shards (Iterable[Doc]): Shards to reduce to single doc instance. + RETURNS (Doc): Fused doc instance. + """ + shards = list(shards) + + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + category=UserWarning, + message=".*Skipping .* while merging docs.", + ) + doc = Doc.from_docs(shards, ensure_whitespace=True) + setattr( + doc._, + task.field, + " ".join([getattr(shard._, task.field) for shard in shards]), + ) + + return doc diff --git a/spacy_llm/tasks/templates/raw.v1.jinja b/spacy_llm/tasks/templates/raw.v1.jinja new file mode 100644 index 00000000..e5cc6f95 --- /dev/null +++ b/spacy_llm/tasks/templates/raw.v1.jinja @@ -0,0 +1,17 @@ +{%- if prompt_examples -%} +Below are some examples (only use these as a guide): +{# whitespace #} +{%- for example in prompt_examples -%} +{# whitespace #} +Text: +{{ example.text }} +Reply: +{{ example.reply }} +{# whitespace #} +{%- endfor -%} +{# whitespace #} +{%- endif -%} +{# whitespace #} +Text: +{{ text }} +Reply: diff --git a/spacy_llm/tests/sharding/test_sharding.py b/spacy_llm/tests/sharding/test_sharding.py index 19fc17c4..0abfc6d6 100644 --- a/spacy_llm/tests/sharding/test_sharding.py +++ b/spacy_llm/tests/sharding/test_sharding.py @@ -282,3 +282,25 @@ def test_sharding_entity_linker(config): assert all([ent.kb_id_ != EntityLinker.NIL for ent in doc.ents]) assert prompts == ["Alice goes to *Boston* to ", "see the *Boston Celtics* game."] assert len(doc.user_data["llm_io"]["llm"]["response"]) == 2 + + +@pytest.mark.external +@pytest.mark.skipif(has_openai_key is False, reason="OpenAI API key not available") +def test_sharding_raw(config): + context_length = 20 + config["components"]["llm"]["model"]["context_length"] = context_length + config["components"]["llm"]["task"] = {"@llm_tasks": "spacy.Raw.v1"} + nlp = assemble_from_config(config) + + doc = nlp(_TEXT) + marker = "Text:\n" + prompts = [ + pr[pr.rindex(marker) + len(marker) : pr.rindex("\nReply:")] + for pr in doc.user_data["llm_io"]["llm"]["prompt"] + ] + assert hasattr(doc._, "llm_reply") and doc._.llm_reply + assert prompts == [ + "Do one thing every day that scares you. The ", + "only thing we have to fear is fear itself.", + ] + assert len(doc.user_data["llm_io"]["llm"]["response"]) == 2 diff --git a/spacy_llm/tests/tasks/examples/raw.json b/spacy_llm/tests/tasks/examples/raw.json new file mode 100644 index 00000000..0dc0d8d0 --- /dev/null +++ b/spacy_llm/tests/tasks/examples/raw.json @@ -0,0 +1,5 @@ +[ + {"text": "3 + 5 = x. What's x?", "reply": "8"}, + {"text": "Write me a limerick.", "reply": "There was an Old Man with a beard, Who said, 'It is just as I feared! Two Owls and a Hen, Four Larks and a Wren, Have all built their nests in my beard!"}, + {"text": "Analyse the sentiment of the text 'This is great'.", "reply": "'This is great' expresses a very positive sentiment."} +] \ No newline at end of file diff --git a/spacy_llm/tests/tasks/examples/raw.jsonl b/spacy_llm/tests/tasks/examples/raw.jsonl new file mode 100644 index 00000000..8240c638 --- /dev/null +++ b/spacy_llm/tests/tasks/examples/raw.jsonl @@ -0,0 +1,3 @@ +{"text": "3 + 5 = x. What's x?", "reply": "8"} +{"text": "Write me a limerick.", "reply": "There was an Old Man with a beard, Who said, 'It is just as I feared! Two Owls and a Hen, Four Larks and a Wren, Have all built their nests in my beard!"} +{"text": "Analyse the sentiment of the text 'This is great'.", "reply": "'This is great' expresses a very positive sentiment."} diff --git a/spacy_llm/tests/tasks/examples/raw.yml b/spacy_llm/tests/tasks/examples/raw.yml new file mode 100644 index 00000000..8855f648 --- /dev/null +++ b/spacy_llm/tests/tasks/examples/raw.yml @@ -0,0 +1,8 @@ +- text: "3 + 5 = x. What's x?" + reply: "8" + +- text: "Write me a limerick." + reply: "There was an Old Man with a beard, Who said, 'It is just as I feared! Two Owls and a Hen, Four Larks and a Wren, Have all built their nests in my beard!" + +- text: "Analyse the sentiment of the text 'This is great'." + reply: "'This is great' expresses a very positive sentiment." diff --git a/spacy_llm/tests/tasks/templates/raw.jinja2 b/spacy_llm/tests/tasks/templates/raw.jinja2 new file mode 100644 index 00000000..cde69cea --- /dev/null +++ b/spacy_llm/tests/tasks/templates/raw.jinja2 @@ -0,0 +1,2 @@ +This is a test RAW template. +Here is the text: {{ text }} \ No newline at end of file diff --git a/spacy_llm/tests/tasks/test_raw.py b/spacy_llm/tests/tasks/test_raw.py new file mode 100644 index 00000000..9973135a --- /dev/null +++ b/spacy_llm/tests/tasks/test_raw.py @@ -0,0 +1,278 @@ +from pathlib import Path + +import pytest +import spacy +from confection import Config +from spacy.training import Example +from spacy.util import make_tempdir + +from spacy_llm.registry import file_reader +from spacy_llm.util import assemble_from_config + +from ...tasks import RawTask, make_raw_task +from ..compat import has_openai_key + +EXAMPLES_DIR = Path(__file__).parent / "examples" +TEMPLATES_DIR = Path(__file__).parent / "templates" + + +@pytest.fixture +def noop_config(): + return """ + [nlp] + lang = "en" + pipeline = ["llm"] + + [components] + + [components.llm] + factory = "llm" + + [components.llm.task] + @llm_tasks = "spacy.Raw.v1" + + [components.llm.model] + @llm_models = "test.NoOpModel.v1" + """ + + +@pytest.fixture +def zeroshot_cfg_string(): + return """ + [nlp] + lang = "en" + pipeline = ["llm"] + batch_size = 128 + + [components] + + [components.llm] + factory = "llm" + + [components.llm.task] + @llm_tasks = "spacy.Raw.v1" + + [components.llm.model] + @llm_models = "spacy.GPT-3-5.v3" + """ + + +@pytest.fixture +def fewshot_cfg_string(): + return f""" + [nlp] + lang = "en" + pipeline = ["llm"] + + [components] + + [components.llm] + factory = "llm" + + [components.llm.task] + @llm_tasks = "spacy.Raw.v1" + + [components.llm.task.examples] + @misc = "spacy.FewShotReader.v1" + path = {str((Path(__file__).parent / "examples" / "raw.yml"))} + + [components.llm.model] + @llm_models = "spacy.GPT-3-5.v3" + """ + + +@pytest.fixture +def ext_template_cfg_string(): + """Simple zero-shot config with an external template""" + + return f""" + [nlp] + lang = "en" + pipeline = ["llm"] + + [components] + [components.llm] + factory = "llm" + + [components.llm.task] + @llm_tasks = "spacy.Raw.v1" + + [components.llm.task.template] + @misc = "spacy.FileReader.v1" + path = {str((Path(__file__).parent / "templates" / "raw.jinja2"))} + + [components.llm.model] + @llm_models = "spacy.GPT-3-5.v3" + """ + + +@pytest.mark.external +@pytest.mark.skipif(has_openai_key is False, reason="OpenAI API key not available") +@pytest.mark.parametrize( + "cfg_string", + [ + "zeroshot_cfg_string", + "fewshot_cfg_string", + "ext_template_cfg_string", + ], +) +def test_raw_config(cfg_string, request): + cfg_string = request.getfixturevalue(cfg_string) + orig_config = Config().from_str(cfg_string) + nlp = spacy.util.load_model_from_config(orig_config, auto_fill=True) + assert nlp.pipe_names == ["llm"] + + # also test nlp config from a dict in add_pipe + component_cfg = dict(orig_config["components"]["llm"]) + component_cfg.pop("factory") + + nlp2 = spacy.blank("en") + nlp2.add_pipe("llm", config=component_cfg) + assert nlp2.pipe_names == ["llm"] + + +@pytest.mark.external +@pytest.mark.skipif(has_openai_key is False, reason="OpenAI API key not available") +@pytest.mark.parametrize( + "cfg_string", + [ + "zeroshot_cfg_string", + "fewshot_cfg_string", + "ext_template_cfg_string", + ], +) +def test_raw_predict(cfg_string, request): + """Use OpenAI to get zero-shot LEMMA results. + Note that this test may fail randomly, as the LLM's output is unguaranteed to be consistent/predictable + """ + cfg = request.getfixturevalue(cfg_string) + orig_config = Config().from_str(cfg) + nlp = spacy.util.load_model_from_config(orig_config, auto_fill=True) + assert nlp("What's the weather like?")._.llm_reply + + +@pytest.mark.external +@pytest.mark.parametrize( + "cfg_string", + [ + "zeroshot_cfg_string", + "fewshot_cfg_string", + ], +) +def test_raw_io(cfg_string, request): + cfg = request.getfixturevalue(cfg_string) + orig_config = Config().from_str(cfg) + nlp = spacy.util.load_model_from_config(orig_config, auto_fill=True) + assert nlp.pipe_names == ["llm"] + # ensure you can save a pipeline to disk and run it after loading + with make_tempdir() as tmpdir: + nlp.to_disk(tmpdir) + nlp2 = spacy.load(tmpdir) + assert nlp2.pipe_names == ["llm"] + assert nlp2("I've watered the plants.")._.llm_reply + + +def test_jinja_template_rendering_without_examples(): + """Test if jinja template renders as we expected + + We apply the .strip() method for each prompt so that we don't have to deal + with annoying newlines and spaces at the edge of the text. + """ + nlp = spacy.blank("en") + text = "How much wood would a woodchuck chuck if a woodchuck could chuck wood?" + doc = nlp.make_doc(text) + + raw_task = make_raw_task(examples=None) + prompt = list(raw_task.generate_prompts([doc]))[0][0][0] + + assert ( + prompt.strip() + == f""" +Text: +{text} +Reply: +""".strip() + ) + + +@pytest.mark.parametrize( + "examples_path", + [ + str(EXAMPLES_DIR / "raw.json"), + str(EXAMPLES_DIR / "raw.yml"), + str(EXAMPLES_DIR / "raw.jsonl"), + ], +) +def test_jinja_template_rendering_with_examples(examples_path): + """Test if jinja2 template renders as expected + + We apply the .strip() method for each prompt so that we don't have to deal + with annoying newlines and spaces at the edge of the text. + """ + nlp = spacy.blank("en") + text = "How much wood would a woodchuck chuck if a woodchuck could chuck wood?" + doc = nlp.make_doc(text) + + raw_task = make_raw_task(examples=None) + prompt = list(raw_task.generate_prompts([doc]))[0][0][0] + + assert ( + prompt.strip() + == f""" +Text: +{text} +Reply: +""".strip() + ) + + +def test_external_template_actually_loads(): + template_path = str(TEMPLATES_DIR / "raw.jinja2") + template = file_reader(template_path) + text = "How much wood would a woodchuck chuck if a woodchuck could chuck wood?" + nlp = spacy.blank("en") + doc = nlp.make_doc(text) + + raw_task = make_raw_task(examples=None, template=template) + prompt = list(raw_task.generate_prompts([doc]))[0][0][0] + + assert ( + prompt.strip() + == f""" +This is a test RAW template. +Here is the text: {text} +""".strip() + ) + + +@pytest.mark.parametrize("n_prompt_examples", [-1, 0, 1, 2]) +def test_raw_init(noop_config, n_prompt_examples: int): + config = Config().from_str(noop_config) + with pytest.warns(UserWarning, match="Task supports sharding"): + nlp = assemble_from_config(config) + + examples = [] + text = "How much wood would a woodchuck chuck if a woodchuck could chuck wood?" + gold_1 = nlp.make_doc(text) + gold_1._.llm_reply = "Plenty" + examples.append(Example(nlp.make_doc(text), gold_1)) + + text = "Who sells seashells by the seashore?" + gold_2 = nlp.make_doc(text) + gold_2._.llm_reply = "Shelly" + examples.append(Example(nlp.make_doc(text), gold_2)) + + _, llm = nlp.pipeline[0] + task: RawTask = llm._task + + assert not task._prompt_examples + + nlp.config["initialize"]["components"]["llm"] = { + "n_prompt_examples": n_prompt_examples + } + nlp.initialize(lambda: examples) + + if n_prompt_examples >= 0: + assert len(task._prompt_examples) == n_prompt_examples + else: + assert len(task._prompt_examples) == len(examples)