aphp · percevalw · Sep 7, 2023 · Sep 7, 2023 · Sep 7, 2023
diff --git a/changelog.md b/changelog.md
@@ -12,6 +12,7 @@
 
 - Updated API to follow EDS-NLP's refactoring
 - Updated `confit` to 0.4.2 (better errors) and `foldedtensor` to 0.3.0 (better multiprocess support)
+- Removed `pipeline.score`. You should use `pipeline.pipe`, a custom scorer and `pipeline.select_pipes` instead.
 - Better test coverage
 
 ### Fixed

diff --git a/...beddings/assets/transformer-windowing.svg → docs/assets/images/transformer-windowing.svg b/...beddings/assets/transformer-windowing.svg → docs/assets/images/transformer-windowing.svg
diff --git a/docs/pipeline.md b/docs/pipeline.md
@@ -57,7 +57,7 @@ model(pdf_bytes)
 model.pipe([pdf_bytes, ...])
 ```
 
-For more information on how to use the pipeline, refer to the [Inference](../inference) page.
+For more information on how to use the pipeline, refer to the [Inference](/inference) page.
 
 ## Hybrid models
 

diff --git a/docs/recipes/training.md b/docs/recipes/training.md
@@ -160,16 +160,32 @@ model to decrease a given loss. The process of training a pipeline with EDS-PDF
         optimizer.step()
     ```
 
-7. Finally, the model is evaluated on the validation dataset at regular intervals and saved at the end of the training. Although you can use `torch.save` to save your model, we provide a safer method to avoid the security pitfalls of pickle models
-    ```python
-    from edspdf import Pipeline
+   7. Finally, the model is evaluated on the validation dataset at regular intervals and saved at the end of the training. To score the model, we only want to run "classifier" component and not the extractor, otherwise we would overwrite annotated text boxes on documents in the `val_docs` dataset, and have mismatching text boxes between the gold and predicted documents. To save the model, although you can use `torch.save` to save your model, we provide a safer method to avoid the security pitfalls of pickle models
+       ```python
+       from edspdf import Pipeline
+       from sklearn.metrics import classification_report
+       from copy import deepcopy
 
-    if (step % 100) == 0:
-        print(model.score(val_docs))
 
-    # torch.save(model, "model.pt")
-    model.save("model")
-    ```
+       def score(golds, preds):
+           return classification_report(
+               [b.label for gold in golds for b in gold.text_boxes if b.text != ""],
+               [b.label for pred in preds for b in pred.text_boxes if b.text != ""],
+               output_dict=True,
+               zero_division=0,
+           )
+
+
+       ...
+
+       if (step % 100) == 0:
+           # we only want to run "classifier" component, not overwrite the text boxes
+           with model.select_pipes(enable=["classifier"]):
+               print(score(val_docs, model.pipe(deepcopy(val_docs))))
+
+       # torch.save(model, "model.pt")
+       model.save("model")
+       ```
 
 ## Adapting a dataset
 
@@ -234,6 +250,7 @@ Let's wrap the training code in a function, and make it callable from the comman
     ```python linenums="1"
     import itertools
     import json
+    from copy import deepcopy
     from pathlib import Path
 
     import torch
@@ -250,6 +267,15 @@ Let's wrap the training code in a function, and make it callable from the comman
     app = Cli(pretty_exceptions_show_locals=False)
 
 
+    def score(golds, preds):
+        return classification_report(
+            [b.label for gold in golds for b in gold.text_boxes if b.text != ""],
+            [b.label for pred in preds for b in pred.text_boxes if b.text != ""],
+            output_dict=True,
+            zero_division=0,
+        )
+
+
     @registry.adapter.register("my-segmentation-adapter")
     def segmentation_adapter(
         path: str,
@@ -390,14 +416,16 @@ Let's wrap the training code in a function, and make it callable from the comman
                 optimizer.step()
 
             if (step % 100) == 0:
-                print(model.score(val_docs))
+                with model.select_pipes(enable=["classifier"]):
+                    print(score(val_docs, model.pipe(deepcopy(val_docs))))
                 model.save("model")
 
         return model
 
 
     if __name__ == "__main__":
         app()
+
     ```
 
 ```bash

diff --git a/edspdf/pipeline.py b/edspdf/pipeline.py
@@ -1,9 +1,7 @@
-import copy
 import functools
 import json
 import os
 import shutil
-import time
 import warnings
 from collections import defaultdict
 from contextlib import contextmanager
@@ -27,7 +25,6 @@
 from confit.errors import ConfitValidationError, patch_errors
 from confit.utils.collections import join_path, split_path
 from confit.utils.xjson import Reference
-from tqdm import tqdm
 
 import edspdf
 
@@ -37,7 +34,6 @@
 from .utils.collections import (
     FrozenList,
     batch_compress_dict,
-    batchify,
     decompress_dict,
     multi_tee,
 )
@@ -122,10 +118,7 @@ def has_pipe(self, name: str) -> bool:
         -------
         bool
         """
-        for n, _ in self.pipeline:
-            if n == name:
-                return True
-        return False
+        return any(n == name for n, _ in self.pipeline)
 
     def create_pipe(
         self,
@@ -635,79 +628,6 @@ def __exit__(ctx_self, type, value, traceback):
 
         return context()
 
-    def score(self, docs: Sequence[PDFDoc], batch_size: int = None) -> Dict[str, Any]:
-        """
-        Scores a pipeline against a sequence of annotated documents.
-
-        This poses a few challenges:
-        - for a NER pipeline, if a component adds new entities instead of replacing
-          all entities altogether, documents need to be stripped of gold entities before
-          being passed to the component to avoid counting them.
-        - on the other hand, if a component uses existing entities to make a decision,
-          e.g. span classification, we must preserve the gold entities in documents
-          before evaluating the component.
-        Therefore, we must be able to define what a "clean" document is for each
-        component.
-        Can we do this automatically? If not, we should at least be able to define
-        it manually for each component.
-
-
-        Parameters
-        ----------
-        docs: Sequence[InputT]
-            The documents to score
-        batch_size: int
-            The batch size to use for scoring
-
-        Returns
-        -------
-        Dict[str, Any]
-            A dictionary containing the metrics of the pipeline, as well as the speed of
-            the pipeline. Each component that has a scorer will also be scored and its
-            metrics will be included in the returned dictionary under a key named after
-            each component.
-        """
-        import torch
-
-        inputs: Sequence[PDFDoc] = copy.deepcopy(docs)
-        golds: Sequence[PDFDoc] = docs
-
-        scored_components = {}
-
-        # Predicting intermediate steps
-        preds = defaultdict(lambda: [])
-        if batch_size is None:
-            batch_size = self.batch_size
-        total_duration = 0
-        with self.train(False), torch.no_grad():  # type: ignore
-            for batch in batchify(
-                tqdm(inputs, "Scoring components"), batch_size=batch_size
-            ):
-                with self.cache():
-                    for name, pipe in self.pipeline[::-1]:
-                        if hasattr(pipe, "clean_gold_for_evaluation"):
-                            batch = [
-                                pipe.clean_gold_for_evaluation(doc) for doc in batch
-                            ]
-                        t0 = time.time()
-                        if hasattr(pipe, "batch_process"):
-                            batch = pipe.batch_process(batch)
-                        else:
-                            batch = [pipe(doc) for doc in batch]
-                        total_duration += time.time() - t0
-
-                        if getattr(pipe, "score", None) is not None:
-                            scored_components[name] = pipe
-                            preds[name].extend(copy.deepcopy(batch))
-
-            metrics: Dict[str, Any] = {
-                "speed": len(inputs) / total_duration,
-            }
-            for name, pipe in scored_components.items():
-                metrics[name] = pipe.score([(p, g) for p, g in zip(preds[name], golds)])
-
-        return metrics
-
     def save(
         self, path: Union[str, Path], *, exclude: Optional[Set[str]] = None
     ) -> None:
@@ -878,7 +798,6 @@ def config(self) -> Config:
         config["pipeline"]["components"] = Reference("components")
         return config.serialize()
 
-    @contextmanager
     def select_pipes(
         self,
         *,
@@ -895,27 +814,47 @@ def select_pipes(
         enable: Optional[Union[str, Iterable[str]]]
             The name of the component to enable, or a list of names.
         """
+
+        class context:
+            def __enter__(self):
+                pass
+
+            def __exit__(ctx_self, type, value, traceback):
+                self._disabled = disabled_before
+
         if enable is None and disable is None:
             raise ValueError("Expected either `enable` or `disable`")
         if isinstance(disable, str):
             disable = [disable]
+        pipe_names = set(self.pipe_names)
         if enable is not None:
             if isinstance(enable, str):
                 enable = [enable]
+            if set(enable) - pipe_names:
+                raise ValueError(
+                    "Enabled pipes {} not found in pipeline.".format(
+                        sorted(set(enable) - pipe_names)
+                    )
+                )
             to_disable = [pipe for pipe in self.pipe_names if pipe not in enable]
             # raise an error if the enable and disable keywords are not consistent
             if disable is not None and disable != to_disable:
                 raise ValueError("Inconsistent values for `enable` and `disable`")
             disable = to_disable
 
-        disabled_before = self._disabled
+        if set(disable) - pipe_names:
+            raise ValueError(
+                "Disabled pipes {} not found in pipeline.".format(
+                    sorted(set(disable) - pipe_names)
+                )
+            )
 
+        disabled_before = self._disabled
         self._disabled = disable
-        yield self
-        self._disabled = disabled_before
+        return context()
 
 
-def load(config: Union[Path, str, Config]):
+def load(config: Union[Path, str, Config]) -> Pipeline:
     error = "The load function expects a Config or a path to a config file"
     if isinstance(config, (Path, str)):
         path = Path(config)

diff --git a/edspdf/pipes/classifiers/trainable.py b/edspdf/pipes/classifiers/trainable.py
@@ -6,24 +6,14 @@
 import torch
 import torch.nn.functional as F
 from foldedtensor import as_folded_tensor
-from sklearn.metrics import classification_report
 from tqdm import tqdm
 
 from edspdf.layers.vocabulary import Vocabulary
 from edspdf.pipeline import Pipeline
 from edspdf.pipes.embeddings import EmbeddingOutput
 from edspdf.registry import registry
 from edspdf.structures import PDFDoc
-from edspdf.trainable_pipe import Scorer, TrainablePipe
-
-
-def classifier_scorer(pairs):
-    return classification_report(
-        [b.label for pred, gold in pairs for b in gold.text_boxes if b.text != ""],
-        [b.label for pred, gold in pairs for b in pred.text_boxes if b.text != ""],
-        output_dict=True,
-        zero_division=0,
-    )
+from edspdf.trainable_pipe import TrainablePipe
 
 
 @registry.factory.register("trainable-classifier")
@@ -96,17 +86,12 @@ class TrainableClassifier(TrainablePipe[Dict[str, Any]]):
         Initial labels of the classifier (will be completed during initialization)
     embedding: TrainablePipe[EmbeddingOutput]
         Embedding module to encode the PDF boxes
-    dropout_p: float
-        Dropout probability used on the output of the box and textual encoders
-    scorer: Scorer
-        Scoring function
     """
 
     def __init__(
         self,
         embedding: TrainablePipe[EmbeddingOutput],
         labels: Sequence[str] = ("pollution",),
-        scorer: Scorer = classifier_scorer,
         pipeline: Pipeline = None,
         name: str = "trainable-classifier",
     ):
@@ -121,8 +106,6 @@ def __init__(
             in_features=self.embedding.output_size,
             out_features=len(self.label_voc),
         )
-        # Scoring function
-        self.score = scorer
 
     def post_init(self, gold_data: Iterable[PDFDoc], exclude: set):
         if self.name in exclude:

diff --git a/edspdf/pipes/embeddings/box_transformer.py b/edspdf/pipes/embeddings/box_transformer.py
@@ -61,8 +61,8 @@ class BoxTransformer(TrainablePipe[EmbeddingOutput]):
         Initializing with a value close to 0 can help the training converge.
     attention_mode: Sequence[RelativeAttentionMode]
         Mode of relative position infused attention layer.
-        See the [relative attention](relative_attention) documentation for more
-        information.
+        See the [relative attention][edspdf.layers.relative_attention.RelativeAttention]
+        documentation for more information.
     n_layers: int
         Number of layers in the Transformer
     """

diff --git a/edspdf/pipes/embeddings/huggingface_embedding.py b/edspdf/pipes/embeddings/huggingface_embedding.py
@@ -42,7 +42,7 @@ class HuggingfaceEmbedding(TrainablePipe[EmbeddingOutput]):
     occurrence that is the closest to the center of its window.
 
     Here is an overview how this works in a classifier model :
-    ![Transformer windowing](./assets/transformer-windowing.svg)
+    ![Transformer windowing](/assets/images/transformer-windowing.svg)
 
     Examples
     --------
@@ -82,7 +82,8 @@ class HuggingfaceEmbedding(TrainablePipe[EmbeddingOutput]):
     )
     ```
 
-    This model can then be trained following the [training recipe](/recipes/training/).
+    This model can then be trained following the
+    [training recipe](/recipes/training/).
 
     Parameters
     ----------