Last token pooling for Huggingface models like SFR-Embedding-Mistral (#11373)

marib00 · root · marib00 · web-flow · commit 6024956517e6 · 2024-02-26T15:12:31.000+05:30
* Added last token pooling for Huggingface models like Salesforce/SFR-Embedding-Mistral

* fixed whitespace

* Added overloaded method signatures

---------

Co-authored-by: root &lt;root@maid-beast.staff.bournemouth.ac.uk&gt;
Co-authored-by: marib00 &lt;newborn09current@icloud.com&gt;
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface/llama_index/embeddings/huggingface/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-huggingface/llama_index/embeddings/huggingface/base.py
@@ -160,12 +160,14 @@ def _embed(self, sentences: List[str]) -> List[List[float]]:
 
         model_output = self._model(**encoded_input)
 
+        context_layer: "torch.Tensor" = model_output[0]
         if self.pooling == Pooling.CLS:
-            context_layer: "torch.Tensor" = model_output[0]
             embeddings = self.pooling.cls_pooling(context_layer)
+        elif self.pooling == Pooling.LAST:
+            embeddings = self.pooling.last_pooling(context_layer)
         else:
             embeddings = self._mean_pooling(
-                token_embeddings=model_output[0],
+                token_embeddings=context_layer,
                 attention_mask=encoded_input["attention_mask"],
             )
 
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface/llama_index/embeddings/huggingface/pooling.py b/llama-index-integrations/embeddings/llama-index-embeddings-huggingface/llama_index/embeddings/huggingface/pooling.py
@@ -12,10 +12,13 @@ class Pooling(str, Enum):
 
     CLS = "cls"
     MEAN = "mean"
+    LAST = "last"  # last token pooling
 
     def __call__(self, array: np.ndarray) -> np.ndarray:
         if self == self.CLS:
             return self.cls_pooling(array)
+        elif self == self.LAST:
+            return self.last_pooling(array)
         return self.mean_pooling(array)
 
     @classmethod
@@ -47,3 +50,25 @@ def mean_pooling(cls, array: np.ndarray) -> np.ndarray:
         if len(array.shape) == 2:
             return array.mean(axis=0)
         raise NotImplementedError(f"Unhandled shape {array.shape}.")
+
+    @classmethod
+    @overload
+    def last_pooling(cls, array: np.ndarray) -> np.ndarray:
+        ...
+
+    @classmethod
+    @overload
+    # TODO: Remove this `type: ignore` after the false positive problem
+    #  is addressed in mypy: https://github.com/python/mypy/issues/15683 .
+    def last_pooling(cls, array: "torch.Tensor") -> "torch.Tensor":  # type: ignore
+        ...
+
+    @classmethod
+    def last_pooling(
+        cls, array: "Union[np.ndarray, torch.Tensor]"
+    ) -> "Union[np.ndarray, torch.Tensor]":
+        if len(array.shape) == 3:
+            return array[:, -1]
+        if len(array.shape) == 2:
+            return array[-1]
+        raise NotImplementedError(f"Unhandled shape {array.shape}.")