Bump numpy from 1.19.5 to 1.21.6 (RasaHQ#11078)

* Bump numpy from 1.19.5 to 1.21.6 Bumps [numpy](https://github.com/numpy/numpy) from 1.19.5 to 1.21.6. - [Release notes](https://github.com/numpy/numpy/releases) - [Changelog](https://github.com/numpy/numpy/blob/main/doc/HOWTO_RELEASE.rst.txt) - [Commits](numpy/numpy@v1.19.5...v1.21.6) --- updated-dependencies: - dependency-name: numpy dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * fixed mypy errors for numpy 1.21.6 upgrade * removed duplicate np.array call Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Thomas Werkmeister <thomas@werkmeister.me> Co-authored-by: melindaloubser1 <melinda.loubser@gmail.com>
taytzehao · Aug 16, 2022 · 4cdceaa · 4cdceaa
1 parent 593d526
commit 4cdceaa
Show file tree

Hide file tree

Showing 21 changed files with 399 additions and 246 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -70,7 +70,7 @@ matplotlib = ">=3.1,<3.4"
 attrs = ">=19.3,<22.2"
 jsonpickle = ">=1.3,<2.3"
 redis = ">=3.4,<5.0"
-numpy = ">=1.19.2,<1.20.0"
+numpy = ">=1.19.2,<1.22.0"
 scipy = ">=1.4.1,<1.8.0"
 absl-py = ">=0.9,<1.3"
 apscheduler = ">=3.6,<3.10"

diff --git a/rasa/core/evaluation/marker_stats.py b/rasa/core/evaluation/marker_stats.py
@@ -12,12 +12,15 @@
 
 def compute_statistics(
     values: List[Union[float, int]]
-) -> Dict[Text, Union[int, np.float]]:
+) -> Dict[Text, Union[int, float]]:
     """Computes some statistics over the given numbers."""
     return {
         "count": len(values) if values else 0,
         "mean": np.mean(values) if values else np.nan,
-        "median": np.median(values) if values else np.nan,
+        # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
+        "median": (
+            np.median(values) if values else np.nan  # type: ignore[no-untyped-call]
+        ),
         "min": min(values) if values else np.nan,
         "max": max(values) if values else np.nan,
     }
@@ -250,7 +253,7 @@ def _write_per_session_statistic(
         marker_name: Text,
         statistic_name: Text,
         session_identifiers: List[Tuple[Text, int]],
-        values: List[Union[np.float, int]],
+        values: List[Union[float, int]],
     ) -> None:
         for record_idx, (sender_id, session_idx) in enumerate(session_identifiers):
             MarkerStatistics._write_row(
@@ -268,17 +271,18 @@ def _write_per_session_statistic(
     def _write_row(
         table_writer: WriteRow,
         sender_id: Text,
-        session_idx: Union[int, np.float],
+        session_idx: Union[int, float],
         marker_name: Text,
         statistic_name: Text,
-        statistic_value: Union[int, np.float],
+        statistic_value: Union[int, float],
     ) -> None:
         if isinstance(statistic_value, int):
             value_str = str(statistic_value)
         elif np.isnan(statistic_value):
             value_str = str(np.nan)
         else:
-            value_str = np.round(statistic_value, 3)
+            # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
+            value_str = np.round(statistic_value, 3)  # type: ignore[no-untyped-call]
         table_writer.writerow(
             [
                 str(item)

diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py
@@ -142,7 +142,8 @@ def _create_features(
             # its value
             if state_feature in self._default_feature_states[attribute]:
                 features[self._default_feature_states[attribute][state_feature]] = value
-        features = np.expand_dims(features, 0)
+        # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
+        features = np.expand_dims(features, 0)  # type: ignore[no-untyped-call]
 
         if sparse:
             features = scipy.sparse.coo_matrix(features)

diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
@@ -455,10 +455,16 @@ def _assemble_label_data(
             SEQUENCE,
         )
         label_ids = np.arange(domain.num_actions)
+        # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
         label_data.add_features(
             LABEL_KEY,
             LABEL_SUB_KEY,
-            [FeatureArray(np.expand_dims(label_ids, -1), number_of_dimensions=2)],
+            [
+                FeatureArray(
+                    np.expand_dims(label_ids, -1),  # type: ignore[no-untyped-call]
+                    number_of_dimensions=2,
+                )
+            ],
         )
         return label_data
 
@@ -521,8 +527,12 @@ def _create_model_data(
         model_data = RasaModelData(label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY)
 
         if label_ids is not None and encoded_all_labels is not None:
+            # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
             label_ids = np.array(
-                [np.expand_dims(seq_label_ids, -1) for seq_label_ids in label_ids]
+                [
+                    np.expand_dims(seq_label_ids, -1)  # type: ignore[no-untyped-call]
+                    for seq_label_ids in label_ids
+                ]
             )
             model_data.add_features(
                 LABEL_KEY,
@@ -790,10 +800,15 @@ def _pick_confidence(
             logger.debug(f"User intent lead to '{non_e2e_action_name}'.")
             e2e_action_name = domain.action_names_or_texts[np.argmax(confidences[1])]
             logger.debug(f"User text lead to '{e2e_action_name}'.")
+            # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
+            # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
+            # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
             if (
-                np.max(confidences[1]) > self.config[E2E_CONFIDENCE_THRESHOLD]
+                np.max(confidences[1])  # type: ignore[no-untyped-call]
+                > self.config[E2E_CONFIDENCE_THRESHOLD]
                 # TODO maybe compare confidences is better
-                and np.max(similarities[1]) > np.max(similarities[0])
+                and np.max(similarities[1])  # type: ignore[no-untyped-call]
+                > np.max(similarities[0])  # type: ignore[no-untyped-call]
             ):
                 logger.debug(f"TED predicted '{e2e_action_name}' based on user text.")
                 return confidences[1], True
@@ -827,11 +842,19 @@ def predict_action_probabilities(
             tracker, domain, precomputations, rule_only_data=rule_only_data
         )
         model_data = self._create_model_data(tracker_state_features)
-        outputs: Dict[Text, np.ndarray] = self.model.run_inference(model_data)
+        outputs = self.model.run_inference(model_data)
 
-        # take the last prediction in the sequence
-        similarities = outputs["similarities"][:, -1, :]
-        confidences = outputs["scores"][:, -1, :]
+        if isinstance(outputs["similarities"], np.ndarray):
+            # take the last prediction in the sequence
+            similarities = outputs["similarities"][:, -1, :]
+        else:
+            raise TypeError(
+                "model output for `similarities` " "should be a numpy array"
+            )
+        if isinstance(outputs["scores"], np.ndarray):
+            confidences = outputs["scores"][:, -1, :]
+        else:
+            raise TypeError("model output for `scores` should be a numpy array")
         # take correct prediction from batch
         confidence, is_e2e_prediction = self._pick_confidence(
             confidences, similarities, domain

diff --git a/rasa/core/policies/unexpected_intent_policy.py b/rasa/core/policies/unexpected_intent_policy.py
@@ -1,7 +1,7 @@
 import dataclasses
 import logging
 from pathlib import Path
-from typing import Any, List, Optional, Text, Dict, Type
+from typing import Any, List, Optional, Text, Dict, Type, Union
 
 import numpy as np
 import tensorflow as tf
@@ -370,10 +370,16 @@ def _assemble_label_data(
             f"{LABEL}_{INTENT}", SEQUENCE_LENGTH, f"{LABEL}_{INTENT}", SEQUENCE
         )
         label_ids = np.arange(len(domain.intents))
+        # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
         label_data.add_features(
             LABEL_KEY,
             LABEL_SUB_KEY,
-            [FeatureArray(np.expand_dims(label_ids, -1), number_of_dimensions=2)],
+            [
+                FeatureArray(
+                    np.expand_dims(label_ids, -1),  # type: ignore[no-untyped-call]
+                    number_of_dimensions=2,
+                )
+            ],
         )
         return label_data
 
@@ -487,7 +493,7 @@ def run_training(
         self.compute_label_quantiles_post_training(model_data, label_ids)
 
     def _collect_action_metadata(
-        self, domain: Domain, similarities: np.array, query_intent: Text
+        self, domain: Domain, similarities: np.ndarray, query_intent: Text
     ) -> UnexpecTEDIntentPolicyMetadata:
         """Collects metadata to be attached to the predicted action.
 
@@ -604,8 +610,12 @@ def predict_action_probabilities(
         output = self.model.run_inference(model_data)
 
         # take the last prediction in the sequence
-        all_similarities: np.ndarray = output["similarities"]
-        sequence_similarities = all_similarities[:, -1, :]
+        if isinstance(output["similarities"], np.ndarray):
+            sequence_similarities = output["similarities"][:, -1, :]
+        else:
+            raise TypeError(
+                "model output for `similarities` " "should be a numpy array"
+            )
 
         # Check for unlikely intent
         last_user_uttered_event = tracker.get_last_event_for(UserUttered)
@@ -697,7 +707,7 @@ def _should_check_for_intent(self, intent: Text, domain: Domain) -> bool:
         return True
 
     def _check_unlikely_intent(
-        self, domain: Domain, similarities: np.array, query_intent: Text
+        self, domain: Domain, similarities: np.ndarray, query_intent: Text
     ) -> bool:
         """Checks if the query intent is probable according to model's predictions.
 
@@ -774,7 +784,10 @@ def _collect_label_id_grouped_scores(
         Returns:
             Both buckets of similarity scores grouped by each unique label id.
         """
-        unique_label_ids = np.unique(label_ids).tolist()
+        # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
+        unique_label_ids = np.unique(
+            label_ids
+        ).tolist()  # type: ignore[no-untyped-call]
         if LABEL_PAD_ID in unique_label_ids:
             unique_label_ids.remove(LABEL_PAD_ID)
 
@@ -826,8 +839,9 @@ def _compute_label_quantiles(
                 prediction_scores[NEGATIVE_SCORES_KEY],
             )
             minimum_positive_score = min(positive_scores)
+            # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
             if negative_scores:
-                quantile_values = np.quantile(
+                quantile_values = np.quantile(  # type: ignore[no-untyped-call]
                     negative_scores, quantile_indices, interpolation="lower"
                 )
                 label_quantiles[label_id] = [
@@ -981,7 +995,9 @@ def _get_labels_embed(
 
         return labels_embed
 
-    def run_bulk_inference(self, model_data: RasaModelData) -> Dict[Text, np.ndarray]:
+    def run_bulk_inference(
+        self, model_data: RasaModelData
+    ) -> Dict[Text, Union[np.ndarray, Dict[Text, Any]]]:
         """Computes model's predictions for input data.
 
         Args:

diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
@@ -605,10 +605,16 @@ def _compute_default_label_features(
         logger.debug("No label features found. Computing default label features.")
 
         eye_matrix = np.eye(len(labels_example), dtype=np.float32)
+        # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
         # add sequence dimension to one-hot labels
         return [
             FeatureArray(
-                np.array([np.expand_dims(a, 0) for a in eye_matrix]),
+                np.array(
+                    [
+                        np.expand_dims(a, 0)  # type: ignore[no-untyped-call]
+                        for a in eye_matrix
+                    ]
+                ),
                 number_of_dimensions=3,
             )
         ]
@@ -658,12 +664,18 @@ def _create_label_data(
             )
 
         label_ids = np.array([idx for (idx, _) in labels_idx_examples])
+        # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
         # explicitly add last dimension to label_ids
         # to track correctly dynamic sequences
         label_data.add_features(
             LABEL_KEY,
             LABEL_SUB_KEY,
-            [FeatureArray(np.expand_dims(label_ids, -1), number_of_dimensions=2)],
+            [
+                FeatureArray(
+                    np.expand_dims(label_ids, -1),  # type: ignore[no-untyped-call]
+                    number_of_dimensions=2,
+                )
+            ],
         )
 
         label_data.add_lengths(LABEL, SEQUENCE_LENGTH, LABEL, SEQUENCE)
@@ -788,10 +800,16 @@ def _add_label_features(
                     label_ids.append(label_id_dict[example.get(label_attribute)])
             # explicitly add last dimension to label_ids
             # to track correctly dynamic sequences
+            # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
             model_data.add_features(
                 LABEL_KEY,
                 LABEL_SUB_KEY,
-                [FeatureArray(np.expand_dims(label_ids, -1), number_of_dimensions=2)],
+                [
+                    FeatureArray(
+                        np.expand_dims(label_ids, -1),  # type: ignore[no-untyped-call]
+                        number_of_dimensions=2,
+                    )
+                ],
             )
 
         if (
@@ -853,7 +871,15 @@ def preprocess_train_data(self, training_data: TrainingData) -> RasaModelData:
 
     @staticmethod
     def _check_enough_labels(model_data: RasaModelData) -> bool:
-        return len(np.unique(model_data.get(LABEL_KEY, LABEL_SUB_KEY))) >= 2
+        # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
+        return (
+            len(
+                np.unique(  # type: ignore[no-untyped-call]
+                    model_data.get(LABEL_KEY, LABEL_SUB_KEY)
+                )
+            )
+            >= 2
+        )
 
     def train(self, training_data: TrainingData) -> Resource:
         """Train the embedding intent classifier on a data set."""

diff --git a/rasa/nlu/classifiers/sklearn_intent_classifier.py b/rasa/nlu/classifiers/sklearn_intent_classifier.py
@@ -163,7 +163,10 @@ def _get_sentence_features(message: Message) -> np.ndarray:
 
     def _num_cv_splits(self, y: np.ndarray) -> int:
         folds = self.component_config["max_cross_validation_folds"]
-        return max(2, min(folds, np.min(np.bincount(y)) // 5))
+        # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
+        return max(
+            2, min(folds, np.min(np.bincount(y)) // 5)  # type: ignore[no-untyped-call]
+        )
 
     def _create_classifier(
         self, num_threads: int, y: np.ndarray
@@ -260,7 +263,11 @@ def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
         pred_result = self.predict_prob(X)
         # sort the probabilities retrieving the indices of
         # the elements in sorted order
-        sorted_indices = np.fliplr(np.argsort(pred_result, axis=1))
+
+        # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
+        sorted_indices = np.fliplr(  # type: ignore[no-untyped-call]
+            np.argsort(pred_result, axis=1)
+        )
         return sorted_indices, pred_result[:, sorted_indices]
 
     def persist(self) -> None:

diff --git a/rasa/nlu/featurizers/dense_featurizer/dense_featurizer.py b/rasa/nlu/featurizers/dense_featurizer/dense_featurizer.py
@@ -48,7 +48,10 @@ def aggregate_sequence_features(
         if pooling_operation == MEAN_POOLING:
             return np.mean(dense_sequence_features, axis=0, keepdims=True)
         elif pooling_operation == MAX_POOLING:
-            return np.max(dense_sequence_features, axis=0, keepdims=True)
+            # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
+            return np.max(
+                dense_sequence_features, axis=0, keepdims=True
+            )  # type: ignore[no-untyped-call]
         else:
             raise InvalidConfigException(
                 f"Invalid pooling operation specified. Available operations are "

diff --git a/rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py b/rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py
@@ -358,8 +358,7 @@ def _compute_attention_mask(
             )
             attention_mask.append(padded_sequence)
 
-        attention_mask = np.array(attention_mask).astype(np.float32)
-        return attention_mask
+        return np.array(attention_mask).astype(np.float32)
 
     def _extract_sequence_lengths(
         self, batch_token_ids: List[List[int]]
@@ -542,8 +541,9 @@ def _add_extra_padding(
         reshaped_sequence_embeddings = []
         for index, embedding in enumerate(sequence_embeddings):
             embedding_size = embedding.shape[-1]
+            # [numpy-upgrade] type ignore can be removed after upgrading to numpy 1.23
             if actual_sequence_lengths[index] > self.max_model_sequence_length:
-                embedding = np.concatenate(
+                embedding = np.concatenate(  # type: ignore[no-untyped-call]
                     [
                         embedding,
                         np.zeros(
@@ -654,9 +654,8 @@ def _get_model_features_for_batch(
         sequence_final_embeddings = []
         for embeddings, tokens in zip(sequence_embeddings, batch_tokens):
             sequence_final_embeddings.append(embeddings[: len(tokens)])
-        sequence_final_embeddings = np.array(sequence_final_embeddings)
 
-        return sentence_embeddings, sequence_final_embeddings
+        return sentence_embeddings, np.array(sequence_final_embeddings)
 
     def _get_docs_for_batch(
         self,

diff --git a/rasa/nlu/featurizers/dense_featurizer/mitie_featurizer.py b/rasa/nlu/featurizers/dense_featurizer/mitie_featurizer.py
@@ -159,10 +159,9 @@ def features_for_tokens(
         feature_extractor: "mitie.total_word_feature_extractor",
     ) -> Tuple[np.ndarray, np.ndarray]:
         """Calculates features."""
-        sequence_features = []
-        for token in tokens:
-            sequence_features.append(feature_extractor.get_feature_vector(token.text))
-        sequence_features = np.array(sequence_features)
+        sequence_features = np.array(
+            [feature_extractor.get_feature_vector(token.text) for token in tokens]
+        )
 
         sentence_fetaures = self.aggregate_sequence_features(
             sequence_features, self.pooling_operation