[air] Un-revert "[air] remove unnecessary logs + improve repr for res…

…ult" (ray-project#26942)
bllchmbrs · Jul 25, 2022 · aae0aae · aae0aae
1 parent bf1d997
commit aae0aae
Show file tree

Hide file tree

Showing 4 changed files with 43 additions and 8 deletions.
diff --git a/python/ray/_private/runtime_env/plugin_schema_manager.py b/python/ray/_private/runtime_env/plugin_schema_manager.py
@@ -48,7 +48,7 @@ def _load_default_schemas(cls):
             for f in files:
                 if f.endswith(RAY_RUNTIME_ENV_PLUGIN_SCHEMA_SUFFIX):
                     schema_json_files.append(os.path.join(root, f))
-            logger.info(
+            logger.debug(
                 f"Loading the default runtime env schemas: {schema_json_files}."
             )
             cls._load_schemas(schema_json_files)

diff --git a/python/ray/air/result.py b/python/ray/air/result.py
@@ -42,10 +42,23 @@ class Result:
     log_dir: Optional[Path]
     metrics_dataframe: Optional[pd.DataFrame]
     best_checkpoints: Optional[List[Tuple[Checkpoint, Dict[str, Any]]]]
+    _items_to_repr = ["metrics", "error", "log_dir"]
 
     @property
     def config(self) -> Optional[Dict[str, Any]]:
         """The config associated with the result."""
         if not self.metrics:
             return None
         return self.metrics.get("config", None)
+
+    def __repr__(self):
+        from ray.tune.result import AUTO_RESULT_KEYS
+
+        shown_attributes = {k: self.__dict__[k] for k in self._items_to_repr}
+
+        if self.metrics:
+            shown_attributes["metrics"] = {
+                k: v for k, v in self.metrics.items() if k not in AUTO_RESULT_KEYS
+            }
+        kws = [f"{key}={value!r}" for key, value in shown_attributes.items()]
+        return "{}({})".format(type(self).__name__, ", ".join(kws))
diff --git a/python/ray/train/gbdt_trainer.py b/python/ray/train/gbdt_trainer.py
@@ -18,6 +18,8 @@
 
     from ray.data.preprocessor import Preprocessor
 
+_WARN_REPARTITION_THRESHOLD = 10 * 1024 ** 3
+
 
 def _convert_scaling_config_to_ray_params(
     scaling_config: ScalingConfig,
@@ -166,13 +168,17 @@ def preprocess_datasets(self) -> None:
         # TODO: Move this logic to the respective libraries
         for dataset_key, dataset in self.datasets.items():
             if dataset.num_blocks() < self._ray_params.num_actors:
-                warnings.warn(
-                    f"Dataset '{dataset_key}' has {dataset.num_blocks()} blocks, "
-                    f"which is less than the `num_workers` "
-                    f"{self._ray_params.num_actors}. "
-                    f"This dataset will be automatically repartitioned to "
-                    f"{self._ray_params.num_actors} blocks."
-                )
+                if dataset.size_bytes() > _WARN_REPARTITION_THRESHOLD:
+                    warnings.warn(
+                        f"Dataset '{dataset_key}' has {dataset.num_blocks()} blocks, "
+                        f"which is less than the `num_workers` "
+                        f"{self._ray_params.num_actors}. "
+                        f"This dataset will be automatically repartitioned to "
+                        f"{self._ray_params.num_actors} blocks. You can disable "
+                        "this error message by partitioning the dataset "
+                        "to have blocks >= number of workers via "
+                        "`dataset.repartition(num_workers)`."
+                    )
                 self.datasets[dataset_key] = dataset.repartition(
                     self._ray_params.num_actors
                 )

diff --git a/python/ray/tune/tests/test_result_grid.py b/python/ray/tune/tests/test_result_grid.py
@@ -190,6 +190,22 @@ def f(config):
         result_grid.get_best_result(metric="x", mode="max")
 
 
+def test_result_repr(ray_start_2_cpus):
+    def f(config):
+        from ray.air import session
+
+        session.report({"loss": 1})
+
+    tuner = tune.Tuner(f, param_space={"x": tune.grid_search([1, 2])})
+    result_grid = tuner.fit()
+    result = result_grid[0]
+
+    from ray.tune.result import AUTO_RESULT_KEYS
+
+    representation = result.__repr__()
+    assert not any(key in representation for key in AUTO_RESULT_KEYS)
+
+
 def test_no_metric_mode(ray_start_2_cpus):
     def f(config):
         tune.report(x=1)