Unify entry points into model code into run_net.py (facebookresearch#148

) Summary: The new usage is ./tools/run_net --mode MODE --cfg CFG_FILE Valid choices for MODE are {info, scale, test, time, train} Note that the info mode is new (and prints the model and complexity) See GETTING_STARTED for the new usage Details: -GETTING_STARTED.md: updated documentation -core/config.py: removed load_cfg_from_args (no longer used) -scaler.py: added documentation that was previously in scale_net.py -tools/run_net.py encompasses all the individual scripts now -tools/{scale, test, time, train}.py are all obsolete now -sweep_launch.py, swee_launch_job.py, sweep/config.py: added MODE Pull Request resolved: facebookresearch#148 Test Plan: Tested all the instructions in the getting started docs - ``` ./tools/run_net.py --mode info \ --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml ``` ``` ./tools/run_net.py --mode test \ --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml \ TEST.WEIGHTS https://dl.fbaipublicfiles.com/pycls/dds_baselines/160905967/RegNetX-400MF_dds_8gpu.pyth \ OUT_DIR /tmp ``` ``` ./tools/run_net.py --mode train \ --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml \ OUT_DIR /tmp ``` ``` ./tools/run_net.py --mode train \ --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml \ TRAIN.WEIGHTS https://dl.fbaipublicfiles.com/pycls/dds_baselines/160905967/RegNetX-400MF_dds_8gpu.pyth \ OUT_DIR /tmp ``` ``` ./tools/run_net.py --mode time \ --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml \ NUM_GPUS 1 \ TRAIN.BATCH_SIZE 64 \ TEST.BATCH_SIZE 64 \ PREC_TIME.WARMUP_ITER 5 \ PREC_TIME.NUM_ITER 50 ``` ``` ./tools/run_net.py --mode scale \ --cfg configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml \ OUT_DIR ./ \ CFG_DEST "RegNetY-4.0GF_dds_8gpu_scaled.yaml" \ MODEL.SCALING_FACTOR 4.0 \ MODEL.SCALING_TYPE "d1_w8_g8_r1" ``` Also tested a sweep launch - ``` SWEEP_CFG=configs/sweeps/cifar/cifar_optim.yaml ./tools/sweep_setup.py --sweep-cfg $SWEEP_CFG ./tools/sweep_launch.py --sweep-cfg $SWEEP_CFG ``` Reviewed By: pdollar Differential Revision: D29275940 Pulled By: mannatsingh fbshipit-source-id: af463d014d259bf8483b981a57a2a85c10209252
wumuyu9 · Jun 22, 2021 · 5b57451 · 5b57451
1 parent f20820e
commit 5b57451
Show file tree

Hide file tree

Showing 11 changed files with 110 additions and 150 deletions.
diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md
@@ -43,6 +43,12 @@ Install dependencies:
 pip install -r requirements.txt
 ```
 
+Set all the files in ./tools to be executable by the user:
+
+```
+chmod 744 ./tools/*.py
+```
+
 Set up modules:
 
 ```
@@ -51,44 +57,45 @@ python setup.py develop --user
 
 Please see [`DATA.md`](DATA.md) for the instructions on setting up datasets.
 
-### Evaluation
+The examples below use a config for RegNetX-400MF on ImageNet with 8 GPUs.
 
-RegNetX-400MF on ImageNet with 8 GPUs:
+### Model Info
 
 ```
-python tools/test_net.py \
+./tools/run_net.py --mode info \
+    --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml
+```
+
+### Model Evaluation
+
+```
+./tools/run_net.py --mode test \
     --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml \
     TEST.WEIGHTS https://dl.fbaipublicfiles.com/pycls/dds_baselines/160905967/RegNetX-400MF_dds_8gpu.pyth \
     OUT_DIR /tmp
 ```
 
-### Training
-
-RegNetX-400MF on ImageNet with 8 GPUs:
+### Model Training
 
 ```
-python tools/train_net.py \
+./tools/run_net.py --mode train \
     --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml \
     OUT_DIR /tmp
 ```
 
-### Finetuning
-
-RegNetX-400MF on ImageNet with 8 GPUs:
+### Model Finetuning
 
 ```
-python tools/train_net.py \
+./tools/run_net.py --mode train \
     --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml \
     TRAIN.WEIGHTS https://dl.fbaipublicfiles.com/pycls/dds_baselines/160905967/RegNetX-400MF_dds_8gpu.pyth \
     OUT_DIR /tmp
 ```
 
-### Timing
-
-RegNetX-400MF with 1 GPU:
+### Model Timing
 
 ```
-python tools/time_net.py
+./tools/run_net.py --mode time \
     --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml \
     NUM_GPUS 1 \
     TRAIN.BATCH_SIZE 64 \
@@ -102,7 +109,7 @@ python tools/time_net.py
 Scale a RegNetY-4GF by 4x using fast compound scaling (see https://arxiv.org/abs/2103.06877):
 
 ```
-python tools/scale_net.py \
+./tools/run_net.py --mode scale \
     --cfg configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml \
     OUT_DIR ./ \
     CFG_DEST "RegNetY-4.0GF_dds_8gpu_scaled.yaml" \

diff --git a/pycls/core/config.py b/pycls/core/config.py
@@ -7,9 +7,7 @@
 
 """Configuration file (powered by YACS)."""
 
-import argparse
 import os
-import sys
 
 from pycls.core.io import cache_url, pathmgr
 from yacs.config import CfgNode
@@ -410,18 +408,3 @@ def load_cfg(cfg_file):
 def reset_cfg():
     """Reset config to initial state."""
     _C.merge_from_other_cfg(_CFG_DEFAULT)
-
-
-def load_cfg_fom_args(description="Config file options."):
-    """Load config from command line arguments and set any specified options."""
-    parser = argparse.ArgumentParser(description=description)
-    help_s = "Config file location"
-    parser.add_argument("--cfg", dest="cfg_file", help=help_s, required=True, type=str)
-    help_s = "See pycls/core/config.py for all options"
-    parser.add_argument("opts", help=help_s, default=None, nargs=argparse.REMAINDER)
-    if len(sys.argv) == 1:
-        parser.print_help()
-        sys.exit(1)
-    args = parser.parse_args()
-    load_cfg(args.cfg_file)
-    _C.merge_from_list(args.opts)
diff --git a/pycls/models/scaler.py b/pycls/models/scaler.py
@@ -50,7 +50,14 @@ def scale_model():
     """
     Scale model blocks by the specified type and amount (note: alters global cfg).
 
+    Scale a model using scaling strategies from "Fast and Accurate Model Scaling".
+    For reference on scaling strategies, see: https://arxiv.org/abs/2103.06877.
+    For example usage, see GETTING_STARTED, MODEL SCALING section.
+
     The actual scaling is specified by MODEL.SCALING_TYPE and MODEL.SCALING_FACTOR.
+    For example, SCALING_TYPE of "d1_w8_g8_r1" is fast compound scaling and is the
+    likely best default option, and SCALING_FACTOR indicates the scaling amount.
+    For further details on controlling the scaling, see comments for scaling_factors().
 
     Note that the scaler must be employed on a standalone config outside of the main
     training loop. This is because it alters the global config, which is typically

diff --git a/pycls/sweep/config.py b/pycls/sweep/config.py
@@ -84,8 +84,8 @@
 # ------------------------------- Sweep launch options ------------------------------- #
 _C.LAUNCH = CfgNode()
 
-# Actual script to run for each job (should be in pycls directory)
-_C.LAUNCH.SCRIPT = "tools/train_net.py"
+# Mode to launch tools/run_net.py script with (train, test, time, etc.)
+_C.LAUNCH.MODE = "train"
 
 # CONDA environment to use for jobs (defaults to current environment)
 _C.LAUNCH.CONDA_ENV = os.environ["CONDA_PREFIX"]
@@ -200,6 +200,10 @@
 SAMPLERS.REGNET_SAMPLER.BOT_MUL = [1.0, 1.0]
 
 
+# --------------------------------- Deprecated keys ---------------------------------- #
+_C.register_deprecated_key("LAUNCH.SCRIPT")
+
+
 # -------------------------------- Utility functions --------------------------------- #
 def load_cfg(sweep_cfg_file):
     """Loads config from specified sweep_cfg_file."""

diff --git a/tools/run_net.py b/tools/run_net.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Execute various operations (train, test, time, etc.) on a classification model."""
+
+import argparse
+import sys
+
+import pycls.core.builders as builders
+import pycls.core.config as config
+import pycls.core.distributed as dist
+import pycls.core.net as net
+import pycls.core.trainer as trainer
+import pycls.models.scaler as scaler
+from pycls.core.config import cfg
+
+
+def parse_args():
+    """Parse command line options (mode and config)."""
+    parser = argparse.ArgumentParser(description="Run a model.")
+    help_s, choices = "Run mode", ["info", "train", "test", "time", "scale"]
+    parser.add_argument("--mode", help=help_s, choices=choices, required=True, type=str)
+    help_s = "Config file location"
+    parser.add_argument("--cfg", help=help_s, required=True, type=str)
+    help_s = "See pycls/core/config.py for all options"
+    parser.add_argument("opts", help=help_s, default=None, nargs=argparse.REMAINDER)
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    return parser.parse_args()
+
+
+def main():
+    """Execute operation (train, test, time, etc.)."""
+    args = parse_args()
+    mode = args.mode
+    config.load_cfg(args.cfg)
+    cfg.merge_from_list(args.opts)
+    config.assert_and_infer_cfg()
+    cfg.freeze()
+    if mode == "info":
+        print(builders.get_model()())
+        print("complexity:", net.complexity(builders.get_model()))
+    elif mode == "train":
+        dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.train_model)
+    elif mode == "test":
+        dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.test_model)
+    elif mode == "time":
+        dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.time_model)
+    elif mode == "scale":
+        cfg.defrost()
+        cx_orig = net.complexity(builders.get_model())
+        scaler.scale_model()
+        cx_scaled = net.complexity(builders.get_model())
+        cfg_file = config.dump_cfg()
+        print("Scaled config dumped to:", cfg_file)
+        print("Original model complexity:", cx_orig)
+        print("Scaled model complexity:", cx_scaled)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/scale_net.py b/tools/scale_net.py
diff --git a/tools/sweep_launch.py b/tools/sweep_launch.py
@@ -33,6 +33,7 @@
     "  {current_dir}/sweep_launch_job.py"
     "  --conda-env {conda_env}"
     "  --script-path {script_path}"
+    "  --script-mode {script_mode}"
     "  --cfgs-dir {cfgs_dir}"
     "  --pycls-dir {pycls_dir}"
     "  --logs-dir {logs_dir}"
@@ -49,7 +50,7 @@ def sweep_launch():
     cfgs_dir = os.path.join(sweep_dir, "cfgs")
     logs_dir = os.path.join(sweep_dir, "logs")
     sbatch_dir = os.path.join(logs_dir, "sbatch")
-    script_path = os.path.abspath(launch_cfg.SCRIPT)
+    script_path = os.path.abspath("tools/run_net.py")
     assert os.path.exists(sweep_dir), "Sweep dir {} invalid".format(sweep_dir)
     assert os.path.exists(script_path), "Script path {} invalid".format(script_path)
     n_cfgs = len([c for c in os.listdir(cfgs_dir) if c.endswith(".yaml")])
@@ -77,6 +78,7 @@ def sweep_launch():
         current_dir=current_dir,
         conda_env=launch_cfg.CONDA_ENV,
         script_path=script_path,
+        script_mode=launch_cfg.MODE,
         cfgs_dir=cfgs_dir,
         pycls_dir=pycls_copy_dir,
         logs_dir=logs_dir,

diff --git a/tools/sweep_launch_job.py b/tools/sweep_launch_job.py
@@ -54,6 +54,7 @@ def main():
     parser = argparse.ArgumentParser(description=desc)
     parser.add_argument("--conda-env", required=True)
     parser.add_argument("--script-path", required=True)
+    parser.add_argument("--script-mode", required=True)
     parser.add_argument("--cfgs-dir", required=True)
     parser.add_argument("--pycls-dir", required=True)
     parser.add_argument("--logs-dir", required=True)
@@ -98,9 +99,12 @@ def main():
         "  --output {out_dir}/stdout.log"
         "  --error {out_dir}/stderr.log"
         "  python {script}"
+        "  --mode {mode}"
         "  --cfg {cfg}"
         "  OUT_DIR {out_dir}"
-    ).format(out_dir=out_dir, script=args.script_path, cfg=cfg_file)
+    ).format(
+        out_dir=out_dir, script=args.script_path, mode=args.script_mode, cfg=cfg_file
+    )
     prt("Running cmd:\n", cmd_to_run.replace("  ", "\n  "))
     # Run command in background using subprocess and wait so that signals can be caught
     p = subprocess.Popen(cmd_to_run, shell=True)

diff --git a/tools/test_net.py b/tools/test_net.py
diff --git a/tools/time_net.py b/tools/time_net.py
diff --git a/tools/train_net.py b/tools/train_net.py