Simplify --user-dir and require user-dir module name to be globally u…

…nique (facebookresearch#2815) Summary: This PR reverts recent changes that attempted to make `--user-dir` work with non-unique module names. But that new approach introduced other issues (e.g., poor compatibility with multiprocessing and Windows), so let's revert to the previous simpler implementation. Pull Request resolved: facebookresearch#2815 Reviewed By: alexeib Differential Revision: D24611571 Pulled By: myleott fbshipit-source-id: cecfe28395585ca0401f844f10bd0d49d014c4d8
pnunna93 · Oct 30, 2020 · a4356b1 · a4356b1
1 parent 6debe29
commit a4356b1
Show file tree

Hide file tree

Showing 33 changed files with 37 additions and 34 deletions.
diff --git a/examples/latent_depth/README.md b/examples/latent_depth/README.md
@@ -14,7 +14,7 @@ lang_pairs_str="eng-aze,eng-bel,eng-ces,eng-glg,eng-por,eng-rus,eng-slk,eng-tur"
 databin_dir=<path to binarized data>
 
 fairseq-train ${databin_dir} \
-  --user-dir, examples/latent_depth/src \
+  --user-dir examples/latent_depth/latent_depth_src \
   --lang-pairs "${lang_pairs_str}" \
   --arch multilingual_transformer_iwslt_de_en \
   --task multilingual_translation_latent_depth \

diff --git a/examples/latent_depth/src/__init__.py → ...latent_depth/latent_depth_src/__init__.py b/examples/latent_depth/src/__init__.py → ...latent_depth/latent_depth_src/__init__.py
diff --git a/examples/latent_depth/src/loss/__init__.py → ...t_depth/latent_depth_src/loss/__init__.py b/examples/latent_depth/src/loss/__init__.py → ...t_depth/latent_depth_src/loss/__init__.py
diff --git a/...les/latent_depth/src/loss/latent_depth.py → ...pth/latent_depth_src/loss/latent_depth.py b/...les/latent_depth/src/loss/latent_depth.py → ...pth/latent_depth_src/loss/latent_depth.py
diff --git a/examples/latent_depth/src/models/__init__.py → ...depth/latent_depth_src/models/__init__.py b/examples/latent_depth/src/models/__init__.py → ...depth/latent_depth_src/models/__init__.py
diff --git a/...models/latent_multilingual_transformer.py → ...models/latent_multilingual_transformer.py b/...models/latent_multilingual_transformer.py → ...models/latent_multilingual_transformer.py
diff --git a/...nt_depth/src/models/latent_transformer.py → ...nt_depth_src/models/latent_transformer.py b/...nt_depth/src/models/latent_transformer.py → ...nt_depth_src/models/latent_transformer.py
diff --git a/...ples/latent_depth/src/modules/__init__.py → ...epth/latent_depth_src/modules/__init__.py b/...ples/latent_depth/src/modules/__init__.py → ...epth/latent_depth_src/modules/__init__.py
diff --git a/...latent_depth/src/modules/latent_layers.py → ...latent_depth_src/modules/latent_layers.py b/...latent_depth/src/modules/latent_layers.py → ...latent_depth_src/modules/latent_layers.py
diff --git a/.../multilingual_translation_latent_depth.py → .../multilingual_translation_latent_depth.py b/.../multilingual_translation_latent_depth.py → .../multilingual_translation_latent_depth.py
diff --git a/examples/linformer/README.md b/examples/linformer/README.md
@@ -6,7 +6,7 @@ This example contains code to train Linformer models as described in our paper
 ## Training a new Linformer RoBERTa model
 
 You can mostly follow the [RoBERTa pretraining README](/examples/roberta/README.pretraining.md),
-updating your training command with `--user-dir examples/linformer/src --arch linformer_roberta_base`.
+updating your training command with `--user-dir examples/linformer/linformer_src --arch linformer_roberta_base`.
 
 ## Citation
 

diff --git a/examples/linformer/src/__init__.py → examples/linformer/linformer_src/__init__.py b/examples/linformer/src/__init__.py → examples/linformer/linformer_src/__init__.py
diff --git a/examples/linformer/src/models/__init__.py → ...informer/linformer_src/models/__init__.py b/examples/linformer/src/models/__init__.py → ...informer/linformer_src/models/__init__.py
diff --git a/...linformer/src/models/linformer_roberta.py → ...linformer_src/models/linformer_roberta.py b/...linformer/src/models/linformer_roberta.py → ...linformer_src/models/linformer_roberta.py
diff --git a/examples/linformer/src/modules/__init__.py → ...nformer/linformer_src/modules/__init__.py b/examples/linformer/src/modules/__init__.py → ...nformer/linformer_src/modules/__init__.py
diff --git a/...src/modules/linformer_sentence_encoder.py → ...src/modules/linformer_sentence_encoder.py b/...src/modules/linformer_sentence_encoder.py → ...src/modules/linformer_sentence_encoder.py
diff --git a/...dules/linformer_sentence_encoder_layer.py → ...dules/linformer_sentence_encoder_layer.py b/...dules/linformer_sentence_encoder_layer.py → ...dules/linformer_sentence_encoder_layer.py
diff --git a/...src/modules/multihead_linear_attention.py → ...src/modules/multihead_linear_attention.py b/...src/modules/multihead_linear_attention.py → ...src/modules/multihead_linear_attention.py
diff --git a/examples/pointer_generator/README.xsum.md b/examples/pointer_generator/README.xsum.md
@@ -77,7 +77,7 @@ update_freq=4
 pointer_layer=-2
 
 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 fairseq-train bin \
-    --user-dir examples/pointer_generator/src \
+    --user-dir examples/pointer_generator/pointer_generator_src \
     --max-tokens "$max_tokens" \
     --task translation \
     --source-lang src --target-lang tgt \
@@ -125,7 +125,7 @@ max_length=60
 length_penalty=1.0
 
 fairseq-interactive bin \
-    --user-dir examples/pointer_generator/src \
+    --user-dir examples/pointer_generator/pointer_generator_src \
     --batch-size "$batch_size" \
     --task translation \
     --source-lang src --target-lang tgt \

diff --git a/examples/pointer_generator/src/__init__.py → ...nerator/pointer_generator_src/__init__.py b/examples/pointer_generator/src/__init__.py → ...nerator/pointer_generator_src/__init__.py
diff --git a/...s/pointer_generator/src/transformer_pg.py → ...r/pointer_generator_src/transformer_pg.py b/...s/pointer_generator/src/transformer_pg.py → ...r/pointer_generator_src/transformer_pg.py
diff --git a/examples/rxf/README.md b/examples/rxf/README.md
@@ -38,7 +38,7 @@ CUDA_VISIBLE_DEVICES=0 fairseq-train RTE-bin \
     --find-unused-parameters \
     --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \
     --noise-type uniform --r3f-lambda 0.7 \
-    --user-dir examples/rxf;
+    --user-dir examples/rxf/rxf_src
 ```
 
 ## Citation

diff --git a/examples/rxf/__init__.py b/examples/rxf/__init__.py
@@ -3,4 +3,4 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-from . import src  # noqa
+from . import rxf_src  # noqa
diff --git a/examples/rxf/src/__init__.py → examples/rxf/rxf_src/__init__.py b/examples/rxf/src/__init__.py → examples/rxf/rxf_src/__init__.py
diff --git a/...f/src/label_smoothed_cross_entropy_r3f.py → ...f_src/label_smoothed_cross_entropy_r3f.py b/...f/src/label_smoothed_cross_entropy_r3f.py → ...f_src/label_smoothed_cross_entropy_r3f.py
diff --git a/examples/rxf/src/sentence_prediction_r3f.py → ...es/rxf/rxf_src/sentence_prediction_r3f.py b/examples/rxf/src/sentence_prediction_r3f.py → ...es/rxf/rxf_src/sentence_prediction_r3f.py
diff --git a/examples/translation_moe/README.md b/examples/translation_moe/README.md
@@ -18,7 +18,7 @@ The following command will train a `hMoElp` model with `3` experts:
 fairseq-train --ddp-backend='no_c10d' \
     data-bin/wmt17_en_de \
     --max-update 100000 \
-    --task translation_moe --user-dir examples/translation_moe/src \
+    --task translation_moe --user-dir examples/translation_moe/translation_moe_src \
     --method hMoElp --mean-pool-gating-network \
     --num-experts 3 \
     --arch transformer_wmt_en_de --share-all-embeddings \
@@ -37,7 +37,7 @@ For example, to generate from expert 0:
 fairseq-generate data-bin/wmt17_en_de \
     --path checkpoints/checkpoint_best.pt \
     --beam 1 --remove-bpe \
-    --task translation_moe --user-dir examples/translation_moe/src \
+    --task translation_moe --user-dir examples/translation_moe/translation_moe_src \
     --method hMoElp --mean-pool-gating-network \
     --num-experts 3 \
     --gen-expert 0
@@ -61,7 +61,7 @@ for EXPERT in $(seq 0 2); do \
         --beam 1 \
         --bpe subword_nmt --bpe-codes $BPE_CODE \
         --buffer-size 500 --max-tokens 6000 \
-        --task translation_moe --user-dir examples/translation_moe/src \
+        --task translation_moe --user-dir examples/translation_moe/translation_moe_src \
         --method hMoElp --mean-pool-gating-network \
         --num-experts 3 \
         --gen-expert $EXPERT ; \

diff --git a/examples/translation_moe/src/__init__.py → ...ation_moe/translation_moe_src/__init__.py b/examples/translation_moe/src/__init__.py → ...ation_moe/translation_moe_src/__init__.py
diff --git a/...ples/translation_moe/src/logsumexp_moe.py → ..._moe/translation_moe_src/logsumexp_moe.py b/...ples/translation_moe/src/logsumexp_moe.py → ..._moe/translation_moe_src/logsumexp_moe.py
diff --git a/...ation_moe/src/mean_pool_gating_network.py → ...ation_moe_src/mean_pool_gating_network.py b/...ation_moe/src/mean_pool_gating_network.py → ...ation_moe_src/mean_pool_gating_network.py
diff --git a/...es/translation_moe/src/translation_moe.py → ...oe/translation_moe_src/translation_moe.py b/...es/translation_moe/src/translation_moe.py → ...oe/translation_moe_src/translation_moe.py
diff --git a/fairseq/utils.py b/fairseq/utils.py
@@ -450,18 +450,21 @@ def import_user_module(args):
                 else:
                     raise FileNotFoundError(module_path)
 
-        # We want to import the module under a unique name so that it doesn't
-        # collide with existing modules. At the same time we don't want to
-        # import the module multiple times. The solution is to create a
-        # temporary directory and symlink the user_dir under a new name, which is
-        # a deterministic hash of the original module_path.
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            unique_mod_name = "fairseq_user_dir_{}".format(hash(module_path) % 100000)
-            os.symlink(module_path, os.path.join(tmpdirname, unique_mod_name))
-
-            sys.path.insert(0, tmpdirname)
-            importlib.import_module(unique_mod_name)
-            sys.path.remove(tmpdirname)
+        # ensure that user modules are only imported once
+        import_user_module.memo = getattr(import_user_module, "memo", set())
+        if module_path not in import_user_module.memo:
+            import_user_module.memo.add(module_path)
+
+            module_parent, module_name = os.path.split(module_path)
+            if module_name not in sys.modules:
+                sys.path.insert(0, module_parent)
+                importlib.import_module(module_name)
+            else:
+                raise ImportError(
+                    "Failed to import --user-dir={} because the corresponding module name "
+                    "({}) is not globally unique. Please rename the directory to "
+                    "something unique and try again.".format(module_path, module_name)
+                )
 
 
 def softmax(x, dim: int, onnx_trace: bool = False):

diff --git a/tests/test_binaries.py b/tests/test_binaries.py
@@ -320,7 +320,7 @@ def test_multilingual_translation_latent_depth(self):
                             task="multilingual_translation_latent_depth",
                             extra_flags=[
                                 "--user-dir",
-                                "examples/latent_depth/src",
+                                "examples/latent_depth/latent_depth_src",
                                 "--encoder-layers",
                                 "2",
                                 "--decoder-layers",
@@ -340,7 +340,7 @@ def test_multilingual_translation_latent_depth(self):
                             run_validation=True,
                             extra_valid_flags=[
                                 "--user-dir",
-                                "examples/latent_depth/src",
+                                "examples/latent_depth/latent_depth_src",
                             ]
                             + enc_ll_flag
                             + dec_ll_flag,
@@ -349,7 +349,7 @@ def test_multilingual_translation_latent_depth(self):
                             data_dir,
                             extra_flags=[
                                 "--user-dir",
-                                "examples/latent_depth/src",
+                                "examples/latent_depth/latent_depth_src",
                                 "--task",
                                 "multilingual_translation_latent_depth",
                                 "--lang-pairs",
@@ -465,7 +465,7 @@ def test_transformer_pointer_generator(self):
                     "transformer_pointer_generator",
                     extra_flags=[
                         "--user-dir",
-                        "examples/pointer_generator/src",
+                        "examples/pointer_generator/pointer_generator_src",
                         "--encoder-layers",
                         "2",
                         "--decoder-layers",
@@ -482,11 +482,11 @@ def test_transformer_pointer_generator(self):
                         "0",
                     ],
                     run_validation=True,
-                    extra_valid_flags=["--user-dir", "examples/pointer_generator/src"],
+                    extra_valid_flags=["--user-dir", "examples/pointer_generator/pointer_generator_src"],
                 )
                 generate_main(
                     data_dir,
-                    extra_flags=["--user-dir", "examples/pointer_generator/src"],
+                    extra_flags=["--user-dir", "examples/pointer_generator/pointer_generator_src"],
                 )
 
     def test_lightconv(self):
@@ -700,7 +700,7 @@ def test_mixture_of_experts(self):
                         "--task",
                         "translation_moe",
                         "--user-dir",
-                        "examples/translation_moe/src",
+                        "examples/translation_moe/translation_moe_src",
                         "--method",
                         "hMoElp",
                         "--mean-pool-gating-network",
@@ -722,7 +722,7 @@ def test_mixture_of_experts(self):
                         "--task",
                         "translation_moe",
                         "--user-dir",
-                        "examples/translation_moe/src",
+                        "examples/translation_moe/translation_moe_src",
                         "--method",
                         "hMoElp",
                         "--mean-pool-gating-network",
@@ -1058,7 +1058,7 @@ def test_linformer_roberta_masked_lm(self):
                     "linformer_roberta_base",
                     extra_flags=[
                         "--user-dir",
-                        "examples/linformer/src",
+                        "examples/linformer/linformer_src",
                         "--encoder-layers",
                         "2",
                     ],
@@ -1075,7 +1075,7 @@ def test_linformer_roberta_sentence_prediction(self):
                     data_dir,
                     "linformer_roberta_base",
                     num_classes=num_classes,
-                    extra_flags=["--user-dir", "examples/linformer/src"],
+                    extra_flags=["--user-dir", "examples/linformer/linformer_src"],
                 )
 
     def test_linformer_roberta_regression_single(self):
@@ -1095,7 +1095,7 @@ def test_linformer_roberta_regression_single(self):
                     extra_flags=[
                         "--regression-target",
                         "--user-dir",
-                        "examples/linformer/src",
+                        "examples/linformer/linformer_src",
                     ],
                 )
 
@@ -1116,7 +1116,7 @@ def test_linformer_roberta_regression_multiple(self):
                     extra_flags=[
                         "--regression-target",
                         "--user-dir",
-                        "examples/linformer/src",
+                        "examples/linformer/linformer_src",
                     ],
                 )
 
@@ -1198,7 +1198,7 @@ def test_r4f_roberta(self):
                     num_classes=num_classes,
                     extra_flags=[
                         "--user-dir",
-                        "examples/rxf/src",
+                        "examples/rxf/rxf_src",
                         "--criterion",
                         "sentence_prediction_r3f",
                         "--spectral-norm-classification-head",