Fix pre-commit formatting

Signed-off-by: Dashiell Stander <dstander@protonmail.com>
EleutherAI · Apr 19, 2023 · 29b968d · briansemrau · Apr 30, 2023 · 29b968d
1 parent 17b84d7
commit 29b968d
Show file tree

Hide file tree

Showing 6 changed files with 37 additions and 87 deletions.
diff --git a/configs/neox_arguments.md b/configs/neox_arguments.md
@@ -111,7 +111,7 @@ Logging Arguments
 
 - **git_hash**: str
 
-    Default = ce9bee3
+    Default = efc1184
 
     current git hash of repository
 
@@ -926,7 +926,7 @@ Text Generation arguments
 
 - **prompt_end**: str
 
-    Default =
+    Default = 
 
 
     a single prompt's end. Defaults to newline
@@ -968,7 +968,7 @@ Text Generation arguments
 
 - **eval_results_prefix**: str
 
-    Default =
+    Default = 
 
     prefix to which to save evaluation results - final fp will be {eval_results_prefix}_eval_results_yy-mm-dd-HH-MM.json
 
@@ -1686,7 +1686,7 @@ Args for deepspeed config
 
     Default = None
 
-
+    
 
 
 
@@ -1988,3 +1988,4 @@ Args for deepspeed runner (deepspeed.launcher.runner).
     Default = None
 
     Adds a `--comment` to the DeepSpeed launch command. In DeeperSpeed this is passed on to the SlurmLauncher as well. Sometime necessary for cluster rules, or so I've heard.
+
diff --git a/megatron/data/helpers.cpp b/megatron/data/helpers.cpp
@@ -173,6 +173,7 @@ py::array build_sample_idx_int32(const py::array_t<int32_t>& sizes_,
                      free_when_done);                           // numpy array references
 }
 
+
 py::array build_sample_idx_int64(const py::array_t<int32_t>& sizes_,
                                  const py::array_t<int32_t>& doc_idx_,
                                  const int32_t seq_length,

diff --git a/megatron/fused_kernels/__init__.py b/megatron/fused_kernels/__init__.py
@@ -38,7 +38,7 @@ def load_fused_kernels():
         print(e)
         print("=" * 100)
         print(
-            f"ERROR: Fused kernels configured but not properly installed. Please run `pip install {str(srcpath)}` to install them"
+            f'ERROR: Fused kernels configured but not properly installed. Please run `pip install {str(srcpath)}` to install them'
         )
         print("=" * 100)
         exit()

diff --git a/tools/convert_sequential_to_hf.py b/tools/convert_sequential_to_hf.py
@@ -58,7 +58,7 @@ def load_partitions(input_checkpoint_path, mp_partitions) -> List[torch.Tensor]:
 
 
 def get_state(
-    state_dicts: List[torch.Tensor],
+    state_dicts: list[torch.Tensor],
     key: str,
     layer_idx: int,
 ) -> torch.Tensor:
@@ -155,29 +155,9 @@ def convert(input_checkpoint_path, loaded_config, output_checkpoint_path):
 
     hf_config = create_config(loaded_config)
 
-    hf_model = GPTNeoXForCausalLM(hf_config)
-
-    # save model in FP16 if Deepspeed fp16 was used in config, else 32 bit
-    fp16 = get_key(loaded_config, "fp16")
-    # save model in fp16/bf16 if Deepspeed fp16 or bf16 mixed precision was used in config, else 32 bit weights
-    fp16 = get_key(loaded_config, "fp16")
-    if fp16:
-        try:
-            # current behavior is to pass "fp16": {"enabled": true}, when using upstream Deepspeed
-            if fp16["enabled"]:
-                hf_model.half()
-                print("Saving weights in fp16 precision...")
-        except:
-            try:
-                # attempt to access bf16 dict in yaml file, if fp16 not enabled
-                bf16 = get_key(loaded_config, "bf16")
-                if bf16:
-                    hf_model.to(dtype=torch.bfloat16)
-                    print("Saving weights in bf16 precision...")
-            except:
-                print(
-                    "Model not trained in fp16 / bf16 mixed precision, saving weights in fp32..."
-                )
+    hf_model = GPTNeoXForCausalLM(
+        hf_config
+    ).half()  # nice-to-have: lazy init weights somehow?
 
     mp_partitions = get_key(loaded_config, "model-parallel-size")
 

diff --git a/tools/convert_v1.0_to_hf.py b/tools/convert_v1.0_to_hf.py
@@ -18,7 +18,6 @@
 import yaml
 import argparse
 from tqdm import tqdm
-from typing import List
 
 import torch
 from transformers import GPTNeoXConfig, GPTNeoXForCausalLM
@@ -42,7 +41,7 @@
 
 def load_partitions(
     input_checkpoint_path, mp_partitions, layer_idx
-) -> List[torch.Tensor]:
+) -> list[torch.Tensor]:
     """Returns a list containing all weights in a given layer from a model (across MP partitions)"""
 
     loaded_tp_ranks = [
@@ -147,24 +146,11 @@ def convert(input_checkpoint_path, loaded_config, output_checkpoint_path):
 
     hf_model = GPTNeoXForCausalLM(hf_config)
 
-    # save model in fp16/bf16 if Deepspeed fp16 or bf16 mixed precision was used in config, else 32 bit weights
+    # save model in FP16 if Deepspeed fp16 was used in config, else 32 bit
     fp16 = get_key(loaded_config, "fp16")
     if fp16:
-        try:
-            # this conditional is quite messy because there were a number of ways to specify bf16 or fp16 training
-            # in DeeperSpeed v1.0 .
-            if (fp16.get("fp16", None) or fp16["enabled"]) and not (
-                fp16.get("type", None) == "bfloat16"
-            ):
-                hf_model.half()
-                print("Saving weights in fp16 precision...")
-            elif fp16.get("type", None) == "bfloat16":
-                hf_model.to(dtype=torch.bfloat16)
-                print("Saving weights in bf16 precision...")
-        except:
-            print(
-                "Model not trained in fp16 / bf16 mixed precision, saving weights in fp32..."
-            )
+        if fp16["fp16"]:
+            hf_model.half()
 
     mp_partitions = get_key(loaded_config, "model-parallel-size")
 

diff --git a/tools/merge_datasets.py b/tools/merge_datasets.py
@@ -2,10 +2,8 @@
 import sys
 import json
 import argparse
-
-sys.path.append(
-    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
-)
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
+                                             os.path.pardir)))
 
 from megatron.data import indexed_dataset
 
@@ -22,63 +20,47 @@ def main(args):
         if not os.path.isfile(os.path.join(args.input, basename)):
             continue
 
-        ext_pair = ".bin" if ext == ".idx" else ".idx"
-        assert os.path.isfile(
-            os.path.join(args.input, prefix) + ext_pair
-        ), f"ERROR: {ext_pair} file not provided for {os.path.join(args.input, prefix)}"
+        ext_pair = '.bin' if ext == '.idx' else '.idx'
+        assert os.path.isfile(os.path.join(args.input, prefix) + ext_pair), \
+               f'ERROR: {ext_pair} file not provided for {os.path.join(args.input, prefix)}'
 
         prefixes.add(prefix)
 
     builder = None
     for prefix in sorted(prefixes):
         if builder is None:
-            dataset = indexed_dataset.make_dataset(
-                os.path.join(args.input, prefix), "infer"
-            )
+            dataset = indexed_dataset.make_dataset(os.path.join(args.input, prefix), 'infer')
 
             if isinstance(dataset, indexed_dataset.MMapIndexedDataset):
-                builder = indexed_dataset.MMapIndexedDatasetBuilder(
-                    args.output_prefix + ".bin", dtype=dataset._index.dtype
-                )
+                builder = indexed_dataset.MMapIndexedDatasetBuilder(args.output_prefix + '.bin', dtype=dataset._index.dtype)
             else:
-                builder = indexed_dataset.IndexedDatasetBuilder(
-                    args.output_prefix + ".bin"
-                )
+                builder = indexed_dataset.IndexedDatasetBuilder(args.output_prefix + '.bin')
 
             del dataset
 
         builder.merge_file_(os.path.join(args.input, prefix))
 
-    builder.finalize(args.output_prefix + ".idx")
+    builder.finalize(args.output_prefix + '.idx')
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     parser = argparse.ArgumentParser()
 
-    group = parser.add_argument_group(title="input data")
-    group.add_argument(
-        "--input",
-        type=str,
-        required=True,
-        help="Path to directory containing all document files to merge",
-    )
-
-    group = parser.add_argument_group(title="output data")
-    group.add_argument(
-        "--output-prefix",
-        type=str,
-        required=True,
-        help="Path to binary output file without suffix",
-    )
+    group = parser.add_argument_group(title='input data')
+    group.add_argument('--input', type=str, required=True,
+                       help='Path to directory containing all document files to merge')
+
+    group = parser.add_argument_group(title='output data')
+    group.add_argument('--output-prefix', type=str, required=True,
+                       help='Path to binary output file without suffix')
 
     args = parser.parse_args()
 
-    assert os.path.isdir(
-        args.input
-    ), f"ERROR: {args.input} is not a directory or does not exist"
+    assert os.path.isdir(args.input), \
+           f'ERROR: {args.input} is not a directory or does not exist'
 
-    assert os.path.isdir(
-        os.path.dirname(args.output_prefix)
-    ), f"ERROR: {os.path.dirname(args.output_prefix)} is not a directory or does not exist"
+    assert os.path.isdir(os.path.dirname(args.output_prefix)), \
+           f'ERROR: {os.path.dirname(args.output_prefix)} is not a directory or does not exist'
 
     main(args)
+