fix: correct several bugs in open_clip (#747)

mindspore-lab · Nov 23, 2023 · 8d3ce84 · 8d3ce84
1 parent 03e7725
commit 8d3ce84
Show file tree

Hide file tree

Showing 6 changed files with 60 additions and 59 deletions.
diff --git a/examples/open_clip/difference.py b/examples/open_clip/difference.py
@@ -3,7 +3,7 @@
 this script will output absolute as well as relative difference of paired files.
 
 Examples:
-python difference.py --ms_path=./ms_data/ --to_path=./ms_data/
+python difference.py --a_path=./results_a/ --b_path=./results_b/
 
 """
 
@@ -17,79 +17,79 @@
 def parse_args(args):
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        "--ms_path",
+        "--a_path",
         type=str,
         default=None,
-        help="A folder path containing at least one .txt file of Mindspore's results",
+        help="A folder path containing at least one .txt file of model results",
     )
     parser.add_argument(
-        "--torch_path",
+        "--b_path",
         type=str,
         default=None,
-        help="A folder path containing at least one .txt file of PyTorch's results",
+        help="Another folder path containing at least one .txt file of model results",
     )
     args = parser.parse_args(args)
     return args
 
 
-def difference(ms_file, torch_file):
-    file = open(ms_file, "r")
-    ms_variable = eval(file.read())
+def difference(a_file, b_file):
+    file = open(a_file, "r")
+    a_variable = eval(file.read())
     file.close()
 
-    file = open(torch_file, "r")
-    torch_variable = eval(file.read())
+    file = open(b_file, "r")
+    b_variable = eval(file.read())
     file.close()
 
-    if not isinstance(torch_variable, np.ndarray):
-        torch_variable = np.array(torch_variable)
-    if not isinstance(ms_variable, np.ndarray):
-        ms_variable = np.array(ms_variable)
+    if not isinstance(b_variable, np.ndarray):
+        b_variable = np.array(b_variable)
+    if not isinstance(a_variable, np.ndarray):
+        a_variable = np.array(a_variable)
 
-    if torch_variable.shape != ms_variable.shape:
+    if b_variable.shape != a_variable.shape:
         raise ValueError(
-            f"{ms_variable} has shape {ms_variable.shape} "
-            f"while {torch_variable} has different shape of {torch_variable.shape}."
+            f"{a_variable} has shape {a_variable.shape} "
+            f"while {b_variable} has different shape of {b_variable.shape}."
         )
 
     # abs diff (mean)
-    abs_mean = abs(ms_variable - torch_variable).mean()
+    abs_mean = abs(a_variable - b_variable).mean()
 
     # abs diff (max)
-    abs_max = abs(ms_variable - torch_variable).max()
+    abs_max = abs(a_variable - b_variable).max()
 
     # relative diff (mean)
-    rel_mean = (abs(ms_variable - torch_variable) / (abs(torch_variable) + 1e-6)).mean()
+    rel_mean = (abs(a_variable - b_variable) / (abs(b_variable) + 1e-6)).mean()
 
     # relative diff (max)
-    rel_max = (abs(ms_variable - torch_variable) / (abs(torch_variable) + 1e-6)).max()
+    rel_max = (abs(a_variable - b_variable) / (abs(b_variable) + 1e-6)).max()
 
     print(
-        f'{os.path.basename(ms_file).replace(".txt",": ")}\n abs_mean: {abs_mean}\n '
+        f'{os.path.basename(a_file).replace(".txt",": ")}\n abs_mean: {abs_mean}\n '
         f"abs_max: {abs_max}\n rel_mean: {rel_mean}\n rel_max: {rel_max}\n\n"
     )
 
 
 def main(args):
     args = parse_args(args)
 
-    ms_files = []
-    torch_files = []
-    for root, dirs, files in os.walk(args.ms_path):
+    a_files = []
+    b_files = []
+    for root, dirs, files in os.walk(args.a_path):
         for file in files:
-            ms_files.append(os.path.join(root, file))
-    ms_files = sorted(ms_files)
-    for root, dirs, files in os.walk(args.torch_path):
+            a_files.append(os.path.join(root, file))
+    a_files = sorted(a_files)
+    for root, dirs, files in os.walk(args.b_path):
         for file in files:
-            torch_files.append(os.path.join(root, file))
-    torch_files = sorted(torch_files)
-
-    if len(ms_files) != len(torch_files):
-        raise ValueError(f"Files in {args.ms_path} are diiferent with those in {args.torch_path}.")
-    for file in range(len(ms_files)):
-        if os.path.basename(ms_files[file]) != os.path.basename(torch_files[file]):
-            raise ValueError(f"Files in {args.ms_path} are diiferent with those in {args.torch_path}.")
-        difference(ms_files[file], torch_files[file])
+            b_files.append(os.path.join(root, file))
+    b_files = sorted(b_files)
+
+    if len(a_files) != len(b_files):
+        raise ValueError(f"Files in {args.a_path} are diiferent with those in {args.b_path}.")
+    for file in range(len(a_files)):
+        if os.path.basename(a_files[file]) != os.path.basename(b_files[file]):
+            raise ValueError(f"Files in {args.a_path} are diiferent with those in {args.b_path}.")
+        difference(a_files[file], b_files[file])
 
 
 if __name__ == "__main__":

diff --git a/examples/open_clip/src/open_clip/model.py b/examples/open_clip/src/open_clip/model.py
@@ -245,7 +245,7 @@ def convert_weights_to_lp(model: nn.Cell):
     """Convert applicable model parameters to fp16"""
 
     def _convert_weights(cell):
-        if isinstance(cell, (nn.Conv1d, nn.Conv2d, nn.Dense, nn.MultiheadAttention, Attention)):
+        if isinstance(cell, (nn.Conv1d, nn.Conv2d, nn.Dense, Attention)):
             cell.to_float(ms.float16)
 
     model.apply(_convert_weights)
@@ -304,8 +304,8 @@ def build_model_from_openai_ckpt(
 
     embed_dim = param_dict["text_projection"].shape[1]
     context_length = param_dict["positional_embedding"].shape[0]
-    vocab_size = param_dict["token_embedding.weight"].shape[0]
-    transformer_width = param_dict["ln_final.weight"].shape[0]
+    vocab_size = param_dict["token_embedding.embedding_table"].shape[0]
+    transformer_width = param_dict["ln_final.gamma"].shape[0]
     transformer_heads = transformer_width // 64
     transformer_layers = len(set(k.split(".")[2] for k in param_dict if k.startswith("transformer.resblocks")))
 

diff --git a/examples/open_clip/src/open_clip/openai.py b/examples/open_clip/src/open_clip/openai.py
@@ -50,11 +50,7 @@ def load_openai_model(
 
     param_dict = ms.load_checkpoint(model_path)
 
-    try:
-        model = build_model_from_openai_ckpt(param_dict)
-    except KeyError:
-        sd = {k[7:]: v for k, v in param_dict["state_dict"].items()}
-        model = build_model_from_openai_ckpt(sd)
+    model = build_model_from_openai_ckpt(param_dict)
 
     # add mean / std attributes for consistency with OpenCLIP models
     model.visual.image_mean = OPENAI_DATASET_MEAN

diff --git a/examples/open_clip/src/open_clip/pretrained.py b/examples/open_clip/src/open_clip/pretrained.py
@@ -1,5 +1,5 @@
 import os
-import urllib
+import urllib.request
 from typing import Dict, Union
 
 from tqdm import tqdm

diff --git a/examples/open_clip/src/open_clip/transformer.py b/examples/open_clip/src/open_clip/transformer.py
@@ -195,7 +195,7 @@ def __init__(
             OrderedDict(
                 [
                     ("c_fc", nn.Dense(d_model, mlp_width, weight_init="HeUniform")),
-                    ("gelu", act_layer),
+                    ("gelu", act_layer()),
                     ("c_proj", nn.Dense(mlp_width, d_model, weight_init="HeUniform")),
                 ]
             )

diff --git a/examples/open_clip/test.py b/examples/open_clip/test.py
@@ -1,6 +1,9 @@
 """
 Generate a folder containing all the main variables' value.
 
+Example:
+python test.py --mode=0 --device_target="Ascend" --model_name="RN50" --pretrained="openai" --quickgelu=True
+
 P.S. This generated folder can be used by difference.py to calculate the difference statistics.
 
 """
@@ -74,23 +77,24 @@ def main(args):
     image_features = model.encode_image(image)
     text_features = model.encode_text(text)
 
-    if not os.path.exists("./" + args.model_name):
-        root = "./" + args.model_name
+    root = "./" + args.model_name + args.pretrained
+    if not os.path.exists(root):
+        root = "./" + args.model_name + args.pretrained
         os.mkdir(root)
 
-    file = open(root + "image.txt", "w+")
-    file.write(str(image.asnumpy().tolist()))
-    file.close()
-
-    file = open(root + "text.txt", "w+")
-    file.write(str(text.asnumpy().tolist()))
-    file.close()
+    # file = open(root + "/image.txt", "w+")
+    # file.write(str(image.asnumpy().tolist()))
+    # file.close()
+    #
+    # file = open(root + "/text.txt", "w+")
+    # file.write(str(text.asnumpy().tolist()))
+    # file.close()
 
-    file = open(root + "image_features.txt", "w+")
+    file = open(root + "/image_features.txt", "w+")
     file.write(str(image_features.asnumpy().tolist()))
     file.close()
 
-    file = open(root + "text_features.txt", "w+")
+    file = open(root + "/text_features.txt", "w+")
     file.write(str(text_features.asnumpy().tolist()))
     file.close()
 
@@ -99,10 +103,11 @@ def main(args):
 
     text_probs = ops.softmax(100.0 * image_features @ text_features.T, axis=-1)
 
-    file = open(root + "text_probs.txt", "w+")
+    file = open(root + "/text_probs.txt", "w+")
     file.write(str(text_probs.asnumpy().tolist()))
     file.close()
 
 
 if __name__ == "__main__":
     main(sys.argv[1:])
+    print("Done!")