Merge pull request #20 from ejhg/ejhg-optimize-load_py

shaltielshmid · web-flow · commit a9463d76ae8a · 2024-08-08T01:42:56.000+03:00
Optimize load_py for memory and speed
diff --git a/TorchSharp.PyBridge/PyBridgeModuleExtensions.cs b/TorchSharp.PyBridge/PyBridgeModuleExtensions.cs
@@ -1,5 +1,3 @@
-using System.Collections;
-using System.IO;
 using System.Text.Json;
 using System.Text.Json.Nodes;
 using TqdmSharp;
@@ -116,35 +114,106 @@ public static Module load_py(this Module module, string location, bool strict =
         /// <remarks>
         /// This method only supports loading the newer format used by `torch.save`, using a zip file. 
         /// The model will be fully loaded and all the validation checks will only run after the state
-        /// dictionary has been fully loaded. 
+        /// dictionary has been fully loaded.
         /// </remarks>
         public static Module load_py(this Module module, System.IO.Stream stream, bool strict = true, IList<string>? skip = null, Dictionary<string, bool>? loadedParameters = null, bool leaveOpen = false) {
-            // Create a dispose score so that we don't keep anyof the loaded tensors past this function
+            // Create a dispose score so that we don't keep any of the loaded tensors past this function
             using var d = torch.NewDisposeScope();
             using var d2 = torch.no_grad(); // To circumvent a bug introduced in 0.102.0
 
-            // Unpickle the state dictionary into memory
-            var stateHashtable = PyTorchUnpickler.UnpickleStateDict(stream, leaveOpen);
+            // Unpickle the state dictionary into memory.
+            // Keep stream open because tensors will not get deserialized yet.
+            var unpickled = PyTorchUnpickler.UnpickleStateDict(stream, leaveOpen: true, skipTensorRead: true);
 
             // Convert the hashtable to a dictionary of string->tensor
-            var stateDict = new Dictionary<string, torch.Tensor>();
-            foreach (string key in stateHashtable.Keys)
-                stateDict.Add(key, (torch.Tensor)stateHashtable[key]!);
+            var unpickledConstructors = new Dictionary<string, PyTorchUnpickler.TensorConstructorArgs>();
 
-            // Load it in using the builtin function
-            var (_, unexpectedKeys) = module.load_state_dict(stateDict, strict, skip);
+            foreach (string key in unpickled.Keys) {
+                unpickledConstructors.Add(key, (PyTorchUnpickler.TensorConstructorArgs)unpickled[key]!);
+            }
 
-            // Fill in the loadedParameters dictionary, if relevant
-            if (loadedParameters is not null) {
-                foreach (string key in stateDict.Keys)
-                    loadedParameters[key] = true;
-                foreach (string key in unexpectedKeys)
-                    loadedParameters[key] = false;
+            var (_, unexpectedKeys) = load_state_dict(module, unpickledConstructors, strict, skip);
+
+            if (!leaveOpen) {
+                // Close stream now that tensor streams have been read.
+                stream.Close ();
+            }
+
+            if (loadedParameters is null) {
+                return module;
+            }
+
+            // Fill in the loadedParameters dictionary
+            foreach (var key in unpickledConstructors.Keys) {
+                loadedParameters[key] = true;
+            }
+
+            foreach (var key in unexpectedKeys) {
+                loadedParameters[key] = false;
             }
 
             return module;
         }
 
+        /// <summary>
+        /// Mirrors the implementation of module.load_state_dict but performs tensor reading
+        /// with less intermediate memory overhead.
+        /// </summary>
+        static (IList<string> missing_keys, IList<string> unexpected_keys) load_state_dict(
+            Module module,
+            Dictionary<string, PyTorchUnpickler.TensorConstructorArgs> unpickled,
+            bool strict = true,
+            IList<string> skip = null
+        ) {
+            var missingKeys = new List<string>();
+            var unexpectedKeys = new List<string>();
+            skip ??= Array.Empty<string>();
+
+            var state = module.state_dict();
+
+            foreach (string key in unpickled.Keys) {
+                if (!skip.Contains(key) && !state.ContainsKey(key))
+                    unexpectedKeys.Add(key);
+            }
+
+            foreach (string key in state.Keys) {
+                if (!skip.Contains(key) && !unpickled.ContainsKey(key)) {
+                    missingKeys.Add(key);
+                }
+            }
+
+            if (strict && (missingKeys.Count > 0 || unexpectedKeys.Count > 0)) {
+                throw new InvalidOperationException("The loaded state_dict is not identical to the target dictionary.");
+            }
+
+            var inputStreams = unpickled
+                .Where(e => state.ContainsKey(e.Key))
+                // Avoid random stream seeks by reading archive files in the order that they are stored.
+                .OrderBy(e => e.Value.ArchiveIndex)
+                .ToArray();
+
+            foreach (var (key, constructor) in inputStreams) {
+                var target = state[key];
+                target.with_requires_grad(constructor.RequiresGrad);
+
+                if (constructor.DType == state[key].dtype) {
+                    using var stream = constructor.Data;
+                    // Read directly into target tensor.
+                    target
+                        .as_strided(constructor.Shape, constructor.Stride, constructor.StorageOffset)
+                        .ReadBytesFromStream(stream);
+                }
+                else {
+                    // Type conversion with intermediate tensor required.
+                    // This will load onto cpu first before copying to target.
+                    using torch.Tensor temp = constructor.ReadTensorFromStream();
+                    state[key].copy_(temp);
+                }
+            }
+
+            return (missingKeys, unexpectedKeys);
+        }
+
         /// <summary>
         /// Load the parameters and buffers from a file saved using the safetensors format (https://github.com/huggingface/safetensors)
         /// </summary>
diff --git a/TorchSharp.PyBridge/PyTorchUnpickler.cs b/TorchSharp.PyBridge/PyTorchUnpickler.cs
@@ -26,8 +26,12 @@ public static Hashtable UnpickleStateDict(string file) {
         /// </summary>
         /// <param name="stream">Stream of the file to load</param>
         /// <param name="leaveOpen">true to leave the stream open after saving the file</param>
+        /// <param name="skipTensorRead">true to return descriptor objects and streams instead of tensors so that they can be loaded later</param>
         /// <returns>The loaded state_dict</returns>
-        public static Hashtable UnpickleStateDict(Stream stream, bool leaveOpen = false) {
+        public static Hashtable UnpickleStateDict(Stream stream, bool leaveOpen = false, bool skipTensorRead = false) {
+            if (skipTensorRead && !leaveOpen)
+                throw new ArgumentException("leaveOpen must be true when skipTensorRead is true");
+
             // Make sure it's a zip file
             // If it's not, then it was saved using legacy torch save and we don't support it (yet, at least)
             // Check the local file signature
@@ -45,7 +49,7 @@ public static Hashtable UnpickleStateDict(Stream stream, bool leaveOpen = false)
 
             // Create our unpickler with the archive, so it can pull all the relevant files
             // using the persistentId
-            var unpickler = new CustomUnpickler(archive);
+            var unpickler = new CustomUnpickler(archive, skipTensorRead);
             // The unpickle returns a hash mapping ["key"] to the tensor
             return (Hashtable)unpickler.load(pklEntry.Open());
         }
@@ -61,8 +65,11 @@ public static Hashtable UnpickleStateDict(Stream stream, bool leaveOpen = false)
         class CustomUnpickler : Unpickler {
             readonly ZipArchive _archive;
 
-            public CustomUnpickler(ZipArchive archive) {
+            readonly bool _skipTensorRead;
+
+            public CustomUnpickler(ZipArchive archive, bool skipTensorRead) {
                 _archive = archive;
+                _skipTensorRead = skipTensorRead;
             }
 
             protected override object persistentLoad(object pid) {
@@ -79,20 +86,24 @@ protected override object persistentLoad(object pid) {
                 string storageType = ((ClassDictConstructor)opid[1]).name;
                 // Tuple Item2: key (filename in the archive)
                 string archiveKey = (string)opid[2];
-                // Tuple Item3: location (cpu/gpu), but we always load onto CPU. 
+                // Tuple Item3: location (cpu/gpu), but we always load onto CPU.
                 // Tuple Item4: numElems (the number of elements in the tensor)
-                
+
                 // Convert the storage name into the relevant scalar type (e.g., LongStorage => torch.long)
                 // and then check how many bytes each element is
                 var dtype = GetScalarTypeFromStorageName(storageType);
-                
+
                 // Retrieve the entry from the archive
-                var entry = _archive.Entries.First(f => f.FullName.EndsWith($"data/{archiveKey}"));
-                
+                var entry = _archive.Entries
+                    .Select((archiveEntry, index) => (archiveEntry, index))
+                    .First(e => e.archiveEntry.FullName.EndsWith($"data/{archiveKey}"));
+
                 // Send this back, so our TensorObjectConstructor can create our torch.tensor from the object.
-                return new TensorObject() {
-                    data = entry!.Open(),
-                    dtype = dtype
+                return new TensorStream {
+                    ArchiveIndex = entry!.index,
+                    ArchiveEntry = entry!.archiveEntry,
+                    DType = dtype,
+                    SkipTensorRead = _skipTensorRead,
                 };
             }
 
@@ -118,7 +129,7 @@ static torch.ScalarType GetScalarTypeFromStorageName(string storage) {
         /// <summary>
         /// The unpickler implementation requires a __setstate__ function for unpickling an ordered dict, due
         /// to the way it was saved. This class is just a regular Hashtable with an implementation for the
-        /// __setstate__. 
+        /// __setstate__.
         /// </summary>
         class OrderedDict : Hashtable {
             public void __setstate__(Hashtable arg) {
@@ -145,27 +156,29 @@ public object construct(object[] args) {
         /// </summary>
         class TensorObjectConstructor : IObjectConstructor {
             public object construct(object[] args) {
-                // Arg 0: (byte[] data, ScalarType dtype) // returned from our custom pickler
-                var arg0 = (TensorObject)args[0];
-                // Arg 1: storage_offset
-                int storageOffset = (int)args[1];
-                // Arg 2: tensor_shape
-                var shape = ((object[])args[2]).Select(i => (long)(int)i).ToArray();
-                // Arg 3: stride 
-                var stride = ((object[])args[3]).Select(i => (long)(int)i).ToArray();
-                // Arg 4: requires_grad
-                var requiresGrad = (bool)args[4];
+                // Arg 0: returned from our custom pickler
+                var tensorStream = (TensorStream)args[0];
+
+                var constructor = new TensorConstructorArgs {
+                    ArchiveIndex = tensorStream.ArchiveIndex,
+                    Data = tensorStream.ArchiveEntry!.Open(),
+                    DType = tensorStream.DType,
+                    // Arg 1: storage_offset
+                    StorageOffset = (int)args[1],
+                    // Arg 2: tensor_shape
+                    Shape = ((object[])args[2]).Select(i => (long)(int)i).ToArray(),
+                    // Arg 3: stride
+                    Stride = ((object[])args[3]).Select(i => (long)(int)i).ToArray(),
+                    // Arg 4: requires_grad
+                    RequiresGrad = (bool)args[4],
+                };
+
                 // Arg 5: backward_hooks, we don't support adding them in and it's not recommended
                 // in PyTorch to serialize them.
 
-                // If there is no shape, then the shape is just 1
-                // Since we have two operations here - we want to make sure to dispose the temporary.
-                torch.Tensor t = torch.WrappedTensorDisposeScope(() => 
-                                    torch.empty(shape, arg0.dtype).as_strided(shape, stride, storageOffset));
-
-                t.ReadBytesFromStream(arg0.data);
-                arg0.data.Close();
-                return t;
+                return tensorStream.SkipTensorRead
+                    ? constructor
+                    : constructor.ReadTensorFromStream();
             }
         }
 
@@ -182,15 +195,43 @@ public object construct(object[] args) {
             }
         }
 
+        internal record TensorConstructorArgs
+        {
+            public int ArchiveIndex { get; init; }
+
+            public Stream Data { get; init; }
+
+            public torch.ScalarType DType { get; init; }
+
+            public int StorageOffset { get; init; }
+
+            public long[] Shape { get; init; }
+
+            public long[] Stride { get; init; }
+
+            public bool RequiresGrad { get; init; }
+
+            public torch.Tensor ReadTensorFromStream() {
+                var temp = torch
+                    .empty(Shape, DType, device: torch.CPU)
+                    .as_strided(Shape, Stride, StorageOffset);
+                temp.ReadBytesFromStream(Data);
+                Data.Close();
+
+                return temp;
+            }
+        }
 
         /// <summary>
         /// When the unpickler first loads in the tensor, it only has access to metadata about the storage
         /// of the tensor, but not the info about stride/shape etc. That part is done in the TensorReconstructor.
         /// Therefore, this class is a simple wrapper for the bytes + dtype of the storage.
         /// </summary>
-        class TensorObject {
-            public Stream data { get; set; }
-            public torch.ScalarType dtype { get; set; }
+        class TensorStream {
+            public int ArchiveIndex { get; init; }
+            public ZipArchiveEntry ArchiveEntry { get; init; }
+            public torch.ScalarType DType { get; init; }
+            public bool SkipTensorRead { get; init; }
         }
     }
-}
+}