pytorch · SeaOtocinclus · Jan 14, 2026 · Jan 28, 2026 · Jan 28, 2026 · Jan 28, 2026
diff --git a/torchvision/csrc/io/decoder/decoder.cpp b/torchvision/csrc/io/decoder/decoder.cpp
@@ -416,7 +416,7 @@ bool Decoder::init(
 // variable `streams_` and make sure it is in range for decoding
 bool Decoder::openStreams(std::vector<DecoderMetadata>* metadata) {
   for (unsigned int i = 0; i < inputCtx_->nb_streams; i++) {
-    // - find the corespondent format at params_.formats set
+    // - find the correspondent format at params_.formats set
     MediaFormat format;
 #if LIBAVUTIL_VERSION_MAJOR < 56 // Before FFMPEG 4.0
     const auto media = inputCtx_->streams[i]->codec->codec_type;

diff --git a/torchvision/csrc/io/decoder/sync_decoder_test.cpp b/torchvision/csrc/io/decoder/sync_decoder_test.cpp
@@ -96,7 +96,7 @@ size_t measurePerformanceUs(
     fclose(f);
 
     for (size_t i = 0; i < rounds; ++i) {
-      // randomy select clip
+      // randomly select clip
       size_t rOffset = std::rand();
       size_t fOffset = rOffset % item.durationPts;
       size_t clipFrames = num + (num - 1) * stride;

diff --git a/torchvision/csrc/io/decoder/util_test.cpp b/torchvision/csrc/io/decoder/util_test.cpp
@@ -22,13 +22,14 @@ TEST(Util, TestSetFormatDimensions) {
       {0, 0, 172, 128, 100, 344, 0, 344, 100},// #8
       {0, 0, 128, 172, 100, 344, 0, 100, 344} // #8
   };
-  // clang-format onn
+  // clang-format on
 
   for (const auto& tc : test_cases) {
-      size_t destW = 0;
-      size_t destH = 0;
-      ffmpeg::Util::setFormatDimensions(destW, destH, tc[0], tc[1], tc[2], tc[3], tc[4], tc[5], tc[6]);
-      CHECK(destW == tc[7]);
-      CHECK(destH == tc[8]);
+    size_t destW = 0;
+    size_t destH = 0;
+    ffmpeg::Util::setFormatDimensions(
+        destW, destH, tc[0], tc[1], tc[2], tc[3], tc[4], tc[5], tc[6]);
+    CHECK(destW == tc[7]);
+    CHECK(destH == tc[8]);
   }
 }
diff --git a/torchvision/csrc/io/image/cpu/decode_gif.cpp b/torchvision/csrc/io/image/cpu/decode_gif.cpp
@@ -55,7 +55,7 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) {
   // InternalRead() and just set the `buf` pointer to the tensor data directly.
   // That might even save allocation of those buffers.
   // If we do that, we'd have to make sure the buffers are never written to by
-  // GIFLIB, otherwise we'd be overridding the tensor data.
+  // GIFLIB, otherwise we'd be overriding the tensor data.
   reader_helper_t reader_helper;
   reader_helper.encoded_data = encoded_data.data_ptr<uint8_t>();
   reader_helper.encoded_data_size = encoded_data.numel();
@@ -85,7 +85,7 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) {
 
   // The GIFLIB docs say that the canvas's height and width are potentially
   // ignored by modern viewers, so to be on the safe side we set the output
-  // height to max(canvas_heigh, first_image_height). Same for width.
+  // height to max(canvas_height, first_image_height). Same for width.
   // https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html
   auto out_h =
       std::max(gifFile->SHeight, gifFile->SavedImages[0].ImageDesc.Height);

diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py
@@ -77,7 +77,7 @@ class FasterRCNN(GeneralizedRCNN):
             we attempt to preserve the aspect ratio and scale the shorter edge
             to ``min_size``. If the resulting longer edge exceeds ``max_size``,
             then downscale so that the longer edge does not exceed ``max_size``.
-            This may result in the shorter edge beeing lower than ``min_size``.
+            This may result in the shorter edge being lower than ``min_size``.
         max_size (int): See ``min_size``.
         image_mean (Tuple[float, float, float]): mean values used for input normalization.
             They are generally the mean values of the dataset on which the backbone has been trained

diff --git a/torchvision/models/detection/retinanet.py b/torchvision/models/detection/retinanet.py
@@ -356,7 +356,7 @@ class RetinaNet(nn.Module):
             we attempt to preserve the aspect ratio and scale the shorter edge
             to ``min_size``. If the resulting longer edge exceeds ``max_size``,
             then downscale so that the longer edge does not exceed ``max_size``.
-            This may result in the shorter edge beeing lower than ``min_size``.
+            This may result in the shorter edge being lower than ``min_size``.
         max_size (int): See ``min_size``.
         image_mean (Tuple[float, float, float]): mean values used for input normalization.
             They are generally the mean values of the dataset on which the backbone has been trained

diff --git a/torchvision/utils.py b/torchvision/utils.py
@@ -180,7 +180,7 @@ def _Image_fromarray(
 ) -> Image.Image:
     """
     A wrapper around PIL.Image.fromarray to mitigate the deprecation of the
-    mode paramter. See:
+    mode parameter. See:
       https://pillow.readthedocs.io/en/stable/releasenotes/11.3.0.html#image-fromarray-mode-parameter
     """
 
@@ -204,7 +204,7 @@ def _Image_fromarray(
         #    new behavior: PIL will infer the image mode from the data passed
         #                  in. That is, the type and shape determines the mode.
         #
-        #    old behiavor: The mode will change how PIL reads the image,
+        #    old behavior: The mode will change how PIL reads the image,
         #                  regardless of the data. That is, it will make the
         #                  data work with the mode.
         #