Skip to content

Commit 189b5d0

Browse files
committed
Fix typoes in src and tests
Signed-off-by: Yuanyuan Chen <cyyever@outlook.com>
1 parent 7cf1f5c commit 189b5d0

File tree

44 files changed

+116
-98
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+116
-98
lines changed

src/transformers/generation/continuous_batching/cache.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ class PagedAttentionCache:
7979
layer group, and the shape of the cache tensor is `[num_blocks * block_size, num_heads, head_size]`.
8080
8181
Grouping layers into groups is useful because when we allocate one block to a group N, the block allocated is the
82-
same for all layers in group N, equivalently it is allocated accross all cache tensors. This allows us to
82+
same for all layers in group N, equivalently it is allocated across all cache tensors. This allows us to
8383
efficiently allocate and free blocks, and to efficiently read and write key and value states.
8484
8585
For instance, imagine we have 8 blocks of cache and a model with two layer groups: a full-attention group with 3
@@ -349,7 +349,7 @@ class PagedAttentionMemoryHandler:
349349
The memory footprint consists of three main components:
350350
- Cache memory: the space needed to store the cache tensors:
351351
2 * layer_group_size * [num_pages, page_size] * cache_dtype
352-
- Activation memory: the space temporarly taken by the largest activation during the model forward pass:
352+
- Activation memory: the space temporarily taken by the largest activation during the model forward pass:
353353
peak_activation_per_token * max_tokens_per_batch * activation_dtype_size
354354
- Static tensors: the space taken by the input/output buffers and metadata tensors for batch processing, sum of:
355355
- inputs_ids + outputs_ids + position_ids + logits_indices: 4 * max_tokens_per_batch * int32_size

src/transformers/models/big_bird/modeling_big_bird.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1272,14 +1272,14 @@ def _get_single_block_row_attention(
12721272
if block_id == to_end_block_id - 2:
12731273
illegal_blocks.append(1)
12741274

1275-
selected_random_blokcs = []
1275+
selected_random_blocks = []
12761276

12771277
for i in range(to_end_block_id - to_start_block_id):
12781278
if perm_block[i] not in illegal_blocks:
1279-
selected_random_blokcs.append(perm_block[i])
1280-
if len(selected_random_blokcs) == num_rand_blocks:
1279+
selected_random_blocks.append(perm_block[i])
1280+
if len(selected_random_blocks) == num_rand_blocks:
12811281
break
1282-
return np.array(selected_random_blokcs, dtype=np.int32)
1282+
return np.array(selected_random_blocks, dtype=np.int32)
12831283

12841284

12851285
# Copied from transformers.models.bert.modeling_bert.BertSelfOutput with Bert->BigBird

src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,14 +1088,14 @@ def _get_single_block_row_attention(
10881088
if block_id == to_end_block_id - 2:
10891089
illegal_blocks.append(1)
10901090

1091-
selected_random_blokcs = []
1091+
selected_random_blocks = []
10921092

10931093
for i in range(to_end_block_id - to_start_block_id):
10941094
if perm_block[i] not in illegal_blocks:
1095-
selected_random_blokcs.append(perm_block[i])
1096-
if len(selected_random_blokcs) == num_rand_blocks:
1095+
selected_random_blocks.append(perm_block[i])
1096+
if len(selected_random_blocks) == num_rand_blocks:
10971097
break
1098-
return np.array(selected_random_blokcs, dtype=np.int32)
1098+
return np.array(selected_random_blocks, dtype=np.int32)
10991099

11001100

11011101
class BigBirdPegasusEncoderAttention(nn.Module):

src/transformers/models/cpmant/modeling_cpmant.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ def forward(
351351
output_hidden_states: Optional[bool] = None,
352352
past_key_values: Optional[Cache] = None,
353353
use_cache: Optional[bool] = None,
354-
cache_postion: Optional[torch.Tensor] = None,
354+
cache_position: Optional[torch.Tensor] = None,
355355
):
356356
"""
357357
Args:
@@ -492,16 +492,16 @@ def _position_bucket(self, relative_position, num_buckets=32, max_distance=128):
492492
relative_position = torch.abs(relative_position)
493493
max_exact = num_buckets // 2
494494
is_small = relative_position < max_exact
495-
relative_postion_if_large = max_exact + (
495+
relative_position_if_large = max_exact + (
496496
torch.log(relative_position.float() / max_exact)
497497
/ math.log(max_distance / max_exact)
498498
* (num_buckets - max_exact)
499499
).to(torch.int32)
500-
relative_postion_if_large = torch.min(
501-
relative_postion_if_large,
502-
torch.full_like(relative_postion_if_large, num_buckets - 1),
500+
relative_position_if_large = torch.min(
501+
relative_position_if_large,
502+
torch.full_like(relative_position_if_large, num_buckets - 1),
503503
)
504-
relative_buckets += torch.where(is_small, relative_position.to(torch.int32), relative_postion_if_large)
504+
relative_buckets += torch.where(is_small, relative_position.to(torch.int32), relative_position_if_large)
505505
return relative_buckets
506506

507507

src/transformers/models/gemma3/convert_gemma3_weights.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -439,9 +439,9 @@ def convert_transformer_weights(
439439
decoder_block_start = path.find(_TRANSFORMER_DECODER_BLOCK)
440440
decoder_block_offset = decoder_block_start + _TRANSFORMER_DECODER_BLOCK_LEN
441441
decoder_block_path = path[decoder_block_offset:]
442-
next_path_seperator_idx = decoder_block_path.find("/")
443-
layer_idx = decoder_block_path[:next_path_seperator_idx]
444-
decoder_block_path = decoder_block_path[next_path_seperator_idx:]
442+
next_path_separator_idx = decoder_block_path.find("/")
443+
layer_idx = decoder_block_path[:next_path_separator_idx]
444+
decoder_block_path = decoder_block_path[next_path_separator_idx:]
445445

446446
base_path = f"language_model.model.layers.{layer_idx}"
447447

src/transformers/models/git/modeling_git.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -954,7 +954,7 @@ def __init__(self, config):
954954
self.visual_projection = GitProjection(config)
955955

956956
if config.num_image_with_embedding is not None:
957-
self.img_temperal_embedding = nn.ParameterList(
957+
self.img_temporal_embedding = nn.ParameterList(
958958
nn.Parameter(torch.zeros(1, 1, config.vision_config.hidden_size))
959959
for _ in range(config.num_image_with_embedding)
960960
)
@@ -1119,7 +1119,7 @@ def forward(
11191119
visual_features_frame = self.image_encoder(
11201120
pixel_values[:, frame_idx, :, :], interpolate_pos_encoding=interpolate_pos_encoding
11211121
).last_hidden_state
1122-
visual_features_frame += self.img_temperal_embedding[frame_idx]
1122+
visual_features_frame += self.img_temporal_embedding[frame_idx]
11231123
visual_features.append(visual_features_frame)
11241124

11251125
# finally, concatenate all features along sequence dimension

src/transformers/models/groupvit/modeling_groupvit.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def gumbel_softmax(logits: torch.Tensor, tau: float = 1, hard: bool = False, dim
7474
y_hard = torch.zeros_like(logits, memory_format=torch.legacy_contiguous_format).scatter_(dim, index, 1.0)
7575
ret = y_hard - y_soft.detach() + y_soft
7676
else:
77-
# Reparametrization trick.
77+
# Reparameterization trick.
7878
ret = y_soft
7979
return ret
8080

@@ -662,7 +662,7 @@ def forward(
662662
attn_weights = nn.functional.softmax(attn_weights, dim=-1)
663663

664664
if output_attentions:
665-
# this operation is a bit akward, but it's required to
665+
# this operation is a bit awkward, but it's required to
666666
# make sure that attn_weights keeps its gradient.
667667
# In order to do so, attn_weights have to reshaped
668668
# twice and have to be reused in the following

src/transformers/models/imagegpt/image_processing_imagegpt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def preprocess(
247247
)
248248

249249
# Here, normalize() is using a constant factor to divide pixel values.
250-
# hence, the method does not need iamge_mean and image_std.
250+
# hence, the method does not need image_mean and image_std.
251251
validate_preprocess_arguments(
252252
do_resize=do_resize,
253253
size=size,

src/transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
# Similar to transformers.models.pix2struct.image_processing_pix2struct.torch_extract_patches but dealing with a batch of images directly.
3535
def torch_extract_patches(image_tensor, patch_height, patch_width):
3636
"""
37-
Utiliy function to extract patches from a given tensor representing a batch of images. Returns a tensor of shape
37+
Utility function to extract patches from a given tensor representing a batch of images. Returns a tensor of shape
3838
(batch_size, `rows`, `columns`, `num_channels` x `patch_height` x `patch_width`).
3939
4040
Args:

src/transformers/models/kyutai_speech_to_text/feature_extraction_kyutai_speech_to_text.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def __call__(
204204
if padding:
205205
padded_inputs["padding_mask"] = padded_inputs.pop("attention_mask")
206206

207-
# now let's padd left and right
207+
# now let's pad left and right
208208
pad_left = int(self.audio_silence_prefix_seconds * self.sampling_rate)
209209
pad_right = int((self.audio_delay_seconds + 1.0) * self.sampling_rate)
210210
padded_inputs["input_values"] = np.pad(

0 commit comments

Comments
 (0)