diff --git a/scripts/export_onnx_model.py b/scripts/export_onnx_model.py index 0095d53e4..a109722a7 100644 --- a/scripts/export_onnx_model.py +++ b/scripts/export_onnx_model.py @@ -144,7 +144,7 @@ def run_export( warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) warnings.filterwarnings("ignore", category=UserWarning) with open(output, "wb") as f: - print(f"Exporing onnx model to {output}...") + print(f"Exporting onnx model to {output}...") torch.onnx.export( onnx_model, tuple(dummy_inputs.values()), diff --git a/segment_anything/automatic_mask_generator.py b/segment_anything/automatic_mask_generator.py index 23264971b..da944ed3c 100644 --- a/segment_anything/automatic_mask_generator.py +++ b/segment_anything/automatic_mask_generator.py @@ -73,10 +73,10 @@ def __init__( calculated the stability score. box_nms_thresh (float): The box IoU cutoff used by non-maximal suppression to filter duplicate masks. - crops_n_layers (int): If >0, mask prediction will be run again on + crop_n_layers (int): If >0, mask prediction will be run again on crops of the image. Sets the number of layers to run, where each layer has 2**i_layer number of image crops. - crops_nms_thresh (float): The box IoU cutoff used by non-maximal + crop_nms_thresh (float): The box IoU cutoff used by non-maximal suppression to filter duplicate masks between different crops. crop_overlap_ratio (float): Sets the degree to which crops overlap. In the first crop layer, crops will overlap by this fraction of diff --git a/segment_anything/modeling/image_encoder.py b/segment_anything/modeling/image_encoder.py index a6ad9ad29..3e7737356 100644 --- a/segment_anything/modeling/image_encoder.py +++ b/segment_anything/modeling/image_encoder.py @@ -198,7 +198,7 @@ def __init__( Args: dim (int): Number of input channels. num_heads (int): Number of attention heads. - qkv_bias (bool: If True, add a learnable bias to query, key, value. + qkv_bias (bool): If True, add a learnable bias to query, key, value. rel_pos (bool): If True, add relative positional embeddings to the attention map. rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. input_size (int or None): Input resolution for calculating the relative positional @@ -270,7 +270,7 @@ def window_unpartition( """ Window unpartition into original sequences and removing padding. Args: - x (tensor): input tokens with [B * num_windows, window_size, window_size, C]. + windows (tensor): input tokens with [B * num_windows, window_size, window_size, C]. window_size (int): window size. pad_hw (Tuple): padded height and width (Hp, Wp). hw (Tuple): original height and width (H, W) before padding. diff --git a/segment_anything/modeling/sam.py b/segment_anything/modeling/sam.py index 303bc2f40..8074cff6b 100644 --- a/segment_anything/modeling/sam.py +++ b/segment_anything/modeling/sam.py @@ -85,8 +85,8 @@ def forward( (list(dict)): A list over input images, where each element is as dictionary with the following keys. 'masks': (torch.Tensor) Batched binary mask predictions, - with shape BxCxHxW, where B is the number of input promts, - C is determiend by multimask_output, and (H, W) is the + with shape BxCxHxW, where B is the number of input prompts, + C is determined by multimask_output, and (H, W) is the original size of the image. 'iou_predictions': (torch.Tensor) The model's predictions of mask quality, in shape BxC. diff --git a/segment_anything/modeling/transformer.py b/segment_anything/modeling/transformer.py index f1a2812f6..28fafea52 100644 --- a/segment_anything/modeling/transformer.py +++ b/segment_anything/modeling/transformer.py @@ -96,7 +96,7 @@ def forward( key_pe=image_pe, ) - # Apply the final attenion layer from the points to the image + # Apply the final attention layer from the points to the image q = queries + point_embedding k = keys + image_pe attn_out = self.final_attn_token_to_image(q=q, k=k, v=keys) diff --git a/segment_anything/predictor.py b/segment_anything/predictor.py index 57c089d1f..91b2ed3ea 100644 --- a/segment_anything/predictor.py +++ b/segment_anything/predictor.py @@ -186,7 +186,7 @@ def predict_torch( point_labels (torch.Tensor or None): A BxN array of labels for the point prompts. 1 indicates a foreground point and 0 indicates a background point. - box (np.ndarray or None): A Bx4 array given a box prompt to the + boxes (np.ndarray or None): A Bx4 array given a box prompt to the model, in XYXY format. mask_input (np.ndarray): A low resolution mask input to the model, typically coming from a previous prediction iteration. Has form Bx1xHxW, where diff --git a/segment_anything/utils/amg.py b/segment_anything/utils/amg.py index 3a137778e..be064071e 100644 --- a/segment_anything/utils/amg.py +++ b/segment_anything/utils/amg.py @@ -162,7 +162,7 @@ def calculate_stability_score( the predicted mask logits at high and low values. """ # One mask is always contained inside the other. - # Save memory by preventing unnecesary cast to torch.int64 + # Save memory by preventing unnecessary cast to torch.int64 intersections = ( (masks > (mask_threshold + threshold_offset)) .sum(-1, dtype=torch.int16) diff --git a/segment_anything/utils/transforms.py b/segment_anything/utils/transforms.py index 3ad346661..97a682a28 100644 --- a/segment_anything/utils/transforms.py +++ b/segment_anything/utils/transforms.py @@ -15,7 +15,7 @@ class ResizeLongestSide: """ - Resizes images to longest side 'target_length', as well as provides + Resizes images to the longest side 'target_length', as well as provides methods for resizing coordinates and boxes. Provides methods for transforming both numpy array and batched torch tensors. """