From ff2d4f288cd48c32c016ee758341651969ed68aa Mon Sep 17 00:00:00 2001 From: Arthur <48595927+ArthurZucker@users.noreply.github.com> Date: Thu, 25 May 2023 16:06:14 +0200 Subject: [PATCH] [LongFormer] code nits, removed unused parameters (#23749) * remove unused parameters * remove unused parameters in config --- .../models/longformer/configuration_longformer.py | 8 -------- src/transformers/models/longformer/modeling_longformer.py | 2 -- 2 files changed, 10 deletions(-) diff --git a/src/transformers/models/longformer/configuration_longformer.py b/src/transformers/models/longformer/configuration_longformer.py index 3f3e2da7e830e8..1542c497989ff0 100644 --- a/src/transformers/models/longformer/configuration_longformer.py +++ b/src/transformers/models/longformer/configuration_longformer.py @@ -86,12 +86,6 @@ class LongformerConfig(PretrainedConfig): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. layer_norm_eps (`float`, *optional*, defaults to 1e-12): The epsilon used by the layer normalization layers. - position_embedding_type (`str`, *optional*, defaults to `"absolute"`): - Type of position embedding. Choose one of `"absolute"`, `"relative_key"`, `"relative_key_query"`. For - positional embeddings use `"absolute"`. For more information on `"relative_key"`, please refer to - [Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155). - For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models - with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658). attention_window (`int` or `List[int]`, *optional*, defaults to 512): Size of an attention window around each token. If an `int`, use the same size for all layers. To specify a different window size for each layer, use a `List[int]` where `len(attention_window) == num_hidden_layers`. @@ -131,7 +125,6 @@ def __init__( type_vocab_size: int = 2, initializer_range: float = 0.02, layer_norm_eps: float = 1e-12, - position_embedding_type: str = "absolute", onnx_export: bool = False, **kwargs, ): @@ -154,7 +147,6 @@ def __init__( self.type_vocab_size = type_vocab_size self.initializer_range = initializer_range self.layer_norm_eps = layer_norm_eps - self.position_embedding_type = position_embedding_type self.onnx_export = onnx_export diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index 9768641afe451c..cd975380be553b 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -445,8 +445,6 @@ def __init__(self, config): self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.dropout = nn.Dropout(config.hidden_dropout_prob) - self.position_embedding_type = getattr(config, "position_embedding_type", "absolute") - self.padding_idx = config.pad_token_id self.position_embeddings = nn.Embedding( config.max_position_embeddings, config.hidden_size, padding_idx=self.padding_idx