Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/seq2seq/test_finetune_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def _compute_metrics(pred):
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
predict_with_generate=True,
evaluate_during_training=True,
evaluation_strategy="steps",
do_train=True,
do_eval=True,
warmup_steps=0,
Expand Down Expand Up @@ -179,7 +179,7 @@ def run_trainer(self, eval_steps: int, max_len: str, model_name: str, num_train_
--per_device_eval_batch_size 4
--learning_rate 3e-3
--warmup_steps 8
--evaluate_during_training
--evaluation_strategy steps
--predict_with_generate
--logging_steps 0
--save_steps {str(eval_steps)}
Expand Down
4 changes: 2 additions & 2 deletions examples/token-classification/run_ner_old.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def compute_metrics(p: EvalPrediction) -> Dict:
trainer.save_model()
# For convenience, we also re-save the tokenizer to the same directory,
# so that you can share your model easily on huggingface.co/models =)
if trainer.is_world_master():
if trainer.is_world_process_zero():
tokenizer.save_pretrained(training_args.output_dir)

# Evaluation
Expand All @@ -265,7 +265,7 @@ def compute_metrics(p: EvalPrediction) -> Dict:
result = trainer.evaluate()

output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
if trainer.is_world_master():
if trainer.is_world_process_zero():
with open(output_eval_file, "w") as writer:
logger.info("***** Eval results *****")
for key, value in result.items():
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/data/processors/squad.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,11 @@ def squad_convert_example_to_features(
# in the way they compute mask of added tokens.
tokenizer_type = type(tokenizer).__name__.replace("Tokenizer", "").lower()
sequence_added_tokens = (
tokenizer.max_len - tokenizer.max_len_single_sentence + 1
tokenizer.model_max_length - tokenizer.max_len_single_sentence + 1
if tokenizer_type in MULTI_SEP_TOKENS_TOKENIZERS_SET
else tokenizer.max_len - tokenizer.max_len_single_sentence
else tokenizer.model_max_length - tokenizer.max_len_single_sentence
)
sequence_pair_added_tokens = tokenizer.max_len - tokenizer.max_len_sentences_pair
sequence_pair_added_tokens = tokenizer.model_max_length - tokenizer.max_len_sentences_pair

span_doc_tokens = all_doc_tokens
while len(spans) * doc_stride < len(all_doc_tokens):
Expand Down
22 changes: 0 additions & 22 deletions src/transformers/models/albert/modeling_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

import math
import os
import warnings
from dataclasses import dataclass
from typing import Optional, Tuple

Expand Down Expand Up @@ -742,7 +741,6 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs,
):
r"""
labels (``torch.LongTensor`` of shape ``(batch_size, sequence_length)``, `optional`):
Expand All @@ -753,8 +751,6 @@ def forward(
Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
(see :obj:`input_ids` docstring) Indices should be in ``[0, 1]``. ``0`` indicates original order (sequence
A, then sequence B), ``1`` indicates switched order (sequence B, then sequence A).
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated.

Returns:

Expand All @@ -773,14 +769,6 @@ def forward(
>>> sop_logits = outputs.sop_logits

"""

if "masked_lm_labels" in kwargs:
warnings.warn(
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("masked_lm_labels")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

outputs = self.albert(
Expand Down Expand Up @@ -898,23 +886,13 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs
):
r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated.
"""
if "masked_lm_labels" in kwargs:
warnings.warn(
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("masked_lm_labels")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

outputs = self.albert(
Expand Down
40 changes: 0 additions & 40 deletions src/transformers/models/bart/modeling_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
"""PyTorch BART model, ported from the fairseq repo."""
import math
import random
import warnings
from typing import Dict, List, Optional, Tuple

import numpy as np
Expand Down Expand Up @@ -529,7 +528,6 @@ def forward(
output_attentions=False,
output_hidden_states=False,
return_dict=True,
**unused,
):
"""
Includes several features from "Jointly Learning to Align and Translate with Transformer Models" (Garg et al.,
Expand All @@ -551,18 +549,6 @@ def forward(
- hidden states
- attentions
"""
if "decoder_cached_states" in unused:
warnings.warn(
"The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = unused.pop("decoder_cached_states")
if "decoder_past_key_values" in unused:
warnings.warn(
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = unused.pop("decoder_past_key_values")

# check attention mask and invert
if encoder_padding_mask is not None:
Expand Down Expand Up @@ -873,14 +859,7 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs,
):
if "decoder_past_key_values" in kwargs:
warnings.warn(
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = kwargs.pop("decoder_past_key_values")

if decoder_input_ids is None:
use_cache = False
Expand Down Expand Up @@ -1006,7 +985,6 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**unused,
):
r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Expand Down Expand Up @@ -1034,24 +1012,6 @@ def forward(
>>> tokenizer.decode(predictions).split()
>>> # ['good', 'great', 'all', 'really', 'very']
"""
if "lm_labels" in unused:
warnings.warn(
"The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = unused.pop("lm_labels")
if "decoder_cached_states" in unused:
warnings.warn(
"The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = unused.pop("decoder_cached_states")
if "decoder_past_key_values" in unused:
warnings.warn(
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = unused.pop("decoder_past_key_values")
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

if labels is not None:
Expand Down
19 changes: 0 additions & 19 deletions src/transformers/models/bert/modeling_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -896,7 +896,6 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs
):
r"""
labels (:obj:`torch.LongTensor` of shape ``(batch_size, sequence_length)``, `optional`):
Expand Down Expand Up @@ -928,13 +927,6 @@ def forward(
>>> prediction_logits = outputs.prediction_logits
>>> seq_relationship_logits = outputs.seq_relationship_logits
"""
if "masked_lm_labels" in kwargs:
warnings.warn(
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("masked_lm_labels")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

outputs = self.bert(
Expand Down Expand Up @@ -1136,24 +1128,13 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs
):
r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated.
"""
if "masked_lm_labels" in kwargs:
warnings.warn(
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("masked_lm_labels")
assert "lm_labels" not in kwargs, "Use `BertWithLMHead` for autoregressive language modeling task."
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."

return_dict = return_dict if return_dict is not None else self.config.use_return_dict

Expand Down
19 changes: 0 additions & 19 deletions src/transformers/models/ctrl/modeling_ctrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
# limitations under the License.
""" PyTorch CTRL model."""


import warnings

import numpy as np
import torch
import torch.nn as nn
Expand Down Expand Up @@ -369,15 +366,7 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs,
):
if "past" in kwargs:
warnings.warn(
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = kwargs.pop("past")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."

output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
use_cache = use_cache if use_cache is not None else self.config.use_cache
Expand Down Expand Up @@ -542,21 +531,13 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs,
):
r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set
``labels = input_ids`` Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to
``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]``
"""
if "past" in kwargs:
warnings.warn(
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = kwargs.pop("past")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

transformer_outputs = self.transformer(
Expand Down
11 changes: 0 additions & 11 deletions src/transformers/models/distilbert/modeling_distilbert.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import copy
import math
import warnings

import numpy as np
import torch
Expand Down Expand Up @@ -526,23 +525,13 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs
):
r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``.
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated.
"""
if "masked_lm_labels" in kwargs:
warnings.warn(
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("masked_lm_labels")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

dlbrt_output = self.distilbert(
Expand Down
11 changes: 0 additions & 11 deletions src/transformers/models/electra/modeling_electra.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

import math
import os
import warnings
from dataclasses import dataclass
from typing import Optional, Tuple

Expand Down Expand Up @@ -1000,23 +999,13 @@ def forward(
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs
):
r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated.
"""
if "masked_lm_labels" in kwargs:
warnings.warn(
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("masked_lm_labels")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

generator_hidden_states = self.electra(
Expand Down
Loading