Open
Description
Currently, the model config is logged twice during startup:
- via
AutoConfig.from_pretrained
- via
AutoTokenizer.from_pretrained
->AutoConfig.from_pretrained
Should there be a state variable that prevents the logging of the same config twice?
This happens for example with all example scripts:
Example log when running run_clm.py
:
File "examples/language-modeling/run_clm.py", line 444, in <module>
main()
File "examples/language-modeling/run_clm.py", line 275, in main
config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs)
File "/mnt/nvme1/code/huggingface/transformers-gpt-neo-nan/src/transformers/models/auto/configuration_auto.py", line 401, in from_pretrained
return config_class.from_dict(config_dict, **kwargs)
File "/mnt/nvme1/code/huggingface/transformers-gpt-neo-nan/src/transformers/configuration_utils.py", line 526, in from_dict
traceback.print_stack()
[INFO|configuration_utils.py:527] 2021-04-06 21:16:04,999 >> Model config GPT2Config {
"_num_labels": 1,
"activation_function": "gelu_new",
"architectures": [
"GPT2LMHeadModel"
],
"attn_pdrop": 0.1,
"bos_token_id": 50256,
"embd_pdrop": 0.1,
"eos_token_id": 50256,
"gradient_checkpointing": false,
"id2label": {
"0": "LABEL_0"
},
"initializer_range": 0.02,
"label2id": {
"LABEL_0": 0
},
"layer_norm_epsilon": 1e-05,
"model_type": "gpt2",
"n_ctx": 1024,
"n_embd": 768,
"n_head": 12,
"n_inner": null,
"n_layer": 6,
"n_positions": 1024,
"resid_pdrop": 0.1,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "cls_index",
"summary_use_proj": true,
"task_specific_params": {
"text-generation": {
"do_sample": true,
"max_length": 50
}
},
"transformers_version": "4.5.0",
"use_cache": true,
"vocab_size": 50257
}
[INFO|configuration_utils.py:490] 2021-04-06 21:16:05,277 >> loading configuration file https://huggingface.co/distilgpt2/resolve/main/config.json from cache at /home/stas/.cache/huggingface/transformers/f985248d2791fcff97732e4ee263617adec1edb5429a2b8421734c6d14e39bee.422318838d1ec4e061efb4ea29671cb2a044e244dc69229682bebd7cacc81631
File "examples/language-modeling/run_clm.py", line 444, in <module>
main()
File "examples/language-modeling/run_clm.py", line 289, in main
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
File "/mnt/nvme1/code/huggingface/transformers-gpt-neo-nan/src/transformers/models/auto/tokenization_auto.py", line 390, in from_pretrained
config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
File "/mnt/nvme1/code/huggingface/transformers-gpt-neo-nan/src/transformers/models/auto/configuration_auto.py", line 401, in from_pretrained
return config_class.from_dict(config_dict, **kwargs)
File "/mnt/nvme1/code/huggingface/transformers-gpt-neo-nan/src/transformers/configuration_utils.py", line 526, in from_dict
traceback.print_stack()
[INFO|configuration_utils.py:527] 2021-04-06 21:16:05,279 >> Model config GPT2Config {
"_num_labels": 1,
"activation_function": "gelu_new",
"architectures": [
"GPT2LMHeadModel"
],
"attn_pdrop": 0.1,
"bos_token_id": 50256,
"embd_pdrop": 0.1,
"eos_token_id": 50256,
"gradient_checkpointing": false,
"id2label": {
"0": "LABEL_0"
},
"initializer_range": 0.02,
"label2id": {
"LABEL_0": 0
},
"layer_norm_epsilon": 1e-05,
"model_type": "gpt2",
"n_ctx": 1024,
"n_embd": 768,
"n_head": 12,
"n_inner": null,
"n_layer": 6,
"n_positions": 1024,
"resid_pdrop": 0.1,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "cls_index",
"summary_use_proj": true,
"task_specific_params": {
"text-generation": {
"do_sample": true,
"max_length": 50
}
},
"transformers_version": "4.5.0",
"use_cache": true,
"vocab_size": 50257
}
To get the traceback I just added:
import traceback
traceback.print_stack()