1
+ # coding=utf-8
2
+ # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
3
+ # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ """ ALBERT model configuration """
17
+
18
+ from .configuration_utils import PretrainedConfig
19
+
20
+ ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
21
+ 'albert-base-v1' : "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-config.json" ,
22
+ 'albert-large-v1' : "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-config.json" ,
23
+ 'albert-xlarge-v1' : "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-config.json" ,
24
+ 'albert-xxlarge-v1' : "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-config.json" ,
25
+ 'albert-base-v2' : "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-v2-config.json" ,
26
+ 'albert-large-v2' : "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-v2-config.json" ,
27
+ 'albert-xlarge-v2' : "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-v2-config.json" ,
28
+ 'albert-xxlarge-v2' : "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-v2-config.json" ,
29
+ }
30
+
31
+
32
+ class AlbertConfig (PretrainedConfig ):
33
+ """Configuration for `AlbertModel`.
34
+
35
+ The default settings match the configuration of model `albert_xxlarge`.
36
+ """
37
+
38
+ pretrained_config_archive_map = ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
39
+
40
+ def __init__ (self ,
41
+ vocab_size_or_config_json_file = 30000 ,
42
+ embedding_size = 128 ,
43
+ hidden_size = 4096 ,
44
+ num_hidden_layers = 12 ,
45
+ num_hidden_groups = 1 ,
46
+ num_attention_heads = 64 ,
47
+ intermediate_size = 16384 ,
48
+ inner_group_num = 1 ,
49
+ hidden_act = "gelu_new" ,
50
+ hidden_dropout_prob = 0 ,
51
+ attention_probs_dropout_prob = 0 ,
52
+ max_position_embeddings = 512 ,
53
+ type_vocab_size = 2 ,
54
+ initializer_range = 0.02 ,
55
+ layer_norm_eps = 1e-12 , ** kwargs ):
56
+ """Constructs AlbertConfig.
57
+
58
+ Args:
59
+ vocab_size: Vocabulary size of `inputs_ids` in `AlbertModel`.
60
+ embedding_size: size of voc embeddings.
61
+ hidden_size: Size of the encoder layers and the pooler layer.
62
+ num_hidden_layers: Number of hidden layers in the Transformer encoder.
63
+ num_hidden_groups: Number of group for the hidden layers, parameters in
64
+ the same group are shared.
65
+ num_attention_heads: Number of attention heads for each attention layer in
66
+ the Transformer encoder.
67
+ intermediate_size: The size of the "intermediate" (i.e., feed-forward)
68
+ layer in the Transformer encoder.
69
+ inner_group_num: int, number of inner repetition of attention and ffn.
70
+ down_scale_factor: float, the scale to apply
71
+ hidden_act: The non-linear activation function (function or string) in the
72
+ encoder and pooler.
73
+ hidden_dropout_prob: The dropout probability for all fully connected
74
+ layers in the embeddings, encoder, and pooler.
75
+ attention_probs_dropout_prob: The dropout ratio for the attention
76
+ probabilities.
77
+ max_position_embeddings: The maximum sequence length that this model might
78
+ ever be used with. Typically set this to something large just in case
79
+ (e.g., 512 or 1024 or 2048).
80
+ type_vocab_size: The vocabulary size of the `token_type_ids` passed into
81
+ `AlbertModel`.
82
+ initializer_range: The stdev of the truncated_normal_initializer for
83
+ initializing all weight matrices.
84
+ """
85
+ super (AlbertConfig , self ).__init__ (** kwargs )
86
+
87
+ self .vocab_size = vocab_size_or_config_json_file
88
+ self .embedding_size = embedding_size
89
+ self .hidden_size = hidden_size
90
+ self .num_hidden_layers = num_hidden_layers
91
+ self .num_hidden_groups = num_hidden_groups
92
+ self .num_attention_heads = num_attention_heads
93
+ self .inner_group_num = inner_group_num
94
+ self .hidden_act = hidden_act
95
+ self .intermediate_size = intermediate_size
96
+ self .hidden_dropout_prob = hidden_dropout_prob
97
+ self .attention_probs_dropout_prob = attention_probs_dropout_prob
98
+ self .max_position_embeddings = max_position_embeddings
99
+ self .type_vocab_size = type_vocab_size
100
+ self .initializer_range = initializer_range
101
+ self .layer_norm_eps = layer_norm_eps
0 commit comments