Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions paddleformers/transformers/ernie4_5/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ def apply_fused_rope(query_states, key_states, rope_theta):
class Ernie4_5RotaryEmbedding(nn.Layer):
def __init__(self, config):
super().__init__()
self.max_seq_len_cached = config.max_position_embeddings
self.original_max_seq_len = config.max_position_embeddings
self.config = config
self.head_dim = config.head_dim
self.base = config.rope_theta
Expand Down
2 changes: 2 additions & 0 deletions paddleformers/transformers/ernie4_5_moe/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ def mtp_hidden_states_set_zero(hidden_states, inbatch_pack_offset):
class Ernie4_5_MoeRotaryEmbedding(nn.Layer):
def __init__(self, config):
super().__init__()
self.max_seq_len_cached = config.max_position_embeddings
self.original_max_seq_len = config.max_position_embeddings
self.config = config
self.head_dim = config.head_dim
self.base = config.rope_theta
Expand Down
2 changes: 2 additions & 0 deletions paddleformers/transformers/gemma3_text/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ def forward(self, x):
class Gemma3RotaryEmbedding(nn.Layer):
def __init__(self, config):
super().__init__()
self.max_seq_len_cached = config.max_position_embeddings
self.original_max_seq_len = config.max_position_embeddings
self.config = config
base = config.rope_theta
partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
Expand Down
2 changes: 2 additions & 0 deletions paddleformers/transformers/glm4_moe/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,8 @@ def _gen_inv_aoa_config(cls, config: Glm4MoeConfig):
class Glm4MoeRotaryEmbedding(nn.Layer):
def __init__(self, config: Glm4MoeConfig, device=None):
super().__init__()
self.max_seq_len_cached = config.max_position_embeddings
self.original_max_seq_len = config.max_position_embeddings
self.config = config
base = config.rope_theta
partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
Expand Down
2 changes: 2 additions & 0 deletions paddleformers/transformers/llama/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,8 @@ def _compute_llama3_parameters(config):
class LlamaRotaryEmbedding(nn.Layer):
def __init__(self, config):
super().__init__()
self.max_seq_len_cached = config.max_position_embeddings
self.original_max_seq_len = config.max_position_embeddings
self.config = config
self.head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)

Expand Down
2 changes: 2 additions & 0 deletions paddleformers/transformers/phi3/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ def forward(
class Phi3RotaryEmbedding(nn.Layer):
def __init__(self, config: Phi3Config, device=None):
super().__init__()
self.max_seq_len_cached = config.max_position_embeddings
self.original_max_seq_len = config.max_position_embeddings
self.config = config
base = config.rope_theta
partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
Expand Down
2 changes: 2 additions & 0 deletions paddleformers/transformers/qwen2/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,8 @@ def _gen_inv_aoa_config(cls, config: Qwen2Config):
class Qwen2RotaryEmbedding(nn.Layer):
def __init__(self, config: Qwen2Config):
super().__init__()
self.max_seq_len_cached = config.max_position_embeddings
self.original_max_seq_len = config.max_position_embeddings
self.config = config
base = config.rope_theta
partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
Expand Down
2 changes: 2 additions & 0 deletions paddleformers/transformers/qwen2_5_vl/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,8 @@ class Qwen2_5_VLRotaryEmbedding(nn.Layer):

def __init__(self, config: Qwen2_5_VLTextConfig):
super().__init__()
self.max_seq_len_cached = config.max_position_embeddings
self.original_max_seq_len = config.max_position_embeddings
self.config = config
base = config.rope_theta
partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
Expand Down
2 changes: 2 additions & 0 deletions paddleformers/transformers/qwen2_moe/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,8 @@ def forward(
class Qwen2MoeRotaryEmbedding(nn.Layer):
def __init__(self, config: Qwen2MoeConfig):
super().__init__()
self.max_seq_len_cached = config.max_position_embeddings
self.original_max_seq_len = config.max_position_embeddings
self.config = config
base = config.rope_theta
partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
Expand Down
2 changes: 2 additions & 0 deletions paddleformers/transformers/qwen3/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,8 @@ def _gen_inv_aoa_config(cls, config: Qwen3Config):
class Qwen3RotaryEmbedding(nn.Layer):
def __init__(self, config: Qwen3Config):
super().__init__()
self.max_seq_len_cached = config.max_position_embeddings
self.original_max_seq_len = config.max_position_embeddings
self.config = config
base = config.rope_theta
partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
Expand Down
2 changes: 2 additions & 0 deletions paddleformers/transformers/qwen3_moe/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,8 @@ def forward(
class Qwen3MoeRotaryEmbedding(nn.Layer):
def __init__(self, config: Qwen3MoeConfig):
super().__init__()
self.max_seq_len_cached = config.max_position_embeddings
self.original_max_seq_len = config.max_position_embeddings
self.config = config
base = config.rope_theta
partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
Expand Down