Skip to content

Commit

Permalink
fix: prefer original layernorm names for 180B (#2365)
Browse files Browse the repository at this point in the history
  • Loading branch information
drbh authored Aug 6, 2024
1 parent a64d407 commit 133015f
Showing 1 changed file with 10 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -382,8 +382,13 @@ def __init__(

prefix = f"{prefix}.h.{layer_id}"

# NOTE: Falcon 180B uses the ln_attn prefix
ln_prefix = "input_layernorm"
if config.num_hidden_layers == 80:
ln_prefix = "ln_attn"

self.input_layernorm = FastLayerNorm.load(
prefix=f"{prefix}.input_layernorm",
prefix=f"{prefix}.{ln_prefix}",
weights=weights,
eps=config.layer_norm_epsilon,
)
Expand Down Expand Up @@ -477,6 +482,10 @@ def __init__(self, config, prefix: str, weights):
# in the case no number of layer norms is provided, we default to 1
self.num_ln = getattr(config, "num_ln_in_parallel_attn", 1)

# Falcon 180B uses the ln_attn prefix and has 2 layer norms
if config.num_hidden_layers == 80:
self.num_ln = 2

if self.num_ln == 1:
self.input_ln = FastLayerNorm.load(
prefix=f"{prefix}.input_layernorm",
Expand Down

0 comments on commit 133015f

Please sign in to comment.