-
-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathexample.py
More file actions
95 lines (80 loc) · 2.99 KB
/
example.py
File metadata and controls
95 lines (80 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import torch
from lfm2.main import create_lfm2_model
def forward_pass_example():
"""Example demonstrating different forward pass scenarios with the LFM2 model.
This example shows:
1. Basic forward pass
2. Forward pass with attention masks
3. Forward pass with caching
4. Forward pass with all outputs
5. Forward pass with custom position IDs
"""
# Create a small model for demonstration
model = create_lfm2_model("350M", verbose=True)
model.eval() # Set to evaluation mode
# Example parameters
batch_size = 2
seq_length = 32
vocab_size = model.config.vocab_size
# Create example inputs
input_ids = torch.randint(0, vocab_size, (batch_size, seq_length))
print("\n1. Basic Forward Pass")
print("-" * 50)
with torch.no_grad():
outputs = model(input_ids)
print(f"Output logits shape: {outputs['logits'].shape}")
print("Expected shape: [batch_size, seq_length, vocab_size]")
print(f"Actual: {list(outputs['logits'].shape)}")
print("\n2. Forward Pass with Attention Mask")
print("-" * 50)
# Create attention mask (example: mask out some tokens)
attention_mask = torch.ones(batch_size, seq_length)
attention_mask[:, -4:] = 0 # Mask out last 4 tokens
with torch.no_grad():
outputs = model(
input_ids=input_ids, attention_mask=attention_mask
)
print(
f"Output with masked attention shape: {outputs['logits'].shape}"
)
print("\n3. Forward Pass with Caching")
print("-" * 50)
# Enable caching for faster generation
with torch.no_grad():
outputs = model(input_ids=input_ids, use_cache=True)
print("Cache sizes for each layer:")
for idx, past_kv in enumerate(outputs["past_key_values"]):
print(
f"Layer {idx}: Key shape: {past_kv[0].shape}, Value shape: {past_kv[1].shape}"
)
print("\n4. Forward Pass with All Outputs")
print("-" * 50)
# Get all hidden states and attention patterns
with torch.no_grad():
outputs = model(
input_ids=input_ids,
output_attentions=True,
output_hidden_states=True,
)
print("Available outputs:", outputs.keys())
print(
f"Number of hidden states: {len(outputs['hidden_states'])}"
)
print(
f"Number of attention patterns: {len(outputs['attentions'])}"
)
print("\n5. Forward Pass with Custom Position IDs")
print("-" * 50)
# Create custom position IDs (example: reversed positions)
position_ids = torch.arange(seq_length - 1, -1, -1)
position_ids = position_ids.unsqueeze(0).expand(batch_size, -1)
with torch.no_grad():
outputs = model(
input_ids=input_ids, position_ids=position_ids
)
print(
f"Output with custom positions shape: {outputs['logits'].shape}"
)
if __name__ == "__main__":
# Run the forward pass example
forward_pass_example()