Skip to content

Commit 12f83ab

Browse files
Martin Yuanfacebook-github-bot
Martin Yuan
authored andcommitted
Enable SDPA without kv cache
Summary: Sdpa custom op has been decoupled from kv cache by kimishpatel. Update the llama definition so that the sdpa op is applied both with and without kv cache. Differential Revision: D70593177
1 parent 5dd96c3 commit 12f83ab

File tree

1 file changed

+9
-25
lines changed

1 file changed

+9
-25
lines changed

examples/models/llama/attention.py

+9-25
Original file line numberDiff line numberDiff line change
@@ -209,13 +209,13 @@ def __init__(self, args: ModelArgs, layer_id: int, rope: Rope):
209209
self.head_dim,
210210
args.enable_dynamic_shape,
211211
)
212-
self.SDPA = SDPA(
213-
dim=self.n_local_heads * self.head_dim,
214-
head_dim=self.head_dim,
215-
n_rep=self.n_rep,
216-
max_context_len=self.max_context_len,
217-
enable_dynamic_shape=args.enable_dynamic_shape,
218-
)
212+
self.SDPA = SDPA(
213+
dim=self.n_local_heads * self.head_dim,
214+
head_dim=self.head_dim,
215+
n_rep=self.n_rep,
216+
max_context_len=self.max_context_len,
217+
enable_dynamic_shape=args.enable_dynamic_shape,
218+
)
219219

220220
def forward(
221221
self,
@@ -244,21 +244,5 @@ def forward(
244244
if self.use_kv_cache:
245245
assert input_pos is not None
246246
k, v = self.kv_cache.update(input_pos, k, v)
247-
output = self.SDPA(input_pos, q, k, v, bsz, seqlen, self.mask)
248-
return self.wo(output), None
249-
250-
# grouped multiquery attention: expand out keys and values
251-
k = k.repeat_interleave(self.n_rep, dim=1)
252-
v = v.repeat_interleave(self.n_rep, dim=1)
253-
254-
assert hasattr(self, "mask")
255-
256-
mask = self.mask[:seqlen, :seqlen]
257-
258-
output = F.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0)
259-
260-
output = output.transpose(1, 2).contiguous().view(bsz, seqlen, -1)
261-
262-
output = self.wo(output)
263-
264-
return output, None
247+
output = self.SDPA(input_pos, q, k, v, bsz, seqlen, self.mask)
248+
return self.wo(output), None

0 commit comments

Comments
 (0)