Skip to content

Commit

Permalink
v1.3.1
Browse files Browse the repository at this point in the history
  • Loading branch information
OlivierDehaene committed Dec 11, 2023
1 parent d0841cc commit ec6d459
Show file tree
Hide file tree
Showing 7 changed files with 17 additions and 11 deletions.
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ members = [
]

[workspace.package]
version = "1.3.0"
version = "1.3.1"
edition = "2021"
authors = ["Olivier Dehaene"]
homepage = "https://github.com/huggingface/text-generation-inference"
Expand Down
2 changes: 1 addition & 1 deletion docs/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"name": "Apache 2.0",
"url": "https://www.apache.org/licenses/LICENSE-2.0"
},
"version": "1.3.0"
"version": "1.3.1"
},
"paths": {
"/": {
Expand Down
2 changes: 1 addition & 1 deletion integration-tests/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "text-generation-integration-tests"
version = "1.3.0"
version = "1.3.1"
description = "Text Generation Inference integration tests"
authors = ["Nicolas Patry <nicolas@huggingface.co>"]

Expand Down
2 changes: 1 addition & 1 deletion server/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "text-generation-server"
version = "1.3.0"
version = "1.3.1"
description = "Text Generation Inference Python gRPC Server"
authors = ["Olivier Dehaene <olivier@huggingface.co>"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -391,14 +391,15 @@ def forward(
slots: torch.Tensor,
input_lengths: torch.Tensor,
max_s: int,
true_max_s: int,
prefill_cache_indices: Optional[torch.Tensor],
) -> torch.Tensor:
hidden_states = self.embed_tokens(input_ids)

# Get rotary cos and sin for this forward
# Avoid to index in each layer
cos, sin = self.layers[0].self_attn.rotary_emb.get_cos_sin(
position_ids, max_s, hidden_states.dtype
position_ids, true_max_s, hidden_states.dtype
)

residual = None
Expand Down Expand Up @@ -449,6 +450,7 @@ def forward(
prefill_cache_indices: Optional[torch.Tensor],
lm_head_indices: Optional[torch.Tensor] = None,
) -> torch.Tensor:
true_max_s = max_s
if prefill_cache_indices is not None:
# Slots also need to be sliced as it has the same size as the whole kv tensor
slots = slots[prefill_cache_indices]
Expand All @@ -467,6 +469,7 @@ def forward(
slots,
input_lengths,
max_s,
true_max_s,
prefill_cache_indices,
)
if lm_head_indices is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def topology(self, x: torch.Tensor, padded_bins: torch.Tensor):
self.offsets_block_rows = block_rows
offsets = self.offsets
else:
offsets = self.offsets[:block_rows]
offsets = self.offsets[: block_rows + 1]

# Indices for the sparse matrix. The indices for
# the intermediate matrix are dynamic depending
Expand Down Expand Up @@ -632,14 +632,15 @@ def forward(
slots: torch.Tensor,
input_lengths: torch.Tensor,
max_s: int,
true_max_s: int,
prefill_cache_indices: Optional[torch.Tensor],
) -> torch.Tensor:
hidden_states = self.embed_tokens(input_ids)

# Get rotary cos and sin for this forward
# Avoid to index in each layer
cos, sin = self.layers[0].self_attn.rotary_emb.get_cos_sin(
position_ids, max_s, hidden_states.dtype
position_ids, true_max_s, hidden_states.dtype
)

residual = None
Expand Down Expand Up @@ -690,6 +691,7 @@ def forward(
prefill_cache_indices: Optional[torch.Tensor],
lm_head_indices: Optional[torch.Tensor] = None,
) -> torch.Tensor:
true_max_s = max_s
if prefill_cache_indices is not None:
# Slots also need to be sliced as it has the same size as the whole kv tensor
slots = slots[prefill_cache_indices]
Expand All @@ -708,6 +710,7 @@ def forward(
slots,
input_lengths,
max_s,
true_max_s,
prefill_cache_indices,
)
if lm_head_indices is not None:
Expand Down

0 comments on commit ec6d459

Please sign in to comment.