Skip to content

Commit

Permalink
Add support for GPTQ-quantized MoE models using MoE Marlin (huggingfa…
Browse files Browse the repository at this point in the history
…ce#2557)

This change add support for MoE models that use GPTQ quantization.
Currently only models with the following properties are supported:

- No `desc_act` with tensor parallelism, unless `group_size=-1`.
- No asymmetric quantization.
- No AWQ.
  • Loading branch information
danieldk authored and yuanwu2017 committed Oct 25, 2024
1 parent ff905ae commit 288bcb0
Show file tree
Hide file tree
Showing 9 changed files with 866 additions and 26 deletions.
15 changes: 8 additions & 7 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
inputs.nixpkgs.follows = "tgi-nix/nixpkgs";
};
nix-filter.url = "github:numtide/nix-filter";
tgi-nix.url = "github:huggingface/text-generation-inference-nix";
tgi-nix.url = "github:danieldk/tgi-nix/moe-kernels-0.4.0";
nixpkgs.follows = "tgi-nix/nixpkgs";
flake-utils.url = "github:numtide/flake-utils";
rust-overlay = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 3735,
"logprob": -11.0078125,
"text": "Test"
},
{
"id": 2159,
"logprob": -13.59375,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.7089844,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.68847656,
"special": false,
"text": "\n"
},
{
"id": 28771,
"logprob": -1.9394531,
"special": false,
"text": "#"
},
{
"id": 3735,
"logprob": -2.8808594,
"special": false,
"text": " Test"
},
{
"id": 2159,
"logprob": -0.37280273,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.26098633,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.0017137527,
"special": false,
"text": "\n"
},
{
"id": 1064,
"logprob": -2.2695312,
"special": false,
"text": "##"
},
{
"id": 3735,
"logprob": -1.9238281,
"special": false,
"text": " Test"
},
{
"id": 2159,
"logprob": -0.48828125,
"special": false,
"text": " request"
}
],
"top_tokens": null
},
"generated_text": "\n\n# Test request\n\n## Test request"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 3735,
"logprob": -11.0078125,
"text": "Test"
},
{
"id": 2159,
"logprob": -13.59375,
"text": "request"
}
],
"seed": 0,
"tokens": [
{
"id": 13,
"logprob": -0.34838867,
"special": false,
"text": "\n"
},
{
"id": 13940,
"logprob": -0.38916016,
"special": false,
"text": "``"
},
{
"id": 28832,
"logprob": 0.0,
"special": false,
"text": "`"
},
{
"id": 3371,
"logprob": -1.2529297,
"special": false,
"text": "json"
},
{
"id": 13,
"logprob": 0.0,
"special": false,
"text": "\n"
},
{
"id": 28751,
"logprob": 0.0,
"special": false,
"text": "{"
},
{
"id": 13,
"logprob": 0.0,
"special": false,
"text": "\n"
},
{
"id": 2287,
"logprob": 0.0,
"special": false,
"text": " "
},
{
"id": 345,
"logprob": 0.0,
"special": false,
"text": " \""
},
{
"id": 3134,
"logprob": -0.640625,
"special": false,
"text": "request"
}
],
"top_tokens": null
},
"generated_text": "Test request\n```json\n{\n \"request"
}
Loading

0 comments on commit 288bcb0

Please sign in to comment.