Skip to content

Commit

Permalink
Add support for GPTQ-quantized MoE models using MoE Marlin (#2557)
Browse files Browse the repository at this point in the history
This change add support for MoE models that use GPTQ quantization.
Currently only models with the following properties are supported:

- No `desc_act` with tensor parallelism, unless `group_size=-1`.
- No asymmetric quantization.
- No AWQ.
  • Loading branch information
danieldk authored Sep 30, 2024
1 parent f9e561e commit 90a1d04
Show file tree
Hide file tree
Showing 10 changed files with 870 additions and 30 deletions.
15 changes: 8 additions & 7 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
inputs.nixpkgs.follows = "tgi-nix/nixpkgs";
};
nix-filter.url = "github:numtide/nix-filter";
tgi-nix.url = "github:huggingface/text-generation-inference-nix";
tgi-nix.url = "github:danieldk/tgi-nix/moe-kernels-0.4.0";
nixpkgs.follows = "tgi-nix/nixpkgs";
flake-utils.url = "github:numtide/flake-utils";
rust-overlay = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 3735,
"logprob": -11.0078125,
"text": "Test"
},
{
"id": 2159,
"logprob": -13.59375,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.7089844,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.68847656,
"special": false,
"text": "\n"
},
{
"id": 28771,
"logprob": -1.9394531,
"special": false,
"text": "#"
},
{
"id": 3735,
"logprob": -2.8808594,
"special": false,
"text": " Test"
},
{
"id": 2159,
"logprob": -0.37280273,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.26098633,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.0017137527,
"special": false,
"text": "\n"
},
{
"id": 1064,
"logprob": -2.2695312,
"special": false,
"text": "##"
},
{
"id": 3735,
"logprob": -1.9238281,
"special": false,
"text": " Test"
},
{
"id": 2159,
"logprob": -0.48828125,
"special": false,
"text": " request"
}
],
"top_tokens": null
},
"generated_text": "\n\n# Test request\n\n## Test request"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 3735,
"logprob": -11.0078125,
"text": "Test"
},
{
"id": 2159,
"logprob": -13.59375,
"text": "request"
}
],
"seed": 0,
"tokens": [
{
"id": 13,
"logprob": -0.34838867,
"special": false,
"text": "\n"
},
{
"id": 13940,
"logprob": -0.38916016,
"special": false,
"text": "``"
},
{
"id": 28832,
"logprob": 0.0,
"special": false,
"text": "`"
},
{
"id": 3371,
"logprob": -1.2529297,
"special": false,
"text": "json"
},
{
"id": 13,
"logprob": 0.0,
"special": false,
"text": "\n"
},
{
"id": 28751,
"logprob": 0.0,
"special": false,
"text": "{"
},
{
"id": 13,
"logprob": 0.0,
"special": false,
"text": "\n"
},
{
"id": 2287,
"logprob": 0.0,
"special": false,
"text": " "
},
{
"id": 345,
"logprob": 0.0,
"special": false,
"text": " \""
},
{
"id": 3134,
"logprob": -0.640625,
"special": false,
"text": "request"
}
],
"top_tokens": null
},
"generated_text": "Test request\n```json\n{\n \"request"
}
Loading

0 comments on commit 90a1d04

Please sign in to comment.