Skip to content

Commit

Permalink
Add support for exl2 quantization
Browse files Browse the repository at this point in the history
Mostly straightforward, changes to existing code:

* Wrap quantizer parameters in a small wrapper to avoid passing
  around untyped tuples and needing to repack them as a dict.
* Move scratch space computation to warmup, because we need the
  maximum input sequence length to avoid allocating huge
  scratch buffers that OOM.
  • Loading branch information
danieldk committed May 28, 2024
1 parent 54e3e2f commit 17511ed
Show file tree
Hide file tree
Showing 21 changed files with 1,613 additions and 140 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2,
"logprob": null,
"text": "<bos>"
},
{
"id": 2015,
"logprob": -672.5,
"text": "Test"
},
{
"id": 3853,
"logprob": -16.65625,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 604,
"logprob": -1.0458984,
"special": false,
"text": " for"
},
{
"id": 10665,
"logprob": -0.7919922,
"special": false,
"text": " API"
},
{
"id": 37875,
"logprob": -0.76171875,
"special": false,
"text": " endpoint"
},
{
"id": 235292,
"logprob": -1.0478516,
"special": false,
"text": ":"
},
{
"id": 109,
"logprob": -0.81152344,
"special": false,
"text": "\n\n"
},
{
"id": 688,
"logprob": -0.0390625,
"special": false,
"text": "**"
},
{
"id": 35447,
"logprob": -0.2232666,
"special": false,
"text": "Endpoint"
},
{
"id": 66058,
"logprob": -0.00017559528,
"special": false,
"text": ":**"
},
{
"id": 102001,
"logprob": -0.11376953,
"special": false,
"text": " `/"
},
{
"id": 3564,
"logprob": -0.25610352,
"special": false,
"text": "api"
}
],
"top_tokens": null
},
"generated_text": " for API endpoint:\n\n**Endpoint:** `/api"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2,
"logprob": null,
"text": "<bos>"
},
{
"id": 2015,
"logprob": -672.0,
"text": "Test"
},
{
"id": 3853,
"logprob": -16.609375,
"text": " request"
}
],
"seed": 0,
"tokens": [
{
"id": 604,
"logprob": 0.0,
"special": false,
"text": " for"
},
{
"id": 10665,
"logprob": -0.16027832,
"special": false,
"text": " API"
},
{
"id": 53785,
"logprob": 0.0,
"special": false,
"text": " Gateway"
},
{
"id": 37875,
"logprob": -0.6328125,
"special": false,
"text": " endpoint"
},
{
"id": 235292,
"logprob": -0.20141602,
"special": false,
"text": ":"
},
{
"id": 109,
"logprob": 0.0,
"special": false,
"text": "\n\n"
},
{
"id": 688,
"logprob": -0.4741211,
"special": false,
"text": "**"
},
{
"id": 6217,
"logprob": -0.43286133,
"special": false,
"text": "URL"
},
{
"id": 66058,
"logprob": 0.0,
"special": false,
"text": ":**"
},
{
"id": 102001,
"logprob": 0.0,
"special": false,
"text": " `/"
}
],
"top_tokens": null
},
"generated_text": "Test request for API Gateway endpoint:\n\n**URL:** `/"
}
Loading

0 comments on commit 17511ed

Please sign in to comment.