forked from vllm-project/vllm
-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0439542
commit c677099
Showing
5 changed files
with
36 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
5 changes: 3 additions & 2 deletions
5
vllm/model_executor/layers/quantization/compressed_tensors/schemes/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,11 @@ | ||
from .compressed_tensors_scheme import CompressedTensorsScheme # noqa: F401 | ||
from .compressed_tensors_unquantized import ( # noqa: F401 | ||
CompressedTensorsUnquantized) | ||
from .compressed_tensors_w4a16 import CompressedTensorsW4A16 # noqa: F401 | ||
from .compressed_tensors_w4a16_24 import ( # noqa: F401 | ||
CompressedTensorsW4A16Sparse24) | ||
W4A16SPARSE24_SUPPORTED_BITS, CompressedTensorsW4A16Sparse24) | ||
from .compressed_tensors_w8a8_dynamictoken import ( # noqa: F401, E501 | ||
CompressedTensorsW8A8DynamicToken) | ||
from .compressed_tensors_w8a8_statictensor import ( # noqa: F401, E501 | ||
CompressedTensorsW8A8StaticTensor) | ||
from .compressed_tensors_wNa16 import WNA16_SUPPORTED_BITS # noqa: F401 | ||
from .compressed_tensors_wNa16 import CompressedTensorsWNA16 # noqa: F401 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters