Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

python: add check-requirements.sh and GitHub workflow #4585

Merged
merged 14 commits into from
Dec 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .devops/full-cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ ARG CUDA_DOCKER_ARCH=all
RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip git

COPY requirements.txt requirements.txt
COPY requirements.txt requirements.txt
COPY requirements requirements

RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt
Expand Down
3 changes: 2 additions & 1 deletion .devops/full-rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ ARG ROCM_DOCKER_ARCH=\
gfx1101 \
gfx1102

COPY requirements.txt requirements.txt
COPY requirements.txt requirements.txt
COPY requirements requirements

RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt
Expand Down
3 changes: 2 additions & 1 deletion .devops/full.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ FROM ubuntu:$UBUNTU_VERSION as build
RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip git

COPY requirements.txt requirements.txt
COPY requirements.txt requirements.txt
COPY requirements requirements

RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt
Expand Down
3 changes: 2 additions & 1 deletion .devops/main-rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ ARG ROCM_DOCKER_ARCH=\
gfx1101 \
gfx1102

COPY requirements.txt requirements.txt
COPY requirements.txt requirements.txt
COPY requirements requirements

RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt
Expand Down
29 changes: 29 additions & 0 deletions .github/workflows/python-check-requirements.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Python check requirements.txt

on:
push:
paths:
- 'scripts/check-requirements.sh'
- 'convert*.py'
- 'requirements.txt'
- 'requirements/*.txt'
pull_request:
paths:
- 'scripts/check-requirements.sh'
- 'convert*.py'
- 'requirements.txt'
- 'requirements/*.txt'

jobs:
python-check-requirements:
runs-on: ubuntu-latest
name: check-requirements
steps:
- name: Check out source repository
uses: actions/checkout@v3
- name: Set up Python environment
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Run check-requirements.sh script
run: bash scripts/check-requirements.sh nocleanup
95 changes: 50 additions & 45 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def _set_vocab_gpt2(self):
tokens: list[bytearray] = []
toktypes: list[int] = []

from transformers import AutoTokenizer # type: ignore[attr-defined]
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(dir_model)
vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
assert max(tokenizer.vocab.values()) < vocab_size
Expand Down Expand Up @@ -852,7 +852,7 @@ def set_gguf_parameters(self):
hparams = self.hparams
block_count = hparams["num_hidden_layers"]

self.gguf_writer.add_name(dir_model.name)
self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
self.gguf_writer.add_block_count(block_count)
Expand Down Expand Up @@ -898,7 +898,7 @@ def set_vocab(self):
tokens: list[bytearray] = []
toktypes: list[int] = []

from transformers import AutoTokenizer # type: ignore[attr-defined]
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
vocab_size = hparams["vocab_size"]
assert max(tokenizer.get_vocab().values()) < vocab_size
Expand Down Expand Up @@ -1119,57 +1119,62 @@ def parse_args() -> argparse.Namespace:
return parser.parse_args()


args = parse_args()
def main() -> None:
args = parse_args()

dir_model = args.model
dir_model = args.model

if args.awq_path:
sys.path.insert(1, str(Path(__file__).parent / 'awq-py'))
from awq.apply_awq import add_scale_weights
tmp_model_path = args.model / "weighted_model"
dir_model = tmp_model_path
if tmp_model_path.is_dir():
print(f"{tmp_model_path} exists as a weighted model.")
if args.awq_path:
sys.path.insert(1, str(Path(__file__).parent / 'awq-py'))
from awq.apply_awq import add_scale_weights
tmp_model_path = args.model / "weighted_model"
dir_model = tmp_model_path
if tmp_model_path.is_dir():
print(f"{tmp_model_path} exists as a weighted model.")
else:
tmp_model_path.mkdir(parents=True, exist_ok=True)
print("Saving new weighted model ...")
add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path))
print(f"Saved weighted model at {tmp_model_path}.")

if not dir_model.is_dir():
print(f'Error: {args.model} is not a directory', file=sys.stderr)
sys.exit(1)

ftype_map = {
"f32": gguf.GGMLQuantizationType.F32,
"f16": gguf.GGMLQuantizationType.F16,
}

if args.outfile is not None:
fname_out = args.outfile
else:
tmp_model_path.mkdir(parents=True, exist_ok=True)
print("Saving new weighted model ...")
add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path))
print(f"Saved weighted model at {tmp_model_path}.")

if not dir_model.is_dir():
print(f'Error: {args.model} is not a directory', file=sys.stderr)
sys.exit(1)
# output in the same directory as the model by default
fname_out = dir_model / f'ggml-model-{args.outtype}.gguf'

ftype_map = {
"f32": gguf.GGMLQuantizationType.F32,
"f16": gguf.GGMLQuantizationType.F16,
}
print(f"Loading model: {dir_model.name}")

if args.outfile is not None:
fname_out = args.outfile
else:
# output in the same directory as the model by default
fname_out = dir_model / f'ggml-model-{args.outtype}.gguf'
hparams = Model.load_hparams(dir_model)

print(f"Loading model: {dir_model.name}")
with torch.inference_mode():
model_class = Model.from_model_architecture(hparams["architectures"][0])
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian)

hparams = Model.load_hparams(dir_model)
print("Set model parameters")
model_instance.set_gguf_parameters()

with torch.inference_mode():
model_class = Model.from_model_architecture(hparams["architectures"][0])
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian)
print("Set model tokenizer")
model_instance.set_vocab()

print("Set model parameters")
model_instance.set_gguf_parameters()
if args.vocab_only:
print(f"Exporting model vocab to '{fname_out}'")
model_instance.write_vocab()
else:
print(f"Exporting model to '{fname_out}'")
model_instance.write()

print("Set model tokenizer")
model_instance.set_vocab()
print(f"Model successfully exported to '{fname_out}'")

if args.vocab_only:
print(f"Exporting model vocab to '{fname_out}'")
model_instance.write_vocab()
else:
print(f"Exporting model to '{fname_out}'")
model_instance.write()

print(f"Model successfully exported to '{fname_out}'")
if __name__ == '__main__':
main()
183 changes: 92 additions & 91 deletions convert-lora-to-ggml.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,95 +47,96 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty
fout.seek((fout.tell() + 31) & -32)


if len(sys.argv) < 2:
print(f"Usage: python {sys.argv[0]} <path> [arch]")
print(
"Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
)
print(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
sys.exit(1)

input_json = os.path.join(sys.argv[1], "adapter_config.json")
input_model = os.path.join(sys.argv[1], "adapter_model.bin")
output_path = os.path.join(sys.argv[1], "ggml-adapter-model.bin")

model = torch.load(input_model, map_location="cpu")
arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"

if arch_name not in gguf.MODEL_ARCH_NAMES.values():
print(f"Error: unsupported architecture {arch_name}")
sys.exit(1)

arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone

with open(input_json, "r") as f:
params = json.load(f)

if params["peft_type"] != "LORA":
print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA")
sys.exit(1)

if params["fan_in_fan_out"] is True:
print("Error: param fan_in_fan_out is not supported")
sys.exit(1)

if params["bias"] is not None and params["bias"] != "none":
print("Error: param bias is not supported")
sys.exit(1)

# TODO: these seem to be layers that have been trained but without lora.
# doesn't seem widely used but eventually should be supported
if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0:
print("Error: param modules_to_save is not supported")
sys.exit(1)

with open(output_path, "wb") as fout:
fout.truncate()

write_file_header(fout, params)
for k, v in model.items():
orig_k = k
if k.endswith(".default.weight"):
k = k.replace(".default.weight", ".weight")
if k in ["llama_proj.weight", "llama_proj.bias"]:
continue
if k.endswith("lora_A.weight"):
if v.dtype != torch.float16 and v.dtype != torch.float32:
if __name__ == '__main__':
if len(sys.argv) < 2:
print(f"Usage: python {sys.argv[0]} <path> [arch]")
print(
"Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
)
print(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
sys.exit(1)

input_json = os.path.join(sys.argv[1], "adapter_config.json")
input_model = os.path.join(sys.argv[1], "adapter_model.bin")
output_path = os.path.join(sys.argv[1], "ggml-adapter-model.bin")

model = torch.load(input_model, map_location="cpu")
arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"

if arch_name not in gguf.MODEL_ARCH_NAMES.values():
print(f"Error: unsupported architecture {arch_name}")
sys.exit(1)

arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone

with open(input_json, "r") as f:
params = json.load(f)

if params["peft_type"] != "LORA":
print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA")
sys.exit(1)

if params["fan_in_fan_out"] is True:
print("Error: param fan_in_fan_out is not supported")
sys.exit(1)

if params["bias"] is not None and params["bias"] != "none":
print("Error: param bias is not supported")
sys.exit(1)

# TODO: these seem to be layers that have been trained but without lora.
# doesn't seem widely used but eventually should be supported
if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0:
print("Error: param modules_to_save is not supported")
sys.exit(1)

with open(output_path, "wb") as fout:
fout.truncate()

write_file_header(fout, params)
for k, v in model.items():
orig_k = k
if k.endswith(".default.weight"):
k = k.replace(".default.weight", ".weight")
if k in ["llama_proj.weight", "llama_proj.bias"]:
continue
if k.endswith("lora_A.weight"):
if v.dtype != torch.float16 and v.dtype != torch.float32:
v = v.float()
v = v.T
else:
v = v.float()
v = v.T
else:
v = v.float()

t = v.detach().numpy()

prefix = "base_model.model."
if k.startswith(prefix):
k = k[len(prefix) :]

lora_suffixes = (".lora_A.weight", ".lora_B.weight")
if k.endswith(lora_suffixes):
suffix = k[-len(lora_suffixes[0]):]
k = k[: -len(lora_suffixes[0])]
else:
print(f"Error: unrecognized tensor name {orig_k}")
sys.exit(1)

tname = name_map.get_name(k)
if tname is None:
print(f"Error: could not map tensor name {orig_k}")
print(" Note: the arch parameter must be specified if the model is not llama")
sys.exit(1)

if suffix == ".lora_A.weight":
tname += ".weight.loraA"
elif suffix == ".lora_B.weight":
tname += ".weight.loraB"
else:
assert False

print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
write_tensor_header(fout, tname, t.shape, t.dtype)
t.tofile(fout)

print(f"Converted {input_json} and {input_model} to {output_path}")

t = v.detach().numpy()

prefix = "base_model.model."
if k.startswith(prefix):
k = k[len(prefix) :]

lora_suffixes = (".lora_A.weight", ".lora_B.weight")
if k.endswith(lora_suffixes):
suffix = k[-len(lora_suffixes[0]):]
k = k[: -len(lora_suffixes[0])]
else:
print(f"Error: unrecognized tensor name {orig_k}")
sys.exit(1)

tname = name_map.get_name(k)
if tname is None:
print(f"Error: could not map tensor name {orig_k}")
print(" Note: the arch parameter must be specified if the model is not llama")
sys.exit(1)

if suffix == ".lora_A.weight":
tname += ".weight.loraA"
elif suffix == ".lora_B.weight":
tname += ".weight.loraB"
else:
assert False

print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
write_tensor_header(fout, tname, t.shape, t.dtype)
t.tofile(fout)

print(f"Converted {input_json} and {input_model} to {output_path}")
1 change: 1 addition & 0 deletions convert-persimmon-to-gguf.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env python3
import torch
import os
from pprint import pprint
Expand Down
3 changes: 0 additions & 3 deletions requirements-hf-to-gguf.txt

This file was deleted.

Loading