Skip to content

[Feat] Multi model support #931

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 53 commits into from
Dec 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
3ec00d0
Update Llama class to handle chat_format & caching
D4ve-R Nov 21, 2023
6e68a4b
Add settings.py
D4ve-R Nov 21, 2023
e63cffb
Add util.py & update __main__.py
D4ve-R Nov 21, 2023
55e33ab
multimodel
D4ve-R Nov 21, 2023
39a07d6
Merge branch 'multimodel'
D4ve-R Nov 21, 2023
5ab0010
update settings.py
D4ve-R Nov 21, 2023
45bfa07
cleanup
D4ve-R Nov 21, 2023
76c0168
delete util.py
D4ve-R Nov 21, 2023
97a6a21
Fix /v1/models endpoint
D4ve-R Nov 21, 2023
fb2a1e7
MultiLlama now iterable, app check-alive on "/"
D4ve-R Nov 21, 2023
3f150ac
instant model init if file is given
D4ve-R Nov 21, 2023
e71946c
backward compability
D4ve-R Nov 21, 2023
55a9767
revert model param mandatory
D4ve-R Nov 22, 2023
bb1857a
Merge branch 'main' of https://github.com/abetlen/llama-cpp-python
D4ve-R Nov 22, 2023
3c4b526
fix error
D4ve-R Nov 22, 2023
10a2d32
handle individual model config json
D4ve-R Nov 22, 2023
ee71f20
refactor
D4ve-R Nov 22, 2023
ea0fcca
revert chathandler/clip_model changes
D4ve-R Nov 22, 2023
6f5e60a
handle chat_handler in MulitLlama()
D4ve-R Nov 22, 2023
d9d696d
split settings into server/llama
D4ve-R Nov 23, 2023
e71fc92
reduce global vars
D4ve-R Nov 23, 2023
522f0bd
Update LlamaProxy to handle config files
D4ve-R Nov 23, 2023
a67e779
Merge branch 'main' of https://github.com/abetlen/llama-cpp-python
D4ve-R Nov 23, 2023
6e0ab3e
Add free method to LlamaProxy
D4ve-R Nov 24, 2023
9ed047c
Merge branch 'main' of https://github.com/abetlen/llama-cpp-python
D4ve-R Nov 25, 2023
ec9a9db
update arg parsers & install server alias
D4ve-R Nov 25, 2023
fd731d7
refactor cache settings
D4ve-R Nov 29, 2023
288fa85
change server executable name
D4ve-R Nov 29, 2023
b64742b
better var name
D4ve-R Nov 29, 2023
bc5cf51
whitespace
D4ve-R Nov 29, 2023
02a83f1
Merge branch 'abetlen:main' into main
D4ve-R Nov 29, 2023
b780c6e
Merge branch 'main' of github.com:D4ve-R/llama-cpp-python
D4ve-R Nov 29, 2023
5fd9892
Revert "whitespace"
D4ve-R Nov 29, 2023
7b1c17b
remove exe_name
D4ve-R Dec 1, 2023
a94b0de
Merge branch 'main' into D4ve-R/main
abetlen Dec 21, 2023
ec8265a
Merge branch 'main' of github.com:abetlen/llama_cpp_python into D4ve-…
abetlen Dec 21, 2023
ba36629
Fix merge bugs
abetlen Dec 21, 2023
315a82f
Fix type annotations
abetlen Dec 21, 2023
c5051be
Fix type annotations
abetlen Dec 21, 2023
7a3e11a
Fix uvicorn app factory
abetlen Dec 21, 2023
4f99ac6
Fix settings
abetlen Dec 21, 2023
3f2e6c1
Refactor server
abetlen Dec 22, 2023
3472b6f
Remove formatting fix
abetlen Dec 22, 2023
310e2e6
Format
abetlen Dec 22, 2023
5c9c35e
Use default model if not found in model settings
abetlen Dec 22, 2023
950f721
Fix
abetlen Dec 22, 2023
3d6c479
Merge branch 'main' into D4ve-R/main
abetlen Dec 22, 2023
8347a78
Cleanup
abetlen Dec 22, 2023
02ab0e2
Fix
abetlen Dec 22, 2023
fd1bf64
Fix
abetlen Dec 22, 2023
ecd8434
Remove unnused CommandLineSettings
abetlen Dec 22, 2023
5286146
Cleanup
abetlen Dec 22, 2023
1b322b4
Support default name for copilot-codex models
abetlen Dec 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 58 additions & 71 deletions llama_cpp/server/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

Then run:
```
uvicorn llama_cpp.server.app:app --reload
uvicorn llama_cpp.server.app:create_app --reload
```

or
Expand All @@ -21,81 +21,68 @@
Then visit http://localhost:8000/docs to see the interactive API docs.

"""
from __future__ import annotations

import os
import sys
import argparse
from typing import List, Literal, Union

import uvicorn

from llama_cpp.server.app import create_app, Settings

def get_base_type(annotation):
if getattr(annotation, '__origin__', None) is Literal:
return type(annotation.__args__[0])
elif getattr(annotation, '__origin__', None) is Union:
non_optional_args = [arg for arg in annotation.__args__ if arg is not type(None)]
if non_optional_args:
return get_base_type(non_optional_args[0])
elif getattr(annotation, '__origin__', None) is list or getattr(annotation, '__origin__', None) is List:
return get_base_type(annotation.__args__[0])
else:
return annotation

def contains_list_type(annotation) -> bool:
origin = getattr(annotation, '__origin__', None)

if origin is list or origin is List:
return True
elif origin in (Literal, Union):
return any(contains_list_type(arg) for arg in annotation.__args__)
else:
return False

def parse_bool_arg(arg):
if isinstance(arg, bytes):
arg = arg.decode('utf-8')

true_values = {'1', 'on', 't', 'true', 'y', 'yes'}
false_values = {'0', 'off', 'f', 'false', 'n', 'no'}

arg_str = str(arg).lower().strip()

if arg_str in true_values:
return True
elif arg_str in false_values:
return False
else:
raise ValueError(f'Invalid boolean argument: {arg}')

if __name__ == "__main__":
parser = argparse.ArgumentParser()
for name, field in Settings.model_fields.items():
description = field.description
if field.default is not None and description is not None:
description += f" (default: {field.default})"
base_type = get_base_type(field.annotation) if field.annotation is not None else str
list_type = contains_list_type(field.annotation)
if base_type is not bool:
parser.add_argument(
f"--{name}",
dest=name,
nargs="*" if list_type else None,
type=base_type,
help=description,
)
if base_type is bool:
parser.add_argument(
f"--{name}",
dest=name,
type=parse_bool_arg,
help=f"{description}",
)

from llama_cpp.server.app import create_app
from llama_cpp.server.settings import (
Settings,
ServerSettings,
ModelSettings,
ConfigFileSettings,
)
from llama_cpp.server.cli import add_args_from_model, parse_model_from_args


def main():
description = "🦙 Llama.cpp python server. Host your own LLMs!🚀"
parser = argparse.ArgumentParser(description=description)

add_args_from_model(parser, Settings)
parser.add_argument(
"--config_file",
type=str,
help="Path to a config file to load.",
)
server_settings: ServerSettings | None = None
model_settings: list[ModelSettings] = []
args = parser.parse_args()
settings = Settings(**{k: v for k, v in vars(args).items() if v is not None})
app = create_app(settings=settings)

try:
# Load server settings from config_file if provided
config_file = os.environ.get("CONFIG_FILE", args.config_file)
if config_file:
if not os.path.exists(config_file):
raise ValueError(f"Config file {config_file} not found!")
with open(config_file, "rb") as f:
config_file_settings = ConfigFileSettings.model_validate_json(f.read())
server_settings = ServerSettings.model_validate(config_file_settings)
model_settings = config_file_settings.models
else:
server_settings = parse_model_from_args(ServerSettings, args)
model_settings = [parse_model_from_args(ModelSettings, args)]
except Exception as e:
print(e, file=sys.stderr)
parser.print_help()
sys.exit(1)
assert server_settings is not None
assert model_settings is not None
app = create_app(
server_settings=server_settings,
model_settings=model_settings,
)
uvicorn.run(
app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port)),
ssl_keyfile=settings.ssl_keyfile, ssl_certfile=settings.ssl_certfile
app,
host=os.getenv("HOST", server_settings.host),
port=int(os.getenv("PORT", server_settings.port)),
ssl_keyfile=server_settings.ssl_keyfile,
ssl_certfile=server_settings.ssl_certfile,
)


if __name__ == "__main__":
main()
Loading