From d530c431da69c3f08bd152707c78d0cdb872e003 Mon Sep 17 00:00:00 2001 From: Isaac Ong Date: Sun, 7 Jul 2024 02:31:27 -0700 Subject: [PATCH] Simplify flags --- README.md | 2 +- routellm/openai_server.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 6f6f633..7af53bc 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,7 @@ python -m examples.router_chat --router mf --threshold 0.11593 In the above examples, GPT-4 and Mixtral 8x7B are used as the model pair, but you can modify this using the `strong-model` and `weak-model` arguments. -We leverage [LiteLLM](https://github.com/BerriAI/litellm) to support chat completions from a wide-range of open-source and closed models. In general, you need a setup an API key and point to the provider with the appropriate model name. Alternatively, you can also use **any OpenAI-compatible endpoint** by prefixing the model name with `openai/` using the `--alt-base-url` and `--alt-api-key` flags to point to the server. +We leverage [LiteLLM](https://github.com/BerriAI/litellm) to support chat completions from a wide-range of open-source and closed models. In general, you need a setup an API key and point to the provider with the appropriate model name. Alternatively, you can also use **any OpenAI-compatible endpoint** by prefixing the model name with `openai/` and setting the `--base-url` and `--api-key` flags. Note that regardless of the model pair used, an `OPENAI_API_KEY` will be required to generate embeddings for both the `mf` and `sw_ranking` routers. diff --git a/routellm/openai_server.py b/routellm/openai_server.py index 012348f..f2761f0 100644 --- a/routellm/openai_server.py +++ b/routellm/openai_server.py @@ -39,8 +39,8 @@ async def lifespan(app): routers=args.routers, config=yaml.safe_load(open(args.config, "r")) if args.config else None, routed_pair=routed_pair, - alt_base_url=args.alt_base_url, - alt_api_key=args.alt_api_key, + api_base=args.base_url, + api_key=args.api_key, progress_bar=True, ) yield @@ -159,14 +159,14 @@ async def create_chat_completion(request: ChatCompletionRequest): choices=list(ROUTER_CLS.keys()), ) parser.add_argument( - "--alt-base-url", - help="The base URL used for LLM requests", + "--base-url", + help="The base URL used for all LLM requests", type=str, default=None, ) parser.add_argument( - "--alt-api-key", - help="The API key used for LLM requests", + "--api-key", + help="The API key used for all LLM requests", type=str, default=None, )