Skip to content

Commit

Permalink
[Frontend]-config-cli-args (vllm-project#7737)
Browse files Browse the repository at this point in the history
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: Kaunil Dhruv <kaunil_dhruv@intuit.com>
  • Loading branch information
3 people committed Aug 30, 2024
1 parent 98cef6a commit 058344f
Show file tree
Hide file tree
Showing 7 changed files with 185 additions and 1 deletion.
3 changes: 2 additions & 1 deletion docs/requirements-docs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@ pydantic >= 2.8
torch
py-cpuinfo
transformers
mistral_common >= 1.3.4
openai # Required by docs/source/serving/openai_compatible_server.md's vllm.entrypoints.openai.cli_args
mistral_common >= 1.3.4
openai # Required by docs/source/serving/openai_compatible_server.md's vllm.entrypoints.openai.cli_args
26 changes: 26 additions & 0 deletions docs/source/serving/openai_compatible_server.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,32 @@ directory [here](https://github.com/vllm-project/vllm/tree/main/examples/)
:prog: vllm serve
```

### Config file

The `serve` module can also accept arguments from a config file in
`yaml` format. The arguments in the yaml must be specified using the
long form of the argument outlined [here](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#command-line-arguments-for-the-server):

For example:

```yaml
# config.yaml

host: "127.0.0.1"
port: 6379
uvicorn-log-level: "info"
```
```bash
$ vllm serve SOME_MODEL --config config.yaml
```
---
**NOTE**
In case an argument is supplied using command line and the config file, the value from the commandline will take precedence.
The order of priorities is `command line > config file values > defaults`.

---

## Tool calling in the chat completion API
vLLM supports only named function calling in the chat completion API. The `tool_choice` options `auto` and `required` are **not yet supported** but on the roadmap.

Expand Down
1 change: 1 addition & 0 deletions requirements-common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ soundfile # Required for audio processing
gguf == 0.9.1
importlib_metadata
mistral_common >= 1.3.4
pyyaml
2 changes: 2 additions & 0 deletions tests/data/test_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
port: 12312
tensor_parallel_size: 2
44 changes: 44 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,16 @@ def parser():
return parser


@pytest.fixture
def parser_with_config():
parser = FlexibleArgumentParser()
parser.add_argument('serve')
parser.add_argument('--config', type=str)
parser.add_argument('--port', type=int)
parser.add_argument('--tensor-parallel-size', type=int)
return parser


def test_underscore_to_dash(parser):
args = parser.parse_args(['--image_input_type', 'pixel_values'])
assert args.image_input_type == 'pixel_values'
Expand Down Expand Up @@ -176,3 +186,37 @@ def test_missing_required_argument(parser):
parser.add_argument('--required-arg', required=True)
with pytest.raises(SystemExit):
parser.parse_args([])


def test_cli_override_to_config(parser_with_config):
args = parser_with_config.parse_args([
'serve', '--config', './data/test_config.yaml',
'--tensor-parallel-size', '3'
])
assert args.tensor_parallel_size == 3
args = parser_with_config.parse_args([
'serve', '--tensor-parallel-size', '3', '--config',
'./data/test_config.yaml'
])
assert args.tensor_parallel_size == 3


def test_config_args(parser_with_config):
args = parser_with_config.parse_args(
['serve', '--config', './data/test_config.yaml'])
assert args.tensor_parallel_size == 2


def test_config_file(parser_with_config):
with pytest.raises(FileNotFoundError):
parser_with_config.parse_args(['serve', '--config', 'test_config.yml'])

with pytest.raises(ValueError):
parser_with_config.parse_args(
['serve', '--config', './data/test_config.json'])

with pytest.raises(ValueError):
parser_with_config.parse_args([
'serve', '--tensor-parallel-size', '3', '--config', '--batch-size',
'32'
])
9 changes: 9 additions & 0 deletions vllm/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,15 @@ def main():
serve_parser.add_argument("model_tag",
type=str,
help="The model tag to serve")
serve_parser.add_argument(
"--config",
type=str,
default='',
required=False,
help="Read CLI options from a config file."
"Must be a YAML with the following options:"
"https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#command-line-arguments-for-the-server"
)
serve_parser = make_arg_parser(serve_parser)
serve_parser.set_defaults(dispatch_function=serve)

Expand Down
101 changes: 101 additions & 0 deletions vllm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import psutil
import torch
import torch.types
import yaml
from packaging.version import Version
from typing_extensions import ParamSpec, TypeIs, assert_never

Expand Down Expand Up @@ -1093,6 +1094,9 @@ def parse_args(self, args=None, namespace=None):
if args is None:
args = sys.argv[1:]

if '--config' in args:
args = FlexibleArgumentParser._pull_args_from_config(args)

# Convert underscores to dashes and vice versa in argument names
processed_args = []
for arg in args:
Expand All @@ -1109,6 +1113,103 @@ def parse_args(self, args=None, namespace=None):

return super().parse_args(processed_args, namespace)

@staticmethod
def _pull_args_from_config(args: List[str]) -> List[str]:
"""Method to pull arguments specified in the config file
into the command-line args variable.
The arguments in config file will be inserted between
the argument list.
example:
```yaml
port: 12323
tensor-parallel-size: 4
```
```python
$: vllm {serve,chat,complete} "facebook/opt-12B" \
--config config.yaml -tp 2
$: args = [
"serve,chat,complete",
"facebook/opt-12B",
'--config', 'config.yaml',
'-tp', '2'
]
$: args = [
"serve,chat,complete",
"facebook/opt-12B",
'--port', '12323',
'--tensor-parallel-size', '4',
'-tp', '2'
]
```
Please note how the config args are inserted after the sub command.
this way the order of priorities is maintained when these are args
parsed by super().
"""
assert args.count(
'--config') <= 1, "More than one config file specified!"

index = args.index('--config')
if index == len(args) - 1:
raise ValueError("No config file specified! \
Please check your command-line arguments.")

file_path = args[index + 1]

config_args = FlexibleArgumentParser._load_config_file(file_path)

# 0th index is for {serve,chat,complete}
# followed by config args
# followed by rest of cli args.
# maintaining this order will enforce the precedence
# of cli > config > defaults
args = [args[0]] + config_args + args[1:index] + args[index + 2:]

return args

@staticmethod
def _load_config_file(file_path: str) -> List[str]:
"""Loads a yaml file and returns the key value pairs as a
flattened list with argparse like pattern
```yaml
port: 12323
tensor-parallel-size: 4
```
returns:
processed_args: list[str] = [
'--port': '12323',
'--tensor-parallel-size': '4'
]
"""

extension: str = file_path.split('.')[-1]
if extension not in ('yaml', 'yml'):
raise ValueError(
"Config file must be of a yaml/yml type.\
%s supplied", extension)

# only expecting a flat dictionary of atomic types
processed_args: List[str] = []

config: Dict[str, Union[int, str]] = {}
try:
with open(file_path, 'r') as config_file:
config = yaml.safe_load(config_file)
except Exception as ex:
logger.error(
"Unable to read the config file at %s. \
Make sure path is correct", file_path)
raise ex

for key, value in config.items():
processed_args.append('--' + key)
processed_args.append(str(value))

return processed_args


async def _run_task_with_lock(task: Callable, lock: asyncio.Lock, *args,
**kwargs):
Expand Down

0 comments on commit 058344f

Please sign in to comment.