Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
Add githash to nm-vllm (#299)
Browse files Browse the repository at this point in the history
Add git hash information to nm-vllm:

```
>>> import vllm
>>> vllm.githash()
'106796861914146372aba9386aeff9361edfb34d'
```

---------

Co-authored-by: dhuangnm <dhuang@MacBook-Pro-2.local>
  • Loading branch information
dhuangnm and dhuangnm authored Jun 19, 2024
1 parent 8ad8c8a commit d8da97b
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 3 deletions.
10 changes: 7 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
message(STATUS "Target device: ${VLLM_TARGET_DEVICE}")

include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/cmake/dep.cmake)

#
# Supported python versions. These versions will be searched in order, the
Expand Down Expand Up @@ -206,7 +207,8 @@ define_gpu_extension_target(
ARCHITECTURES ${VLLM_GPU_ARCHES}
INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR};${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}
USE_SABI 3
WITH_SOABI)
WITH_SOABI
LIBRARIES cmake_git_version_tracking)

#
# _moe_C extension
Expand All @@ -224,7 +226,8 @@ define_gpu_extension_target(
COMPILE_FLAGS ${VLLM_GPU_FLAGS}
ARCHITECTURES ${VLLM_GPU_ARCHES}
USE_SABI 3
WITH_SOABI)
WITH_SOABI
LIBRARIES cmake_git_version_tracking)

#
# _punica_C extension
Expand Down Expand Up @@ -276,7 +279,8 @@ if (VLLM_PUNICA_GPU_ARCHES)
COMPILE_FLAGS ${VLLM_PUNICA_GPU_FLAGS}
ARCHITECTURES ${VLLM_PUNICA_GPU_ARCHES}
USE_SABI 3
WITH_SOABI)
WITH_SOABI
LIBRARIES cmake_git_version_tracking)
else()
message(WARNING "Unable to create _punica_C target because none of the "
"requested architectures (${VLLM_GPU_ARCHES}) are supported, i.e. >= 8.0")
Expand Down
6 changes: 6 additions & 0 deletions cmake/dep.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include(FetchContent)
FetchContent_Declare(cmake_git_version_tracking
GIT_REPOSITORY https://github.com/andrew-hardin/cmake-git-version-tracking.git
GIT_TAG 6c0cb87edd029ddfb403a8e24577c144a03605a6
)
FetchContent_MakeAvailable(cmake_git_version_tracking)
13 changes: 13 additions & 0 deletions collect_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@
try:
import torch
TORCH_AVAILABLE = True
installed_path = os.path.dirname(torch.__file__)
sys.path.insert(0, os.path.dirname(installed_path))
except (ImportError, NameError, AttributeError, OSError):
TORCH_AVAILABLE = False

# System Environment Information
SystemEnv = namedtuple(
'SystemEnv',
[
'vllm_git_hash',
'torch_version',
'is_debug_build',
'cuda_compiled_version',
Expand Down Expand Up @@ -140,6 +143,14 @@ def get_conda_packages(run_lambda, patterns=None):
for name in patterns))


def get_vllm_git_hash():
try:
import vllm
return vllm.githash()
except ImportError:
return None


def get_gcc_version(run_lambda):
return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')

Expand Down Expand Up @@ -538,6 +549,7 @@ def get_version_or_na(cfg, prefix):
gpu_topo = get_gpu_topo(run_lambda)

return SystemEnv(
vllm_git_hash=get_vllm_git_hash(),
torch_version=version_str,
is_debug_build=debug_mode_str,
python_version='{} ({}-bit runtime)'.format(
Expand Down Expand Up @@ -614,6 +626,7 @@ def get_version_or_na(cfg, prefix):
ROCM Version: {rocm_version}
Neuron SDK Version: {neuron_sdk_version}
vLLM Version: {vllm_version}
vLLM Git Hash: {vllm_git_hash}
vLLM Build Flags:
{vllm_build_flags}
GPU Topology:
Expand Down
7 changes: 7 additions & 0 deletions csrc/cpu/torch_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,18 @@
#include "ops.h"
#include "registration.h"

#include <git.h>
#include <torch/library.h>

std::string githash() { return std::string{git::CommitSHA1()}; }

TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
// vLLM custom ops

// Show vllm git hash
ops.def("githash", &githash);
ops.impl("githash", torch::kCPU, &githash);

// Attention ops
// Compute the attention between an input query and the cached keys/values
// using PagedAttention.
Expand Down
7 changes: 7 additions & 0 deletions csrc/torch_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
#include "ops.h"
#include "registration.h"

#include <git.h>
#include <torch/library.h>

std::string githash() { return std::string{git::CommitSHA1()}; }

// Note on op signatures:
// The X_meta signatures are for the meta functions corresponding to op X.
// They must be kept in sync with the signature for X. Generally, only
Expand All @@ -18,6 +21,10 @@
TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
// vLLM custom ops

// Show vllm git hash
ops.def("githash", &githash);
ops.impl("githash", torch::kCUDA, &githash);

// Attention ops
// Compute the attention between an input query and the cached
// keys/values using PagedAttention.
Expand Down
6 changes: 6 additions & 0 deletions vllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
__version__ = "0.5.0"

__all__ = [
"githash",
"LLM",
"ModelRegistry",
"PromptStrictInputs",
Expand All @@ -33,3 +34,8 @@
"initialize_ray_cluster",
"PoolingParams",
]


def githash():
import torch
return torch.ops._C.githash()

4 comments on commit d8da97b

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: d8da97b Previous: 8ad8c8a Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.4225222844514933 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 930.2485572293734 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: d8da97b Previous: 8ad8c8a Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.432386252736635 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 934.0363210508677 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: d8da97b Previous: 8ad8c8a Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.445825995665408 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 939.1971823355167 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: d8da97b Previous: 8ad8c8a Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.4505808553524178 prompts/s 2.4329362842575506 prompts/s 0.99
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 941.0230484553284 tokens/s 934.2475331548994 tokens/s 0.99

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.