Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Add githash to nm-vllm #299

Merged
merged 14 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
message(STATUS "Target device: ${VLLM_TARGET_DEVICE}")

include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/cmake/dep.cmake)

#
# Supported python versions. These versions will be searched in order, the
Expand Down Expand Up @@ -215,7 +216,8 @@ define_gpu_extension_target(
COMPILE_FLAGS ${VLLM_GPU_FLAGS}
ARCHITECTURES ${VLLM_GPU_ARCHES}
INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR};${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}
WITH_SOABI)
WITH_SOABI
LIBRARIES cmake_git_version_tracking)

#
# _moe_C extension
Expand All @@ -232,7 +234,8 @@ define_gpu_extension_target(
SOURCES ${VLLM_MOE_EXT_SRC}
COMPILE_FLAGS ${VLLM_GPU_FLAGS}
ARCHITECTURES ${VLLM_GPU_ARCHES}
WITH_SOABI)
WITH_SOABI
LIBRARIES cmake_git_version_tracking)

#
# _punica_C extension
Expand Down Expand Up @@ -283,7 +286,8 @@ if (VLLM_PUNICA_GPU_ARCHES)
SOURCES ${VLLM_PUNICA_EXT_SRC}
COMPILE_FLAGS ${VLLM_PUNICA_GPU_FLAGS}
ARCHITECTURES ${VLLM_PUNICA_GPU_ARCHES}
WITH_SOABI)
WITH_SOABI
LIBRARIES cmake_git_version_tracking)
else()
message(WARNING "Unable to create _punica_C target because none of the "
"requested architectures (${VLLM_GPU_ARCHES}) are supported, i.e. >= 8.0")
Expand Down
6 changes: 6 additions & 0 deletions cmake/dep.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include(FetchContent)
FetchContent_Declare(cmake_git_version_tracking
GIT_REPOSITORY https://github.com/andrew-hardin/cmake-git-version-tracking.git
GIT_TAG 6c0cb87edd029ddfb403a8e24577c144a03605a6
)
FetchContent_MakeAvailable(cmake_git_version_tracking)
8 changes: 8 additions & 0 deletions collect_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
SystemEnv = namedtuple(
'SystemEnv',
[
'vllm_git_hash',
'torch_version',
'is_debug_build',
'cuda_compiled_version',
Expand Down Expand Up @@ -138,6 +139,11 @@ def get_conda_packages(run_lambda, patterns=None):
for name in patterns))


def get_vllm_git_hash():
import vllm
return vllm.githash()


def get_gcc_version(run_lambda):
return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')

Expand Down Expand Up @@ -536,6 +542,7 @@ def get_version_or_na(cfg, prefix):
gpu_topo = get_gpu_topo(run_lambda)

return SystemEnv(
vllm_git_hash=get_vllm_git_hash(),
torch_version=version_str,
is_debug_build=debug_mode_str,
python_version='{} ({}-bit runtime)'.format(
Expand Down Expand Up @@ -583,6 +590,7 @@ def get_version_or_na(cfg, prefix):
CMake version: {cmake_version}
Libc version: {libc_version}

vllm git hash: {vllm_git_hash}
Python version: {python_version}
Python platform: {python_platform}
Is CUDA available: {is_cuda_available}
Expand Down
6 changes: 6 additions & 0 deletions csrc/cpu/pybind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@
#include "cuda_utils.h"
#include "ops.h"
#include <torch/extension.h>
#include <git.h>

std::string githash() { return std::string{git::CommitSHA1()}; }

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
// vLLM custom ops
pybind11::module ops = m.def_submodule("ops", "vLLM custom operators");
dhuangnm marked this conversation as resolved.
Show resolved Hide resolved

// githash
ops.def("githash", &githash, "Show nm-vllm git hash.");

// Attention ops
ops.def(
"paged_attention_v1",
Expand Down
5 changes: 4 additions & 1 deletion csrc/punica/punica_pybind.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
#include <torch/extension.h>

#include <git.h>
#include "punica_ops.h"

//====== pybind ======

#define DEFINE_pybind(name) m.def(#name, &name, #name);

std::string githash() { return std::string{git::CommitSHA1()}; }
dhuangnm marked this conversation as resolved.
Show resolved Hide resolved

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("githash", &githash, "nm-vllm git hash");
m.def("dispatch_bgmv", &dispatch_bgmv, "dispatch_bgmv");
m.def("dispatch_bgmv_low_level", &dispatch_bgmv_low_level,
"dispatch_bgmv_low_level");
Expand Down
6 changes: 6 additions & 0 deletions csrc/pybind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@
#include "cuda_utils.h"
#include "ops.h"
#include <torch/extension.h>
#include <git.h>

std::string githash() { return std::string{git::CommitSHA1()}; }

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
// vLLM custom ops
pybind11::module ops = m.def_submodule("ops", "vLLM custom operators");

// githash
ops.def("githash", &githash, "Show nm-vllm git hash.");

// Attention ops
ops.def(
"paged_attention_v1",
Expand Down
6 changes: 6 additions & 0 deletions vllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
__version__ = "0.5.0"

__all__ = [
"githash",
"LLM",
"ModelRegistry",
"SamplingParams",
Expand All @@ -29,3 +30,8 @@
"initialize_ray_cluster",
"PoolingParams",
]


def githash():
from vllm._C import ops as vllm_ops
return vllm_ops.githash()
Loading