neuralmagic · dhuangnm · Jun 19, 2024 · Jun 10, 2024 · Jun 10, 2024 · Jun 11, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -8,6 +8,7 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
 message(STATUS "Target device: ${VLLM_TARGET_DEVICE}")
 
 include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/cmake/dep.cmake)
 
 #
 # Supported python versions.  These versions will be searched in order, the
@@ -215,7 +216,8 @@ define_gpu_extension_target(
   COMPILE_FLAGS ${VLLM_GPU_FLAGS}
   ARCHITECTURES ${VLLM_GPU_ARCHES}
   INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR};${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}
-  WITH_SOABI)
+  WITH_SOABI
+  LIBRARIES cmake_git_version_tracking)
 
 #
 # _moe_C extension
@@ -232,7 +234,8 @@ define_gpu_extension_target(
   SOURCES ${VLLM_MOE_EXT_SRC}
   COMPILE_FLAGS ${VLLM_GPU_FLAGS}
   ARCHITECTURES ${VLLM_GPU_ARCHES}
-  WITH_SOABI)
+  WITH_SOABI
+  LIBRARIES cmake_git_version_tracking)
 
 #
 # _punica_C extension
@@ -283,7 +286,8 @@ if (VLLM_PUNICA_GPU_ARCHES)
     SOURCES ${VLLM_PUNICA_EXT_SRC}
     COMPILE_FLAGS ${VLLM_PUNICA_GPU_FLAGS}
     ARCHITECTURES ${VLLM_PUNICA_GPU_ARCHES}
-    WITH_SOABI)
+    WITH_SOABI
+    LIBRARIES cmake_git_version_tracking)
 else()
   message(WARNING "Unable to create _punica_C target because none of the "
     "requested architectures (${VLLM_GPU_ARCHES}) are supported, i.e. >= 8.0")

diff --git a/cmake/dep.cmake b/cmake/dep.cmake
@@ -0,0 +1,6 @@
+include(FetchContent)
+FetchContent_Declare(cmake_git_version_tracking                   
+  GIT_REPOSITORY https://github.com/andrew-hardin/cmake-git-version-tracking.git
+  GIT_TAG 6c0cb87edd029ddfb403a8e24577c144a03605a6
+)
+FetchContent_MakeAvailable(cmake_git_version_tracking)
diff --git a/collect_env.py b/collect_env.py
@@ -15,13 +15,16 @@
 try:
     import torch
     TORCH_AVAILABLE = True
+    installed_path = os.path.dirname(torch.__file__)
+    sys.path.insert(0, os.path.dirname(installed_path))
 except (ImportError, NameError, AttributeError, OSError):
     TORCH_AVAILABLE = False
 
 # System Environment Information
 SystemEnv = namedtuple(
     'SystemEnv',
     [
+        'vllm_git_hash',
         'torch_version',
         'is_debug_build',
         'cuda_compiled_version',
@@ -138,6 +141,14 @@ def get_conda_packages(run_lambda, patterns=None):
                                                          for name in patterns))
 
 
+def get_vllm_git_hash():
+    try:
+        import vllm
+        return vllm.githash()
+    except ImportError:
+        return 'N/A'
+
+
 def get_gcc_version(run_lambda):
     return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
 
@@ -536,6 +547,7 @@ def get_version_or_na(cfg, prefix):
     gpu_topo = get_gpu_topo(run_lambda)
 
     return SystemEnv(
+        vllm_git_hash=get_vllm_git_hash(),
         torch_version=version_str,
         is_debug_build=debug_mode_str,
         python_version='{} ({}-bit runtime)'.format(
@@ -607,6 +619,7 @@ def get_version_or_na(cfg, prefix):
 ROCM Version: {rocm_version}
 Neuron SDK Version: {neuron_sdk_version}
 vLLM Version: {vllm_version}
+vLLM Git Hash: {vllm_git_hash}
 vLLM Build Flags:
 {vllm_build_flags}
 GPU Topology:

diff --git a/csrc/cpu/pybind.cpp b/csrc/cpu/pybind.cpp
@@ -2,11 +2,17 @@
 #include "cuda_utils.h"
 #include "ops.h"
 #include <torch/extension.h>
+#include <git.h>
+
+std::string githash() { return std::string{git::CommitSHA1()}; }
 
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
   // vLLM custom ops
   pybind11::module ops = m.def_submodule("ops", "vLLM custom operators");
 
+  // githash
+  ops.def("githash", &githash, "Show nm-vllm git hash.");
+
   // Attention ops
   ops.def(
     "paged_attention_v1",

diff --git a/csrc/pybind.cpp b/csrc/pybind.cpp
@@ -2,11 +2,17 @@
 #include "cuda_utils.h"
 #include "ops.h"
 #include <torch/extension.h>
+#include <git.h>
+
+std::string githash() { return std::string{git::CommitSHA1()}; }
 
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
   // vLLM custom ops
   pybind11::module ops = m.def_submodule("ops", "vLLM custom operators");
 
+  // githash
+  ops.def("githash", &githash, "Show nm-vllm git hash.");
+
   // Attention ops
   ops.def(
     "paged_attention_v1",

diff --git a/vllm/__init__.py b/vllm/__init__.py
@@ -15,6 +15,7 @@
 __version__ = "0.5.0"
 
 __all__ = [
+    "githash",
     "LLM",
     "ModelRegistry",
     "SamplingParams",
@@ -29,3 +30,8 @@
     "initialize_ray_cluster",
     "PoolingParams",
 ]
+
+
+def githash():
+    from vllm._C import ops as vllm_ops
+    return vllm_ops.githash()