Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
revert githash commit
Browse files Browse the repository at this point in the history
  • Loading branch information
dhuangnm authored and dhuangnm committed Jun 21, 2024
1 parent 39e484e commit a512d63
Show file tree
Hide file tree
Showing 6 changed files with 3 additions and 46 deletions.
10 changes: 3 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
message(STATUS "Target device: ${VLLM_TARGET_DEVICE}")

include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/cmake/dep.cmake)

#
# Supported python versions. These versions will be searched in order, the
Expand Down Expand Up @@ -207,8 +206,7 @@ define_gpu_extension_target(
ARCHITECTURES ${VLLM_GPU_ARCHES}
INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR};${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}
USE_SABI 3
WITH_SOABI
LIBRARIES cmake_git_version_tracking)
WITH_SOABI)

#
# _moe_C extension
Expand All @@ -226,8 +224,7 @@ define_gpu_extension_target(
COMPILE_FLAGS ${VLLM_GPU_FLAGS}
ARCHITECTURES ${VLLM_GPU_ARCHES}
USE_SABI 3
WITH_SOABI
LIBRARIES cmake_git_version_tracking)
WITH_SOABI)

#
# _punica_C extension
Expand Down Expand Up @@ -279,8 +276,7 @@ if (VLLM_PUNICA_GPU_ARCHES)
COMPILE_FLAGS ${VLLM_PUNICA_GPU_FLAGS}
ARCHITECTURES ${VLLM_PUNICA_GPU_ARCHES}
USE_SABI 3
WITH_SOABI
LIBRARIES cmake_git_version_tracking)
WITH_SOABI)
else()
message(WARNING "Unable to create _punica_C target because none of the "
"requested architectures (${VLLM_GPU_ARCHES}) are supported, i.e. >= 8.0")
Expand Down
6 changes: 0 additions & 6 deletions cmake/dep.cmake

This file was deleted.

13 changes: 0 additions & 13 deletions collect_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,13 @@
try:
import torch
TORCH_AVAILABLE = True
installed_path = os.path.dirname(torch.__file__)
sys.path.insert(0, os.path.dirname(installed_path))
except (ImportError, NameError, AttributeError, OSError):
TORCH_AVAILABLE = False

# System Environment Information
SystemEnv = namedtuple(
'SystemEnv',
[
'vllm_git_hash',
'torch_version',
'is_debug_build',
'cuda_compiled_version',
Expand Down Expand Up @@ -143,14 +140,6 @@ def get_conda_packages(run_lambda, patterns=None):
for name in patterns))


def get_vllm_git_hash():
try:
import vllm
return vllm.githash()
except ImportError:
return None


def get_gcc_version(run_lambda):
return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')

Expand Down Expand Up @@ -549,7 +538,6 @@ def get_version_or_na(cfg, prefix):
gpu_topo = get_gpu_topo(run_lambda)

return SystemEnv(
vllm_git_hash=get_vllm_git_hash(),
torch_version=version_str,
is_debug_build=debug_mode_str,
python_version='{} ({}-bit runtime)'.format(
Expand Down Expand Up @@ -626,7 +614,6 @@ def get_version_or_na(cfg, prefix):
ROCM Version: {rocm_version}
Neuron SDK Version: {neuron_sdk_version}
vLLM Version: {vllm_version}
vLLM Git Hash: {vllm_git_hash}
vLLM Build Flags:
{vllm_build_flags}
GPU Topology:
Expand Down
7 changes: 0 additions & 7 deletions csrc/cpu/torch_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,11 @@
#include "ops.h"
#include "registration.h"

#include <git.h>
#include <torch/library.h>

std::string githash() { return std::string{git::CommitSHA1()}; }

TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
// vLLM custom ops

// Show vllm git hash
ops.def("githash", &githash);
ops.impl("githash", torch::kCPU, &githash);

// Attention ops
// Compute the attention between an input query and the cached keys/values
// using PagedAttention.
Expand Down
7 changes: 0 additions & 7 deletions csrc/torch_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,8 @@
#include "ops.h"
#include "registration.h"

#include <git.h>
#include <torch/library.h>

std::string githash() { return std::string{git::CommitSHA1()}; }

// Note on op signatures:
// The X_meta signatures are for the meta functions corresponding to op X.
// They must be kept in sync with the signature for X. Generally, only
Expand All @@ -21,10 +18,6 @@ std::string githash() { return std::string{git::CommitSHA1()}; }
TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
// vLLM custom ops

// Show vllm git hash
ops.def("githash", &githash);
ops.impl("githash", torch::kCUDA, &githash);

// Attention ops
// Compute the attention between an input query and the cached
// keys/values using PagedAttention.
Expand Down
6 changes: 0 additions & 6 deletions vllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,8 @@
from .version import __version__


def githash():
import torch
return torch.ops._C.githash()


__all__ = [
"__version__",
"githash",
"LLM",
"ModelRegistry",
"PromptStrictInputs",
Expand Down

4 comments on commit a512d63

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: a512d63 Previous: abc0ceb Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.4976551730438463 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 959.099586448837 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: a512d63 Previous: abc0ceb Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.4839670949913555 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 953.8433644766804 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: a512d63 Previous: abc0ceb Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.5347108274896324 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 973.3289577560188 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: a512d63 Previous: abc0ceb Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.496178631285675 prompts/s 2.4860718116442153 prompts/s 1.00
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 958.5325944136991 tokens/s 954.6515756713787 tokens/s 1.00

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.