- 
          
- 
                Notifications
    You must be signed in to change notification settings 
- Fork 10.9k
Open
Labels
installationInstallation problemsInstallation problems
Description
Your current environment
The output of `python collect_env.py`
server is hang now
How you are installing vllm
pip install -e . inside source codethe error log is here. it said flashinfer failed to find C++ math.h, but I am pretty sure the file is there by ls
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] 
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] The above exception was the direct cause of the following exception:
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] 
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] Traceback (most recent call last):
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/vllm/vllm/v1/executor/multiproc_executor.py", line 589, in worker_busy_loop
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     output = func(*args, **kwargs)
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]              ^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     return func(*args, **kwargs)
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]            ^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/vllm/vllm/v1/worker/gpu_worker.py", line 244, in determine_available_memory
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     self.model_runner.profile_run()
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/vllm/vllm/v1/worker/gpu_model_runner.py", line 2511, in profile_run
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     output = self._dummy_sampler_run(last_hidden_states)
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     return func(*args, **kwargs)
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]            ^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/vllm/vllm/v1/worker/gpu_model_runner.py", line 2336, in _dummy_sampler_run
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     raise e
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/vllm/vllm/v1/worker/gpu_model_runner.py", line 2326, in _dummy_sampler_run
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     sampler_output = self.sampler(logits=logits,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     return self._call_impl(*args, **kwargs)
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     return forward_call(*args, **kwargs)
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/vllm/vllm/v1/sample/sampler.py", line 68, in forward
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     sampled = self.sample(logits, sampling_metadata)
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/vllm/vllm/v1/sample/sampler.py", line 135, in sample
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     random_sampled = self.topk_topp_sampler(
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                      ^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     return self._call_impl(*args, **kwargs)
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     return forward_call(*args, **kwargs)
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/vllm/vllm/v1/sample/ops/topk_topp_sampler.py", line 108, in forward_cuda
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     return flashinfer_sample(logits.contiguous(), k, p, generators)
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/vllm/vllm/v1/sample/ops/topk_topp_sampler.py", line 294, in flashinfer_sample
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     next_token_ids = flashinfer.sampling.top_k_top_p_sampling_from_logits(
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/sampling.py", line 983, in top_k_top_p_sampling_from_logits
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     masked_logits = top_k_mask_logits(logits, top_k)
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/sampling.py", line 1303, in top_k_mask_logits
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     return get_sampling_module().top_k_mask_logits(
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]            ^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/sampling.py", line 47, in get_sampling_module
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     module = gen_sampling_module().build_and_load()
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/jit/core.py", line 123, in build_and_load
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     self.build(verbose)
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/jit/core.py", line 115, in build
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     run_ninja(jit_env.FLASHINFER_JIT_DIR, self.ninja_path, verbose)
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]   File "/mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/jit/cpp_ext.py", line 211, in run_ninja
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]     raise RuntimeError(msg) from e
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] RuntimeError: Ninja build failed. Ninja output:
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] ninja: Entering directory `/root/.cache/flashinfer/89/cached_ops'
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] [1/4] /usr/bin/nvcc --generate-dependencies-with-compile --dependency-output sampling/flashinfer_sampling_ops.cuda.o.d -DTORCH_EXTENSION_NAME=sampling -DTORCH_API_INCLUDE_EXTENSION_H -DPy_LIMITED_API=0x03090000 -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1016\" -D_GLIBCXX_USE_CXX11_ABI=1 -isystem /usr/include/python3.12 -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/include/torch/csrc/api/include -isystem /usr/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/csrc -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/cutlass/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/cutlass/tools/util/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/spdlog/include --compiler-options=-fPIC --expt-relaxed-constexpr -gencode=arch=compute_89,code=sm_89 -O3 -std=c++17 --threads=32 -use_fast_math -DFLASHINFER_ENABLE_F16 -DFLASHINFER_ENABLE_BF16 -DFLASHINFER_ENABLE_FP8_E4M3 -DFLASHINFER_ENABLE_FP8_E5M2 -DNDEBUG -c /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/csrc/flashinfer_sampling_ops.cu -o sampling/flashinfer_sampling_ops.cuda.o 
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] FAILED: sampling/flashinfer_sampling_ops.cuda.o 
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] /usr/bin/nvcc --generate-dependencies-with-compile --dependency-output sampling/flashinfer_sampling_ops.cuda.o.d -DTORCH_EXTENSION_NAME=sampling -DTORCH_API_INCLUDE_EXTENSION_H -DPy_LIMITED_API=0x03090000 -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1016\" -D_GLIBCXX_USE_CXX11_ABI=1 -isystem /usr/include/python3.12 -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/include/torch/csrc/api/include -isystem /usr/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/csrc -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/cutlass/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/cutlass/tools/util/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/spdlog/include --compiler-options=-fPIC --expt-relaxed-constexpr -gencode=arch=compute_89,code=sm_89 -O3 -std=c++17 --threads=32 -use_fast_math -DFLASHINFER_ENABLE_F16 -DFLASHINFER_ENABLE_BF16 -DFLASHINFER_ENABLE_FP8_E4M3 -DFLASHINFER_ENABLE_FP8_E5M2 -DNDEBUG -c /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/csrc/flashinfer_sampling_ops.cu -o sampling/flashinfer_sampling_ops.cuda.o 
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] In file included from /usr/include/crt/math_functions.h:10551,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from /usr/include/crt/common_functions.h:303,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from /usr/include/cuda_runtime.h:118,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from <command-line>:
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] /usr/include/c++/12/cmath:45:15: fatal error: math.h: No such file or directory
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]    45 | #include_next <math.h>
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]       |               ^~~~~~~~
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] compilation terminated.
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] In file included from /usr/include/crt/math_functions.h:10551,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from /usr/include/crt/common_functions.h:303,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from /usr/include/cuda_runtime.h:118,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from <command-line>:
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] /usr/include/c++/12/cmath:45:15: fatal error: math.h: No such file or directory
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]    45 | #include_next <math.h>
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]       |               ^~~~~~~~
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] compilation terminated.
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] fatal   : Could not open input file /tmp/tmpxft_000119da_00000000-7_flashinfer_sampling_ops.cpp1.ii
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] [2/4] /usr/bin/nvcc --generate-dependencies-with-compile --dependency-output sampling/renorm.cuda.o.d -DTORCH_EXTENSION_NAME=sampling -DTORCH_API_INCLUDE_EXTENSION_H -DPy_LIMITED_API=0x03090000 -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1016\" -D_GLIBCXX_USE_CXX11_ABI=1 -isystem /usr/include/python3.12 -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/include/torch/csrc/api/include -isystem /usr/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/csrc -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/cutlass/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/cutlass/tools/util/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/spdlog/include --compiler-options=-fPIC --expt-relaxed-constexpr -gencode=arch=compute_89,code=sm_89 -O3 -std=c++17 --threads=32 -use_fast_math -DFLASHINFER_ENABLE_F16 -DFLASHINFER_ENABLE_BF16 -DFLASHINFER_ENABLE_FP8_E4M3 -DFLASHINFER_ENABLE_FP8_E5M2 -DNDEBUG -c /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/csrc/renorm.cu -o sampling/renorm.cuda.o 
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] FAILED: sampling/renorm.cuda.o 
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] /usr/bin/nvcc --generate-dependencies-with-compile --dependency-output sampling/renorm.cuda.o.d -DTORCH_EXTENSION_NAME=sampling -DTORCH_API_INCLUDE_EXTENSION_H -DPy_LIMITED_API=0x03090000 -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1016\" -D_GLIBCXX_USE_CXX11_ABI=1 -isystem /usr/include/python3.12 -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/include/torch/csrc/api/include -isystem /usr/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/csrc -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/cutlass/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/cutlass/tools/util/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/spdlog/include --compiler-options=-fPIC --expt-relaxed-constexpr -gencode=arch=compute_89,code=sm_89 -O3 -std=c++17 --threads=32 -use_fast_math -DFLASHINFER_ENABLE_F16 -DFLASHINFER_ENABLE_BF16 -DFLASHINFER_ENABLE_FP8_E4M3 -DFLASHINFER_ENABLE_FP8_E5M2 -DNDEBUG -c /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/csrc/renorm.cu -o sampling/renorm.cuda.o 
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] In file included from /usr/include/crt/math_functions.h:10551,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from /usr/include/crt/common_functions.h:303,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from /usr/include/cuda_runtime.h:118,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from <command-line>:
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] /usr/include/c++/12/cmath:45:15: fatal error: math.h: No such file or directory
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]    45 | #include_next <math.h>
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]       |               ^~~~~~~~
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] compilation terminated.
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] In file included from /usr/include/crt/math_functions.h:10551,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from /usr/include/crt/common_functions.h:303,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from /usr/include/cuda_runtime.h:118,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from <command-line>:
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] /usr/include/c++/12/cmath:45:15: fatal error: math.h: No such file or directory
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]    45 | #include_next <math.h>
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]       |               ^~~~~~~~
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] compilation terminated.
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] fatal   : Could not open input file /tmp/tmpxft_000119d9_00000000-7_renorm.cpp1.ii
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] [3/4] /usr/bin/nvcc --generate-dependencies-with-compile --dependency-output sampling/sampling.cuda.o.d -DTORCH_EXTENSION_NAME=sampling -DTORCH_API_INCLUDE_EXTENSION_H -DPy_LIMITED_API=0x03090000 -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1016\" -D_GLIBCXX_USE_CXX11_ABI=1 -isystem /usr/include/python3.12 -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/include/torch/csrc/api/include -isystem /usr/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/csrc -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/cutlass/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/cutlass/tools/util/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/spdlog/include --compiler-options=-fPIC --expt-relaxed-constexpr -gencode=arch=compute_89,code=sm_89 -O3 -std=c++17 --threads=32 -use_fast_math -DFLASHINFER_ENABLE_F16 -DFLASHINFER_ENABLE_BF16 -DFLASHINFER_ENABLE_FP8_E4M3 -DFLASHINFER_ENABLE_FP8_E5M2 -DNDEBUG -c /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/csrc/sampling.cu -o sampling/sampling.cuda.o 
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] FAILED: sampling/sampling.cuda.o 
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] /usr/bin/nvcc --generate-dependencies-with-compile --dependency-output sampling/sampling.cuda.o.d -DTORCH_EXTENSION_NAME=sampling -DTORCH_API_INCLUDE_EXTENSION_H -DPy_LIMITED_API=0x03090000 -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1016\" -D_GLIBCXX_USE_CXX11_ABI=1 -isystem /usr/include/python3.12 -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/torch/include/torch/csrc/api/include -isystem /usr/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/csrc -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/cutlass/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/cutlass/tools/util/include -isystem /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/spdlog/include --compiler-options=-fPIC --expt-relaxed-constexpr -gencode=arch=compute_89,code=sm_89 -O3 -std=c++17 --threads=32 -use_fast_math -DFLASHINFER_ENABLE_F16 -DFLASHINFER_ENABLE_BF16 -DFLASHINFER_ENABLE_FP8_E4M3 -DFLASHINFER_ENABLE_FP8_E5M2 -DNDEBUG -c /mnt/nvme1n1/wayne/vast/vast/lib/python3.12/site-packages/flashinfer/data/csrc/sampling.cu -o sampling/sampling.cuda.o 
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] In file included from /usr/include/crt/math_functions.h:10551,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from /usr/include/crt/common_functions.h:303,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from /usr/include/cuda_runtime.h:118,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from <command-line>:
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] /usr/include/c++/12/cmath:45:15: fatal error: math.h: No such file or directory
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]    45 | #include_next <math.h>
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]       |               ^~~~~~~~
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] compilation terminated.
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] In file included from /usr/include/crt/math_functions.h:10551,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from /usr/include/crt/common_functions.h:303,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from /usr/include/cuda_runtime.h:118,
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]                  from <command-line>:
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] /usr/include/c++/12/cmath:45:15: fatal error: math.h: No such file or directory
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]    45 | #include_next <math.h>
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594]       |               ^~~~~~~~
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] compilation terminated.
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] fatal   : Could not open input file /tmp/tmpxft_000119d8_00000000-7_sampling.cpp1.ii
(VllmWorker rank=0 pid=71558) ERROR 07-31 08:44:05 [multiproc_executor.py:594] ninja: build stopped: subcommand failed.
anyone can shed some light?
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
felipehertzer
Metadata
Metadata
Assignees
Labels
installationInstallation problemsInstallation problems