Skip to content

ROCM 6.4.2 unable to build #140

@Kashouryo

Description

@Kashouryo

Describe the Issue
On Linux Mint Xia, after doing the #130 I was still unable to build on ROCM 6.4.2 due to various build issues.
Such as

error: declaration of 'abort' has a different language linkage
error: redefinition of '__assert_fail'
error: redefinition of '__assertfail'
error: no template named 'conditional' in namespace '__hip_internal'; did you mean 'rocwmma::conditional'?
error: unknown type name '__hip_bfloat16_raw'
error: unknown type name '__hip_bfloat162_raw'; did you mean '__hip_bfloat162'?
no viable conversion from '__hip_bfloat162' to 'float2' (aka 'HIP_vector_type<float, 2>')
etc...

Additional Information:
printenvs that might be useful:

ROCM_PATH=/opt/rocm-6.4.2
HSA_OVERRIDE_GFX_VERSION=11.0.0
PATH=/opt/rocm-6.4.2/bin:/home/ksr/miniconda3/bin:/home/ksr/miniconda3/condabin:/home/ksr/.deno/bin:/home/moene/.cargo/bin:/home/ksr/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/home/ksr/.dotnet/tools

build log:

(base) ksr@ksr-desktop:~/Software/koboldcpp-rocm$ make LLAMA_HIPBLAS=1 -j24
I koboldcpp build info: 
I UNAME_S:  Linux
I UNAME_P:  x86_64
I UNAME_M:  x86_64
I UNAME_O:  GNU/Linux
I CFLAGS:   -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./vendor -I./vendor/stb -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/ttscpp/include -I./otherarch/ttscpp/src -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_REPACK -DNDEBUG -s -DGGML_USE_LLAMAFILE -pthread -Wno-deprecated -Wno-deprecated-declarations -Wno-unused-variable -pthread -march=native -mtune=native
I CXXFLAGS: -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./vendor -I./vendor/stb -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/ttscpp/include -I./otherarch/ttscpp/src -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_REPACK -DNDEBUG -s -DGGML_USE_LLAMAFILE -pthread -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -Wno-unused-variable -pthread
I LDFLAGS:  -ldl
I CC:       cc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0
I CXX:      g++ (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0
I HIP CC:       AMD clang version 19.0.0git (https://github.com/RadeonOpenCompute/llvm-project roc-6.4.2 25224 d366fa84f3fdcbd4b10847ebd5db572ae12a34fb)
I HIP CXX:      AMD clang version 19.0.0git (https://github.com/RadeonOpenCompute/llvm-project roc-6.4.2 25224 d366fa84f3fdcbd4b10847ebd5db572ae12a34fb)

g++ -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./vendor -I./vendor/stb -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/ttscpp/include -I./otherarch/ttscpp/src -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_REPACK -DNDEBUG -s -DGGML_USE_LLAMAFILE -pthread -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -Wno-unused-variable -pthread ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o gpttype_adapter.o sdcpp_default.o whispercpp_default.o tts_default.o embeddings_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o kcpp-quantmapper.o kcpp-repackmapper.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm.o common.o sampling.o kcpputils.o mtmdaudio.o -shared -o koboldcpp_default.so -ldl
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/fattn-wmma-f16.o ggml/src/ggml-cuda/fattn-wmma-f16.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.o ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.o ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.o ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.o ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.o ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:76:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/hip_assert.h:27:6: error: declaration of 'abort' has a different language linkage
   27 | void abort() {
      |      ^
/usr/include/hip/amd_detail/amd_device_functions.h:805:6: note: previous definition is here
  805 | void abort() {
      |      ^
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:76:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/hip_assert.h:44:6: error: redefinition of '__assert_fail'
   44 | void __assert_fail(const char *assertion,
      |      ^
/usr/include/hip/amd_detail/amd_device_functions.h:822:6: note: previous definition is here
  822 | void __assert_fail(const char *assertion,
      |      ^
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:76:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/hip_assert.h:86:6: error: redefinition of '__assertfail'
   86 | void __assertfail()
      |      ^
/usr/include/hip/amd_detail/amd_device_functions.h:864:6: note: previous definition is here
  864 | void __assertfail()
      |      ^
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:481:12: error: no template named 'conditional' in namespace '__hip_internal'; did you mean 'rocwmma::conditional'?
  481 |   typename __hip_internal::conditional<
      |            ^~~~~~~~~~~~~~~~
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../utility/type_traits.hpp:117:16: note: 'rocwmma::conditional' declared here
  117 |     using std::conditional;
      |                ^
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:483:16: error: no template named 'conditional' in namespace '__hip_internal'; did you mean 'rocwmma::conditional'?
  483 |       typename __hip_internal::conditional<sizeof(T) == 4, unsigned int,
      |                ^~~~~~~~~~~~~~~~
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../utility/type_traits.hpp:117:16: note: 'rocwmma::conditional' declared here
  117 |     using std::conditional;
      |                ^
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:797:36: error: unknown type name '__hip_bfloat16_raw'
  797 | __hip_cvt_bfloat16raw_to_fp8(const __hip_bfloat16_raw hr, const __hip_saturation_t sat,
      |                                    ^
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:804:9: error: no viable conversion from '__hip_bfloat16' to 'float'
  804 |   float fval = __hip_bfloat16(hr);
      |         ^      ~~~~~~~~~~~~~~~~~~
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:823:39: error: unknown type name '__hip_bfloat162_raw'; did you mean '__hip_bfloat162'?
  823 | __hip_cvt_bfloat16raw2_to_fp8x2(const __hip_bfloat162_raw hr, const __hip_saturation_t sat,
      |                                       ^
/usr/include/hip/amd_detail/amd_hip_bf16.h:113:8: note: '__hip_bfloat162' declared here
  113 | struct __hip_bfloat162 {
      |        ^
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:830:10: error: no viable conversion from '__hip_bfloat162' to 'float2' (aka 'HIP_vector_type<float, 2>')
  830 |   float2 f2 = __hip_bfloat162(hr);
      |          ^    ~~~~~~~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_vector_types.h:470:9: note: candidate constructor not viable: no known conversion from '__hip_bfloat162' to 'const HIP_vector_type<float, 2> &' for 1st argument
  470 |         HIP_vector_type(const HIP_vector_type&) = default;
      |         ^               ~~~~~~~~~~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_vector_types.h:473:9: note: candidate constructor not viable: no known conversion from '__hip_bfloat162' to 'HIP_vector_type<float, 2> &&' for 1st argument
  473 |         HIP_vector_type(HIP_vector_type&&) = default;
      |         ^               ~~~~~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_vector_types.h:465:9: note: candidate template ignored: requirement 'sizeof...(Us) == 2U' was not satisfied [with Us = <__hip_bfloat162>]
  465 |         HIP_vector_type(Us... xs) noexcept
      |         ^
/usr/include/hip/amd_detail/amd_hip_vector_types.h:456:9: note: explicit constructor is not a candidate
  456 |         HIP_vector_type(U x_) noexcept
      |         ^
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:1032:36: /opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.o ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu
error: cannot convert 'const __hip_bfloat16' to 'float' without a conversion operator
 1032 |       : __x(__hip_cvt_float_to_fp8(static_cast<float>(f), __default_saturation,
      |                                    ^~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:1069:12: error: no matching conversion for functional-style cast from 'float' to '__hip_bfloat16'
 1069 |     return __hip_bfloat16(f);
      |            ^~~~~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit copy constructor) not viable: no known conversion from 'float' to 'const __hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit move constructor) not viable: no known conversion from 'float' to '__hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:1590:36: error: cannot convert 'const __hip_bfloat16' to 'float' without a conversion operator
 1590 |       : __x(__hip_cvt_float_to_fp8(static_cast<float>(f), __default_saturation,
      |                                    ^~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:1640:12: error: no matching conversion for functional-style cast from 'float' to '__hip_bfloat16'
 1640 |     return __hip_bfloat16(f);
      |            ^~~~~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit copy constructor) not viable: no known conversion from 'float' to 'const __hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit move constructor) not viable: no known conversion from 'float' to '__hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:2147:36: error: cannot convert 'const __hip_bfloat16' to 'float' without a conversion operator
 2147 |       : __x(__hip_cvt_float_to_fp8(static_cast<float>(f), __default_saturation,
      |                                    ^~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:2185:12: error: no matching conversion for functional-style cast from 'float' to '__hip_bfloat16'
 2185 |     return __hip_bfloat16(f);
      |            ^~~~~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit copy constructor) not viable: no known conversion from 'float' to 'const __hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit move constructor) not viable: no known conversion from 'float' to '__hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:2709:36: error: cannot convert 'const __hip_bfloat16' to 'float' without a conversion operator
 2709 |       : __x(__hip_cvt_float_to_fp8(static_cast<float>(f), __default_saturation,
      |                                    ^~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:2760:12: error: no matching conversion for functional-style cast from 'float' to '__hip_bfloat16'
 2760 |     return __hip_bfloat16(f);
      |            ^~~~~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit copy constructor) not viable: no known conversion from 'float' to 'const __hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit move constructor) not viable: no known conversion from 'float' to '__hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.o ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.o ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.o ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.o ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.o ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.o ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.o ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.o ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.o ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu
ggml/src/ggml-cuda/fattn-wmma-f16.cu:418:45: error: call to '__hadd' is ambiguous
  418 |                 KQ_rowsum_h2[j0/nwarps].x = __hadd(KQ_rowsum_h2[j0/nwarps].x, val);
      |                                             ^~~~~~
ggml/src/ggml-cuda/fattn-wmma-f16.cu:526:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<64, 16, 4, 64, float, false>' requested here
  526 |         fattn_kernel = flash_attn_ext_f16<
      |                        ^
ggml/src/ggml-cuda/fattn-wmma-f16.cu:548:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<64, 16, float>' requested here
  548 |                     ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst);
      |                     ^
/usr/include/hip/amd_detail/amd_device_functions.h:257:39: note: candidate function
  257 | __device__ static inline unsigned int __hadd(int x, int y) {
      |                                       ^
/usr/include/hip/amd_detail/amd_hip_fp16.h:1368:20: note: candidate function
 1368 |             __half __hadd(__half x, __half y)
      |                    ^
ggml/src/ggml-cuda/fattn-wmma-f16.cu:418:45: error: call to '__hadd' is ambiguous
  418 |                 KQ_rowsum_h2[j0/nwarps].x = __hadd(KQ_rowsum_h2[j0/nwarps].x, val);
      |                                             ^~~~~~
ggml/src/ggml-cuda/fattn-wmma-f16.cu:530:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<64, 16, 4, 64, float, true>' requested here
  530 |         fattn_kernel = flash_attn_ext_f16<
      |                        ^
ggml/src/ggml-cuda/fattn-wmma-f16.cu:548:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<64, 16, float>' requested here
  548 |                     ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst);
      |                     ^
/usr/include/hip/amd_detail/amd_device_functions.h:257:39: note: candidate function
  257 | __device__ static inline unsigned int __hadd(int x, int y) {
      |                                       ^
/usr/include/hip/amd_detail/amd_hip_fp16.h:1368:20: note: candidate function
 1368 |             __half __hadd(__half x, __half y)
      |                    ^
fatal error: too many errors emitted, stopping now [-ferror-limit=]
20 errors generated when compiling for gfx1100.
make: *** [Makefile:303: ggml/src/ggml-cuda/fattn-wmma-f16.o] Error 1
make: *** Waiting for unfinished jobs....

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions