ROCM 6.4.2 unable to build

**Describe the Issue**
On Linux Mint Xia, after doing the #130 I was still unable to build on ROCM 6.4.2 due to various build issues.
Such as
```
error: declaration of 'abort' has a different language linkage
error: redefinition of '__assert_fail'
error: redefinition of '__assertfail'
error: no template named 'conditional' in namespace '__hip_internal'; did you mean 'rocwmma::conditional'?
error: unknown type name '__hip_bfloat16_raw'
error: unknown type name '__hip_bfloat162_raw'; did you mean '__hip_bfloat162'?
no viable conversion from '__hip_bfloat162' to 'float2' (aka 'HIP_vector_type<float, 2>')
etc...
```
**Additional Information:**
`printenv`s that might be useful:
```
ROCM_PATH=/opt/rocm-6.4.2
HSA_OVERRIDE_GFX_VERSION=11.0.0
PATH=/opt/rocm-6.4.2/bin:/home/ksr/miniconda3/bin:/home/ksr/miniconda3/condabin:/home/ksr/.deno/bin:/home/moene/.cargo/bin:/home/ksr/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/home/ksr/.dotnet/tools
```


build log:

```
(base) ksr@ksr-desktop:~/Software/koboldcpp-rocm$ make LLAMA_HIPBLAS=1 -j24
I koboldcpp build info: 
I UNAME_S:  Linux
I UNAME_P:  x86_64
I UNAME_M:  x86_64
I UNAME_O:  GNU/Linux
I CFLAGS:   -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./vendor -I./vendor/stb -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/ttscpp/include -I./otherarch/ttscpp/src -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_REPACK -DNDEBUG -s -DGGML_USE_LLAMAFILE -pthread -Wno-deprecated -Wno-deprecated-declarations -Wno-unused-variable -pthread -march=native -mtune=native
I CXXFLAGS: -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./vendor -I./vendor/stb -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/ttscpp/include -I./otherarch/ttscpp/src -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_REPACK -DNDEBUG -s -DGGML_USE_LLAMAFILE -pthread -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -Wno-unused-variable -pthread
I LDFLAGS:  -ldl
I CC:       cc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0
I CXX:      g++ (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0
I HIP CC:       AMD clang version 19.0.0git (https://github.com/RadeonOpenCompute/llvm-project roc-6.4.2 25224 d366fa84f3fdcbd4b10847ebd5db572ae12a34fb)
I HIP CXX:      AMD clang version 19.0.0git (https://github.com/RadeonOpenCompute/llvm-project roc-6.4.2 25224 d366fa84f3fdcbd4b10847ebd5db572ae12a34fb)

g++ -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./vendor -I./vendor/stb -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/ttscpp/include -I./otherarch/ttscpp/src -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_REPACK -DNDEBUG -s -DGGML_USE_LLAMAFILE -pthread -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -Wno-unused-variable -pthread ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o gpttype_adapter.o sdcpp_default.o whispercpp_default.o tts_default.o embeddings_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o kcpp-quantmapper.o kcpp-repackmapper.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm.o common.o sampling.o kcpputils.o mtmdaudio.o -shared -o koboldcpp_default.so -ldl
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/fattn-wmma-f16.o ggml/src/ggml-cuda/fattn-wmma-f16.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.o ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.o ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.o ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.o ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.o ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.o ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:76:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/hip_assert.h:27:6: error: declaration of 'abort' has a different language linkage
   27 | void abort() {
      |      ^
/usr/include/hip/amd_detail/amd_device_functions.h:805:6: note: previous definition is here
  805 | void abort() {
      |      ^
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:76:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/hip_assert.h:44:6: error: redefinition of '__assert_fail'
   44 | void __assert_fail(const char *assertion,
      |      ^
/usr/include/hip/amd_detail/amd_device_functions.h:822:6: note: previous definition is here
  822 | void __assert_fail(const char *assertion,
      |      ^
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:76:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/hip_assert.h:86:6: error: redefinition of '__assertfail'
   86 | void __assertfail()
      |      ^
/usr/include/hip/amd_detail/amd_device_functions.h:864:6: note: previous definition is here
  864 | void __assertfail()
      |      ^
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:481:12: error: no template named 'conditional' in namespace '__hip_internal'; did you mean 'rocwmma::conditional'?
  481 |   typename __hip_internal::conditional<
      |            ^~~~~~~~~~~~~~~~
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../utility/type_traits.hpp:117:16: note: 'rocwmma::conditional' declared here
  117 |     using std::conditional;
      |                ^
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:483:16: error: no template named 'conditional' in namespace '__hip_internal'; did you mean 'rocwmma::conditional'?
  483 |       typename __hip_internal::conditional<sizeof(T) == 4, unsigned int,
      |                ^~~~~~~~~~~~~~~~
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../utility/type_traits.hpp:117:16: note: 'rocwmma::conditional' declared here
  117 |     using std::conditional;
      |                ^
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:797:36: error: unknown type name '__hip_bfloat16_raw'
  797 | __hip_cvt_bfloat16raw_to_fp8(const __hip_bfloat16_raw hr, const __hip_saturation_t sat,
      |                                    ^
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:804:9: error: no viable conversion from '__hip_bfloat16' to 'float'
  804 |   float fval = __hip_bfloat16(hr);
      |         ^      ~~~~~~~~~~~~~~~~~~
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:823:39: error: unknown type name '__hip_bfloat162_raw'; did you mean '__hip_bfloat162'?
  823 | __hip_cvt_bfloat16raw2_to_fp8x2(const __hip_bfloat162_raw hr, const __hip_saturation_t sat,
      |                                       ^
/usr/include/hip/amd_detail/amd_hip_bf16.h:113:8: note: '__hip_bfloat162' declared here
  113 | struct __hip_bfloat162 {
      |        ^
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:830:10: error: no viable conversion from '__hip_bfloat162' to 'float2' (aka 'HIP_vector_type<float, 2>')
  830 |   float2 f2 = __hip_bfloat162(hr);
      |          ^    ~~~~~~~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_vector_types.h:470:9: note: candidate constructor not viable: no known conversion from '__hip_bfloat162' to 'const HIP_vector_type<float, 2> &' for 1st argument
  470 |         HIP_vector_type(const HIP_vector_type&) = default;
      |         ^               ~~~~~~~~~~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_vector_types.h:473:9: note: candidate constructor not viable: no known conversion from '__hip_bfloat162' to 'HIP_vector_type<float, 2> &&' for 1st argument
  473 |         HIP_vector_type(HIP_vector_type&&) = default;
      |         ^               ~~~~~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_vector_types.h:465:9: note: candidate template ignored: requirement 'sizeof...(Us) == 2U' was not satisfied [with Us = <__hip_bfloat162>]
  465 |         HIP_vector_type(Us... xs) noexcept
      |         ^
/usr/include/hip/amd_detail/amd_hip_vector_types.h:456:9: note: explicit constructor is not a candidate
  456 |         HIP_vector_type(U x_) noexcept
      |         ^
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:1032:36: /opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.o ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu
error: cannot convert 'const __hip_bfloat16' to 'float' without a conversion operator
 1032 |       : __x(__hip_cvt_float_to_fp8(static_cast<float>(f), __default_saturation,
      |                                    ^~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:1069:12: error: no matching conversion for functional-style cast from 'float' to '__hip_bfloat16'
 1069 |     return __hip_bfloat16(f);
      |            ^~~~~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit copy constructor) not viable: no known conversion from 'float' to 'const __hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit move constructor) not viable: no known conversion from 'float' to '__hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:1590:36: error: cannot convert 'const __hip_bfloat16' to 'float' without a conversion operator
 1590 |       : __x(__hip_cvt_float_to_fp8(static_cast<float>(f), __default_saturation,
      |                                    ^~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:1640:12: error: no matching conversion for functional-style cast from 'float' to '__hip_bfloat16'
 1640 |     return __hip_bfloat16(f);
      |            ^~~~~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit copy constructor) not viable: no known conversion from 'float' to 'const __hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit move constructor) not viable: no known conversion from 'float' to '__hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:2147:36: error: cannot convert 'const __hip_bfloat16' to 'float' without a conversion operator
 2147 |       : __x(__hip_cvt_float_to_fp8(static_cast<float>(f), __default_saturation,
      |                                    ^~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:2185:12: error: no matching conversion for functional-style cast from 'float' to '__hip_bfloat16'
 2185 |     return __hip_bfloat16(f);
      |            ^~~~~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit copy constructor) not viable: no known conversion from 'float' to 'const __hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit move constructor) not viable: no known conversion from 'float' to '__hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided
In file included from ggml/src/ggml-cuda/fattn-wmma-f16.cu:18:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/rocwmma.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors.hpp:93:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/accessors_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/coop_io_config.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/register_layout_transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms.hpp:29:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../transforms_impl.hpp:31:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../dpp_impl.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../vector_util.hpp:30:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../types.hpp:87:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/rocwmma/internal/./layout/../float8.hpp:118:
In file included from /opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/hip_fp8.h:30:
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:2709:36: error: cannot convert 'const __hip_bfloat16' to 'float' without a conversion operator
 2709 |       : __x(__hip_cvt_float_to_fp8(static_cast<float>(f), __default_saturation,
      |                                    ^~~~~~~~~~~~~~~~~~~~~
/opt/rocm-6.4.2/lib/llvm/bin/../../../include/hip/amd_detail/amd_hip_fp8.h:2760:12: error: no matching conversion for functional-style cast from 'float' to '__hip_bfloat16'
 2760 |     return __hip_bfloat16(f);
      |            ^~~~~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit copy constructor) not viable: no known conversion from 'float' to 'const __hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit move constructor) not viable: no known conversion from 'float' to '__hip_bfloat16' for 1st argument
  108 | struct __hip_bfloat16 {
      |        ^~~~~~~~~~~~~~
/usr/include/hip/amd_detail/amd_hip_bf16.h:108:8: note: candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.o ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.o ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.o ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.o ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.o ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.o ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.o ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.o ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
/opt/rocm-6.4.2/llvm/bin/clang++ -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only  -DNDEBUG -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE -DGGML_HIP_ROCWMMA_FATTN -I/opt/rocm/include/rocwmma/ -DGGML_USE_HIPBLAS -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA -DSD_USE_CUBLAS  -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I/opt/rocm-6.4.2/include -I/include  --offload-arch=gfx1100 -DGGML_CUDA_DMMV_X=32  -DGGML_CUDA_MMV_Y=2 -DK_QUANTS_PER_ITERATION=2 -x hip -c -o ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.o ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu
ggml/src/ggml-cuda/fattn-wmma-f16.cu:418:45: error: call to '__hadd' is ambiguous
  418 |                 KQ_rowsum_h2[j0/nwarps].x = __hadd(KQ_rowsum_h2[j0/nwarps].x, val);
      |                                             ^~~~~~
ggml/src/ggml-cuda/fattn-wmma-f16.cu:526:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<64, 16, 4, 64, float, false>' requested here
  526 |         fattn_kernel = flash_attn_ext_f16<
      |                        ^
ggml/src/ggml-cuda/fattn-wmma-f16.cu:548:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<64, 16, float>' requested here
  548 |                     ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst);
      |                     ^
/usr/include/hip/amd_detail/amd_device_functions.h:257:39: note: candidate function
  257 | __device__ static inline unsigned int __hadd(int x, int y) {
      |                                       ^
/usr/include/hip/amd_detail/amd_hip_fp16.h:1368:20: note: candidate function
 1368 |             __half __hadd(__half x, __half y)
      |                    ^
ggml/src/ggml-cuda/fattn-wmma-f16.cu:418:45: error: call to '__hadd' is ambiguous
  418 |                 KQ_rowsum_h2[j0/nwarps].x = __hadd(KQ_rowsum_h2[j0/nwarps].x, val);
      |                                             ^~~~~~
ggml/src/ggml-cuda/fattn-wmma-f16.cu:530:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<64, 16, 4, 64, float, true>' requested here
  530 |         fattn_kernel = flash_attn_ext_f16<
      |                        ^
ggml/src/ggml-cuda/fattn-wmma-f16.cu:548:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<64, 16, float>' requested here
  548 |                     ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst);
      |                     ^
/usr/include/hip/amd_detail/amd_device_functions.h:257:39: note: candidate function
  257 | __device__ static inline unsigned int __hadd(int x, int y) {
      |                                       ^
/usr/include/hip/amd_detail/amd_hip_fp16.h:1368:20: note: candidate function
 1368 |             __half __hadd(__half x, __half y)
      |                    ^
fatal error: too many errors emitted, stopping now [-ferror-limit=]
20 errors generated when compiling for gfx1100.
make: *** [Makefile:303: ggml/src/ggml-cuda/fattn-wmma-f16.o] Error 1
make: *** Waiting for unfinished jobs....
```


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ROCM 6.4.2 unable to build #140

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

ROCM 6.4.2 unable to build #140

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions