diff --git a/CMakeLists.txt b/CMakeLists.txt index a72f42c1fb651..57ae4c2df7cda 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -182,10 +182,15 @@ if (LLAMA_METAL) # note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works # disabling fast math is needed in order to pass tests/test-backend-ops # note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1 + set(XC_FLAGS -fno-fast-math -fno-inline -g) + if (LLAMA_QKK_64) + set(XC_FLAGS ${XC_FLAGS} -DQK_K=64) + endif() + add_custom_command( OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml.metallib - COMMAND xcrun -sdk macosx metal -fno-fast-math -fno-inline -g -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air - COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml.metallib + COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air + COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml.metallib DEPENDS ggml-metal.metal COMMENT "Compiling Metal kernels" ) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index b115299c0ce30..eff063b2d6dfe 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -15,19 +15,18 @@ #include #include - static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) { size_t size = ggml_nelements(tensor); std::vector data(size); #if 0 - std::default_random_engine generator(rd()); + static std::default_random_engine generator(1234); std::uniform_real_distribution distribution(min, max); for (size_t i = 0; i < size; i++) { data[i] = distribution(generator); } -#endif +#else auto init_thread = [&](size_t start, size_t end) { std::random_device rd; std::default_random_engine generator(rd()); @@ -49,6 +48,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m for (auto & t : threads) { t.join(); } +#endif if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) { ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float)); @@ -437,7 +437,7 @@ struct test_case { double err = nmse(f1.data(), f2.data(), f1.size()); if (err > ud->max_err) { printf("[%s] NMSE = %f ", ggml_op_desc(t1), err); - //for (int i = 0; i < f1.size(); i++) { + //for (int i = 0; i < (int) f1.size(); i++) { // printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]); //} //printf("\n");