Skip to content

Commit

Permalink
files : reorganize + update CMake
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Jun 25, 2024
1 parent 46b9bee commit 8be8952
Show file tree
Hide file tree
Showing 39 changed files with 13,429 additions and 14,588 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ CMakeSettings.json
.vscode/
.clangd

.venv/
.exrc
.cache
.DS_Store
Expand Down
332 changes: 173 additions & 159 deletions CMakeLists.txt

Large diffs are not rendered by default.

100 changes: 100 additions & 0 deletions cmake/FindSIMD.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
include(CheckCSourceRuns)

set(AVX_CODE "
#include <immintrin.h>
int main()
{
__m256 a;
a = _mm256_set1_ps(0);
return 0;
}
")

set(AVX512_CODE "
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0);
__m512i b = a;
__mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ);
return 0;
}
")

set(AVX2_CODE "
#include <immintrin.h>
int main()
{
__m256i a = {0};
a = _mm256_abs_epi16(a);
__m256i x;
_mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code
return 0;
}
")

set(FMA_CODE "
#include <immintrin.h>
int main()
{
__m256 acc = _mm256_setzero_ps();
const __m256 d = _mm256_setzero_ps();
const __m256 p = _mm256_setzero_ps();
acc = _mm256_fmadd_ps( d, p, acc );
return 0;
}
")

macro(check_sse type flags)
set(__FLAG_I 1)
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
foreach (__FLAG ${flags})
if (NOT ${type}_FOUND)
set(CMAKE_REQUIRED_FLAGS ${__FLAG})
check_c_source_runs("${${type}_CODE}" HAS_${type}_${__FLAG_I})
if (HAS_${type}_${__FLAG_I})
set(${type}_FOUND TRUE CACHE BOOL "${type} support")
set(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags")
endif()
math(EXPR __FLAG_I "${__FLAG_I}+1")
endif()
endforeach()
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})

if (NOT ${type}_FOUND)
set(${type}_FOUND FALSE CACHE BOOL "${type} support")
set(${type}_FLAGS "" CACHE STRING "${type} flags")
endif()

mark_as_advanced(${type}_FOUND ${type}_FLAGS)
endmacro()

# flags are for MSVC only!
check_sse("AVX" " ;/arch:AVX")
if (NOT ${AVX_FOUND})
set(GGML_AVX OFF)
else()
set(GGML_AVX ON)
endif()

check_sse("AVX2" " ;/arch:AVX2")
check_sse("FMA" " ;/arch:AVX2")
if ((NOT ${AVX2_FOUND}) OR (NOT ${FMA_FOUND}))
set(GGML_AVX2 OFF)
else()
set(GGML_AVX2 ON)
endif()

check_sse("AVX512" " ;/arch:AVX512")
if (NOT ${AVX512_FOUND})
set(GGML_AVX512 OFF)
else()
set(GGML_AVX512 ON)
endif()
35 changes: 2 additions & 33 deletions include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -591,11 +591,7 @@ extern "C" {
struct ggml_tensor * grad;
struct ggml_tensor * src[GGML_MAX_SRC];

// performance
int perf_runs;
int64_t perf_cycles;
int64_t perf_time_us;

// source tensor and offset for views
struct ggml_tensor * view_src;
size_t view_offs;

Expand All @@ -605,7 +601,7 @@ extern "C" {

void * extra; // extra things e.g. for ggml-cuda.cu

char padding[8];
// char padding[4];
};

static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
Expand Down Expand Up @@ -652,11 +648,6 @@ extern "C" {
struct ggml_hash_set visited_hash_table;

enum ggml_cgraph_eval_order order;

// performance
int perf_runs;
int64_t perf_cycles;
int64_t perf_time_us;
};

// scratch buffer
Expand All @@ -673,28 +664,6 @@ extern "C" {
bool no_alloc; // don't allocate memory for the tensor data
};


// compute types

// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
enum ggml_task_type {
GGML_TASK_TYPE_INIT = 0,
GGML_TASK_TYPE_COMPUTE,
GGML_TASK_TYPE_FINALIZE,
};

struct ggml_compute_params {
enum ggml_task_type type;

// ith = thread index, nth = number of threads
int ith, nth;

// work buffer for all threads
size_t wsize;
void * wdata;
};

// numa strategies
enum ggml_numa_strategy {
GGML_NUMA_STRATEGY_DISABLED = 0,
Expand Down
76 changes: 0 additions & 76 deletions include/ggml/ggml-alloc.h

This file was deleted.

Loading

0 comments on commit 8be8952

Please sign in to comment.