Merge branch 'master' into fix_tf_invalid_output_delete

DvirDukhan · web-flow · commit f3e815ed1c2f · 2021-10-13T13:10:18.000+03:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -160,6 +160,7 @@ ENDIF()
 IF(BUILD_ORT)
     FIND_LIBRARY(ORT_LIBRARIES NAMES onnxruntime
             PATHS ${depsAbs}/onnxruntime/lib)
+    ADD_SUBDIRECTORY(src/backends/onnx_allocator)
     MESSAGE(STATUS "Found ONNXRuntime Libraries: \"${ORT_LIBRARIES}\")")
     IF (NOT ORT_LIBRARIES)
         MESSAGE(FATAL_ERROR "Could not find ONNXRuntime")
@@ -293,6 +294,7 @@ ENDIF()
 
 IF(BUILD_ORT)
     ADD_LIBRARY(redisai_onnxruntime SHARED $<TARGET_OBJECTS:redisai_onnxruntime_obj>)
+    TARGET_LINK_LIBRARIES(redisai_onnxruntime onnx_allocator ${ORT_LIBRARIES})
     TARGET_LINK_LIBRARIES(redisai_onnxruntime ${ORT_LIBRARIES})
     SET_TARGET_PROPERTIES(redisai_onnxruntime PROPERTIES PREFIX "")
     SET_TARGET_PROPERTIES(redisai_onnxruntime PROPERTIES SUFFIX ".so")
diff --git a/src/backends/backends.c b/src/backends/backends.c
@@ -50,6 +50,8 @@ int RAI_ExportFunc(const char *func_name, void **targetFuncPtr) {
         *targetFuncPtr = Config_GetModelExecutionTimeout;
     } else if (strcmp("GetThreadsCount", func_name) == 0) {
         *targetFuncPtr = BGWorker_GetThreadsCount;
+    } else if (strcmp("GetBackendMemoryLimit", func_name) == 0) {
+        *targetFuncPtr = Config_GetBackendMemoryLimit;
 
         // Export RedisAI low level API functions.
     } else if (strcmp("RedisAI_InitError", func_name) == 0) {
diff --git a/src/backends/backends_api.h b/src/backends/backends_api.h
@@ -37,12 +37,20 @@ BACKENDS_API uintptr_t (*RedisAI_GetThreadsCount)(void);
 BACKENDS_API long long (*RedisAI_GetNumThreadsPerQueue)(void);
 
 /**
- * @return The maximal number of milliseconds that a model run session should run
+ * @return The maximum number of milliseconds that a model run session should run
  * before it is terminated forcefully (load time config).
- * Currently supported only fo onnxruntime backend.
+ * Currently supported only for onnxruntime backend.
  */
 BACKENDS_API long long (*RedisAI_GetModelExecutionTimeout)(void);
 
+/**
+ * @return The maximum number of memory (in MB) that a backend can consume
+ * for creating and running inference sessions. When memory limit is exceeded, operation
+ * is not permitted and an error is returned.
+ * Currently supported only for onnxruntime backend.
+ */
+BACKENDS_API long long (*RedisAI_GetMemoryLimit)(void);
+
 /**
  * The following functions are part of RedisAI low level API (the full low level
  * API is defined in redisai.h). For every function below named "RedisAI_X", its
diff --git a/src/backends/onnx_allocator/CMakeLists.txt b/src/backends/onnx_allocator/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_library(onnx_allocator STATIC onnx_allocator.cpp)
+target_link_libraries(onnx_allocator "${ONNX_LIBRARIES}")
+set_property(TARGET onnx_allocator PROPERTY CXX_STANDARD 14)
diff --git a/src/backends/onnx_allocator/onnx_allocator.cpp b/src/backends/onnx_allocator/onnx_allocator.cpp
@@ -0,0 +1,114 @@
+#include "onnx_allocator.h"
+#include "../onnxruntime.h"
+#include "onnxruntime_cxx_api.h"
+#include <atomic>
+
+struct RAIOrtAllocator : OrtAllocator {
+    RAIOrtAllocator();
+    ~RAIOrtAllocator();
+    RAIOrtAllocator(const RAIOrtAllocator&) = delete;
+    RAIOrtAllocator& operator=(const RAIOrtAllocator&) = delete;
+
+    void* Alloc(size_t size);
+    void Free(void* p);
+    const OrtMemoryInfo* Info() const;
+    unsigned long long NumAllocatorAccess() const;
+    unsigned long long MemoryInUse() const;
+    void SetMemoryLimit(unsigned long long max_memory);
+    static RAIOrtAllocator *GetInstance();
+
+private:
+    std::atomic<unsigned long long> memory_inuse{0};
+    std::atomic<unsigned long long> num_allocator_access{0};
+    unsigned long long memory_limit = 0;
+    OrtMemoryInfo* cpu_memory_info;
+    static RAIOrtAllocator* allocator_instance;
+};
+
+RAIOrtAllocator* RAIOrtAllocator::allocator_instance = nullptr;
+
+RAIOrtAllocator::RAIOrtAllocator() {
+    OrtAllocator::version = ORT_API_VERSION;
+    OrtAllocator::Alloc = [](OrtAllocator* this_, size_t size) { return static_cast<RAIOrtAllocator*>(this_)->Alloc(size); };
+    OrtAllocator::Free = [](OrtAllocator* this_, void* p) { static_cast<RAIOrtAllocator*>(this_)->Free(p); };
+    OrtAllocator::Info = [](const OrtAllocator* this_) { return static_cast<const RAIOrtAllocator*>(this_)->Info(); };
+    Ort::ThrowOnError(Ort::GetApi().CreateCpuMemoryInfo(OrtDeviceAllocator, OrtMemTypeDefault, &cpu_memory_info));
+    RAIOrtAllocator::allocator_instance = this;
+}
+
+RAIOrtAllocator::~RAIOrtAllocator() {
+    Ort::GetApi().ReleaseMemoryInfo(cpu_memory_info);
+}
+
+void* RAIOrtAllocator::Alloc(size_t size) {
+    // Allocate an additional 63 bytes to ensure that we can return an address which is
+    // 64-byte aligned, and an additional space in the size of a pointer to store
+    // the address that RedisModule_Alloc returns.
+    int offset = 63 + sizeof(void *);
+    void *allocated_address = (void *)RedisModule_Alloc(size + offset);
+    size_t allocated_size = RedisModule_MallocSize(allocated_address);
+    // Update the total number of bytes that onnx is using and the number of accesses
+    // that onnx made to the allocator.
+    size_t cur_memory = memory_inuse.load();
+    if (memory_limit && cur_memory + allocated_size > memory_limit) {
+        RedisModule_Free(allocated_address);
+        throw Ort::Exception("Onnxruntime memory limit exceeded, memory allocation failed.", ORT_RUNTIME_EXCEPTION);
+    }
+    memory_inuse.fetch_add(allocated_size);
+    num_allocator_access.fetch_add(1);
+    // This operation guarantees that "aligned_address" is the closest 64-aligned address to ("allocated_address"+size_t).
+    void **aligned_address = (void **)(((size_t)(allocated_address) + offset) & (~63));
+    // This stores the address "allocated_address" right before "aligned_address" (so we can retrieve it when we free).
+    aligned_address[-1] = allocated_address;
+    return aligned_address;
+}
+
+void RAIOrtAllocator::Free(void* p) {
+    if (p == nullptr) {
+        return;
+    }
+    // Retrieve the address that we originally received from RedisModule_Alloc
+    // (this is the address that we need to sent to RedisModule_Free).
+    void *allocated_address = ((void **)p)[-1];
+    size_t allocated_size = RedisModule_MallocSize(allocated_address);
+    // Update the total number of bytes that onnx is using and the number of accesses
+    // that onnx made to the allocator.
+    memory_inuse.fetch_sub(allocated_size);
+    num_allocator_access.fetch_add(1);
+    RedisModule_Free(allocated_address);
+}
+
+const OrtMemoryInfo* RAIOrtAllocator::Info() const {
+    return cpu_memory_info;
+}
+
+unsigned long long RAIOrtAllocator::NumAllocatorAccess() const {
+    return num_allocator_access.load();
+}
+
+unsigned long long RAIOrtAllocator::MemoryInUse() const {
+    return memory_inuse.load();
+}
+
+void RAIOrtAllocator::SetMemoryLimit(unsigned long long max_memory) {
+    // max_memory is given in MB
+    memory_limit = 1000000*max_memory;
+}
+
+RAIOrtAllocator *RAIOrtAllocator::GetInstance() {
+    return RAIOrtAllocator::allocator_instance;
+}
+
+OrtAllocator *CreateCustomAllocator(unsigned long long max_memory) {
+    auto *allocator = new RAIOrtAllocator();
+    allocator->SetMemoryLimit(max_memory);
+    return allocator;
+}
+
+unsigned long long RAI_GetMemoryInfoORT() {
+    return RAIOrtAllocator::GetInstance()->MemoryInUse();
+}
+
+unsigned long long RAI_GetMemoryAccessORT() {
+    return RAIOrtAllocator::GetInstance()->NumAllocatorAccess();
+}
diff --git a/src/backends/onnx_allocator/onnx_allocator.h b/src/backends/onnx_allocator/onnx_allocator.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "onnxruntime_c_api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+OrtAllocator *CreateCustomAllocator(unsigned long long max_memory);
+
+unsigned long long RAI_GetMemoryInfoORT();
+
+unsigned long long RAI_GetMemoryAccessORT();
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/backends/onnxruntime.c b/src/backends/onnxruntime.c
@@ -6,6 +6,7 @@
 #include "util/arr.h"
 #include "backends/onnxruntime.h"
 #include "redis_ai_objects/tensor.h"
+#include "onnx_allocator/onnx_allocator.h"
 
 #include "onnxruntime_c_api.h"
 #include "backends_api.h"
@@ -21,63 +22,7 @@ OrtEnv *env = NULL;
 // For model that run on GPU, onnx will not use the custom allocator (redis allocator), but
 // the onnx allocator for GPU. But for the auxiliary allocations of the input and output names,
 // we will use the custom global allocator for models that run on GPU as well.
-OrtMemoryInfo *mem_info = NULL;
 OrtAllocator *global_allocator = NULL;
-unsigned long long OnnxMemory = 0;
-unsigned long long OnnxMemoryAccessCounter = 0;
-
-const OrtMemoryInfo *AllocatorInfo(const OrtAllocator *allocator) {
-    (void)allocator;
-    const OrtApi *ort = OrtGetApiBase()->GetApi(1);
-    if (mem_info != NULL) {
-        return mem_info;
-    }
-    if (ort->CreateCpuMemoryInfo(OrtDeviceAllocator, OrtMemTypeDefault, &mem_info) != NULL) {
-        return NULL;
-    }
-    return mem_info;
-}
-
-// Allocate address with 64-byte alignment to cope with onnx optimizations.
-void *AllocatorAlloc(OrtAllocator *ptr, size_t size) {
-
-    (void)ptr;
-    // Allocate an additional 63 bytes to ensure that we can return an address which is
-    // 64-byte aligned, and an additional space in the size of a pointer to store
-    // the address that RedisModule_Alloc returns.
-    int offset = 63 + sizeof(void *);
-    void *allocated_address = (void *)RedisModule_Alloc(size + offset);
-    size_t allocated_size = RedisModule_MallocSize(allocated_address);
-    // Update the total number of bytes that onnx is using and the number of accesses
-    // that onnx made to the allocator.
-    atomic_fetch_add(&OnnxMemory, allocated_size);
-    atomic_fetch_add(&OnnxMemoryAccessCounter, 1);
-    // This operation guarantees that p2 is the closest 64-aligned address to (p1+size_t).
-    void **aligned_address = (void **)(((size_t)(allocated_address) + offset) & (~63));
-    // This stores the address p1 right before p2 (so we can retrieve it when we free).
-    aligned_address[-1] = allocated_address;
-    return aligned_address;
-}
-
-void AllocatorFree(OrtAllocator *ptr, void *aligned_address) {
-    (void)ptr;
-    if (aligned_address == NULL) {
-        return;
-    }
-    // Retrieve the address that we originally received from RedisModule_Alloc
-    // (this is the address that we need to sent to RedisModule_Free).
-    void *allocated_address = ((void **)aligned_address)[-1];
-    size_t allocated_size = RedisModule_MallocSize(allocated_address);
-    // Update the total number of bytes that onnx is using and the number of accesses
-    // that onnx made to the allocator.
-    atomic_fetch_sub(&OnnxMemory, allocated_size);
-    atomic_fetch_add(&OnnxMemoryAccessCounter, 1);
-    return RedisModule_Free(allocated_address);
-}
-
-unsigned long long RAI_GetMemoryInfoORT() { return OnnxMemory; }
-
-unsigned long long RAI_GetMemoryAccessORT() { return OnnxMemoryAccessCounter; }
 
 int RAI_InitBackendORT(int (*get_api_fn)(const char *, void **)) {
     // Export redis callbacks.
@@ -95,6 +40,7 @@ int RAI_InitBackendORT(int (*get_api_fn)(const char *, void **)) {
     get_api_fn("GetThreadId", ((void **)&RedisAI_GetThreadId));
     get_api_fn("GetNumThreadsPerQueue", ((void **)&RedisAI_GetNumThreadsPerQueue));
     get_api_fn("GetModelExecutionTimeout", ((void **)&RedisAI_GetModelExecutionTimeout));
+    get_api_fn("GetBackendMemoryLimit", ((void **)&RedisAI_GetMemoryLimit));
     get_api_fn("GetThreadsCount", ((void **)&RedisAI_GetThreadsCount));
 
     // Create a global array of onnx runSessions, with an entry for every working thread.
@@ -389,8 +335,9 @@ RAI_Model *RAI_ModelCreateORT(RAI_Backend backend, const char *devicestr, RAI_Mo
     // allocating buffers when creating and running models that run on CPU, and for allocations of
     // models inputs and outputs names (for both models that run on CPU and GPU)
     if (env == NULL) {
-        ONNX_VALIDATE_STATUS(ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "test", &env))
-        ONNX_VALIDATE_STATUS(ort->GetAllocatorWithDefaultOptions(&global_allocator));
+        ONNX_VALIDATE_STATUS(ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "RedisAI", &env))
+        global_allocator = CreateCustomAllocator(RedisAI_GetMemoryLimit());
+        ONNX_VALIDATE_STATUS(ort->RegisterAllocator(env, global_allocator))
     }
 
     ONNX_VALIDATE_STATUS(ort->CreateSessionOptions(&session_options))
diff --git a/src/backends/onnxruntime.h b/src/backends/onnxruntime.h
@@ -5,10 +5,6 @@
 #include "redis_ai_objects/model.h"
 #include "execution/execution_contexts/execution_ctx.h"
 
-unsigned long long RAI_GetMemoryInfoORT(void);
-
-unsigned long long RAI_GetMemoryAccessORT(void);
-
 int RAI_InitBackendORT(int (*get_api_fn)(const char *, void **));
 
 RAI_Model *RAI_ModelCreateORT(RAI_Backend backend, const char *devicestr, RAI_ModelOpts opts,
diff --git a/src/config/config.c b/src/config/config.c
@@ -16,6 +16,8 @@ long long ThreadPoolSizePerQueue = 1;     //  Number of working threads for devi
 
 long long ModelExecutionTimeout = 5000; //  The maximum time in milliseconds
                                         //  before killing onnx run session.
+long long BackendMemoryLimit = 0;       //  The maximum amount of memory in MB
+                                        //  that backend is allowed to consume.
 
 static int _Config_LoadTimeParamParse(RedisModuleCtx *ctx, const char *key, const char *val,
                                       RedisModuleString *rsval) {
@@ -56,6 +58,11 @@ static int _Config_LoadTimeParamParse(RedisModuleCtx *ctx, const char *key, cons
         if (ret == REDISMODULE_OK) {
             RedisModule_Log(ctx, "notice", "%s: %s", REDISAI_INFOMSG_MODEL_EXECUTION_TIMEOUT, val);
         }
+    } else if (strcasecmp((key), "BACKEND_MEMORY_LIMIT") == 0) {
+        ret = Config_SetBackendMemoryLimit(rsval);
+        if (ret == REDISMODULE_OK) {
+            RedisModule_Log(ctx, "notice", "%s: %s", REDISAI_INFOMSG_BACKEND_MEMORY_LIMIT, val);
+        }
     } else if (strcasecmp((key), "BACKENDSPATH") == 0) {
         // already taken care of
     } else {
@@ -74,6 +81,8 @@ long long Config_GetNumThreadsPerQueue() { return ThreadPoolSizePerQueue; }
 
 long long Config_GetModelExecutionTimeout() { return ModelExecutionTimeout; }
 
+long long Config_GetBackendMemoryLimit() { return BackendMemoryLimit; }
+
 char *Config_GetBackendsPath() { return BackendsPath; }
 
 int Config_LoadBackend(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
@@ -160,6 +169,16 @@ int Config_SetModelExecutionTimeout(RedisModuleString *timeout) {
     return REDISMODULE_OK;
 }
 
+int Config_SetBackendMemoryLimit(RedisModuleString *memory_limit) {
+    long long val;
+    int result = RedisModule_StringToLongLong(memory_limit, &val);
+    if (result != REDISMODULE_OK || val <= 0) {
+        return REDISMODULE_ERR;
+    }
+    BackendMemoryLimit = val;
+    return REDISMODULE_OK;
+}
+
 int Config_SetLoadTimeParams(RedisModuleCtx *ctx, RedisModuleString *const *argv, int argc) {
     if (argc > 0 && argc % 2 != 0) {
         RedisModule_Log(ctx, "warning",
diff --git a/src/config/config.h b/src/config/config.h
@@ -26,6 +26,7 @@ typedef enum { RAI_DEVICE_CPU = 0, RAI_DEVICE_GPU = 1 } RAI_Device;
 #define REDISAI_INFOMSG_INTER_OP_PARALLELISM    "Setting INTER_OP_PARALLELISM parameter to"
 #define REDISAI_INFOMSG_MODEL_CHUNK_SIZE        "Setting MODEL_CHUNK_SIZE parameter to"
 #define REDISAI_INFOMSG_MODEL_EXECUTION_TIMEOUT "Setting MODEL_EXECUTION_TIMEOUT parameter to"
+#define REDISAI_INFOMSG_BACKEND_MEMORY_LIMIT    "Setting BACKEND_MEMORY_LIMIT parameter to"
 
 /**
  * Get number of threads used for parallelism between independent operations, by
@@ -56,6 +57,13 @@ long long Config_GetNumThreadsPerQueue(void);
  */
 long long Config_GetModelExecutionTimeout(void);
 
+/**
+ * @return Memory limit in MB for backend. This is the maximum amount of memory
+ * that can be consumed by the backend for creating and running sessions.
+ * Currently supported only for onnxruntime backend.
+ */
+long long Config_GetBackendMemoryLimit(void);
+
 /**
  * @return Returns the backends path string.
  */
@@ -113,11 +121,19 @@ int Config_SetModelChunkSize(RedisModuleString *chunk_size_string);
 
 /**
  * Set the maximum time in ms that onnx backend allow running a model.
- * @param onnx_max_runtime - string containing the max runtime (in ms)
+ * @param timeout - string containing the max runtime (in ms)
  * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if failed
  */
 int Config_SetModelExecutionTimeout(RedisModuleString *timeout);
 
+/**
+ * Set the memory limit in MB for backends allocations.
+ * @param memory_limit - maximum memory consumption by backend. If values is zero,
+ * there will be no enforcement of any memory limit.
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if failed
+ */
+int Config_SetBackendMemoryLimit(RedisModuleString *memory_limit);
+
 /**
  * Load time configuration parser
  * @param ctx Context in which Redis modules operate
diff --git a/src/redisai.c b/src/redisai.c
@@ -1167,6 +1167,7 @@ void RAI_moduleInfoFunc(RedisModuleInfoCtx *ctx, int for_crash_report) {
                                      Config_GetBackendsIntraOpParallelism());
     RedisModule_InfoAddFieldLongLong(ctx, "model_execution_timeout",
                                      Config_GetModelExecutionTimeout());
+    RedisModule_InfoAddFieldLongLong(ctx, "backend_memory_limit", Config_GetBackendMemoryLimit());
     _moduleInfo_getBackendsInfo(ctx);
 
     struct rusage self_ru, c_ru;
diff --git a/src/util/arr.h b/src/util/arr.h
diff --git a/tests/flow/test_data/inception-v2-9.onnx b/tests/flow/test_data/inception-v2-9.onnx
diff --git a/tests/flow/tests_common.py b/tests/flow/tests_common.py
diff --git a/tests/flow/tests_onnx.py b/tests/flow/tests_onnx.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+add_library(onnx_allocator STATIC onnx_allocator.cpp)`
	`2`	`+target_link_libraries(onnx_allocator "${ONNX_LIBRARIES}")`
	`3`	`+set_property(TARGET onnx_allocator PROPERTY CXX_STANDARD 14)`