forked from triton-inference-server/server
-
Notifications
You must be signed in to change notification settings - Fork 0
/
shared_memory_manager.h
199 lines (180 loc) · 8.69 KB
/
shared_memory_manager.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <cstring>
#include <map>
#include <memory>
#include <mutex>
#include "triton/core/tritonserver.h"
#define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
#define TRITONJSON_STATUSRETURN(M) \
return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
#define TRITONJSON_STATUSSUCCESS nullptr
#include "triton/common/triton_json.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif // TRITON_ENABLE_GPU
namespace triton { namespace server {
class SharedMemoryManager {
public:
SharedMemoryManager() = default;
~SharedMemoryManager();
/// A struct that records the shared memory regions registered by the shared
/// memory manager.
struct SharedMemoryInfo {
SharedMemoryInfo(
const std::string& name, const std::string& shm_key,
const size_t offset, const size_t byte_size, int shm_fd,
void* mapped_addr, const TRITONSERVER_MemoryType kind,
const int64_t device_id)
: name_(name), shm_key_(shm_key), offset_(offset),
byte_size_(byte_size), shm_fd_(shm_fd), mapped_addr_(mapped_addr),
kind_(kind), device_id_(device_id)
{
}
std::string name_;
std::string shm_key_;
size_t offset_;
size_t byte_size_;
int shm_fd_;
void* mapped_addr_;
TRITONSERVER_MemoryType kind_;
int64_t device_id_;
};
#ifdef TRITON_ENABLE_GPU
struct CUDASharedMemoryInfo : SharedMemoryInfo {
CUDASharedMemoryInfo(
const std::string& name, const std::string& shm_key,
const size_t offset, const size_t byte_size, int shm_fd,
void* mapped_addr, const TRITONSERVER_MemoryType kind,
const int64_t device_id, const cudaIpcMemHandle_t* cuda_ipc_handle)
: SharedMemoryInfo(
name, shm_key, offset, byte_size, shm_fd, mapped_addr, kind,
device_id),
cuda_ipc_handle_(*cuda_ipc_handle)
{
}
cudaIpcMemHandle_t cuda_ipc_handle_;
};
#endif
/// Add a shared memory block representing shared memory in system
/// (CPU) memory to the manager. Return TRITONSERVER_ERROR_ALREADY_EXISTS
/// if a shared memory block of the same name already exists in the manager.
/// \param name The name of the memory block.
/// \param shm_key The name of the posix shared memory object
/// containing the block of memory.
/// \param offset The offset within the shared memory object to the
/// start of the block.
/// \param byte_size The size, in bytes of the block.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error* RegisterSystemSharedMemory(
const std::string& name, const std::string& shm_key, const size_t offset,
const size_t byte_size);
#ifdef TRITON_ENABLE_GPU
/// Add a shared memory block representing shared memory in CUDA
/// (GPU) memory to the manager. Return TRITONSERVER_ERROR_ALREADY_EXISTS
/// if a shared memory block of the same name already exists in the manager.
/// \param name The name of the memory block.
/// \param cuda_shm_handle The unique memory handle to the cuda shared
/// memory block.
/// \param byte_size The size, in bytes of the block.
/// \param device id The GPU number the shared memory region is in.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error* RegisterCUDASharedMemory(
const std::string& name, const cudaIpcMemHandle_t* cuda_shm_handle,
const size_t byte_size, const int device_id);
#endif // TRITON_ENABLE_GPU
/// Get the access information for the shared memory block
/// with the specified name. Return TRITONSERVER_ERROR_NOT_FOUND
/// if named block doesn't exist.
/// \param name The name of the shared memory block to get.
/// \param offset The offset in the block
/// \param byte_size The byte size to request for the shm region
/// \param shm_mapped_addr Returns the pointer to the shared
/// memory block with the specified name and offset
/// \param memory_type Returns the type of the memory
/// \param device_id Returns the device id associated with the
/// memory block
/// \param shm_info Returns a shared pointer reference(read-only) to the
/// shared memory block's information.
/// This pointer will automatically increase the usage count, preventing
/// unregistration while the reference is held. The reference must be cleared
/// or set to nullptr when no longer needed, to decrease the count and allow
/// unregistration.
/// \return a TRITONSERVER_Error indicating success or
/// failure.
TRITONSERVER_Error* GetMemoryInfo(
const std::string& name, size_t offset, size_t byte_size,
void** shm_mapped_addr, TRITONSERVER_MemoryType* memory_type,
int64_t* device_id, std::shared_ptr<const SharedMemoryInfo>* shm_info);
#ifdef TRITON_ENABLE_GPU
/// Get the CUDA memory handle associated with the block name.
/// Return TRITONSERVER_ERROR_NOT_FOUND if named block doesn't exist.
/// \param name The name of the shared memory block to get.
/// \param cuda_mem_handle Returns the cuda memory handle with the memory
/// block.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error* GetCUDAHandle(
const std::string& name, cudaIpcMemHandle_t** cuda_mem_handle);
#endif
/// Populates the status of active system/CUDA shared memory regions
/// in the status JSON. If 'name' is empty then return status of all
/// active system/CUDA shared memory regions as specified by 'memory_type'.
/// \param name The name of the shared memory block to get the status of.
/// \param memory_type The type of memory to get the status of.
/// \param shm_status Returns status of active shared memory blocks in JSON.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error* GetStatus(
const std::string& name, TRITONSERVER_MemoryType memory_type,
triton::common::TritonJson::Value* shm_status);
/// Removes the named shared memory block of the specified type from
/// the manager. Any future attempt to get the details of this block
/// will result in an array till another block with the same name is
/// added to the manager.
/// \param name The name of the shared memory block to remove.
/// \param memory_type The type of memory to unregister.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error* Unregister(
const std::string& name, TRITONSERVER_MemoryType memory_type);
/// Unregister all shared memory blocks of specified type from the manager.
/// \param memory_type The type of memory to unregister.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error* UnregisterAll(TRITONSERVER_MemoryType memory_type);
private:
/// A helper function to remove the named shared memory blocks of
/// specified type
TRITONSERVER_Error* UnregisterHelper(
const std::string& name, TRITONSERVER_MemoryType memory_type);
using SharedMemoryStateMap =
std::map<std::string, std::shared_ptr<SharedMemoryInfo>>;
// A map between the name and the details of the associated
// shared memory block
SharedMemoryStateMap shared_memory_map_;
// A mutex to protect the concurrent access to shared_memory_map_
std::mutex mu_;
};
}} // namespace triton::server