|
33 | 33 | #include "exception/InvalidArgumentException.h" |
34 | 34 | #include "utils/ColumnSizeCSVReader.h" |
35 | 35 | #include <map> |
36 | | - |
| 36 | +#include "physical/BufferPool/Bitmap.h" |
| 37 | +#include "physical/BufferPool/BufferPoolEntry.h" |
| 38 | +#include <mutex> |
| 39 | +#include <thread> |
| 40 | +#include <cstdio> |
37 | 41 | // when allocating buffer pool, we use the size of the first pxl file. Consider that |
38 | 42 | // the remaining pxl file has larger size than the first file, we allocate some extra |
39 | 43 | // size (10MB) to each column. |
40 | 44 | // TODO: how to evaluate the maximal pool size |
41 | | -#define EXTRA_POOL_SIZE 3*1024*1024 |
| 45 | +#define EXTRA_POOL_SIZE 10*1024*1024 |
42 | 46 |
|
43 | 47 | class DirectUringRandomAccessFile; |
44 | 48 | // This class is global class. The variable is shared by each thread |
45 | 49 | class BufferPool |
46 | 50 | { |
47 | 51 | public: |
| 52 | + // 嵌套子类,用于管理缓冲区池条目及其属性 |
| 53 | + class BufferPoolManagedEntry { |
| 54 | + public: |
| 55 | + enum class State{ |
| 56 | + InitizaledNotAllocated, |
| 57 | + AllocatedAndInUse, |
| 58 | + UselessButNotFree |
| 59 | + }; |
| 60 | + private: |
| 61 | + std::shared_ptr<BufferPoolEntry> bufferPoolEntry; // 指向缓冲区池条目的智能指针 |
| 62 | + int ring_index; // 环形缓冲区索引 |
| 63 | + size_t current_size; // 当前使用大小 |
| 64 | + int offset; // 偏移量 |
| 65 | + State state; |
| 66 | + |
| 67 | + |
| 68 | + |
| 69 | + public: |
| 70 | + |
| 71 | + BufferPoolManagedEntry(std::shared_ptr<BufferPoolEntry> entry, |
| 72 | + int ringIdx, |
| 73 | + size_t currSize, |
| 74 | + off_t off) |
| 75 | + : bufferPoolEntry(std::move(entry)), |
| 76 | + ring_index(ringIdx), |
| 77 | + current_size(currSize), |
| 78 | + offset(off) , |
| 79 | + state(State::InitizaledNotAllocated){} |
| 80 | + |
| 81 | + std::shared_ptr<BufferPoolEntry> getBufferPoolEntry() const { |
| 82 | + return bufferPoolEntry; |
| 83 | + } |
| 84 | + |
| 85 | + int getRingIndex() const { |
| 86 | + return ring_index; |
| 87 | + } |
| 88 | + |
| 89 | + void setRingIndex(int index) { |
| 90 | + ring_index = index; |
| 91 | + } |
| 92 | + |
| 93 | + size_t getCurrentSize() const { |
| 94 | + return current_size; |
| 95 | + } |
| 96 | + |
| 97 | + void setCurrentSize(size_t size) { |
| 98 | + current_size = size; |
| 99 | + } |
| 100 | + |
| 101 | + int getOffset() const { |
| 102 | + return offset; |
| 103 | + } |
| 104 | + |
| 105 | + void setOffset(int off) { |
| 106 | + offset = off; |
| 107 | + } |
| 108 | + |
| 109 | + // 获取当前状态 |
| 110 | + State getStatus() const { |
| 111 | + return state; |
| 112 | + } |
| 113 | + |
| 114 | + // 设置状态 |
| 115 | + void setStatus(State newStatus) { |
| 116 | + state = newStatus; |
| 117 | + } |
| 118 | + }; |
| 119 | + |
48 | 120 | static void |
49 | 121 | Initialize(std::vector <uint32_t> colIds, std::vector <uint64_t> bytes, std::vector <std::string> columnNames); |
50 | 122 |
|
51 | | - static std::shared_ptr <ByteBuffer> GetBuffer(uint32_t colId); |
| 123 | + static void |
| 124 | + InitializeBuffers(); |
| 125 | + |
| 126 | + static std::shared_ptr <ByteBuffer> GetBuffer(uint32_t colId,uint64_t byte,std::string columnName); |
52 | 127 |
|
53 | | - static int64_t GetBufferId(uint32_t index); |
| 128 | + static int64_t GetBufferId(); |
54 | 129 |
|
55 | 130 | static void Switch(); |
56 | 131 |
|
57 | 132 | static void Reset(); |
58 | 133 |
|
| 134 | + static std::shared_ptr<BufferPoolEntry> AddNewBuffer(size_t size); |
| 135 | + |
| 136 | + static int getRingIndex(uint32_t colId); |
| 137 | + |
| 138 | + static std::shared_ptr<ByteBuffer> AllocateNewBuffer(std::shared_ptr<BufferPoolManagedEntry> currentBufferManagedEntry, uint32_t colId,uint64_t byte,std::string columnName); |
| 139 | + |
| 140 | + static std::shared_ptr<ByteBuffer> ReusePreviousBuffer(std::shared_ptr<BufferPoolManagedEntry> currentBufferManagedEntry,uint32_t colId,uint64_t byte,std::string columnName); |
| 141 | + |
| 142 | + static void PrintStats() { |
| 143 | + // 打印当前线程 ID |
| 144 | + std::thread::id tid = std::this_thread::get_id(); |
| 145 | + |
| 146 | + printf("线程 %zu -> 全局缓冲区使用: %ld / %ld\n", |
| 147 | + std::hash<std::thread::id>{}(tid), // 转换成整数便于阅读 |
| 148 | + global_used_size, global_free_size); |
| 149 | + |
| 150 | + // 线程局部统计 |
| 151 | + printf("线程 %zu -> Buffer0使用: %zu, 缓冲区数量: %d\n", |
| 152 | + std::hash<std::thread::id>{}(tid), |
| 153 | + thread_local_used_size[0], thread_local_buffer_count[0]); |
| 154 | + |
| 155 | + printf("线程 %zu -> Buffer1使用: %zu, 缓冲区数量: %d\n", |
| 156 | + std::hash<std::thread::id>{}(tid), |
| 157 | + thread_local_used_size[1], thread_local_buffer_count[1]); |
| 158 | + } |
59 | 159 | private: |
60 | 160 | BufferPool() = default; |
| 161 | + // global |
| 162 | + static std::mutex bufferPoolMutex; |
61 | 163 |
|
| 164 | + // thread local |
| 165 | + static thread_local bool isInitialized; |
| 166 | + static thread_local std::vector <std::shared_ptr<BufferPoolEntry>> registeredBuffers[2]; |
| 167 | + static thread_local long global_used_size; |
| 168 | + static thread_local long global_free_size; |
| 169 | + static thread_local std::shared_ptr <DirectIoLib> directIoLib; |
| 170 | + static thread_local int nextRingIndex; |
| 171 | + static thread_local std::shared_ptr<BufferPoolEntry> nextEmptyBufferPoolEntry[2]; |
62 | 172 | static thread_local int colCount; |
63 | | - static thread_local std::map<uint32_t, uint64_t> |
64 | | - nrBytes; |
65 | | - static thread_local bool isInitialized; |
66 | | - static thread_local std::map<uint32_t, std::shared_ptr < ByteBuffer>> |
67 | | - buffers[2]; |
68 | | - static std::shared_ptr <DirectIoLib> directIoLib; |
| 173 | + |
69 | 174 | static thread_local int currBufferIdx; |
70 | 175 | static thread_local int nextBufferIdx; |
| 176 | + static thread_local std::map <uint32_t, std::shared_ptr<ByteBuffer>> buffersAllocated[2]; |
71 | 177 | friend class DirectUringRandomAccessFile; |
| 178 | + |
| 179 | + static thread_local std::unordered_map<uint32_t, std::shared_ptr<BufferPoolManagedEntry>> ringBufferMap[2]; |
| 180 | + |
| 181 | + |
| 182 | + |
| 183 | + static thread_local size_t thread_local_used_size[2]; // 线程已使用大小 |
| 184 | + static thread_local int thread_local_buffer_count[2]; // 线程持有的缓冲区数量 |
72 | 185 | }; |
73 | 186 | #endif // DUCKDB_BUFFERPOOL_H |
0 commit comments