forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathStorageImpl.h
330 lines (284 loc) · 9.63 KB
/
StorageImpl.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
#pragma once
#include <c10/core/Allocator.h>
#include <c10/core/Device.h>
#include <c10/core/DeviceType.h>
#include <c10/core/SymInt.h>
#include <c10/core/impl/COW.h>
#include <c10/core/impl/COWDeleter.h>
#include <c10/core/impl/PyObjectSlot.h>
#include <c10/macros/Export.h>
#include <c10/util/Exception.h>
#include <c10/util/UniqueVoidPtr.h>
#include <c10/util/intrusive_ptr.h>
#include <cstddef>
#include <utility>
namespace c10 {
C10_API void throwNullDataPtrError();
C10_API void warnDeprecatedDataPtr();
// A storage represents the underlying backing data buffer for a
// tensor. This concept was inherited from the original Torch7
// codebase; we'd kind of like to get rid of the concept
// (see https://github.com/pytorch/pytorch/issues/14797) but
// it's hard work and no one has gotten around to doing it.
//
// NB: storage is supposed to uniquely own a data pointer; e.g.,
// two non-null data pointers alias if and only if they are from
// the same storage. Technically you can violate this invariant
// (e.g., you can create a non-owning StorageImpl with at::from_blob)
// but a lot of things won't work correctly, including:
//
// - An ordinary deleter on such a storage is wrong, because normal deleters
// assume unique ownership, but if you have two storages at the same data,
// that implies there is some sort of shared ownership. So your deleter would
// have to actually be internally doing some sort of refcount thing
// - Deepcopy in Python side relies on storage equality and not data pointer
// equality; so if there are two separate storages pointing to the same data,
// the data will actually get duplicated in that case (one data ptr before,
// two data ptrs after)
// - Version counts won't work correctly, because we do all VC tracking at the
// level of storages (unless you explicitly disconnect the VC with detach);
// mutation because data pointers are the same are totally untracked
struct C10_API StorageImpl : public c10::intrusive_ptr_target {
public:
struct use_byte_size_t {};
StorageImpl(
use_byte_size_t /*use_byte_size*/,
SymInt size_bytes,
at::DataPtr data_ptr,
at::Allocator* allocator,
bool resizable)
: data_ptr_(std::move(data_ptr)),
size_bytes_(std::move(size_bytes)),
size_bytes_is_heap_allocated_(size_bytes_.is_heap_allocated()),
resizable_(resizable),
received_cuda_(false),
allocator_(allocator) {
if (resizable) {
TORCH_INTERNAL_ASSERT(
allocator_, "For resizable storage, allocator must be provided");
}
refresh_has_data_ptr_check();
}
StorageImpl(
use_byte_size_t /*use_byte_size*/,
const SymInt& size_bytes,
at::Allocator* allocator,
bool resizable)
: StorageImpl(
use_byte_size_t(),
size_bytes,
size_bytes.is_heap_allocated()
? allocator->allocate(0)
: allocator->allocate(size_bytes.as_int_unchecked()),
allocator,
resizable) {}
StorageImpl& operator=(StorageImpl&& other) = delete;
StorageImpl& operator=(const StorageImpl&) = delete;
StorageImpl() = delete;
StorageImpl(StorageImpl&& other) = delete;
StorageImpl(const StorageImpl&) = delete;
~StorageImpl() override = default;
void reset() {
data_ptr_.clear();
size_bytes_ = 0;
size_bytes_is_heap_allocated_ = false;
}
// Destructor doesn't call release_resources because it's
// unnecessary; don't forget to change that if needed!
void release_resources() override {
data_ptr_.clear();
}
size_t nbytes() const {
// OK to do this instead of maybe_as_int as nbytes is guaranteed positive
TORCH_CHECK(!size_bytes_is_heap_allocated_);
return size_bytes_.as_int_unchecked();
}
SymInt sym_nbytes() const {
return size_bytes_;
}
// TODO: remove later
void set_nbytes(size_t size_bytes) {
size_bytes_ = static_cast<int64_t>(size_bytes);
size_bytes_is_heap_allocated_ = false;
}
void set_nbytes(c10::SymInt size_bytes) {
size_bytes_ = std::move(size_bytes);
}
bool resizable() const {
return resizable_;
}
const at::DataPtr& data_ptr() const {
return data_ptr_;
}
at::DataPtr& mutable_data_ptr() {
if (C10_UNLIKELY(has_data_ptr_check_)) {
if (throw_on_mutable_data_ptr_) {
throwNullDataPtrError();
}
if (warn_deprecated_on_mutable_data_ptr_) {
warnDeprecatedDataPtr();
}
maybe_materialize_cow();
}
return data_ptr_;
}
// Returns the data_ptr. Bypasses all checks.
at::DataPtr& _mutable_data_ptr_no_checks() {
return data_ptr_;
}
// Returns the previous data_ptr
at::DataPtr set_data_ptr(at::DataPtr&& data_ptr) {
// We need to materialize the old COW DataPtr because it is
// being returned as mutable.
maybe_materialize_cow();
return set_data_ptr_no_materialize_cow(std::move(data_ptr));
}
void set_data_ptr_noswap(at::DataPtr&& data_ptr) {
data_ptr_ = std::move(data_ptr);
refresh_has_data_ptr_check();
}
const void* data() const {
return data_ptr_.get();
}
void* mutable_data() {
if (C10_UNLIKELY(has_data_ptr_check_)) {
if (throw_on_mutable_data_ptr_) {
throwNullDataPtrError();
}
if (warn_deprecated_on_mutable_data_ptr_) {
warnDeprecatedDataPtr();
}
maybe_materialize_cow();
}
return data_ptr_.mutable_get();
}
at::DeviceType device_type() const {
return data_ptr_.device().type();
}
at::Allocator* allocator() {
return allocator_;
}
const at::Allocator* allocator() const {
return allocator_;
}
// You generally shouldn't use this method, but it is occasionally
// useful if you want to override how a tensor will be reallocated,
// after it was already allocated (and its initial allocator was
// set)
void set_allocator(at::Allocator* allocator) {
allocator_ = allocator;
}
Device device() const {
return data_ptr_.device();
}
void set_resizable(bool resizable) {
if (resizable) {
// We need an allocator to be resizable
AT_ASSERT(allocator_);
}
resizable_ = resizable;
}
/**
* Can only be called when use_count is 1
*/
void UniqueStorageShareExternalPointer(
void* src,
size_t size_bytes,
DeleterFnPtr d = nullptr) {
UniqueStorageShareExternalPointer(
at::DataPtr(src, src, d, data_ptr_.device()), size_bytes);
}
/**
* Can only be called when use_count is 1
*/
void UniqueStorageShareExternalPointer(
at::DataPtr&& data_ptr,
size_t size_bytes) {
data_ptr_ = std::move(data_ptr);
size_bytes_ = static_cast<int64_t>(size_bytes);
size_bytes_is_heap_allocated_ = false;
allocator_ = nullptr;
resizable_ = false;
}
// This method can be used only after storage construction and cannot be used
// to modify storage status
void set_received_cuda(bool received_cuda) {
received_cuda_ = received_cuda;
}
bool received_cuda() {
return received_cuda_;
}
impl::PyObjectSlot* pyobj_slot() {
return &pyobj_slot_;
}
const impl::PyObjectSlot* pyobj_slot() const {
return &pyobj_slot_;
}
void set_throw_on_mutable_data_ptr() {
throw_on_mutable_data_ptr_ = true;
refresh_has_data_ptr_check();
}
void set_warn_deprecated_on_mutable_data_ptr() {
warn_deprecated_on_mutable_data_ptr_ = true;
refresh_has_data_ptr_check();
}
protected:
// materialize_cow_storage needs to call set_data_ptr_no_materlize_cow
friend void c10::impl::cow::materialize_cow_storage(StorageImpl& storage);
// Returns the previous data_ptr. If the old data_ptr was COW,
// this avoids materializing it
at::DataPtr set_data_ptr_no_materialize_cow(at::DataPtr&& data_ptr) {
at::DataPtr old_data_ptr(std::move(data_ptr_));
data_ptr_ = std::move(data_ptr);
refresh_has_data_ptr_check();
return old_data_ptr;
}
private:
void refresh_has_data_ptr_check() {
has_data_ptr_check_ = is_cow() || throw_on_mutable_data_ptr_ ||
warn_deprecated_on_mutable_data_ptr_;
}
inline bool is_cow() const {
return c10::impl::cow::is_cow_data_ptr(data_ptr_);
}
// Triggers a copy if this is a copy-on-write tensor.
void maybe_materialize_cow() {
if (is_cow()) {
impl::cow::materialize_cow_storage(*this);
}
}
DataPtr data_ptr_;
SymInt size_bytes_;
bool size_bytes_is_heap_allocated_;
bool resizable_;
// Identifies that Storage was received from another process and doesn't have
// local to process cuda memory allocation
bool received_cuda_;
// All special checks in data/data_ptr calls are guarded behind this single
// boolean. This is for performance: .data/.data_ptr calls are commonly in the
// hot-path.
bool has_data_ptr_check_ = false;
// If we should throw when mutable_data_ptr() or mutable_data() is called.
bool throw_on_mutable_data_ptr_ = false;
// If we warn when mutable_data_ptr() or mutable_data() is called.
bool warn_deprecated_on_mutable_data_ptr_ = false;
Allocator* allocator_;
impl::PyObjectSlot pyobj_slot_;
};
// Declare StorageImpl create function pointer types.
using StorageImplCreateHelper = intrusive_ptr<StorageImpl> (*)(
StorageImpl::use_byte_size_t,
SymInt size_bytes,
DataPtr data_ptr,
Allocator* allocator,
bool resizable);
C10_API void SetStorageImplCreate(DeviceType t, StorageImplCreateHelper fptr);
C10_API StorageImplCreateHelper GetStorageImplCreate(DeviceType t);
C10_API c10::intrusive_ptr<c10::StorageImpl> make_storage_impl(
c10::StorageImpl::use_byte_size_t use_byte_size,
c10::SymInt size_bytes,
c10::DataPtr data_ptr,
c10::Allocator* allocator,
bool resizable,
c10::optional<at::Device> device_opt);
} // namespace c10