3131
3232namespace tvm {
3333namespace relay {
34+ constexpr auto Is2DStorage = runtime::IsTextureStorage;
3435
35- size_t TokenAllocator1D::GetMemorySize (StorageToken* prototype) {
36+ /*
37+ * Mixed mode memory allocator
38+ */
39+ size_t TokenAllocatorMixed::GetMemorySize (StorageToken* prototype) {
3640 TensorType ttype = prototype->ttype ;
3741 ICHECK (ttype.defined ());
3842 size_t size = 1 ;
39- for (IndexExpr dim : ttype->shape ) {
40- const int64_t * pval = tir::as_const_int (dim);
41- ICHECK (pval != nullptr ) << " Cannot allocate memory symbolic tensor shape " << ttype->shape ;
42- ICHECK_GE (*pval, 0 ) << " Cannot allocate memory for tensor with negative shape" << *pval;
43- size *= static_cast <size_t >(pval[0 ]);
43+ if (relay::Is2DStorage (prototype->virtual_device ->memory_scope )) {
44+ size = GetSize2D (prototype);
45+ } else {
46+ for (IndexExpr dim : ttype->shape ) {
47+ const int64_t * pval = tir::as_const_int (dim);
48+ ICHECK (pval != nullptr ) << " Cannot allocate memory symbolic tensor shape " << ttype->shape ;
49+ ICHECK_GE (*pval, 0 ) << " Cannot allocate memory for tensor with negative shape" << *pval;
50+ size *= static_cast <size_t >(pval[0 ]);
51+ }
52+ size *= DivRoundUp (ttype->dtype .bits () * ttype->dtype .lanes (), 8 );
4453 }
45- size *= DivRoundUp (ttype->dtype .bits () * ttype->dtype .lanes (), 8 );
4654 return size;
4755}
4856
49- StorageToken* TokenAllocator1D::Request (StorageToken* prototype) {
57+ bool IsTargetContainsKey (StorageToken* tok, String key) {
58+ Target null_tgt{nullptr };
59+ if (null_tgt == tok->virtual_device ->target ) {
60+ return false ;
61+ }
62+ auto prototype_keys = tok->virtual_device ->target ->GetKeys ();
63+ return std::find (prototype_keys.begin (), prototype_keys.end (), key) != prototype_keys.end ();
64+ }
65+
66+ StorageToken* TokenAllocatorMixed::Request (StorageToken* prototype) {
5067 // calculate the size;
5168 size_t size = GetMemorySize (prototype);
5269 // search memory block in [size / match_range_, size * match_range_)
@@ -57,144 +74,78 @@ StorageToken* TokenAllocator1D::Request(StorageToken* prototype) {
5774 auto mid = free_.lower_bound (size);
5875 auto end = free_.upper_bound (size * match_range_);
5976 // search for memory blocks larger than requested
77+ bool is_prototype_adreno = IsTargetContainsKey (prototype, " adreno" );
6078 for (auto it = mid; it != end; ++it) {
6179 StorageToken* tok = it->second ;
62- if (!tok->is_compatible (*prototype)) continue ;
63- ICHECK_EQ (tok->ref_counter , 0 );
64- // Use exect matching strategy
65- tok->max_bytes = std::max (size, tok->max_bytes );
66- tok->ref_counter = prototype->ref_counter ;
67- // find a exact match, erase from map and return
68- free_.erase (it);
69- return tok;
80+ // TODO(Siva): We need a additional ways of comparing VirtualDevice
81+ bool is_tok_adreno = IsTargetContainsKey (tok, " adreno" );
82+
83+ if (tok->is_compatible (*prototype) || (is_prototype_adreno && is_tok_adreno)) {
84+ ICHECK_EQ (tok->ref_counter , 0 );
85+ // Use exect matching strategy
86+ if (size > tok->max_bytes ) {
87+ tok->max_bytes = size;
88+ tok->ttype = prototype->ttype ;
89+ }
90+ tok->ref_counter = prototype->ref_counter ;
91+ // find a exact match, erase from map and return
92+ free_.erase (it);
93+ return tok;
94+ }
7095 }
7196 // then search for memory blocks smaller than requested space
7297 for (auto it = mid; it != begin;) {
7398 --it;
7499 StorageToken* tok = it->second ;
75- if (!tok->is_compatible (*prototype)) continue ;
76- ICHECK_EQ (tok->ref_counter , 0 );
77- // Use exect matching strategy
78- tok->max_bytes = std::max (size, tok->max_bytes );
79- tok->ref_counter = prototype->ref_counter ;
80- // erase from map and return
81- free_.erase (it);
82- return tok;
100+ bool is_tok_adreno = IsTargetContainsKey (tok, " adreno" );
101+ if (tok->is_compatible (*prototype) || (is_prototype_adreno && is_tok_adreno)) {
102+ ICHECK_EQ (tok->ref_counter , 0 );
103+ // Use exect matching strategy
104+ if (size > tok->max_bytes ) {
105+ tok->max_bytes = size;
106+ tok->ttype = prototype->ttype ;
107+ }
108+ tok->ref_counter = prototype->ref_counter ;
109+ // erase from map and return
110+ free_.erase (it);
111+ return tok;
112+ }
83113 }
84114 return nullptr ;
85115}
86116
87- StorageToken* TokenAllocator1D ::Alloc (StorageToken* prototype, int64_t storage_id) {
117+ StorageToken* TokenAllocatorMixed ::Alloc (StorageToken* prototype, int64_t storage_id) {
88118 size_t size = GetMemorySize (prototype);
89119 prototype->max_bytes = size;
90120 prototype->storage_id = storage_id;
91121 data_.push_back (prototype);
92122 return prototype;
93123}
94124
95- void TokenAllocator1D ::CheckForRelease (StorageToken* tok) {
125+ void TokenAllocatorMixed ::CheckForRelease (StorageToken* tok) {
96126 ICHECK_GE (tok->storage_id , 0 );
97127 ICHECK_GE (tok->ref_counter , 0 );
98128 if (tok->ref_counter == 0 ) {
99129 free_.insert ({tok->max_bytes , tok});
100130 }
101131}
102132
103- StorageToken* TokenAllocator2D::Request (StorageToken* prototype) {
104- auto shape = GetSize2D (prototype);
105- const int64_t max_ratio = 5 ;
106- int64_t min_added_size_x = std::numeric_limits<int64_t >::max ();
107- int64_t min_added_size_y = std::numeric_limits<int64_t >::max ();
108- int64_t min_wasted_size_x = std::numeric_limits<int64_t >::max ();
109- int64_t min_wasted_size_y = std::numeric_limits<int64_t >::max ();
110- int64_t best_storage_id = -1 ;
111- MemBlock new_mem;
112- for (int64_t free_id : free_list_) {
113- MemBlock& cached = blocks_[free_id];
114- // Can only reuse texture 2d blocks of the same type
115- if (cached.token_ ->ttype ->dtype != prototype->ttype ->dtype ) {
116- continue ;
117- }
118- // Can only reuse texture 2d blocks of the same scope
119- // Because reusing textures with different memory scope may lead to
120- // accuracy issues, because the data will be packed in a different way for
121- // different memory scopes.
122- if (cached.token_ ->virtual_device ->memory_scope != prototype->virtual_device ->memory_scope ) {
123- continue ;
124- }
125- // avoid reusing too small and too big textures
126- if (shape.width / cached.x_ > max_ratio || cached.x_ / shape.width > max_ratio ||
127- shape.height / cached.y_ > max_ratio || cached.y_ / shape.height > max_ratio) {
128- continue ;
129- }
130- int64_t new_width = std::max (cached.x_ , shape.width );
131- int64_t new_height = std::max (cached.y_ , shape.height );
132- int64_t added_size_x = new_width - cached.x_ ;
133- int64_t added_size_y = new_height - cached.y_ ;
134- int64_t wasted_size_x = new_width - shape.width ;
135- int64_t wasted_size_y = new_height - shape.height ;
136- // Prioritize minimization of added size first, then minimize
137- // wasted size among blocks which would not require expansion
138- if ((min_added_size_x > 0 && added_size_x < min_added_size_x) ||
139- (min_added_size_y > 0 && added_size_y < min_added_size_y) ||
140- (min_added_size_x == added_size_x && wasted_size_x < min_wasted_size_x) ||
141- (min_added_size_y == added_size_y && wasted_size_y < min_wasted_size_y)) {
142- min_added_size_x = added_size_x;
143- min_added_size_y = added_size_y;
144- min_wasted_size_x = wasted_size_x;
145- min_wasted_size_y = wasted_size_y;
146- best_storage_id = free_id;
147- new_mem.x_ = new_width;
148- new_mem.y_ = new_height;
149- }
150- }
151-
152- if (min_added_size_x == 0 && min_added_size_y == 0 ) {
153- // use existing block
154- free_list_.erase (best_storage_id);
155- blocks_[best_storage_id].token_ ->ref_counter += prototype->ref_counter ;
156- return blocks_[best_storage_id].token_ ;
157- } else if (min_added_size_x <= shape.width || min_added_size_y <= shape.height ) {
158- // Reset the reference counter of the now live token
159- free_list_.erase (best_storage_id);
160- new_mem.token_ = prototype;
161- new_mem.token_ ->ref_counter += 1 ;
162- new_mem.token_ ->storage_id = best_storage_id;
163- blocks_[best_storage_id] = new_mem;
164- return new_mem.token_ ;
165- }
166- return nullptr ;
167- }
168-
169- StorageToken* TokenAllocator2D::Alloc (StorageToken* prototype, int64_t storage_id) {
170- auto shape = GetSize2D (prototype);
171- MemBlock block;
172- block.x_ = shape.width ;
173- block.y_ = shape.height ;
174- prototype->storage_id = storage_id;
175- block.token_ = prototype;
176- blocks_[prototype->storage_id ] = block;
177- return prototype;
178- }
179-
180- void TokenAllocator2D::CheckForRelease (StorageToken* tok) {
181- ICHECK_GE (tok->storage_id , 0 );
182- ICHECK_GE (tok->ref_counter , 0 );
183- if (tok->ref_counter == 0 ) {
184- free_list_.insert (tok->storage_id );
185- }
186- }
187-
188- runtime::Texture2DShape<int64_t > TokenAllocator2D::GetSize2D (StorageToken* prototype) {
133+ size_t TokenAllocatorMixed::GetSize2D (StorageToken* prototype) {
189134 TensorType ttype = prototype->ttype ;
190135 ICHECK (ttype.defined ());
191- size_t axis = runtime::DefaultTextureLayoutSeparator (ttype->shape .size (),
192- prototype->virtual_device ->memory_scope );
193136 struct Shape {
194137 const Array<PrimExpr>& shape;
195138 int64_t operator [](size_t i) const { return *tir::as_const_int (shape[i]); }
139+ int size () { return this ->shape .size (); }
196140 };
197- return runtime::ApplyTexture2DFlattening<int64_t >(Shape{ttype->shape }, ttype->shape .size (), axis);
141+ auto shape = Shape{ttype->shape };
142+ int image_row_align =
143+ prototype->virtual_device ->target ->GetAttr <Integer>(" image_base_address_alignment" )
144+ .value_or (Integer (64 ))
145+ ->value ;
146+ return runtime::GetTextureMemorySize<Shape>(shape, ttype->dtype .bits (), ttype->dtype .lanes (),
147+ prototype->virtual_device ->memory_scope ,
148+ image_row_align);
198149}
199150
200151} // namespace relay
0 commit comments