Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2D: Combine texture state to batch more subsequent commands together #97340

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 46 additions & 4 deletions core/templates/lru.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,21 @@
#include "hash_map.h"
#include "list.h"

template <typename TKey, typename TData, typename Hasher = HashMapHasherDefault, typename Comparator = HashMapComparatorDefault<TKey>>
#if defined(__GNUC__) && !defined(__clang__)
#define ADDRESS_DIAGNOSTIC_WARNING_DISABLE \
_Pragma("GCC diagnostic push"); \
_Pragma("GCC diagnostic ignored \"-Waddress\"");

#define ADDRESS_DIAGNOSTIC_POP \
_Pragma("GCC diagnostic pop");
#else
#define ADDRESS_DIAGNOSTIC_WARNING_DISABLE
#define ADDRESS_DIAGNOSTIC_POP
#endif

template <typename TKey, typename TData, typename Hasher = HashMapHasherDefault, typename Comparator = HashMapComparatorDefault<TKey>, void (*BeforeEvict)(TKey &, TData &) = nullptr>
class LRUCache {
private:
public:
struct Pair {
TKey key;
TData data;
Expand All @@ -51,28 +63,39 @@ class LRUCache {

typedef typename List<Pair>::Element *Element;

private:
List<Pair> _list;
HashMap<TKey, Element, Hasher, Comparator> _map;
size_t capacity;

public:
const TData *insert(const TKey &p_key, const TData &p_value) {
const Pair *insert(const TKey &p_key, const TData &p_value) {
Element *e = _map.getptr(p_key);
Element n = _list.push_front(Pair(p_key, p_value));

if (e) {
ADDRESS_DIAGNOSTIC_WARNING_DISABLE;
if constexpr (BeforeEvict != nullptr) {
BeforeEvict((*e)->get().key, (*e)->get().data);
}
ADDRESS_DIAGNOSTIC_POP;
_list.erase(*e);
_map.erase(p_key);
}
_map[p_key] = _list.front();

while (_map.size() > capacity) {
Element d = _list.back();
ADDRESS_DIAGNOSTIC_WARNING_DISABLE
if constexpr (BeforeEvict != nullptr) {
BeforeEvict(d->get().key, d->get().data);
}
ADDRESS_DIAGNOSTIC_POP
_map.erase(d->get().key);
_list.pop_back();
}

return &n->get().data;
return &n->get();
}

void clear() {
Expand All @@ -84,6 +107,17 @@ class LRUCache {
return _map.getptr(p_key);
}

bool erase(const TKey &p_key) {
Element *e = _map.getptr(p_key);
if (!e) {
return false;
}
_list.move_to_front(*e);
_map.erase(p_key);
_list.pop_front();
return true;
}

const TData &get(const TKey &p_key) {
Element *e = _map.getptr(p_key);
CRASH_COND(!e);
Expand All @@ -109,6 +143,11 @@ class LRUCache {
capacity = p_capacity;
while (_map.size() > capacity) {
Element d = _list.back();
ADDRESS_DIAGNOSTIC_WARNING_DISABLE;
if constexpr (BeforeEvict != nullptr) {
BeforeEvict(d->get().key, d->get().data);
}
ADDRESS_DIAGNOSTIC_POP;
_map.erase(d->get().key);
_list.pop_back();
}
Expand All @@ -124,4 +163,7 @@ class LRUCache {
}
};

#undef ADDRESS_DIAGNOSTIC_WARNING_DISABLE
#undef ADDRESS_DIAGNOSTIC_POP

#endif // LRU_H
55 changes: 55 additions & 0 deletions core/typedefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,61 @@ static inline uint64_t BSWAP64(uint64_t x) {
}
#endif

// Software implementation of count trailing zeros
static inline int __CTZ32_software(uint32_t x) {
if (x == 0) {
return 32;
}
uint32_t n = 1;
// clang-format off
if ((x & 0x0000FFFF) == 0) { n += 16; x >>= 16; }
if ((x & 0x000000FF) == 0) { n += 8; x >>= 8; }
if ((x & 0x0000000F) == 0) { n += 4; x >>= 4; }
if ((x & 0x00000003) == 0) { n += 2; x >>= 2; }
// clang-format on
return n - (x & 1);
}

#if defined(__GNUC__) || defined(__clang__)
#define CTZ32(x) ((x != 0) ? __builtin_ctzl((uint32_t)(x)) : 32)
#elif defined(_MSC_VER)
#include <intrin.h>

static inline int CTZ32(uint32_t x) {
unsigned long leading_zero = 0;
if (_BitScanForward(&leading_zero, x))
return leading_zero;
else
return 32;
}
#else
#define CTZ32(x) __CTZ32_software(x)
#endif

static inline int __CTZ64_software(uint64_t x) {
uint32_t msw = (uint32_t)(x >> 32);
uint32_t lsw = (uint32_t)x;
if (lsw == 0) {
return 32 + CTZ32(msw);
} else {
return CTZ32(lsw);
}
}

#if defined(__GNUC__) || defined(__clang__)
#define CTZ64(x) ((x != 0) ? __builtin_ctzll((uint64_t)(x)) : 64)
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
static inline int CTZ64(uint64_t x) {
unsigned long leading_zero = 0;
if (_BitScanForward64(&leading_zero, x))
return leading_zero;
else
return 64;
}
#else
#define CTZ64(x) __CTZ64_software(x)
#endif

// Generic comparator used in Map, List, etc.
template <typename T>
struct Comparator {
Expand Down
4 changes: 4 additions & 0 deletions doc/classes/ProjectSettings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2350,6 +2350,10 @@
<member name="rendering/2d/batching/item_buffer_size" type="int" setter="" getter="" default="16384">
Maximum number of canvas item commands that can be batched into a single draw call.
</member>
<member name="rendering/2d/batching/uniform_set_cache_size" type="int" setter="" getter="" default="256">
Maximum number of uniform sets that will be cached by the 2D renderer when batching draw calls.
[b]Note:[/b] A project that uses a large numer of unique sprite textures per frame may benefit from increasing this value.
</member>
Comment on lines +2353 to +2356
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added the project setting. Let me know if this commentary is ok, or if you'd like to see edits.

<member name="rendering/2d/sdf/oversize" type="int" setter="" getter="" default="1">
Controls how much of the original viewport size should be covered by the 2D signed distance field. This SDF can be sampled in [CanvasItem] shaders and is used for [GPUParticles2D] collision. Higher values allow portions of occluders located outside the viewport to still be taken into account in the generated signed distance field, at the cost of performance. If you notice particles falling through [LightOccluder2D]s as the occluders leave the viewport, increase this setting.
The percentage specified is added on each axis and on both sides. For example, with the default setting of 120%, the signed distance field will cover 20% of the viewport's size outside the viewport on each side (top, right, bottom, left).
Expand Down
3 changes: 1 addition & 2 deletions drivers/metal/metal_objects.mm
Original file line number Diff line number Diff line change
Expand Up @@ -1211,8 +1211,7 @@ vertex VaryingsPos vertClear(AttributesPos attributes [[stage_in]], constant Cle
varyings.layer = uint(attributes.a_position.w);
return varyings;
}
)",
ClearAttKey::DEPTH_INDEX];
)", ClearAttKey::DEPTH_INDEX];

return new_func(msl, @"vertClear", nil);
}
Expand Down
6 changes: 5 additions & 1 deletion drivers/metal/rendering_device_driver_metal.mm
Original file line number Diff line number Diff line change
Expand Up @@ -2060,14 +2060,18 @@ void deserialize(BufReader &p_reader) {

case BT::Sampler: {
primary.dataType = MTLDataTypeSampler;
primary.arrayLength = 1;
for (uint32_t const &a : a_type.array) {
primary.arrayLength *= a;
}
Comment on lines +2063 to +2066
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixes a bug in the Metal driver, that didn't handle sampler state arrays

} break;

default: {
ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Unexpected BaseType");
} break;
}

// Find array length.
// Find array length of image.
if (basetype == BT::Image || basetype == BT::SampledImage) {
primary.arrayLength = 1;
for (uint32_t const &a : a_type.array) {
Expand Down
Loading
Loading