From d3a35de38667a7f82384dff4773ddf75a3735e17 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Tue, 23 Jul 2024 23:31:20 -0700 Subject: [PATCH 01/15] fix a crash with bloom when screen dimension smaller than 16px --- filament/src/PostProcessManager.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/filament/src/PostProcessManager.cpp b/filament/src/PostProcessManager.cpp index 85e21d6832e..da5c120b8ef 100644 --- a/filament/src/PostProcessManager.cpp +++ b/filament/src/PostProcessManager.cpp @@ -1965,8 +1965,8 @@ PostProcessManager::BloomPassOutput PostProcessManager::bloom(FrameGraph& fg, // - visible bloom size changes with dynamic resolution in non-homogenous mode // This allows us to use the 9 sample downsampling filter (instead of 13) // for at least 4 levels. - uint32_t width = std::max(1u, uint32_t(std::floor(bloomWidth))); - uint32_t height = std::max(1u, uint32_t(std::floor(bloomHeight))); + uint32_t width = std::max(16u, uint32_t(std::floor(bloomWidth))); + uint32_t height = std::max(16u, uint32_t(std::floor(bloomHeight))); width &= ~((1 << 4) - 1); // at least 4 levels height &= ~((1 << 4) - 1); bloomWidth = float(width); @@ -1978,6 +1978,8 @@ PostProcessManager::BloomPassOutput PostProcessManager::bloom(FrameGraph& fg, // we don't need to do the fireflies reduction if we have TAA (it already does it) bool fireflies = threshold && !taaOptions.enabled; + assert_invariant(bloomWidth && bloomHeight); + while (2 * bloomWidth < float(desc.width) || 2 * bloomHeight < float(desc.height)) { if (inoutBloomOptions.quality == QualityLevel::LOW || inoutBloomOptions.quality == QualityLevel::MEDIUM) { From be4f287b07987f4aead4119dc53ba7fa21676489 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Wed, 24 Jul 2024 15:34:36 -0700 Subject: [PATCH 02/15] More improvements to the JobSystem (#7988) * improve parallel_for a bit We get about 40% performance increase. The gain comes from not having to copy the JobData structure each time we create a job, by using a new emplaceJob() method, we can create the structure directly into its destination. * avoid calling wakeAll() when possible wakeAll() is very expensive and not always needed when a job finishes because there may not be anyone waiting on that job. We now maintain a waiter count per job, and use that to determine if we need to notify or not. And now that the JobSystem overhead is lower, we can decrease the size of the jobs, which improves the load balancing. * mActiveJobs fixes some comments claimed mActiveJobs needed to be modified before or after accessing the WorkQueue; this couldn't be correct because there were no guaranteed global ordering with the workQueue. --- filament/src/RenderPass.cpp | 2 +- filament/src/RenderPass.h | 2 +- filament/src/details/Scene.cpp | 2 +- libs/utils/include/utils/JobSystem.h | 86 ++++++++-- .../utils/include/utils/WorkStealingDequeue.h | 20 ++- libs/utils/src/JobSystem.cpp | 154 +++++++++--------- 6 files changed, 161 insertions(+), 105 deletions(-) diff --git a/filament/src/RenderPass.cpp b/filament/src/RenderPass.cpp index a428d29d46a..63811f8b17d 100644 --- a/filament/src/RenderPass.cpp +++ b/filament/src/RenderPass.cpp @@ -221,7 +221,7 @@ void RenderPass::appendCommands(FEngine& engine, work(vr.first, vr.size()); } else { auto* jobCommandsParallel = jobs::parallel_for(js, nullptr, vr.first, (uint32_t)vr.size(), - std::cref(work), jobs::CountSplitter()); + std::cref(work), jobs::CountSplitter()); js.runAndWait(jobCommandsParallel); } diff --git a/filament/src/RenderPass.h b/filament/src/RenderPass.h index 23c4cf8249d..ebe4f37642d 100644 --- a/filament/src/RenderPass.h +++ b/filament/src/RenderPass.h @@ -417,7 +417,7 @@ class RenderPass { void instanceify(FEngine& engine, Arena& arena, int32_t eyeCount) noexcept; // We choose the command count per job to minimize JobSystem overhead. - static constexpr size_t JOBS_PARALLEL_FOR_COMMANDS_COUNT = 1024; + static constexpr size_t JOBS_PARALLEL_FOR_COMMANDS_COUNT = 128; static constexpr size_t JOBS_PARALLEL_FOR_COMMANDS_SIZE = sizeof(Command) * JOBS_PARALLEL_FOR_COMMANDS_COUNT; diff --git a/filament/src/details/Scene.cpp b/filament/src/details/Scene.cpp index c5b0d1e19e9..ece2e99f93f 100644 --- a/filament/src/details/Scene.cpp +++ b/filament/src/details/Scene.cpp @@ -248,7 +248,7 @@ void FScene::prepare(utils::JobSystem& js, auto* renderableJob = jobs::parallel_for(js, rootJob, renderableInstances.data(), renderableInstances.size(), - std::cref(renderableWork), jobs::CountSplitter<128, 5>()); + std::cref(renderableWork), jobs::CountSplitter<64>()); auto* lightJob = jobs::parallel_for(js, rootJob, lightInstances.data(), lightInstances.size(), diff --git a/libs/utils/include/utils/JobSystem.h b/libs/utils/include/utils/JobSystem.h index e3786dc3225..7ea711661a4 100644 --- a/libs/utils/include/utils/JobSystem.h +++ b/libs/utils/include/utils/JobSystem.h @@ -44,7 +44,9 @@ namespace utils { class JobSystem { - static constexpr size_t MAX_JOB_COUNT = 16384; + static constexpr size_t MAX_JOB_COUNT = 1 << 14; // 16384 + static constexpr uint32_t JOB_COUNT_MASK = MAX_JOB_COUNT - 1; + static constexpr uint32_t WAITER_COUNT_SHIFT = 24; static_assert(MAX_JOB_COUNT <= 0x7FFE, "MAX_JOB_COUNT must be <= 0x7FFE"); using WorkQueue = WorkStealingDequeue; using Mutex = utils::Mutex; @@ -81,14 +83,18 @@ class JobSystem { void* storage[JOB_STORAGE_SIZE_WORDS]; // 48 | 48 JobFunc function; // 4 | 8 uint16_t parent; // 2 | 2 - std::atomic runningJobCount = { 1 }; // 2 | 2 - mutable std::atomic refCount = { 1 }; // 2 | 2 mutable ThreadId id = invalidThreadId; // 1 | 1 - // 1 | 1 (padding) + mutable std::atomic refCount = { 1 }; // 1 | 1 + std::atomic runningJobCount = { 1 }; // 4 | 4 // 4 | 0 (padding) // 64 | 64 }; +#ifndef WIN32 + // on windows std::function is bigger and forces the whole structure to be larger + static_assert(sizeof(Job) == 64); +#endif + explicit JobSystem(size_t threadCount = 0, size_t adoptableThreadsCount = 1) noexcept; ~JobSystem(); @@ -207,6 +213,21 @@ class JobSystem { return job; } + // creates a job from a KNOWN method pointer w/ object passed by value + template + Job* emplaceJob(Job* parent, ARGS&& ... args) noexcept { + static_assert(sizeof(T) <= sizeof(Job::storage), "user data too large"); + Job* job = create(parent, [](void* storage, JobSystem& js, Job* job) { + T* const that = static_cast(storage); + (that->*method)(js, job); + that->~T(); + }); + if (job) { + new(job->storage) T(std::forward(args)...); + } + return job; + } + // creates a job from a functor passed by value template Job* createJob(Job* parent, T functor) noexcept { @@ -222,6 +243,21 @@ class JobSystem { return job; } + // creates a job from a functor passed by value + template + Job* emplaceJob(Job* parent, ARGS&& ... args) noexcept { + static_assert(sizeof(T) <= sizeof(Job::storage), "functor too large"); + Job* job = create(parent, [](void* storage, JobSystem& js, Job* job){ + T* const that = static_cast(storage); + that->operator()(js, job); + that->~T(); + }); + if (job) { + new(job->storage) T(std::forward(args)...); + } + return job; + } + /* * Jobs are normally finished automatically, this can be used to cancel a job before it is run. @@ -345,6 +381,16 @@ class JobSystem { static constexpr uint32_t m = 0x7fffffffu; uint32_t mState; // must be 0 < seed < 0x7fffffff public: + using result_type = uint32_t; + + static constexpr result_type min() noexcept { + return 1; + } + + static constexpr result_type max() noexcept { + return m - 1; + } + inline constexpr explicit default_random_engine(uint32_t seed = 1u) noexcept : mState(((seed % m) == 0u) ? 1u : seed % m) { } @@ -389,7 +435,9 @@ class JobSystem { Job* pop(WorkQueue& workQueue) noexcept; Job* steal(WorkQueue& workQueue) noexcept; - void wait(std::unique_lock& lock, Job* job = nullptr) noexcept; + [[nodiscard]] + uint32_t wait(std::unique_lock& lock, Job* job) noexcept; + void wait(std::unique_lock& lock) noexcept; void wakeAll() noexcept; void wakeOne() noexcept; @@ -418,7 +466,7 @@ class JobSystem { uint8_t mParallelSplitCount = 0; // # of split allowable in parallel_for Job* mRootJob = nullptr; - utils::Mutex mThreadMapLock; // this should have very little contention + Mutex mThreadMapLock; // this should have very little contention tsl::robin_map mThreadMap; }; @@ -436,12 +484,13 @@ template JobSystem::Job* createJob(JobSystem& js, JobSystem::Job* parent, CALLABLE&& func, ARGS&&... args) noexcept { struct Data { + explicit Data(std::function f) noexcept: f(std::move(f)) {} std::function f; // Renaming the method below could cause an Arrested Development. void gob(JobSystem&, JobSystem::Job*) noexcept { f(); } - } user{ std::bind(std::forward(func), - std::forward(args)...) }; - return js.createJob(parent, std::move(user)); + }; + return js.emplaceJob(parent, + std::bind(std::forward(func), std::forward(args)...)); } template f) noexcept: f(std::move(f)) {} std::function f; // Renaming the method below could cause an Arrested Development. void gob(JobSystem&, JobSystem::Job*) noexcept { f(); } - } user{ std::bind(std::forward(func), std::forward(o), - std::forward(args)...) }; - return js.createJob(parent, std::move(user)); + }; + return js.emplaceJob(parent, + std::bind(std::forward(func), std::forward(o), std::forward(args)...)); } @@ -486,8 +536,8 @@ struct ParallelForJobData { right_side: if (splitter.split(splits, count)) { const size_type lc = count / 2; - JobData ld(start, lc, splits + uint8_t(1), functor, splitter); - JobSystem::Job* l = js.createJob(parent, std::move(ld)); + JobSystem::Job* l = js.emplaceJob(parent, + start, lc, splits + uint8_t(1), functor, splitter); if (UTILS_UNLIKELY(l == nullptr)) { // couldn't create a job, just pretend we're done splitting goto execute; @@ -527,8 +577,8 @@ template JobSystem::Job* parallel_for(JobSystem& js, JobSystem::Job* parent, uint32_t start, uint32_t count, F functor, const S& splitter) noexcept { using JobData = details::ParallelForJobData; - JobData jobData(start, count, 0, std::move(functor), splitter); - return js.createJob(parent, std::move(jobData)); + return js.emplaceJob(parent, + start, count, 0, std::move(functor), splitter); } // parallel jobs with pointer/count @@ -539,8 +589,8 @@ JobSystem::Job* parallel_for(JobSystem& js, JobSystem::Job* parent, f(data + s, c); }; using JobData = details::ParallelForJobData; - JobData jobData(0, count, 0, std::move(user), splitter); - return js.createJob(parent, std::move(jobData)); + return js.emplaceJob(parent, + 0, count, 0, std::move(user), splitter); } // parallel jobs on a Slice<> diff --git a/libs/utils/include/utils/WorkStealingDequeue.h b/libs/utils/include/utils/WorkStealingDequeue.h index 9e737d0aa77..f9f5b5fbc23 100644 --- a/libs/utils/include/utils/WorkStealingDequeue.h +++ b/libs/utils/include/utils/WorkStealingDequeue.h @@ -35,7 +35,13 @@ namespace utils { * steal() push(), pop() * any thread main thread * - * + * References: + * - This code is largely inspired from + * https://blog.molecular-matters.com/2015/09/25/job-system-2-0-lock-free-work-stealing-part-3-going-lock-free/ + * - other implementations + * https://github.com/ConorWilliams/ConcurrentDeque/blob/main/include/riften/deque.hpp + * https://github.com/ssbl/concurrent-deque/blob/master/include/deque.hpp + * https://github.com/taskflow/work-stealing-queue/blob/master/wsq.hpp */ template class WorkStealingDequeue { @@ -117,7 +123,7 @@ TYPE WorkStealingDequeue::pop() noexcept { index_t top = mTop.load(std::memory_order_seq_cst); if (top < bottom) { - // Queue isn't empty and it's not the last item, just return it, this is the common case. + // Queue isn't empty, and it's not the last item, just return it, this is the common case. return getItemAt(bottom); } @@ -132,13 +138,13 @@ TYPE WorkStealingDequeue::pop() noexcept { if (mTop.compare_exchange_strong(top, top + 1, std::memory_order_seq_cst, std::memory_order_relaxed)) { - // success: we stole our last item from ourself, meaning that a concurrent steal() + // Success: we stole our last item from ourselves, meaning that a concurrent steal() // would have failed. // mTop now equals top + 1, we adjust top to make the queue empty. top++; } else { - // failure: mTop was not equal to top, which means the item was stolen under our feet. - // top now equals to mTop. Simply discard the item we just popped. + // Failure: mTop was not equal to top, which means the item was stolen under our feet. + // `top` now equals to mTop. Simply discard the item we just popped. // The queue is now empty. item = TYPE(); } @@ -149,7 +155,7 @@ TYPE WorkStealingDequeue::pop() noexcept { } // std::memory_order_relaxed used because we're not publishing any data. - // no concurrent writes to mBottom possible, it's always safe to write mBottom. + // No concurrent writes to mBottom possible, it's always safe to write mBottom. mBottom.store(top, std::memory_order_relaxed); return item; } @@ -194,6 +200,8 @@ TYPE WorkStealingDequeue::steal() noexcept { } // failure: the item we just tried to steal was pop()'ed under our feet, // simply discard it; nothing to do -- it's okay to try again. + // However, item might be corrupted, so it must be trivially destructible + static_assert(std::is_trivially_destructible_v); } } diff --git a/libs/utils/src/JobSystem.cpp b/libs/utils/src/JobSystem.cpp index b0e199fcef2..57640cd6c90 100644 --- a/libs/utils/src/JobSystem.cpp +++ b/libs/utils/src/JobSystem.cpp @@ -23,9 +23,6 @@ // when SYSTRACE_TAG_JOBSYSTEM is used, enables even heavier systraces #define HEAVY_SYSTRACE 0 -// enable for catching hangs waiting on a job to finish -static constexpr bool DEBUG_FINISH_HANGS = false; - #include #include @@ -249,53 +246,44 @@ inline bool JobSystem::hasActiveJobs() const noexcept { } inline bool JobSystem::hasJobCompleted(JobSystem::Job const* job) noexcept { - return job->runningJobCount.load(std::memory_order_acquire) <= 0; + return (job->runningJobCount.load(std::memory_order_acquire) & JOB_COUNT_MASK) == 0; } -void JobSystem::wait(std::unique_lock& lock, Job* job) noexcept { +inline void JobSystem::wait(std::unique_lock& lock) noexcept { HEAVY_SYSTRACE_CALL(); - if constexpr (!DEBUG_FINISH_HANGS) { - mWaiterCondition.wait(lock); - } else { - do { - // we use a pretty long timeout (4s) so we're very confident that the system is hung - // and nothing else is happening. - std::cv_status status = mWaiterCondition.wait_for(lock, - std::chrono::milliseconds(4000)); - if (status == std::cv_status::no_timeout) { - break; - } + mWaiterCondition.wait(lock); +} - // hang debugging... +inline uint32_t JobSystem::wait(std::unique_lock& lock, Job* const job) noexcept { + HEAVY_SYSTRACE_CALL(); + // signal we are waiting - // We check of we had active jobs or if the job we're waiting on had completed already. - // There is the possibility of a race condition, but our long timeout gives us some - // confidence that we're in an incorrect state. + if (hasActiveJobs() || exitRequested()) { + return job->runningJobCount.load(std::memory_order_acquire); + } - size_t const id = std::distance(mThreadStates.data(), &getState()); - auto activeJobs = mActiveJobs.load(); + uint32_t runningJobCount = + job->runningJobCount.fetch_add(1 << WAITER_COUNT_SHIFT, std::memory_order_relaxed); - if (job) { - auto runningJobCount = job->runningJobCount.load(); - FILAMENT_CHECK_POSTCONDITION(runningJobCount > 0) - << "JobSystem(" << this << ", " << unsigned(id) << "): waiting while job " - << job << " has completed and " << activeJobs << " jobs are active!"; - } + if (runningJobCount & JOB_COUNT_MASK) { + mWaiterCondition.wait(lock); + } - FILAMENT_CHECK_POSTCONDITION(activeJobs <= 0) - << "JobSystem(" << this << ", " << unsigned(id) << "): waiting while " - << activeJobs << " jobs are active!"; + runningJobCount = + job->runningJobCount.fetch_sub(1 << WAITER_COUNT_SHIFT, std::memory_order_acquire); - } while (true); - } + assert_invariant((runningJobCount >> WAITER_COUNT_SHIFT) >= 1); + + return runningJobCount; } +UTILS_NOINLINE void JobSystem::wakeAll() noexcept { // wakeAll() is called when a job finishes (to wake up any thread that might be waiting on it) - HEAVY_SYSTRACE_CALL(); + SYSTRACE_CALL(); mWaiterLock.lock(); // this empty critical section is needed -- it guarantees that notify_all() happens - // after the condition's variables are set. + // either before the condition is checked, or after the condition variable sleeps. mWaiterLock.unlock(); // notify_all() can be pretty slow, and it doesn't need to be inside the lock. mWaiterCondition.notify_all(); @@ -306,7 +294,7 @@ void JobSystem::wakeOne() noexcept { HEAVY_SYSTRACE_CALL(); mWaiterLock.lock(); // this empty critical section is needed -- it guarantees that notify_one() happens - // after the condition's variables are set. + // either before the condition is checked, or after the condition variable sleeps. mWaiterLock.unlock(); // notify_one() can be pretty slow, and it doesn't need to be inside the lock. mWaiterCondition.notify_one(); @@ -328,50 +316,37 @@ void JobSystem::put(WorkQueue& workQueue, Job* job) noexcept { size_t const index = job - mJobStorageBase; assert(index >= 0 && index < MAX_JOB_COUNT); - // put the job into the queue first + // put the job into the queue workQueue.push(uint16_t(index + 1)); - // then increase our active job count - int32_t const oldActiveJobs = mActiveJobs.fetch_add(1, std::memory_order_relaxed); - // But it's possible that the job has already been picked-up, so oldActiveJobs could be - // negative for instance. We signal only if that's not the case. - if (oldActiveJobs >= 0) { - wakeOne(); // wake-up a thread if needed... - } + + // increase our active job count (the order in which we're doing this must not matter + // because we're not using std::memory_order_seq_cst (here or in WorkQueue::push()). + mActiveJobs.fetch_add(1, std::memory_order_relaxed); + + // Note: it's absolutely possible for mActiveJobs to be 0 here, because the job could have + // been handled by a zealous worker already. In that case we could avoid calling wakeOne(), + // but that is not the common case. + + wakeOne(); } JobSystem::Job* JobSystem::pop(WorkQueue& workQueue) noexcept { - // decrement mActiveJobs first, this is to ensure that if there is only a single job left - // (and we're about to pick it up), other threads don't loop trying to do the same. - mActiveJobs.fetch_sub(1, std::memory_order_relaxed); - size_t const index = workQueue.pop(); assert(index <= MAX_JOB_COUNT); Job* const job = !index ? nullptr : &mJobStorageBase[index - 1]; - - // If our guess was wrong, i.e. we couldn't pick up a job (b/c our queue was empty), we - // need to correct mActiveJobs. - if (!job) { - // no need to wake someone else up because, we will go into job-stealing mode - // immediately after this - mActiveJobs.fetch_add(1, std::memory_order_relaxed); + if (UTILS_LIKELY(job)) { + mActiveJobs.fetch_sub(1, std::memory_order_relaxed); } return job; } JobSystem::Job* JobSystem::steal(WorkQueue& workQueue) noexcept { - // decrement mActiveJobs first, this is to ensure that if there is only a single job left - // (and we're about to pick it up), other threads don't loop trying to do the same. - mActiveJobs.fetch_sub(1, std::memory_order_relaxed); - size_t const index = workQueue.steal(); assert_invariant(index <= MAX_JOB_COUNT); Job* const job = !index ? nullptr : &mJobStorageBase[index - 1]; - - if (!job) { - // If we failed taking a job, we need to correct mActiveJobs. - mActiveJobs.fetch_add(1, std::memory_order_relaxed); + if (UTILS_LIKELY(job)) { + mActiveJobs.fetch_sub(1, std::memory_order_relaxed); } - return job; } @@ -402,7 +377,7 @@ JobSystem::Job* JobSystem::steal(JobSystem::ThreadState& state) noexcept { Job* job = nullptr; do { ThreadState* const stateToStealFrom = getStateToStealFrom(state); - if (UTILS_LIKELY(stateToStealFrom)) { + if (stateToStealFrom) { job = steal(stateToStealFrom->workQueue); } // nullptr -> nothing to steal in that queue either, if there are active jobs, @@ -415,14 +390,18 @@ bool JobSystem::execute(JobSystem::ThreadState& state) noexcept { HEAVY_SYSTRACE_CALL(); Job* job = pop(state.workQueue); - if (UTILS_UNLIKELY(job == nullptr)) { + + // It is beneficial for some benchmarks to poll on steal() for a bit, because going back to + // sleep and waking up is pretty expensive. However, it is unclear it helps in practice with + // larger jobs or when parallel_for is used. + constexpr size_t const STEAL_TRY_COUNT = 1; + for (size_t i = 0; UTILS_UNLIKELY(!job && i < STEAL_TRY_COUNT); i++) { // our queue is empty, try to steal a job job = steal(state); } - if (job) { - assert(job->runningJobCount.load(std::memory_order_relaxed) >= 1); - + if (UTILS_LIKELY(job)) { + assert((job->runningJobCount.load(std::memory_order_relaxed) & JOB_COUNT_MASK) >= 1); if (UTILS_LIKELY(job->function)) { HEAVY_SYSTRACE_NAME("job->function"); job->id = std::distance(mThreadStates.data(), &state); @@ -467,11 +446,16 @@ void JobSystem::finish(Job* job) noexcept { do { // std::memory_order_release here is needed to synchronize with JobSystem::wait() // which needs to "see" all changes that happened before the job terminated. - auto runningJobCount = job->runningJobCount.fetch_sub(1, std::memory_order_acq_rel); + uint32_t const v = job->runningJobCount.fetch_sub(1, std::memory_order_acq_rel); + uint32_t const runningJobCount = v & JOB_COUNT_MASK; assert(runningJobCount > 0); + if (runningJobCount == 1) { // no more work, destroy this job and notify its parent - notify = true; + uint32_t const waiters = v >> WAITER_COUNT_SHIFT; + if (waiters) { + notify = true; + } Job* const parent = job->parent == 0x7FFF ? nullptr : &storage[job->parent]; decRef(job); job = parent; @@ -482,7 +466,8 @@ void JobSystem::finish(Job* job) noexcept { } while (job); // wake-up all threads that could potentially be waiting on this job finishing - if (notify) { + if (UTILS_UNLIKELY(notify)) { + // but avoid calling notify_all() at all cost, because it's always expensive wakeAll(); } } @@ -500,10 +485,11 @@ JobSystem::Job* JobSystem::create(JobSystem::Job* parent, JobFunc func) noexcept // add a reference to the parent to make sure it can't be terminated. // memory_order_relaxed is safe because no action is taken at this point // (the job is not started yet). - auto parentJobCount = parent->runningJobCount.fetch_add(1, std::memory_order_relaxed); + UTILS_UNUSED_IN_RELEASE auto const parentJobCount = + parent->runningJobCount.fetch_add(1, std::memory_order_relaxed); // can't create a child job of a terminated parent - assert(parentJobCount > 0); + assert((parentJobCount & JOB_COUNT_MASK) > 0); index = parent - mJobStorageBase; assert(index < MAX_JOB_COUNT); @@ -567,7 +553,7 @@ void JobSystem::waitAndRelease(Job*& job) noexcept { ThreadState& state(getState()); do { - if (!execute(state)) { + if (UTILS_UNLIKELY(!execute(state))) { // test if job has completed first, to possibly avoid taking the lock if (hasJobCompleted(job)) { break; @@ -583,11 +569,23 @@ void JobSystem::waitAndRelease(Job*& job) noexcept { // continue to handle more jobs, as they get added. std::unique_lock lock(mWaiterLock); - if (!hasJobCompleted(job) && !hasActiveJobs() && !exitRequested()) { - wait(lock, job); + uint32_t const runningJobCount = wait(lock, job); + // we could be waking up because either: + // - the job we're waiting on has completed + // - more jobs where added to the JobSystem + // - we're asked to exit + if ((runningJobCount & JOB_COUNT_MASK) == 0 || exitRequested()) { + break; } + + // if we get here, it means that + // - the job we're waiting on is still running, and + // - we're not asked to exit, and + // - there were some active jobs + // So we try to handle one. + continue; } - } while (!hasJobCompleted(job) && !exitRequested()); + } while (UTILS_LIKELY(!hasJobCompleted(job) && !exitRequested())); if (job == mRootJob) { mRootJob = nullptr; From ef4c4a76fcd39a3e5b796f3b7fb01fc3a4ccd29d Mon Sep 17 00:00:00 2001 From: Grant Commodore Date: Thu, 25 Jul 2024 00:31:51 -0700 Subject: [PATCH 03/15] Only sets the layercount if the view has stereo and the stereoscopic type is multiview (#7987) Co-authored-by: Powei Feng --- filament/src/RendererUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/filament/src/RendererUtils.cpp b/filament/src/RendererUtils.cpp index 2e252633c47..f5a7238753f 100644 --- a/filament/src/RendererUtils.cpp +++ b/filament/src/RendererUtils.cpp @@ -172,7 +172,7 @@ FrameGraphId RendererUtils::colorPass( data.color = builder.write(data.color, FrameGraphTexture::Usage::COLOR_ATTACHMENT); data.depth = builder.write(data.depth, FrameGraphTexture::Usage::DEPTH_ATTACHMENT); - if (engine.getConfig().stereoscopicType == StereoscopicType::MULTIVIEW) { + if (view.hasStereo() && engine.getConfig().stereoscopicType == StereoscopicType::MULTIVIEW) { layerCount = engine.getConfig().stereoscopicEyeCount; } From 7441e878bbc8b2c160e96fdcbbac47152a771737 Mon Sep 17 00:00:00 2001 From: Powei Feng Date: Thu, 25 Jul 2024 17:06:56 +0800 Subject: [PATCH 04/15] gltfio: enable escaped unicode for node name (#7989) Fixes #7846 --- libs/gltfio/src/AssetLoader.cpp | 65 +++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/libs/gltfio/src/AssetLoader.cpp b/libs/gltfio/src/AssetLoader.cpp index 7a8138392e0..7c7555bd689 100644 --- a/libs/gltfio/src/AssetLoader.cpp +++ b/libs/gltfio/src/AssetLoader.cpp @@ -58,6 +58,8 @@ #include "downcast.h" +#include +#include #include using namespace filament; @@ -88,12 +90,57 @@ static constexpr cgltf_material kDefaultMat = { }, }; -static const char* getNodeName(const cgltf_node* node, const char* defaultNodeName) { - if (node->name) return node->name; - if (node->mesh && node->mesh->name) return node->mesh->name; - if (node->light && node->light->name) return node->light->name; - if (node->camera && node->camera->name) return node->camera->name; - return defaultNodeName; +static std::string getNodeName(cgltf_node const* node, char const* defaultNodeName) { + auto const getNameImpl = [node, defaultNodeName]() -> char const* { + if (node->name) return node->name; + if (node->mesh && node->mesh->name) return node->mesh->name; + if (node->light && node->light->name) return node->light->name; + if (node->camera && node->camera->name) return node->camera->name; + return defaultNodeName; + }; + + std::string strOrig(getNameImpl()); + + // We handle the potential case of escaped characters in the JSON which should be properly + // interpreted as unicode. See spec: + // https://registry.khronos.org/glTF/specs/2.0/glTF-2.0.html#json-encoding + // Also see spec for escaped strings in JSON (Section 2.5) https://www.ietf.org/rfc/rfc4627.txt + + std::string strEscaped; + size_t cur = 0, idx = 0; + std::wstring_convert, char32_t> conv; + + auto const addUnencodedSubstr = [&](size_t cursor, size_t nextPoint) { + assert_invariant(nextPoint >= cursor); + if (cursor == nextPoint) { + return; + } + strEscaped += strOrig.substr(cursor, nextPoint - cursor); + }; + + while ((idx = strOrig.find("\\u", cur)) != std::string::npos) { + if (idx + 6 > strOrig.length()) { + utils::slog.w << "gltfio: Unable to interpret node name=" << strOrig + << " as proper unicode encoding." << utils::io::endl; + return strOrig; + } + + // Turns string of the form \u0062 to 0x0062 + std::string const hexStr = strOrig.substr(idx + 2, 4); + if (hexStr.find_first_not_of("0123456789abcdefABCDEF") != std::string::npos) { + utils::slog.w << "gltfio: Unable to interpret node name=" << strOrig + << " as proper unicode encoding." << utils::io::endl; + return strOrig; + } + + addUnencodedSubstr(cur, idx); + + strEscaped += conv.to_bytes((char32_t) std::stoul(hexStr, nullptr, 16)); + cur = idx + 6; + } + addUnencodedSubstr(cur, strOrig.length()); + + return strEscaped; } static bool primitiveHasVertexColor(const cgltf_primitive& inPrim) { @@ -504,7 +551,8 @@ FFilamentAsset* FAssetLoader::createRootAsset(const cgltf_data* srcAsset) { } void FAssetLoader::recursePrimitives(const cgltf_node* node, FFilamentAsset* fAsset) { - const char* name = getNodeName(node, mDefaultNodeName); + auto nameStr = getNodeName(node, mDefaultNodeName); + const char* name = nameStr.c_str(); name = name ? name : "node"; if (node->mesh) { @@ -573,7 +621,8 @@ void FAssetLoader::recurseEntities(const cgltf_node* node, SceneMask scenes, Ent instance->mEntities.push_back(entity); instance->mNodeMap[node - srcAsset->nodes] = entity; - const char* name = getNodeName(node, mDefaultNodeName); + auto nameStr = getNodeName(node, mDefaultNodeName); + const char* name = nameStr.c_str(); if (name) { fAsset->mNameToEntity[name].push_back(entity); From 730bc99025a6846dc41764f7f5c7f79f0a192350 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Mon, 22 Jul 2024 16:07:33 -0700 Subject: [PATCH 05/15] Move the ResourceAllocator from Engine to Renderer The ResourceAllocator used to be global and owned by the Engine, this was causing some issues when using several Renderers because each one could cause the eviction of cache data for another. We now have a ResourceAllocator per Renderer, which makes more sense because most resources are allocated by the FrameGraph. We also introduce a ResourceAllocatorDisposer class, which is used for checking in and out a texture from the cache, and destroy the texture when it's checked-out. That objet is still global. --- .../filament-android/src/main/cpp/View.cpp | 9 ++ .../com/google/android/filament/View.java | 14 ++- filament/include/filament/View.h | 12 +++ filament/src/FrameHistory.h | 3 + filament/src/ResourceAllocator.cpp | 91 +++++++++++++++---- filament/src/ResourceAllocator.h | 48 +++++++++- filament/src/View.cpp | 6 ++ filament/src/details/Engine.cpp | 14 +-- filament/src/details/Engine.h | 15 ++- filament/src/details/Renderer.cpp | 12 ++- filament/src/details/Renderer.h | 4 + filament/src/details/View.cpp | 22 +++-- filament/src/details/View.h | 8 +- filament/src/fg/FrameGraph.cpp | 1 + filament/test/filament_framegraph_test.cpp | 8 ++ web/filament-js/jsbindings.cpp | 3 +- 16 files changed, 222 insertions(+), 48 deletions(-) diff --git a/android/filament-android/src/main/cpp/View.cpp b/android/filament-android/src/main/cpp/View.cpp index f16b76c671b..ac04c5326ea 100644 --- a/android/filament-android/src/main/cpp/View.cpp +++ b/android/filament-android/src/main/cpp/View.cpp @@ -531,3 +531,12 @@ Java_com_google_android_filament_View_nGetFogEntity(JNIEnv *env, jclass clazz, View *view = (View *) nativeView; return (jint)view->getFogEntity().getId(); } + +extern "C" +JNIEXPORT void JNICALL +Java_com_google_android_filament_View_nClearFrameHistory(JNIEnv *env, jclass clazz, + jlong nativeView, jlong nativeEngine) { + View *view = (View *) nativeView; + Engine *engine = (Engine *) nativeEngine; + view->clearFrameHistory(*engine); +} diff --git a/android/filament-android/src/main/java/com/google/android/filament/View.java b/android/filament-android/src/main/java/com/google/android/filament/View.java index 460c25d4063..f300d07f885 100644 --- a/android/filament-android/src/main/java/com/google/android/filament/View.java +++ b/android/filament-android/src/main/java/com/google/android/filament/View.java @@ -1233,6 +1233,18 @@ public int getFogEntity() { return nGetFogEntity(getNativeObject()); } + /** + * When certain temporal features are used (e.g.: TAA or Screen-space reflections), the view + * keeps a history of previous frame renders associated with the Renderer the view was last + * used with. When switching Renderer, it may be necessary to clear that history by calling + * this method. Similarly, if the whole content of the screen change, like when a cut-scene + * starts, clearing the history might be needed to avoid artifacts due to the previous frame + * being very different. + */ + public void clearFrameHistory(Engine engine) { + nClearFrameHistory(getNativeObject(), engine.getNativeObject()); + } + public long getNativeObject() { if (mNativeObject == 0) { throw new IllegalStateException("Calling method on destroyed View"); @@ -1294,7 +1306,7 @@ private static native void nSetDepthOfFieldOptions(long nativeView, float cocSca private static native void nSetMaterialGlobal(long nativeView, int index, float x, float y, float z, float w); private static native void nGetMaterialGlobal(long nativeView, int index, float[] out); private static native int nGetFogEntity(long nativeView); - + private static native void nClearFrameHistory(long nativeView, long nativeEngine); /** * List of available ambient occlusion techniques. diff --git a/filament/include/filament/View.h b/filament/include/filament/View.h index 1a1ed96f511..5264cdae493 100644 --- a/filament/include/filament/View.h +++ b/filament/include/filament/View.h @@ -40,6 +40,7 @@ class CallbackHandler; class Camera; class ColorGrading; +class Engine; class MaterialInstance; class RenderTarget; class Scene; @@ -878,6 +879,17 @@ class UTILS_PUBLIC View : public FilamentAPI { */ utils::Entity getFogEntity() const noexcept; + + /** + * When certain temporal features are used (e.g.: TAA or Screen-space reflections), the view + * keeps a history of previous frame renders associated with the Renderer the view was last + * used with. When switching Renderer, it may be necessary to clear that history by calling + * this method. Similarly, if the whole content of the screen change, like when a cut-scene + * starts, clearing the history might be needed to avoid artifacts due to the previous frame + * being very different. + */ + void clearFrameHistory(Engine& engine) noexcept; + /** * List of available ambient occlusion techniques * @deprecated use AmbientOcclusionOptions::enabled instead diff --git a/filament/src/FrameHistory.h b/filament/src/FrameHistory.h index 905130f5c8f..c016ec1bc29 100644 --- a/filament/src/FrameHistory.h +++ b/filament/src/FrameHistory.h @@ -21,6 +21,9 @@ #include #include +#include + +#include namespace filament { diff --git a/filament/src/ResourceAllocator.cpp b/filament/src/ResourceAllocator.cpp index 2990629bcea..3f92acf7849 100644 --- a/filament/src/ResourceAllocator.cpp +++ b/filament/src/ResourceAllocator.cpp @@ -29,13 +29,14 @@ #include #include -#include #include #include #include #include #include +#include +#include #include #include @@ -89,8 +90,15 @@ void ResourceAllocator::AssociativeContainer::emplace(ARGS&& ... args) } // ------------------------------------------------------------------------------------------------ + ResourceAllocatorInterface::~ResourceAllocatorInterface() = default; +// ------------------------------------------------------------------------------------------------ + +ResourceAllocatorDisposerInterface::~ResourceAllocatorDisposerInterface() = default; + +// ------------------------------------------------------------------------------------------------ + size_t ResourceAllocator::TextureKey::getSize() const noexcept { size_t const pixelCount = width * height * depth; size_t size = pixelCount * FTexture::getFormatSize(format); @@ -110,16 +118,22 @@ size_t ResourceAllocator::TextureKey::getSize() const noexcept { ResourceAllocator::ResourceAllocator(Engine::Config const& config, DriverApi& driverApi) noexcept : mCacheMaxAge(config.resourceAllocatorCacheMaxAge), - mBackend(driverApi) { + mBackend(driverApi), + mDisposer(std::make_shared(driverApi)) { +} + +ResourceAllocator::ResourceAllocator(std::shared_ptr disposer, + Engine::Config const& config, DriverApi& driverApi) noexcept + : mCacheMaxAge(config.resourceAllocatorCacheMaxAge), + mBackend(driverApi), + mDisposer(std::move(disposer)) { } ResourceAllocator::~ResourceAllocator() noexcept { - assert_invariant(!mTextureCache.size()); - assert_invariant(!mInUseTextures.size()); + assert_invariant(mTextureCache.empty()); } void ResourceAllocator::terminate() noexcept { - assert_invariant(!mInUseTextures.size()); auto& textureCache = mTextureCache; for (auto it = textureCache.begin(); it != textureCache.end();) { mBackend.destroyTexture(it->second.handle); @@ -174,7 +188,7 @@ backend::TextureHandle ResourceAllocator::createTexture(const char* name, swizzle[0], swizzle[1], swizzle[2], swizzle[3]); } } - mInUseTextures.emplace(handle, key); + mDisposer->checkout(handle, key); } else { if (swizzle == defaultSwizzle) { handle = mBackend.createTexture( @@ -190,24 +204,21 @@ backend::TextureHandle ResourceAllocator::createTexture(const char* name, void ResourceAllocator::destroyTexture(TextureHandle h) noexcept { if constexpr (mEnabled) { - // find the texture in the in-use list (it must be there!) - auto it = mInUseTextures.find(h); - assert_invariant(it != mInUseTextures.end()); - - // move it to the cache - const TextureKey key = it->second; - uint32_t const size = key.getSize(); - - mTextureCache.emplace(key, TextureCachePayload{ h, mAge, size }); - mCacheSize += size; - - // remove it from the in-use list - mInUseTextures.erase(it); + auto const key = mDisposer->checkin(h); + if (UTILS_LIKELY(key.has_value())) { + uint32_t const size = key.value().getSize(); + mTextureCache.emplace(key.value(), TextureCachePayload{ h, mAge, size }); + mCacheSize += size; + } } else { mBackend.destroyTexture(h); } } +ResourceAllocatorDisposerInterface& ResourceAllocator::getDisposer() noexcept { + return *mDisposer; +} + void ResourceAllocator::gc() noexcept { // this is called regularly -- usually once per frame of each Renderer @@ -254,4 +265,46 @@ void ResourceAllocator::purge( mTextureCache.erase(pos); } +// ------------------------------------------------------------------------------------------------ + +ResourceAllocatorDisposer::ResourceAllocatorDisposer(DriverApi& driverApi) noexcept + : mBackend(driverApi) { +} + +ResourceAllocatorDisposer::~ResourceAllocatorDisposer() noexcept { + assert_invariant(mInUseTextures.empty()); +} + +void ResourceAllocatorDisposer::terminate() noexcept { + assert_invariant(mInUseTextures.empty()); +} + +void ResourceAllocatorDisposer::destroy(backend::TextureHandle handle) noexcept { + if (handle) { + auto r = checkin(handle); + if (r.has_value()) { + mBackend.destroyTexture(handle); + } + } +} + +void ResourceAllocatorDisposer::checkout(backend::TextureHandle handle, + ResourceAllocator::TextureKey key) { + mInUseTextures.emplace(handle, key); +} + +std::optional ResourceAllocatorDisposer::checkin( + backend::TextureHandle handle) { + // find the texture in the in-use list (it must be there!) + auto it = mInUseTextures.find(handle); + assert_invariant(it != mInUseTextures.end()); + if (it == mInUseTextures.end()) { + return std::nullopt; + } + TextureKey const key = it->second; + // remove it from the in-use list + mInUseTextures.erase(it); + return key; +} + } // namespace filament diff --git a/filament/src/ResourceAllocator.h b/filament/src/ResourceAllocator.h index 5486592fa53..40f15f3b4ac 100644 --- a/filament/src/ResourceAllocator.h +++ b/filament/src/ResourceAllocator.h @@ -28,16 +28,29 @@ #include #include -#include +#include +#include #include +#include +#include #include #include namespace filament { +class ResourceAllocatorDisposer; + // The only reason we use an interface here is for unit-tests, so we can mock this allocator. // This is not too time-critical, so that's okay. + +class ResourceAllocatorDisposerInterface { +public: + virtual void destroy(backend::TextureHandle handle) noexcept = 0; +protected: + virtual ~ResourceAllocatorDisposerInterface(); +}; + class ResourceAllocatorInterface { public: virtual backend::RenderTargetHandle createRenderTarget(const char* name, @@ -60,15 +73,20 @@ class ResourceAllocatorInterface { virtual void destroyTexture(backend::TextureHandle h) noexcept = 0; + virtual ResourceAllocatorDisposerInterface& getDisposer() noexcept = 0; + protected: virtual ~ResourceAllocatorInterface(); }; - class ResourceAllocator final : public ResourceAllocatorInterface { public: + explicit ResourceAllocator(std::shared_ptr disposer, + Engine::Config const& config, backend::DriverApi& driverApi) noexcept; + explicit ResourceAllocator( Engine::Config const& config, backend::DriverApi& driverApi) noexcept; + ~ResourceAllocator() noexcept override; void terminate() noexcept; @@ -93,6 +111,8 @@ class ResourceAllocator final : public ResourceAllocatorInterface { void destroyTexture(backend::TextureHandle h) noexcept override; + ResourceAllocatorDisposerInterface& getDisposer() noexcept override; + void gc() noexcept; private: @@ -181,6 +201,7 @@ class ResourceAllocator final : public ResourceAllocatorInterface { using value_type = typename Container::value_type::second_type; size_t size() const { return mContainer.size(); } + bool empty() const { return size() == 0; } iterator begin() { return mContainer.begin(); } const_iterator begin() const { return mContainer.begin(); } iterator end() { return mContainer.end(); } @@ -193,16 +214,35 @@ class ResourceAllocator final : public ResourceAllocatorInterface { }; using CacheContainer = AssociativeContainer; - using InUseContainer = AssociativeContainer; void purge(ResourceAllocator::CacheContainer::iterator const& pos); backend::DriverApi& mBackend; + std::shared_ptr mDisposer; CacheContainer mTextureCache; - InUseContainer mInUseTextures; size_t mAge = 0; uint32_t mCacheSize = 0; static constexpr bool mEnabled = true; + + friend class ResourceAllocatorDisposer; +}; + +class ResourceAllocatorDisposer final : public ResourceAllocatorDisposerInterface { + using TextureKey = ResourceAllocator::TextureKey; +public: + explicit ResourceAllocatorDisposer(backend::DriverApi& driverApi) noexcept; + ~ResourceAllocatorDisposer() noexcept override; + void terminate() noexcept; + void destroy(backend::TextureHandle handle) noexcept override; + +private: + friend class ResourceAllocator; + void checkout(backend::TextureHandle handle, TextureKey key); + std::optional checkin(backend::TextureHandle handle); + + using InUseContainer = ResourceAllocator::AssociativeContainer; + backend::DriverApi& mBackend; + InUseContainer mInUseTextures; }; } // namespace filament diff --git a/filament/src/View.cpp b/filament/src/View.cpp index 58ac1a6afc6..abdd615ab8c 100644 --- a/filament/src/View.cpp +++ b/filament/src/View.cpp @@ -15,6 +15,8 @@ */ #include "details/View.h" +#include "filament/View.h" + namespace filament { @@ -312,4 +314,8 @@ utils::Entity View::getFogEntity() const noexcept { return downcast(this)->getFogEntity(); } +void View::clearFrameHistory(Engine& engine) noexcept { + downcast(this)->clearFrameHistory(downcast(engine)); +} + } // namespace filament diff --git a/filament/src/details/Engine.cpp b/filament/src/details/Engine.cpp index 72b5b599c3f..8276ebd40b5 100644 --- a/filament/src/details/Engine.cpp +++ b/filament/src/details/Engine.cpp @@ -163,8 +163,7 @@ FEngine* FEngine::getEngine(void* token) { FILAMENT_CHECK_PRECONDITION(ThreadUtils::isThisThread(instance->mMainThreadId)) << "Engine::createAsync() and Engine::getEngine() must be called on the same thread."; - // we use mResourceAllocator as a proxy for "am I already initialized" - if (!instance->mResourceAllocator) { + if (!instance->mInitialized) { if (UTILS_UNLIKELY(!instance->mDriver)) { // something went horribly wrong during driver initialization instance->mDriverThread.join(); @@ -262,7 +261,7 @@ void FEngine::init() { slog.i << "FEngine feature level: " << int(mActiveFeatureLevel) << io::endl; - mResourceAllocator = new ResourceAllocator(mConfig, driverApi); + mResourceAllocatorDisposer = std::make_shared(driverApi); mFullScreenTriangleVb = downcast(VertexBuffer::Builder() .vertexCount(3) @@ -425,11 +424,13 @@ void FEngine::init() { } }); }); + + mInitialized = true; } FEngine::~FEngine() noexcept { SYSTRACE_CALL(); - delete mResourceAllocator; + assert_invariant(!mResourceAllocatorDisposer); delete mDriver; if (mOwnPlatform) { PlatformFactory::destroy(&mPlatform); @@ -440,7 +441,7 @@ void FEngine::shutdown() { SYSTRACE_CALL(); // by construction this should never be nullptr - assert_invariant(mResourceAllocator); + assert_invariant(mResourceAllocatorDisposer); FILAMENT_CHECK_PRECONDITION(ThreadUtils::isThisThread(mMainThreadId)) << "Engine::shutdown() called from the wrong thread!"; @@ -460,7 +461,8 @@ void FEngine::shutdown() { */ mPostProcessManager.terminate(driver); // free-up post-process manager resources - mResourceAllocator->terminate(); + mResourceAllocatorDisposer->terminate(); + mResourceAllocatorDisposer.reset(); mDFG.terminate(*this); // free-up the DFG mRenderableManager.terminate(); // free-up all renderables mLightManager.terminate(); // free-up all lights diff --git a/filament/src/details/Engine.h b/filament/src/details/Engine.h index b8cdb498773..18abcc2b004 100644 --- a/filament/src/details/Engine.h +++ b/filament/src/details/Engine.h @@ -86,6 +86,7 @@ namespace filament { class Renderer; class MaterialParser; +class ResourceAllocatorDisposer; namespace backend { class Driver; @@ -254,9 +255,13 @@ class FEngine : public Engine { } } - ResourceAllocator& getResourceAllocator() noexcept { - assert_invariant(mResourceAllocator); - return *mResourceAllocator; + ResourceAllocatorDisposer& getResourceAllocatorDisposer() noexcept { + assert_invariant(mResourceAllocatorDisposer); + return *mResourceAllocatorDisposer; + } + + std::shared_ptr const& getSharedResourceAllocatorDisposer() noexcept { + return mResourceAllocatorDisposer; } void* streamAlloc(size_t size, size_t alignment) noexcept; @@ -490,7 +495,7 @@ class FEngine : public Engine { FTransformManager mTransformManager; FLightManager mLightManager; FCameraManager mCameraManager; - ResourceAllocator* mResourceAllocator = nullptr; + std::shared_ptr mResourceAllocatorDisposer; HwVertexBufferInfoFactory mHwVertexBufferInfoFactory; ResourceList mBufferObjects{ "BufferObject" }; @@ -562,6 +567,8 @@ class FEngine : public Engine { std::thread::id mMainThreadId{}; + bool mInitialized = false; + // Creation parameters Config mConfig; diff --git a/filament/src/details/Renderer.cpp b/filament/src/details/Renderer.cpp index 8b537644c0a..9fd1758b82e 100644 --- a/filament/src/details/Renderer.cpp +++ b/filament/src/details/Renderer.cpp @@ -60,6 +60,7 @@ #include #include +#include #include #include @@ -88,7 +89,11 @@ FRenderer::FRenderer(FEngine& engine) : mHdrQualityMedium(TextureFormat::R11F_G11F_B10F), mHdrQualityHigh(TextureFormat::RGB16F), mIsRGB8Supported(false), - mUserEpoch(engine.getEngineEpoch()) + mUserEpoch(engine.getEngineEpoch()), + mResourceAllocator(std::make_unique( + engine.getSharedResourceAllocatorDisposer(), + engine.getConfig(), + engine.getDriverApi())) { FDebugRegistry& debugRegistry = engine.getDebugRegistry(); debugRegistry.registerProperty("d.renderer.doFrameCapture", @@ -176,6 +181,7 @@ void FRenderer::terminate(FEngine& engine) { } mFrameInfoManager.terminate(driver); mFrameSkipper.terminate(driver); + mResourceAllocator->terminate(); } void FRenderer::resetUserTime() { @@ -380,7 +386,7 @@ void FRenderer::endFrame() { } // do this before engine.flush() - engine.getResourceAllocator().gc(); + mResourceAllocator->gc(); // Run the component managers' GC in parallel // WARNING: while doing this we can't access any component manager @@ -750,7 +756,7 @@ void FRenderer::renderJob(RootArenaScope& rootArenaScope, FView& view) { * Frame graph */ - FrameGraph fg(engine.getResourceAllocator(), + FrameGraph fg(*mResourceAllocator, isProtectedContent ? FrameGraph::Mode::PROTECTED : FrameGraph::Mode::UNPROTECTED); auto& blackboard = fg.getBlackboard(); diff --git a/filament/src/details/Renderer.h b/filament/src/details/Renderer.h index e946e2a0dbc..e2043964342 100644 --- a/filament/src/details/Renderer.h +++ b/filament/src/details/Renderer.h @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -53,6 +54,8 @@ namespace filament { +class ResourceAllocator; + namespace backend { class Driver; } // namespace backend @@ -206,6 +209,7 @@ class FRenderer : public Renderer { tsl::robin_set mPreviousRenderTargets; std::function mBeginFrameInternal; uint64_t mVsyncSteadyClockTimeNano = 0; + std::unique_ptr mResourceAllocator{}; }; FILAMENT_DOWNCAST(Renderer) diff --git a/filament/src/details/View.cpp b/filament/src/details/View.cpp index af0e48ca994..1a3b54deb2a 100644 --- a/filament/src/details/View.cpp +++ b/filament/src/details/View.cpp @@ -17,6 +17,7 @@ #include "details/View.h" #include "Culler.h" +#include "FrameHistory.h" #include "Froxelizer.h" #include "RenderPrimitive.h" #include "ResourceAllocator.h" @@ -118,7 +119,7 @@ void FView::terminate(FEngine& engine) { DriverApi& driver = engine.getDriverApi(); driver.destroyBufferObject(mLightUbh); driver.destroyBufferObject(mRenderableUbh); - drainFrameHistory(engine); + clearFrameHistory(engine); ShadowMapManager::terminate(engine, mShadowMapManager); mPerViewUniforms.terminate(driver); @@ -1007,20 +1008,29 @@ FrameGraphId FView::renderShadowMaps(FEngine& engine, FrameGr void FView::commitFrameHistory(FEngine& engine) noexcept { // Here we need to destroy resources in mFrameHistory.back() + auto& disposer = engine.getResourceAllocatorDisposer(); auto& frameHistory = mFrameHistory; FrameHistoryEntry& last = frameHistory.back(); - last.taa.color.destroy(engine.getResourceAllocator()); - last.ssr.color.destroy(engine.getResourceAllocator()); + disposer.destroy(last.taa.color.handle); + disposer.destroy(last.ssr.color.handle); + last.taa.color.handle.clear(); + last.ssr.color.handle.clear(); // and then push the new history entry to the history stack frameHistory.commit(); } -void FView::drainFrameHistory(FEngine& engine) noexcept { +void FView::clearFrameHistory(FEngine& engine) noexcept { // make sure we free all resources in the history - for (size_t i = 0; i < mFrameHistory.size(); ++i) { - commitFrameHistory(engine); + auto& disposer = engine.getResourceAllocatorDisposer(); + auto& frameHistory = mFrameHistory; + for (size_t i = 0; i < frameHistory.size(); ++i) { + FrameHistoryEntry& last = frameHistory[i]; + disposer.destroy(last.taa.color.handle); + disposer.destroy(last.ssr.color.handle); + last.taa.color.handle.clear(); + last.ssr.color.handle.clear(); } } diff --git a/filament/src/details/View.h b/filament/src/details/View.h index 0624af1ce61..73e7c9dc611 100644 --- a/filament/src/details/View.h +++ b/filament/src/details/View.h @@ -422,6 +422,10 @@ class FView : public View { // (e.g.: after the FrameGraph execution). void commitFrameHistory(FEngine& engine) noexcept; + // Clean-up the whole history, free all resources. This is typically called when the View is + // being terminated. Or we're changing Renderer. + void clearFrameHistory(FEngine& engine) noexcept; + // create the picking query View::PickingQuery& pick(uint32_t x, uint32_t y, backend::CallbackHandler* handler, View::PickingQueryResultCallback callback) noexcept; @@ -485,10 +489,6 @@ class FView : public View { Culler::result_type* visibleMask, size_t count); - // Clean-up the whole history, free all resources. This is typically called when the View is - // being terminated. - void drainFrameHistory(FEngine& engine) noexcept; - // we don't inline this one, because the function is quite large and there is not much to // gain from inlining. static FScene::RenderableSoa::iterator partition( diff --git a/filament/src/fg/FrameGraph.cpp b/filament/src/fg/FrameGraph.cpp index e52cdef5c4f..fec8580326a 100644 --- a/filament/src/fg/FrameGraph.cpp +++ b/filament/src/fg/FrameGraph.cpp @@ -24,6 +24,7 @@ #include "FrameGraphPass.h" #include "FrameGraphRenderPass.h" #include "FrameGraphTexture.h" +#include "ResourceAllocator.h" #include "details/Engine.h" diff --git a/filament/test/filament_framegraph_test.cpp b/filament/test/filament_framegraph_test.cpp index 019bcb6776b..684f3e2bf2d 100644 --- a/filament/test/filament_framegraph_test.cpp +++ b/filament/test/filament_framegraph_test.cpp @@ -34,6 +34,10 @@ using namespace backend; class MockResourceAllocator : public ResourceAllocatorInterface { uint32_t handle = 0; + struct MockDisposer : public ResourceAllocatorDisposerInterface { + void destroy(backend::TextureHandle) noexcept override {} + } disposer; + public: backend::RenderTargetHandle createRenderTarget(const char* name, backend::TargetBufferFlags targetBufferFlags, @@ -60,6 +64,10 @@ class MockResourceAllocator : public ResourceAllocatorInterface { void destroyTexture(backend::TextureHandle h) noexcept override { } + + ResourceAllocatorDisposerInterface& getDisposer() noexcept override { + return disposer; + } }; class FrameGraphTest : public testing::Test { diff --git a/web/filament-js/jsbindings.cpp b/web/filament-js/jsbindings.cpp index e1bc5c37557..e5ec3fad9bb 100644 --- a/web/filament-js/jsbindings.cpp +++ b/web/filament-js/jsbindings.cpp @@ -683,7 +683,8 @@ class_("View") .function("isStencilBufferEnabled", &View::isStencilBufferEnabled) .function("setMaterialGlobal", &View::setMaterialGlobal) .function("getMaterialGlobal", &View::getMaterialGlobal) - .function("getFogEntity", &View::getFogEntity); + .function("getFogEntity", &View::getFogEntity) + .function("clearFrameHistory", &View::clearFrameHistory); /// Scene ::core class:: Flat container of renderables and lights. /// See also the [Engine] methods `createScene` and `destroyScene`. From e7c96cd12499b3daba4d3927c73a90637908c43f Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Tue, 23 Jul 2024 19:04:47 -0700 Subject: [PATCH 06/15] better handle skipped frames and cache eviction Add Renderer::skipFrame() which should be called when intentionally skipping frames, for instance because the screen content hasn't changed, allowing Filament to performance needed periodic tasks, such as cache garbage collection and callback dispatches. We also improve the ResourceAllocator cache eviction policy: - the cache is aggressively purged when skipping a frame - we aggressively evict entries older than 3 frames The default Config is now set to a more agressive setting. --- .../src/main/cpp/Renderer.cpp | 8 +++ .../com/google/android/filament/Engine.java | 7 +- .../com/google/android/filament/Renderer.java | 15 ++++ filament/include/filament/Engine.h | 7 +- filament/include/filament/Renderer.h | 8 +++ filament/src/Renderer.cpp | 4 ++ filament/src/ResourceAllocator.cpp | 72 +++++++++++++++---- filament/src/ResourceAllocator.h | 3 +- filament/src/details/Renderer.cpp | 33 +++++++++ filament/src/details/Renderer.h | 3 + web/filament-js/jsbindings.cpp | 1 + 11 files changed, 143 insertions(+), 18 deletions(-) diff --git a/android/filament-android/src/main/cpp/Renderer.cpp b/android/filament-android/src/main/cpp/Renderer.cpp index 1dc252d3560..0020ea21d64 100644 --- a/android/filament-android/src/main/cpp/Renderer.cpp +++ b/android/filament-android/src/main/cpp/Renderer.cpp @@ -28,6 +28,14 @@ using namespace filament; using namespace backend; + +extern "C" JNIEXPORT void JNICALL +Java_com_google_android_filament_Renderer_nSkipFrame(JNIEnv *, jclass, jlong nativeRenderer, + jlong vsyncSteadyClockTimeNano) { + Renderer *renderer = (Renderer *) nativeRenderer; + renderer->skipFrame(uint64_t(vsyncSteadyClockTimeNano)); +} + extern "C" JNIEXPORT jboolean JNICALL Java_com_google_android_filament_Renderer_nBeginFrame(JNIEnv *, jclass, jlong nativeRenderer, jlong nativeSwapChain, jlong frameTimeNanos) { diff --git a/android/filament-android/src/main/java/com/google/android/filament/Engine.java b/android/filament-android/src/main/java/com/google/android/filament/Engine.java index 6c4c5119e3d..6cadf7d5c7d 100644 --- a/android/filament-android/src/main/java/com/google/android/filament/Engine.java +++ b/android/filament-android/src/main/java/com/google/android/filament/Engine.java @@ -425,9 +425,12 @@ public static class Config { public long resourceAllocatorCacheSizeMB = 64; /* - * This value determines for how many frames are texture entries kept in the cache. + * This value determines how many frames texture entries are kept for in the cache. This + * is a soft limit, meaning some texture older than this are allowed to stay in the cache. + * Typically only one texture is evicted per frame. + * The default is 1. */ - public long resourceAllocatorCacheMaxAge = 2; + public long resourceAllocatorCacheMaxAge = 1; /* * Disable backend handles use-after-free checks. diff --git a/android/filament-android/src/main/java/com/google/android/filament/Renderer.java b/android/filament-android/src/main/java/com/google/android/filament/Renderer.java index 4fb447db2c7..509a3c080ce 100644 --- a/android/filament-android/src/main/java/com/google/android/filament/Renderer.java +++ b/android/filament-android/src/main/java/com/google/android/filament/Renderer.java @@ -297,6 +297,20 @@ public void setVsyncTime(long steadyClockTimeNano) { nSetVsyncTime(getNativeObject(), steadyClockTimeNano); } + /** + * Call skipFrame when momentarily skipping frames, for instance if the content of the + * scene doesn't change. + * + * @param vsyncSteadyClockTimeNano The time in nanoseconds when the frame started being rendered, + * in the {@link System#nanoTime()} timebase. Divide this value by 1000000 to + * convert it to the {@link android.os.SystemClock#uptimeMillis()} + * time base. This typically comes from + * {@link android.view.Choreographer.FrameCallback}. + */ + public void skipFrame(long vsyncSteadyClockTimeNano) { + nSkipFrame(getNativeObject(), vsyncSteadyClockTimeNano); + } + /** * Sets up a frame for this Renderer. *

beginFrame manages frame pacing, and returns whether or not a frame should be @@ -716,6 +730,7 @@ void clearNativeObject() { private static native void nSetPresentationTime(long nativeObject, long monotonicClockNanos); private static native void nSetVsyncTime(long nativeObject, long steadyClockTimeNano); + private static native void nSkipFrame(long nativeObject, long vsyncSteadyClockTimeNano); private static native boolean nBeginFrame(long nativeRenderer, long nativeSwapChain, long frameTimeNanos); private static native void nEndFrame(long nativeRenderer); private static native void nRender(long nativeRenderer, long nativeView); diff --git a/filament/include/filament/Engine.h b/filament/include/filament/Engine.h index 6c3d87c29c4..f728e0dde34 100644 --- a/filament/include/filament/Engine.h +++ b/filament/include/filament/Engine.h @@ -350,9 +350,12 @@ class UTILS_PUBLIC Engine { uint32_t resourceAllocatorCacheSizeMB = 64; /* - * This value determines for how many frames are texture entries kept in the cache. + * This value determines how many frames texture entries are kept for in the cache. This + * is a soft limit, meaning some texture older than this are allowed to stay in the cache. + * Typically only one texture is evicted per frame. + * The default is 1. */ - uint32_t resourceAllocatorCacheMaxAge = 2; + uint32_t resourceAllocatorCacheMaxAge = 1; /* * Disable backend handles use-after-free checks. diff --git a/filament/include/filament/Renderer.h b/filament/include/filament/Renderer.h index 9bce76867e4..a4e919610db 100644 --- a/filament/include/filament/Renderer.h +++ b/filament/include/filament/Renderer.h @@ -271,6 +271,14 @@ class UTILS_PUBLIC Renderer : public FilamentAPI { */ void setVsyncTime(uint64_t steadyClockTimeNano) noexcept; + /** + * Call skipFrame when momentarily skipping frames, for instance if the content of the + * scene doesn't change. + * + * @param vsyncSteadyClockTimeNano + */ + void skipFrame(uint64_t vsyncSteadyClockTimeNano = 0u); + /** * Set-up a frame for this Renderer. * diff --git a/filament/src/Renderer.cpp b/filament/src/Renderer.cpp index 01546a325bd..258fd5ed327 100644 --- a/filament/src/Renderer.cpp +++ b/filament/src/Renderer.cpp @@ -45,6 +45,10 @@ void Renderer::setPresentationTime(int64_t monotonic_clock_ns) { downcast(this)->setPresentationTime(monotonic_clock_ns); } +void Renderer::skipFrame(uint64_t vsyncSteadyClockTimeNano) { + downcast(this)->skipFrame(vsyncSteadyClockTimeNano); +} + bool Renderer::beginFrame(SwapChain* swapChain, uint64_t vsyncSteadyClockTimeNano) { return downcast(this)->beginFrame(downcast(swapChain), vsyncSteadyClockTimeNano); } diff --git a/filament/src/ResourceAllocator.cpp b/filament/src/ResourceAllocator.cpp index 3f92acf7849..471f8778bd8 100644 --- a/filament/src/ResourceAllocator.cpp +++ b/filament/src/ResourceAllocator.cpp @@ -27,6 +27,8 @@ #include "private/backend/DriverApi.h" +#include +#include #include #include #include @@ -209,6 +211,7 @@ void ResourceAllocator::destroyTexture(TextureHandle h) noexcept { uint32_t const size = key.value().getSize(); mTextureCache.emplace(key.value(), TextureCachePayload{ h, mAge, size }); mCacheSize += size; + mCacheSizeHiWaterMark = std::max(mCacheSizeHiWaterMark, mCacheSize); } } else { mBackend.destroyTexture(h); @@ -219,40 +222,83 @@ ResourceAllocatorDisposerInterface& ResourceAllocator::getDisposer() noexcept { return *mDisposer; } -void ResourceAllocator::gc() noexcept { - // this is called regularly -- usually once per frame of each Renderer +void ResourceAllocator::gc(bool skippedFrame) noexcept { + // this is called regularly -- usually once per frame - // increase our age - const size_t age = mAge++; + // increase our age at each (non-skipped) frame + const size_t age = mAge; + if (!skippedFrame) { + mAge++; + } // Purging strategy: - // - remove entries that are older than a certain age - // - remove only one entry per gc(), + // - remove all entries older than MAX_AGE_SKIPPED_FRAME when skipping a frame + // - remove entries older than mCacheMaxAgeSoft + // - remove only MAX_EVICTION_COUNT entry per gc(), + // - look for the number of unique resource ages present in the cache (this basically gives + // us how many buckets of resources we have corresponding to previous frames. + // - remove all resources that have an age older than the MAX_UNIQUE_AGE_COUNT'th bucket auto& textureCache = mTextureCache; + + // when skipping a frame, the maximum age to keep in the cache + constexpr size_t MAX_AGE_SKIPPED_FRAME = 1; + + // maximum entry count to evict per GC, under the mCacheMaxAgeSoft limit + constexpr size_t MAX_EVICTION_COUNT = 1; + + // maximum number of unique ages in the cache + constexpr size_t MAX_UNIQUE_AGE_COUNT = 3; + + utils::bitset32 ages; + uint32_t evictedCount = 0; for (auto it = textureCache.begin(); it != textureCache.end();) { - const size_t ageDiff = age - it->second.age; - if (ageDiff >= mCacheMaxAge) { + size_t const ageDiff = age - it->second.age; + if ((ageDiff >= MAX_AGE_SKIPPED_FRAME && skippedFrame) || + (ageDiff >= mCacheMaxAge && evictedCount < MAX_EVICTION_COUNT)) { + evictedCount++; purge(it); - // only purge a single entry per gc - break; } else { + // build the set of ages present in the cache after eviction + ages.set(std::min(size_t(31), ageDiff)); ++it; } } + + // if we have MAX_UNIQUE_AGE_COUNT ages or more, we evict all the resources that + // are older than the MAX_UNIQUE_AGE_COUNT'th age. + if (!skippedFrame && ages.count() >= MAX_UNIQUE_AGE_COUNT) { + uint32_t bits = ages.getValue(); + // remove from the set the ages we keep + for (size_t i = 0; i < MAX_UNIQUE_AGE_COUNT - 1; i++) { + bits &= ~(1 << utils::ctz(bits)); + } + size_t const maxAge = utils::ctz(bits); + for (auto it = textureCache.begin(); it != textureCache.end();) { + const size_t ageDiff = age - it->second.age; + if (ageDiff >= maxAge) { + purge(it); + } else { + ++it; + } + } + } } UTILS_NOINLINE void ResourceAllocator::dump(bool brief) const noexcept { - slog.d << "# entries=" << mTextureCache.size() << ", sz=" << mCacheSize / float(1u << 20u) - << " MiB" << io::endl; + constexpr float MiB = 1.0f / float(1u << 20u); + slog.d << "# entries=" << mTextureCache.size() + << ", sz=" << (float)mCacheSize * MiB << " MiB" + << ", max=" << (float)mCacheSizeHiWaterMark * MiB << " MiB" + << io::endl; if (!brief) { for (auto const& it : mTextureCache) { auto w = it.first.width; auto h = it.first.height; auto f = FTexture::getFormatSize(it.first.format); slog.d << it.first.name << ": w=" << w << ", h=" << h << ", f=" << f << ", sz=" - << it.second.size / float(1u << 20u) << io::endl; + << (float)it.second.size * MiB << io::endl; } } } diff --git a/filament/src/ResourceAllocator.h b/filament/src/ResourceAllocator.h index 40f15f3b4ac..0bcc9aefa23 100644 --- a/filament/src/ResourceAllocator.h +++ b/filament/src/ResourceAllocator.h @@ -113,7 +113,7 @@ class ResourceAllocator final : public ResourceAllocatorInterface { ResourceAllocatorDisposerInterface& getDisposer() noexcept override; - void gc() noexcept; + void gc(bool skippedFrame = false) noexcept; private: size_t const mCacheMaxAge; @@ -222,6 +222,7 @@ class ResourceAllocator final : public ResourceAllocatorInterface { CacheContainer mTextureCache; size_t mAge = 0; uint32_t mCacheSize = 0; + uint32_t mCacheSizeHiWaterMark = 0; static constexpr bool mEnabled = true; friend class ResourceAllocatorDisposer; diff --git a/filament/src/details/Renderer.cpp b/filament/src/details/Renderer.cpp index 9fd1758b82e..a756c0cb9c6 100644 --- a/filament/src/details/Renderer.cpp +++ b/filament/src/details/Renderer.cpp @@ -242,6 +242,39 @@ void FRenderer::setVsyncTime(uint64_t steadyClockTimeNano) noexcept { mVsyncSteadyClockTimeNano = steadyClockTimeNano; } +void FRenderer::skipFrame(uint64_t vsyncSteadyClockTimeNano) { + SYSTRACE_CALL(); + + FILAMENT_CHECK_PRECONDITION(!mSwapChain) << + "skipFrame() can't be called between beginFrame() and endFrame()"; + + if (!vsyncSteadyClockTimeNano) { + vsyncSteadyClockTimeNano = mVsyncSteadyClockTimeNano; + mVsyncSteadyClockTimeNano = 0; + } + + FEngine& engine = mEngine; + FEngine::DriverApi& driver = engine.getDriverApi(); + + // Gives the backend a chance to execute periodic tasks. This must be called before + // the frame skipper. + driver.tick(); + + // do this before engine.flush() + mResourceAllocator->gc(true); + + // Run the component managers' GC in parallel + // WARNING: while doing this we can't access any component manager + auto& js = engine.getJobSystem(); + + auto *job = js.runAndRetain(jobs::createJob(js, nullptr, &FEngine::gc, &engine)); // gc all managers + + engine.flush(); // flush command stream + + // make sure we're done with the gcs + js.waitAndRelease(job); +} + bool FRenderer::beginFrame(FSwapChain* swapChain, uint64_t vsyncSteadyClockTimeNano) { assert_invariant(swapChain); diff --git a/filament/src/details/Renderer.h b/filament/src/details/Renderer.h index e2043964342..c0c6d5aa1be 100644 --- a/filament/src/details/Renderer.h +++ b/filament/src/details/Renderer.h @@ -91,6 +91,9 @@ class FRenderer : public Renderer { void setVsyncTime(uint64_t steadyClockTimeNano) noexcept; + // skip a frame + void skipFrame(uint64_t vsyncSteadyClockTimeNano); + // start a frame bool beginFrame(FSwapChain* swapChain, uint64_t vsyncSteadyClockTimeNano); diff --git a/web/filament-js/jsbindings.cpp b/web/filament-js/jsbindings.cpp index e5ec3fad9bb..7dcdb7c1838 100644 --- a/web/filament-js/jsbindings.cpp +++ b/web/filament-js/jsbindings.cpp @@ -627,6 +627,7 @@ class_("Renderer") .function("getClearOptions", &Renderer::getClearOptions) .function("setPresentationTime", &Renderer::setPresentationTime) .function("setVsyncTime", &Renderer::setVsyncTime) + .function("skipFrame", &Renderer::skipFrame) .function("beginFrame", EMBIND_LAMBDA(bool, (Renderer* self, SwapChain* swapChain), { return self->beginFrame(swapChain); }), allow_raw_pointers()) From 8add6ae1ac9a6ee4b28418e239e69691f1d41979 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Thu, 25 Jul 2024 17:21:40 -0700 Subject: [PATCH 07/15] fix a potential crash is AssetLoader if it can't find a name for a node, it will revert to the config's defaultNodeName, however, if that is nullptr also, a crash will occur. so we provide a last-resort hardcoded name in that case. --- libs/gltfio/src/AssetLoader.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/gltfio/src/AssetLoader.cpp b/libs/gltfio/src/AssetLoader.cpp index 7c7555bd689..cf3007a67f6 100644 --- a/libs/gltfio/src/AssetLoader.cpp +++ b/libs/gltfio/src/AssetLoader.cpp @@ -96,7 +96,8 @@ static std::string getNodeName(cgltf_node const* node, char const* defaultNodeNa if (node->mesh && node->mesh->name) return node->mesh->name; if (node->light && node->light->name) return node->light->name; if (node->camera && node->camera->name) return node->camera->name; - return defaultNodeName; + if (defaultNodeName) return defaultNodeName; + return ""; }; std::string strOrig(getNameImpl()); From b54a2046cb12f47c34e6e45ffda298c3ad34033f Mon Sep 17 00:00:00 2001 From: Grant Commodore Date: Fri, 26 Jul 2024 00:41:00 -0700 Subject: [PATCH 08/15] Multiview support for vulkan (#7892) Adds multiview support for vulkan. This is done by adding a layerCount to the renderTarget, which is used to determine if multiview is available and being used in the current renderpass. FIXES=[332425392] Co-authored-by: Powei Feng --- filament/backend/src/vulkan/VulkanContext.cpp | 8 ++++++-- filament/backend/src/vulkan/VulkanContext.h | 2 ++ filament/backend/src/vulkan/VulkanDriver.cpp | 12 ++++++++++- .../backend/src/vulkan/VulkanFboCache.cpp | 20 +++++++++++++++++++ filament/backend/src/vulkan/VulkanFboCache.h | 2 +- filament/backend/src/vulkan/VulkanHandles.cpp | 5 +++-- filament/backend/src/vulkan/VulkanHandles.h | 4 +++- filament/backend/src/vulkan/VulkanTexture.cpp | 5 ++++- filament/backend/src/vulkan/VulkanTexture.h | 4 ++++ filament/src/RendererUtils.cpp | 2 +- filament/src/fg/FrameGraphRenderPass.h | 2 +- 11 files changed, 56 insertions(+), 10 deletions(-) diff --git a/filament/backend/src/vulkan/VulkanContext.cpp b/filament/backend/src/vulkan/VulkanContext.cpp index 5352f639a84..fc634eaebfe 100644 --- a/filament/backend/src/vulkan/VulkanContext.cpp +++ b/filament/backend/src/vulkan/VulkanContext.cpp @@ -59,7 +59,11 @@ VkExtent2D VulkanAttachment::getExtent2D() const { VkImageView VulkanAttachment::getImageView() { assert_invariant(texture); - return texture->getAttachmentView(getSubresourceRange()); + VkImageSubresourceRange range = getSubresourceRange(); + if (range.layerCount > 1) { + return texture->getMultiviewAttachmentView(range); + } + return texture->getAttachmentView(range); } bool VulkanAttachment::isDepth() const { @@ -73,7 +77,7 @@ VkImageSubresourceRange VulkanAttachment::getSubresourceRange() const { .baseMipLevel = uint32_t(level), .levelCount = 1, .baseArrayLayer = uint32_t(layer), - .layerCount = 1, + .layerCount = layerCount, }; } diff --git a/filament/backend/src/vulkan/VulkanContext.h b/filament/backend/src/vulkan/VulkanContext.h index cb0a26aea32..9f506e03f76 100644 --- a/filament/backend/src/vulkan/VulkanContext.h +++ b/filament/backend/src/vulkan/VulkanContext.h @@ -43,6 +43,8 @@ struct VulkanCommandBuffer; struct VulkanAttachment { VulkanTexture* texture = nullptr; uint8_t level = 0; + uint8_t baseViewIndex = 0; + uint8_t layerCount = 1; uint16_t layer = 0; bool isDepth() const; diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp index 971647e229e..bf5604a0df5 100644 --- a/filament/backend/src/vulkan/VulkanDriver.cpp +++ b/filament/backend/src/vulkan/VulkanDriver.cpp @@ -595,6 +595,8 @@ void VulkanDriver::createRenderTargetR(Handle rth, colorTargets[i] = { .texture = mResourceAllocator.handle_cast(color[i].handle), .level = color[i].level, + .baseViewIndex = color[i].baseViewIndex, + .layerCount = layerCount, .layer = color[i].layer, }; UTILS_UNUSED_IN_RELEASE VkExtent2D extent = colorTargets[i].getExtent2D(); @@ -609,6 +611,8 @@ void VulkanDriver::createRenderTargetR(Handle rth, depthStencil[0] = { .texture = mResourceAllocator.handle_cast(depth.handle), .level = depth.level, + .baseViewIndex = depth.baseViewIndex, + .layerCount = layerCount, .layer = depth.layer, }; UTILS_UNUSED_IN_RELEASE VkExtent2D extent = depthStencil[0].getExtent2D(); @@ -621,6 +625,8 @@ void VulkanDriver::createRenderTargetR(Handle rth, depthStencil[1] = { .texture = mResourceAllocator.handle_cast(stencil.handle), .level = stencil.level, + .baseViewIndex = stencil.baseViewIndex, + .layerCount = layerCount, .layer = stencil.layer, }; UTILS_UNUSED_IN_RELEASE VkExtent2D extent = depthStencil[1].getExtent2D(); @@ -637,7 +643,7 @@ void VulkanDriver::createRenderTargetR(Handle rth, auto renderTarget = mResourceAllocator.construct(rth, mPlatform->getDevice(), mPlatform->getPhysicalDevice(), mContext, mAllocator, &mCommands, width, height, - samples, colorTargets, depthStencil, mStagePool); + samples, colorTargets, depthStencil, mStagePool, layerCount); mResourceManager.acquire(renderTarget); } @@ -1267,6 +1273,8 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP } } + uint8_t const renderTargetLayerCount = rt->getLayerCount(); + // Create the VkRenderPass or fetch it from cache. VulkanFboCache::RenderPassKey rpkey = { .initialColorLayoutMask = 0, @@ -1277,10 +1285,12 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP .discardEnd = discardEndVal, .samples = rt->getSamples(), .subpassMask = uint8_t(params.subpassMask), + .viewCount = renderTargetLayerCount, }; for (int i = 0; i < MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT; i++) { const VulkanAttachment& info = rt->getColor(i); if (info.texture) { + assert_invariant(info.layerCount == renderTargetLayerCount); rpkey.initialColorLayoutMask |= 1 << i; rpkey.colorFormat[i] = info.getFormat(); if (rpkey.samples > 1 && info.texture->samples == 1) { diff --git a/filament/backend/src/vulkan/VulkanFboCache.cpp b/filament/backend/src/vulkan/VulkanFboCache.cpp index 3b8b85cdeb8..40e3db2ee3c 100644 --- a/filament/backend/src/vulkan/VulkanFboCache.cpp +++ b/filament/backend/src/vulkan/VulkanFboCache.cpp @@ -43,6 +43,7 @@ bool VulkanFboCache::RenderPassEq::operator()(const RenderPassKey& k1, if (k1.samples != k2.samples) return false; if (k1.needsResolveMask != k2.needsResolveMask) return false; if (k1.subpassMask != k2.subpassMask) return false; + if (k1.viewCount != k2.viewCount) return false; return true; } @@ -186,6 +187,25 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { .pDependencies = dependencies }; + VkRenderPassMultiviewCreateInfo multiviewCreateInfo = {}; + uint32_t const subpassViewMask = (1 << config.viewCount) - 1; + // Prepare a view mask array for the maximum number of subpasses. All subpasses have all views + // activated. + uint32_t const viewMasks[2] = {subpassViewMask, subpassViewMask}; + if (config.viewCount > 1) { + // Fill the multiview create info. + multiviewCreateInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO; + multiviewCreateInfo.pNext = nullptr; + multiviewCreateInfo.subpassCount = hasSubpasses ? 2u : 1u; + multiviewCreateInfo.pViewMasks = viewMasks; + multiviewCreateInfo.dependencyCount = 0; + multiviewCreateInfo.pViewOffsets = nullptr; + multiviewCreateInfo.correlationMaskCount = 1; + multiviewCreateInfo.pCorrelationMasks = &subpassViewMask; + + renderPassInfo.pNext = &multiviewCreateInfo; + } + int attachmentIndex = 0; // Populate the Color Attachments. diff --git a/filament/backend/src/vulkan/VulkanFboCache.h b/filament/backend/src/vulkan/VulkanFboCache.h index 2f4b1a9101c..6af444a2032 100644 --- a/filament/backend/src/vulkan/VulkanFboCache.h +++ b/filament/backend/src/vulkan/VulkanFboCache.h @@ -64,7 +64,7 @@ class VulkanFboCache { uint8_t samples; // 1 byte uint8_t needsResolveMask; // 1 byte uint8_t subpassMask; // 1 byte - uint8_t padding2; // 1 byte + uint8_t viewCount; // 1 byte }; struct RenderPassVal { VkRenderPass handle; diff --git a/filament/backend/src/vulkan/VulkanHandles.cpp b/filament/backend/src/vulkan/VulkanHandles.cpp index ff3f137c108..4222f361a6f 100644 --- a/filament/backend/src/vulkan/VulkanHandles.cpp +++ b/filament/backend/src/vulkan/VulkanHandles.cpp @@ -323,11 +323,12 @@ VulkanRenderTarget::VulkanRenderTarget(VkDevice device, VkPhysicalDevice physica VulkanContext const& context, VmaAllocator allocator, VulkanCommands* commands, uint32_t width, uint32_t height, uint8_t samples, VulkanAttachment color[MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT], - VulkanAttachment depthStencil[2], VulkanStagePool& stagePool) + VulkanAttachment depthStencil[2], VulkanStagePool& stagePool, uint8_t layerCount) : HwRenderTarget(width, height), VulkanResource(VulkanResourceType::RENDER_TARGET), mOffscreen(true), - mSamples(samples) { + mSamples(samples), + mLayerCount(layerCount) { for (int index = 0; index < MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT; index++) { mColor[index] = color[index]; } diff --git a/filament/backend/src/vulkan/VulkanHandles.h b/filament/backend/src/vulkan/VulkanHandles.h index 75596ea93cd..ee6ffed346a 100644 --- a/filament/backend/src/vulkan/VulkanHandles.h +++ b/filament/backend/src/vulkan/VulkanHandles.h @@ -304,7 +304,7 @@ struct VulkanRenderTarget : private HwRenderTarget, VulkanResource { VulkanContext const& context, VmaAllocator allocator, VulkanCommands* commands, uint32_t width, uint32_t height, uint8_t samples, VulkanAttachment color[MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT], - VulkanAttachment depthStencil[2], VulkanStagePool& stagePool); + VulkanAttachment depthStencil[2], VulkanStagePool& stagePool, uint8_t layerCount); // Creates a special "default" render target (i.e. associated with the swap chain) explicit VulkanRenderTarget(); @@ -319,6 +319,7 @@ struct VulkanRenderTarget : private HwRenderTarget, VulkanResource { VulkanAttachment& getMsaaDepth(); uint8_t getColorTargetCount(const VulkanRenderPass& pass) const; uint8_t getSamples() const { return mSamples; } + uint8_t getLayerCount() const { return mLayerCount; } bool hasDepth() const { return mDepth.texture; } bool isSwapChain() const { return !mOffscreen; } void bindToSwapChain(VulkanSwapChain& surf); @@ -330,6 +331,7 @@ struct VulkanRenderTarget : private HwRenderTarget, VulkanResource { VulkanAttachment mMsaaDepthAttachment = {}; const bool mOffscreen : 1; uint8_t mSamples : 7; + uint8_t mLayerCount = 1; }; struct VulkanBufferObject; diff --git a/filament/backend/src/vulkan/VulkanTexture.cpp b/filament/backend/src/vulkan/VulkanTexture.cpp index adf0f4ad9ef..9ca88a2c78c 100644 --- a/filament/backend/src/vulkan/VulkanTexture.cpp +++ b/filament/backend/src/vulkan/VulkanTexture.cpp @@ -387,12 +387,15 @@ void VulkanTexture::setPrimaryRange(uint32_t minMiplevel, uint32_t maxMiplevel) } VkImageView VulkanTexture::getAttachmentView(VkImageSubresourceRange range) { - // Attachments should only have one mipmap level and one layer. range.levelCount = 1; range.layerCount = 1; return getImageView(range, VK_IMAGE_VIEW_TYPE_2D, {}); } +VkImageView VulkanTexture::getMultiviewAttachmentView(VkImageSubresourceRange range) { + return getImageView(range, VK_IMAGE_VIEW_TYPE_2D_ARRAY, {}); +} + VkImageView VulkanTexture::getViewForType(VkImageSubresourceRange const& range, VkImageViewType type) { return getImageView(range, type, mSwizzle); } diff --git a/filament/backend/src/vulkan/VulkanTexture.h b/filament/backend/src/vulkan/VulkanTexture.h index 7b0b64a8c4c..df995fc7ed8 100644 --- a/filament/backend/src/vulkan/VulkanTexture.h +++ b/filament/backend/src/vulkan/VulkanTexture.h @@ -73,6 +73,10 @@ struct VulkanTexture : public HwTexture, VulkanResource { // and the identity swizzle. VkImageView getAttachmentView(VkImageSubresourceRange range); + // Gets or creates a cached VkImageView for a single subresource that can be used as a render + // target attachment when rendering with multiview. + VkImageView getMultiviewAttachmentView(VkImageSubresourceRange range); + // This is a workaround for the first few frames where we're waiting for the texture to actually // be uploaded. In that case, we bind the sampler to an empty texture, but the corresponding // imageView needs to be of the right type. Hence, we provide an option to indicate the diff --git a/filament/src/RendererUtils.cpp b/filament/src/RendererUtils.cpp index f5a7238753f..d465a03288b 100644 --- a/filament/src/RendererUtils.cpp +++ b/filament/src/RendererUtils.cpp @@ -75,7 +75,7 @@ FrameGraphId RendererUtils::colorPass( TargetBufferFlags const clearColorFlags = config.clearFlags & TargetBufferFlags::COLOR; TargetBufferFlags clearDepthFlags = config.clearFlags & TargetBufferFlags::DEPTH; TargetBufferFlags clearStencilFlags = config.clearFlags & TargetBufferFlags::STENCIL; - uint8_t layerCount = 0; + uint8_t layerCount = 1; data.shadows = blackboard.get("shadows"); data.ssao = blackboard.get("ssao"); diff --git a/filament/src/fg/FrameGraphRenderPass.h b/filament/src/fg/FrameGraphRenderPass.h index e073dcb321a..a45aaf5a002 100644 --- a/filament/src/fg/FrameGraphRenderPass.h +++ b/filament/src/fg/FrameGraphRenderPass.h @@ -48,7 +48,7 @@ struct FrameGraphRenderPass { Viewport viewport{}; math::float4 clearColor{}; uint8_t samples = 0; // # of samples (0 = unset, default) - uint8_t layerCount = 0; // # of layer (# > 1 = multiview) + uint8_t layerCount = 1; // # of layer (# > 1 = multiview) backend::TargetBufferFlags clearFlags{}; backend::TargetBufferFlags discardStart{}; }; From 40ae416715604fb9eb390a4fcfe56ee90cbe9cb9 Mon Sep 17 00:00:00 2001 From: Ben Doherty Date: Mon, 29 Jul 2024 13:14:50 -0600 Subject: [PATCH 09/15] Metal: redistribute pool sizes (#8000) --- .../backend/include/private/backend/HandleAllocator.h | 2 +- filament/backend/src/metal/MetalDriver.mm | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/filament/backend/include/private/backend/HandleAllocator.h b/filament/backend/include/private/backend/HandleAllocator.h index d083fe3da92..9c31b594875 100644 --- a/filament/backend/include/private/backend/HandleAllocator.h +++ b/filament/backend/include/private/backend/HandleAllocator.h @@ -39,7 +39,7 @@ #define HandleAllocatorGL HandleAllocator<32, 64, 136> // ~4520 / pool / MiB #define HandleAllocatorVK HandleAllocator<64, 160, 312> // ~1820 / pool / MiB -#define HandleAllocatorMTL HandleAllocator<32, 48, 552> // ~1660 / pool / MiB +#define HandleAllocatorMTL HandleAllocator<32, 64, 552> // ~1660 / pool / MiB namespace filament::backend { diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm index 92b1b22f736..a0c8217972a 100644 --- a/filament/backend/src/metal/MetalDriver.mm +++ b/filament/backend/src/metal/MetalDriver.mm @@ -53,14 +53,14 @@ // MetalRenderPrimitive : 24 many // MetalVertexBuffer : 32 moderate // -- less than or equal 32 bytes - // MetalIndexBuffer : 40 moderate // MetalFence : 48 few - // MetalBufferObject : 48 many - // -- less than or equal 48 bytes + // MetalIndexBuffer : 56 moderate + // MetalBufferObject : 64 many + // -- less than or equal 64 bytes // MetalSamplerGroup : 112 few // MetalProgram : 152 moderate // MetalTexture : 152 moderate - // MetalSwapChain : 184 few + // MetalSwapChain : 208 few // MetalRenderTarget : 272 few // MetalVertexBufferInfo : 552 moderate // -- less than or equal to 552 bytes From 3f4dac62485bfebea92ed5afc51806b5dfd2da81 Mon Sep 17 00:00:00 2001 From: Sungun Park Date: Mon, 29 Jul 2024 22:25:36 +0000 Subject: [PATCH 10/15] Fix a crash for IBL resource loading (#8001) This fixes a crash introduced by a8ace2891d2d83e8b9dd7d06a01591d3ff53c764 The refactored FrameInfoManager can cause a crash when IBL resource loading happens because now the getLastFrameInfo() references an invalid value via the `front` method. Return the default FrameInfo to resolve this. Also fix a null pointer reference bug for OpenGLTimer::State, which happenes when the renderer for IBLPrefilterContext is destroyed. --- .../backend/src/opengl/OpenGLTimerQuery.cpp | 32 +++++++++++-------- filament/src/FrameInfo.h | 6 ++-- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/filament/backend/src/opengl/OpenGLTimerQuery.cpp b/filament/backend/src/opengl/OpenGLTimerQuery.cpp index 83a9d834c1e..3815a1f0d8e 100644 --- a/filament/backend/src/opengl/OpenGLTimerQuery.cpp +++ b/filament/backend/src/opengl/OpenGLTimerQuery.cpp @@ -143,21 +143,25 @@ void TimerQueryNativeFactory::endTimeElapsedQuery(OpenGLDriver& driver, GLTimerQ driver.runEveryNowAndThen([&context = mContext, weak]() -> bool { auto state = weak.lock(); - if (state) { - GLuint available = 0; - context.procs.getQueryObjectuiv(state->gl.query, GL_QUERY_RESULT_AVAILABLE, &available); - CHECK_GL_ERROR(utils::slog.e) - if (!available) { - // we need to try this one again later - return false; - } - GLuint64 elapsedTime = 0; - // we won't end-up here if we're on ES and don't have GL_EXT_disjoint_timer_query - context.procs.getQueryObjectui64v(state->gl.query, GL_QUERY_RESULT, &elapsedTime); - state->elapsed.store((int64_t)elapsedTime, std::memory_order_relaxed); - } else { - state->elapsed.store(int64_t(TimerQueryResult::ERROR), std::memory_order_relaxed); + if (!state) { + // The timer query state has been destroyed on the way, very likely due to the IBL + // prefilter context destruction. We still return true to get this element removed from + // the query list. + return true; } + + GLuint available = 0; + context.procs.getQueryObjectuiv(state->gl.query, GL_QUERY_RESULT_AVAILABLE, &available); + CHECK_GL_ERROR(utils::slog.e) + if (!available) { + // we need to try this one again later + return false; + } + GLuint64 elapsedTime = 0; + // we won't end-up here if we're on ES and don't have GL_EXT_disjoint_timer_query + context.procs.getQueryObjectui64v(state->gl.query, GL_QUERY_RESULT, &elapsedTime); + state->elapsed.store((int64_t)elapsedTime, std::memory_order_relaxed); + return true; }); } diff --git a/filament/src/FrameInfo.h b/filament/src/FrameInfo.h index 28b99394014..76563e36f75 100644 --- a/filament/src/FrameInfo.h +++ b/filament/src/FrameInfo.h @@ -163,9 +163,9 @@ class FrameInfoManager { // call this immediately before "swap buffers" void endFrame(backend::DriverApi& driver) noexcept; - details::FrameInfo const& getLastFrameInfo() const noexcept { - // if pFront is not set yet, return front() but in this case front().valid will be false - return pFront ? *pFront : mFrameTimeHistory.front(); + details::FrameInfo getLastFrameInfo() const noexcept { + // if pFront is not set yet, return FrameInfo(). But the `valid` field will be false in this case. + return pFront ? *pFront : details::FrameInfo{}; } utils::FixedCapacityVector getFrameInfoHistory(size_t historySize) const noexcept; From 2d21fcbe55f74ccec8d75b0b6a234a6fad3cd374 Mon Sep 17 00:00:00 2001 From: Balaji M Date: Tue, 30 Jul 2024 22:12:04 -0400 Subject: [PATCH 11/15] [gltfio] Initialize mCgltfBuffersLoaded to fix crash (#7999) Value of mCgltfBuffersLoaded is sometimes retained across creation of FAssetLoader which skips loading the buffer in AssetLoaderExtended#createPrimitive leading to null pointer crash --- libs/gltfio/src/extended/AssetLoaderExtended.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/gltfio/src/extended/AssetLoaderExtended.h b/libs/gltfio/src/extended/AssetLoaderExtended.h index a803fb4f132..bac61c85f05 100644 --- a/libs/gltfio/src/extended/AssetLoaderExtended.h +++ b/libs/gltfio/src/extended/AssetLoaderExtended.h @@ -70,7 +70,8 @@ struct AssetLoaderExtended { : mEngine(engine), mGltfPath(config.gltfPath), mMaterials(materials), - mUriDataCache(std::make_shared()) {} + mUriDataCache(std::make_shared()), + mCgltfBuffersLoaded(false) {} ~AssetLoaderExtended() = default; From 4125802644d666ce1b431465244c781de027f5c2 Mon Sep 17 00:00:00 2001 From: Sungun Park Date: Wed, 31 Jul 2024 16:22:54 +0000 Subject: [PATCH 12/15] Release Filament 1.53.3 --- README.md | 4 ++-- RELEASE_NOTES.md | 3 +++ android/gradle.properties | 2 +- ios/CocoaPods/Filament.podspec | 4 ++-- web/filament-js/package.json | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 97e81b5e083..51e921b31d0 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ repositories { } dependencies { - implementation 'com.google.android.filament:filament-android:1.53.2' + implementation 'com.google.android.filament:filament-android:1.53.3' } ``` @@ -51,7 +51,7 @@ Here are all the libraries available in the group `com.google.android.filament`: iOS projects can use CocoaPods to install the latest release: ```shell -pod 'Filament', '~> 1.53.2' +pod 'Filament', '~> 1.53.3' ``` ### Snapshots diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index d1392c5a600..805b8f277c1 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,6 +7,9 @@ A new header is inserted each time a *tag* is created. Instead, if you are authoring a PR for the main branch, add your release note to [NEW_RELEASE_NOTES.md](./NEW_RELEASE_NOTES.md). +## v1.53.4 + + ## v1.53.3 - Add drag and drop support for IBL files for desktop gltf_viewer. diff --git a/android/gradle.properties b/android/gradle.properties index 7994c61d4a1..79f32859701 100644 --- a/android/gradle.properties +++ b/android/gradle.properties @@ -1,5 +1,5 @@ GROUP=com.google.android.filament -VERSION_NAME=1.53.2 +VERSION_NAME=1.53.3 POM_DESCRIPTION=Real-time physically based rendering engine for Android. diff --git a/ios/CocoaPods/Filament.podspec b/ios/CocoaPods/Filament.podspec index f1f1105fa3c..07eccef0bdc 100644 --- a/ios/CocoaPods/Filament.podspec +++ b/ios/CocoaPods/Filament.podspec @@ -1,12 +1,12 @@ Pod::Spec.new do |spec| spec.name = "Filament" - spec.version = "1.53.2" + spec.version = "1.53.3" spec.license = { :type => "Apache 2.0", :file => "LICENSE" } spec.homepage = "https://google.github.io/filament" spec.authors = "Google LLC." spec.summary = "Filament is a real-time physically based rendering engine for Android, iOS, Windows, Linux, macOS, and WASM/WebGL." spec.platform = :ios, "11.0" - spec.source = { :http => "https://github.com/google/filament/releases/download/v1.53.2/filament-v1.53.2-ios.tgz" } + spec.source = { :http => "https://github.com/google/filament/releases/download/v1.53.3/filament-v1.53.3-ios.tgz" } # Fix linking error with Xcode 12; we do not yet support the simulator on Apple silicon. spec.pod_target_xcconfig = { diff --git a/web/filament-js/package.json b/web/filament-js/package.json index c381eef5718..a79fd893ab7 100644 --- a/web/filament-js/package.json +++ b/web/filament-js/package.json @@ -1,6 +1,6 @@ { "name": "filament", - "version": "1.53.2", + "version": "1.53.3", "description": "Real-time physically based rendering engine", "main": "filament.js", "module": "filament.js", From c43c58af5dfe24c6e08d6694ec178223debcb571 Mon Sep 17 00:00:00 2001 From: Sungun Park Date: Wed, 31 Jul 2024 16:23:06 +0000 Subject: [PATCH 13/15] Bump version to 1.53.4 --- README.md | 4 ++-- android/gradle.properties | 2 +- ios/CocoaPods/Filament.podspec | 4 ++-- web/filament-js/package.json | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 51e921b31d0..9f4ffb12b23 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ repositories { } dependencies { - implementation 'com.google.android.filament:filament-android:1.53.3' + implementation 'com.google.android.filament:filament-android:1.53.4' } ``` @@ -51,7 +51,7 @@ Here are all the libraries available in the group `com.google.android.filament`: iOS projects can use CocoaPods to install the latest release: ```shell -pod 'Filament', '~> 1.53.3' +pod 'Filament', '~> 1.53.4' ``` ### Snapshots diff --git a/android/gradle.properties b/android/gradle.properties index 79f32859701..4b7e670a417 100644 --- a/android/gradle.properties +++ b/android/gradle.properties @@ -1,5 +1,5 @@ GROUP=com.google.android.filament -VERSION_NAME=1.53.3 +VERSION_NAME=1.53.4 POM_DESCRIPTION=Real-time physically based rendering engine for Android. diff --git a/ios/CocoaPods/Filament.podspec b/ios/CocoaPods/Filament.podspec index 07eccef0bdc..49be1bc40a4 100644 --- a/ios/CocoaPods/Filament.podspec +++ b/ios/CocoaPods/Filament.podspec @@ -1,12 +1,12 @@ Pod::Spec.new do |spec| spec.name = "Filament" - spec.version = "1.53.3" + spec.version = "1.53.4" spec.license = { :type => "Apache 2.0", :file => "LICENSE" } spec.homepage = "https://google.github.io/filament" spec.authors = "Google LLC." spec.summary = "Filament is a real-time physically based rendering engine for Android, iOS, Windows, Linux, macOS, and WASM/WebGL." spec.platform = :ios, "11.0" - spec.source = { :http => "https://github.com/google/filament/releases/download/v1.53.3/filament-v1.53.3-ios.tgz" } + spec.source = { :http => "https://github.com/google/filament/releases/download/v1.53.4/filament-v1.53.4-ios.tgz" } # Fix linking error with Xcode 12; we do not yet support the simulator on Apple silicon. spec.pod_target_xcconfig = { diff --git a/web/filament-js/package.json b/web/filament-js/package.json index a79fd893ab7..aef76f68202 100644 --- a/web/filament-js/package.json +++ b/web/filament-js/package.json @@ -1,6 +1,6 @@ { "name": "filament", - "version": "1.53.3", + "version": "1.53.4", "description": "Real-time physically based rendering engine", "main": "filament.js", "module": "filament.js", From 239b43e34d31ed69fec13ab58f1e6618e0768112 Mon Sep 17 00:00:00 2001 From: Benjamin Doherty Date: Mon, 5 Aug 2024 10:59:14 -0700 Subject: [PATCH 14/15] Add type_traits header --- libs/utils/include/utils/WorkStealingDequeue.h | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/utils/include/utils/WorkStealingDequeue.h b/libs/utils/include/utils/WorkStealingDequeue.h index f9f5b5fbc23..9ac980b799d 100644 --- a/libs/utils/include/utils/WorkStealingDequeue.h +++ b/libs/utils/include/utils/WorkStealingDequeue.h @@ -18,6 +18,7 @@ #define TNT_UTILS_WORKSTEALINGDEQUEUE_H #include +#include #include #include From 44d082049c4832f59c977c6a58508fc7b5aa438b Mon Sep 17 00:00:00 2001 From: Benjamin Doherty Date: Tue, 6 Aug 2024 15:11:25 -0700 Subject: [PATCH 15/15] Revert: inject the missing packing/unpacking function in ESSL 3.0 --- .../src/opengl/ShaderCompilerService.cpp | 115 +++--------------- .../src/opengl/ShaderCompilerService.h | 2 +- 2 files changed, 19 insertions(+), 98 deletions(-) diff --git a/filament/backend/src/opengl/ShaderCompilerService.cpp b/filament/backend/src/opengl/ShaderCompilerService.cpp index b40bda0249f..dd64a1b794c 100644 --- a/filament/backend/src/opengl/ShaderCompilerService.cpp +++ b/filament/backend/src/opengl/ShaderCompilerService.cpp @@ -572,23 +572,16 @@ void ShaderCompilerService::compileShaders(OpenGLContext& context, // split shader source, so we can insert the specialization constants and the packing // functions - auto [version, prolog, body] = splitShaderSource({ shader_src, shader_len }); + auto const [prolog, body] = splitShaderSource({ shader_src, shader_len }); - // enable ESSL 3.10 if available - if (context.isAtLeastGLES<3, 1>()) { - version = "#version 310 es\n"; - } - - const std::array sources = { - version.data(), + const std::array sources = { prolog.data(), specializationConstantString.c_str(), packingFunctions.data(), body.data() }; - const std::array lengths = { - (GLint)version.length(), + const std::array lengths = { (GLint)prolog.length(), (GLint)specializationConstantString.length(), (GLint)packingFunctions.length(), @@ -668,7 +661,6 @@ void ShaderCompilerService::process_OVR_multiview2(OpenGLContext& context, // Tragically, OpenGL 4.1 doesn't support unpackHalf2x16 (appeared in 4.2) and // macOS doesn't support GL_ARB_shading_language_packing -// Also GLES3.0 didn't have the full set of packing/unpacking functions std::string_view ShaderCompilerService::process_ARB_shading_language_packing(OpenGLContext& context) noexcept { using namespace std::literals; #ifdef BACKEND_OPENGL_VERSION_GL @@ -708,102 +700,31 @@ highp uint packHalf2x16(vec2 v) { highp uint y = fp32tou16(v.y); return (y << 16u) | x; } -highp uint packUnorm4x8(mediump vec4 v) { - v = round(clamp(v, 0.0, 1.0) * 255.0); - highp uint a = uint(v.x); - highp uint b = uint(v.y) << 8; - highp uint c = uint(v.z) << 16; - highp uint d = uint(v.w) << 24; - return (a|b|c|d); -} -highp uint packSnorm4x8(mediump vec4 v) { - v = round(clamp(v, -1.0, 1.0) * 127.0); - highp uint a = uint((int(v.x) & 0xff)); - highp uint b = uint((int(v.y) & 0xff)) << 8; - highp uint c = uint((int(v.z) & 0xff)) << 16; - highp uint d = uint((int(v.w) & 0xff)) << 24; - return (a|b|c|d); -} -mediump vec4 unpackUnorm4x8(highp uint v) { - return vec4(float((v & 0x000000ffu) ), - float((v & 0x0000ff00u) >> 8), - float((v & 0x00ff0000u) >> 16), - float((v & 0xff000000u) >> 24)) / 255.0; -} -mediump vec4 unpackSnorm4x8(highp uint v) { - int a = int(((v ) & 0xffu) << 24u) >> 24 ; - int b = int(((v >> 8u) & 0xffu) << 24u) >> 24 ; - int c = int(((v >> 16u) & 0xffu) << 24u) >> 24 ; - int d = int(((v >> 24u) & 0xffu) << 24u) >> 24 ; - return clamp(vec4(float(a), float(b), float(c), float(d)) / 127.0, -1.0, 1.0); -} )"sv; } #endif // BACKEND_OPENGL_VERSION_GL - -#ifdef BACKEND_OPENGL_VERSION_GLES - if (!context.isES2() && !context.isAtLeastGLES<3, 1>()) { - return R"( - -highp uint packUnorm4x8(mediump vec4 v) { - v = round(clamp(v, 0.0, 1.0) * 255.0); - highp uint a = uint(v.x); - highp uint b = uint(v.y) << 8; - highp uint c = uint(v.z) << 16; - highp uint d = uint(v.w) << 24; - return (a|b|c|d); -} -highp uint packSnorm4x8(mediump vec4 v) { - v = round(clamp(v, -1.0, 1.0) * 127.0); - highp uint a = uint((int(v.x) & 0xff)); - highp uint b = uint((int(v.y) & 0xff)) << 8; - highp uint c = uint((int(v.z) & 0xff)) << 16; - highp uint d = uint((int(v.w) & 0xff)) << 24; - return (a|b|c|d); -} -mediump vec4 unpackUnorm4x8(highp uint v) { - return vec4(float((v & 0x000000ffu) ), - float((v & 0x0000ff00u) >> 8), - float((v & 0x00ff0000u) >> 16), - float((v & 0xff000000u) >> 24)) / 255.0; -} -mediump vec4 unpackSnorm4x8(highp uint v) { - int a = int(((v ) & 0xffu) << 24u) >> 24 ; - int b = int(((v >> 8u) & 0xffu) << 24u) >> 24 ; - int c = int(((v >> 16u) & 0xffu) << 24u) >> 24 ; - int d = int(((v >> 24u) & 0xffu) << 24u) >> 24 ; - return clamp(vec4(float(a), float(b), float(c), float(d)) / 127.0, -1.0, 1.0); -} -)"sv; - } -#endif // BACKEND_OPENGL_VERSION_GLES return ""sv; } -// split shader source code in three: -// - the version line -// - extensions -// - everything else -std::array ShaderCompilerService::splitShaderSource(std::string_view source) noexcept { - auto version_start = source.find("#version"); - assert_invariant(version_start != std::string_view::npos); +// split shader source code in two, the first section goes from the start to the line after the +// last #extension, and the 2nd part goes from there to the end. +std::array ShaderCompilerService::splitShaderSource(std::string_view source) noexcept { + auto start = source.find("#version"); + assert_invariant(start != std::string_view::npos); - auto version_eol = source.find('\n', version_start) + 1; - assert_invariant(version_eol != std::string_view::npos); - - auto prolog_start = version_eol; - auto prolog_eol = source.rfind("\n#extension"); // last #extension line - if (prolog_eol == std::string_view::npos) { - prolog_eol = prolog_start; + auto pos = source.rfind("\n#extension"); + if (pos == std::string_view::npos) { + pos = start; } else { - prolog_eol = source.find('\n', prolog_eol + 1) + 1; + ++pos; } - auto body_start = prolog_eol; - std::string_view const version = source.substr(version_start, version_eol - version_start); - std::string_view const prolog = source.substr(prolog_start, prolog_eol - prolog_start); - std::string_view const body = source.substr(body_start, source.length() - body_start); - return { version, prolog, body }; + auto eol = source.find('\n', pos) + 1; + assert_invariant(eol != std::string_view::npos); + + std::string_view const version = source.substr(start, eol - start); + std::string_view const body = source.substr(version.length(), source.length() - version.length()); + return { version, body }; } /* diff --git a/filament/backend/src/opengl/ShaderCompilerService.h b/filament/backend/src/opengl/ShaderCompilerService.h index ef4059fc0bc..27255ea0002 100644 --- a/filament/backend/src/opengl/ShaderCompilerService.h +++ b/filament/backend/src/opengl/ShaderCompilerService.h @@ -146,7 +146,7 @@ class ShaderCompilerService { static std::string_view process_ARB_shading_language_packing(OpenGLContext& context) noexcept; - static std::array splitShaderSource(std::string_view source) noexcept; + static std::array splitShaderSource(std::string_view source) noexcept; static GLuint linkProgram(OpenGLContext& context, std::array shaders,