diff --git a/ext/CMakeLists.txt b/ext/CMakeLists.txt index 956b7a584..08f96e1db 100644 --- a/ext/CMakeLists.txt +++ b/ext/CMakeLists.txt @@ -54,7 +54,7 @@ if (MTS_ENABLE_EMBREE) set(EMBREE_GEOMETRY_GRID OFF CACHE BOOL " " FORCE) set(EMBREE_GEOMETRY_SUBDIVISION OFF CACHE BOOL " " FORCE) set(EMBREE_GEOMETRY_INSTANCE OFF CACHE BOOL " " FORCE) - set(EMBREE_GEOMETRY_USER OFF CACHE BOOL " " FORCE) + set(EMBREE_GEOMETRY_USER ON CACHE BOOL " " FORCE) string(TOUPPER "${ENOKI_ARCH_FLAGS}" ENOKI_ARCH_FLAGS_UPPER) diff --git a/ext/embree b/ext/embree index 1f39b74e4..2420a1230 160000 --- a/ext/embree +++ b/ext/embree @@ -1 +1 @@ -Subproject commit 1f39b74e4a374920874a098e017d801bf5e352bb +Subproject commit 2420a12303ff2900e47268931b226014ce43a1bc diff --git a/include/mitsuba/render/kdtree.h b/include/mitsuba/render/kdtree.h index 4e62725ea..626f65469 100644 --- a/include/mitsuba/render/kdtree.h +++ b/include/mitsuba/render/kdtree.h @@ -30,7 +30,7 @@ * Temporary scratch space that is used to cache intersection information * (# of floats) */ -#define MTS_KD_INTERSECTION_CACHE_SIZE 6 +#define MTS_KD_INTERSECTION_CACHE_SIZE 7 NAMESPACE_BEGIN(mitsuba) @@ -2427,7 +2427,7 @@ class MTS_EXPORT_RENDER ShapeKDTree : public TShapeKDTreeray_test(ray, active); else - std::tie(hit, t) = shape->ray_intersect(ray, cache + 2, active); + std::tie(hit, t) = shape->ray_intersect(ray, cache + 3, active); if (!ShadowRay && any(hit)) { Float shape_index_v = reinterpret_array(UInt(shape_index)); @@ -2441,13 +2441,16 @@ class MTS_EXPORT_RENDER ShapeKDTree : public TShapeKDTree) { - cache[2] = u; - cache[3] = v; + cache[3] = u; + cache[4] = v; } else { - masked(cache[2], hit) = u; - masked(cache[3], hit) = v; + masked(cache[3], hit) = u; + masked(cache[4], hit) = v; } } } diff --git a/include/mitsuba/render/shape.h b/include/mitsuba/render/shape.h index c7935147a..9e4c65e6c 100644 --- a/include/mitsuba/render/shape.h +++ b/include/mitsuba/render/shape.h @@ -160,6 +160,11 @@ class MTS_EXPORT_RENDER Shape : public Object { * field \c wi is initialized by the caller following the call to \ref * fill_surface_interaction(), and \c duv_dx, and \c duv_dy are left * uninitialized. + * + * \param cache + * Cached information about the previously computed intersection. The + * first entry of the cache indicates which lanes in the entries are + * valid. For invalid lanes, the information needs to be recomputed. */ virtual void fill_surface_interaction(const Ray3f &ray, const Float *cache, SurfaceInteraction3f &si, Mask active = true) const; @@ -343,6 +348,7 @@ NAMESPACE_END(mitsuba) ENOKI_CALL_SUPPORT_TEMPLATE_BEGIN(mitsuba::Shape) ENOKI_CALL_SUPPORT_METHOD(normal_derivative) ENOKI_CALL_SUPPORT_METHOD(fill_surface_interaction) + ENOKI_CALL_SUPPORT_METHOD(is_mesh) ENOKI_CALL_SUPPORT_GETTER_TYPE(emitter, m_emitter, const typename Class::Emitter *) ENOKI_CALL_SUPPORT_GETTER_TYPE(sensor, m_sensor, const typename Class::Sensor *) ENOKI_CALL_SUPPORT_GETTER_TYPE(bsdf, m_bsdf, const typename Class::BSDF *) diff --git a/src/librender/mesh.cpp b/src/librender/mesh.cpp index 84a8bfc49..36b78f18e 100644 --- a/src/librender/mesh.cpp +++ b/src/librender/mesh.cpp @@ -265,6 +265,10 @@ MTS_VARIANT void Mesh::fill_surface_interaction(const Ray3f & / Mask active) const { MTS_MASK_ARGUMENT(active); + // Only fill surface interaction for lanes with valid cache + Mask invalid_cache = neq(*cache++, 1.f); + active &= !invalid_cache; + // Barycentric coordinates within triangle Float b1 = cache[0], b2 = cache[1]; diff --git a/src/librender/scene_embree.inl b/src/librender/scene_embree.inl index d680e1859..bd8617d78 100644 --- a/src/librender/scene_embree.inl +++ b/src/librender/scene_embree.inl @@ -74,9 +74,10 @@ Scene::ray_intersect_cpu(const Ray3f &ray, Mask active) const { si.shape = m_shapes[shape_index]; si.prim_index = prim_index; - Float cache[2] = { rh.hit.u, rh.hit.v }; + // Create the cache for the Mesh shape + Float cache[3] = { (si.shape->is_mesh() ? 1.f : 0.f), rh.hit.u, rh.hit.v }; - // Ask shape(s) to fill in the rest using the cache + // Ask shape to fill in the rest si.shape->fill_surface_interaction(ray, cache, si); // Gram-schmidt orthogonalization to compute local shading frame @@ -128,9 +129,13 @@ Scene::ray_intersect_cpu(const Ray3f &ray, Mask active) const { si.shape = gather(m_shapes.data(), shape_index, hit); si.prim_index = prim_index; - Float cache[2] = { load(rh.hit.u), load(rh.hit.v) }; + // Create the cache for the Mesh shapes + Float cache[3] = { + select(si.shape->is_mesh(), Float(1.f), Float(0.f)), + load(rh.hit.u), load(rh.hit.v) + }; - // Ask shape(s) to fill in the rest using the cache + // Ask shape(s) to fill in the rest si.shape->fill_surface_interaction(ray, cache, si, active); // Gram-schmidt orthogonalization to compute local shading frame diff --git a/src/librender/shape.cpp b/src/librender/shape.cpp index 09895ce8d..b1fe0aed0 100644 --- a/src/librender/shape.cpp +++ b/src/librender/shape.cpp @@ -6,8 +6,30 @@ #include #include +#if defined(MTS_ENABLE_EMBREE) + #include +#endif + NAMESPACE_BEGIN(mitsuba) +#if defined(MTS_ENABLE_EMBREE) +#if defined(ENOKI_X86_AVX512F) +# define MTS_RAY_WIDTH 16 +#elif defined(ENOKI_X86_AVX2) +# define MTS_RAY_WIDTH 8 +#elif defined(ENOKI_X86_SSE42) +# define MTS_RAY_WIDTH 4 +#else +# error Expected to use vectorization +#endif + +#define JOIN(x, y) JOIN_AGAIN(x, y) +#define JOIN_AGAIN(x, y) x ## y +#define RTCRayHitW JOIN(RTCRayHit, MTS_RAY_WIDTH) +#define RTCRayW JOIN(RTCRay, MTS_RAY_WIDTH) +#define RTCHitW JOIN(RTCHit, MTS_RAY_WIDTH) +#endif + MTS_VARIANT Shape::Shape(const Properties &props) : m_id(props.id()) { for (auto &kv : props.objects()) { Emitter *emitter = dynamic_cast(kv.second.get()); @@ -59,8 +81,132 @@ MTS_VARIANT Float Shape::pdf_position(const PositionSample3f & } #if defined(MTS_ENABLE_EMBREE) -MTS_VARIANT RTCGeometry Shape::embree_geometry(RTCDevice) const { - NotImplementedError("embree_geometry"); +template +void embree_bbox(const struct RTCBoundsFunctionArguments* args) { + MTS_IMPORT_TYPES(Shape) + const Shape* shape = (const Shape*) args->geometryUserPtr; + ScalarBoundingBox3f bbox = shape->bbox(); + RTCBounds* bounds_o = args->bounds_o; + bounds_o->lower_x = bbox.min.x(); + bounds_o->lower_y = bbox.min.y(); + bounds_o->lower_z = bbox.min.z(); + bounds_o->upper_x = bbox.max.x(); + bounds_o->upper_y = bbox.max.y(); + bounds_o->upper_z = bbox.max.z(); +} + +template +void embree_intersect_scalar(int* valid, + void* geometryUserPtr, + unsigned int geomID, + RTCRay* rtc_ray, + RTCHit* rtc_hit) { + MTS_IMPORT_TYPES(Shape) + + const Shape* shape = (const Shape*) geometryUserPtr; + + if (!valid[0]) + return; + + // Create a Mitsuba ray + Ray3f ray; + ray.o.x() = rtc_ray->org_x; + ray.o.y() = rtc_ray->org_y; + ray.o.z() = rtc_ray->org_z; + ray.d.x() = rtc_ray->dir_x; + ray.d.y() = rtc_ray->dir_y; + ray.d.z() = rtc_ray->dir_z; + ray.mint = rtc_ray->tnear; + ray.maxt = rtc_ray->tfar; + ray.time = rtc_ray->time; + ray.update(); + + // Check whether this is a shadow ray or not + if (rtc_hit) { + auto [success, tt] = shape->ray_intersect(ray, nullptr); + if (success) { + rtc_ray->tfar = tt; + rtc_hit->geomID = geomID; + } + } else { + if (shape->ray_test(ray)) + rtc_ray->tfar = -math::Infinity; + } +} + +template +void embree_intersect_packet(int* valid, + void* geometryUserPtr, + unsigned int geomID, + RTCRayW* rays, + RTCHitW* hits) { + MTS_IMPORT_TYPES(Shape) + using Int = replace_scalar_t; + + const Shape* shape = (const Shape*) geometryUserPtr; + + Mask active = neq(load(valid), 0); + if (none(active)) + return; + + // Create Mitsuba ray + Ray3f ray; + ray.o.x() = load(rays->org_x); + ray.o.y() = load(rays->org_y); + ray.o.z() = load(rays->org_z); + ray.d.x() = load(rays->dir_x); + ray.d.y() = load(rays->dir_y); + ray.d.z() = load(rays->dir_z); + ray.mint = load(rays->tnear); + ray.maxt = load(rays->tfar); + ray.time = load(rays->time); + ray.update(); + + // Check whether this is a shadow ray or not + if (hits) { + auto [success, tt] = shape->ray_intersect(ray, nullptr, active); + active &= success; + store(rays->tfar, tt, active); + store(hits->geomID, Int(geomID), active); + } else { + active &= shape->ray_test(ray); + store(rays->tfar, Float(-math::Infinity), active); + } +} + +template +void embree_intersect(const RTCIntersectFunctionNArguments* args) { + if constexpr (!is_array_v) { + RTCRayHit *rh = (RTCRayHit *) args->rayhit; + embree_intersect_scalar(args->valid, args->geometryUserPtr, args->geomID, + (RTCRay*) &rh->ray, (RTCHit*) &rh->hit); + } else { + RTCRayHitW *rh = (RTCRayHitW *) args->rayhit; + embree_intersect_packet(args->valid, args->geometryUserPtr, args->geomID, + (RTCRayW*) &rh->ray, (RTCHitW*) &rh->hit); + } +} + +template +void embree_occluded(const RTCOccludedFunctionNArguments* args) { + if constexpr (!is_array_v) { + embree_intersect_scalar(args->valid, args->geometryUserPtr, args->geomID, + (RTCRay*) args->ray, nullptr); + } else { + embree_intersect_packet(args->valid, args->geometryUserPtr, args->geomID, + (RTCRayW*) args->ray, nullptr); + } +} + +MTS_VARIANT RTCGeometry Shape::embree_geometry(RTCDevice device) const { + RTCGeometry geom = rtcNewGeometry(device, RTC_GEOMETRY_TYPE_USER); + rtcSetGeometryUserPrimitiveCount(geom, 1); + rtcSetGeometryUserData(geom, (void *) this); + rtcSetGeometryBoundsFunction(geom, embree_bbox, nullptr); + rtcSetGeometryIntersectFunction(geom, embree_intersect); + rtcSetGeometryOccludedFunction(geom, embree_occluded); + rtcCommitGeometry(geom); + return geom; } #endif @@ -130,9 +276,12 @@ Shape::ray_intersect(const Ray3f &ray, Mask active) const { SurfaceInteraction3f si = zero(); Float cache[MTS_KD_INTERSECTION_CACHE_SIZE]; - Mask success = false; - std::tie(success, si.t) = ray_intersect(ray, cache, active); + cache[0] = Float(1.f); // Indicates that all lanes have a valid cache + + auto [success, t] = ray_intersect(ray, cache + 1, active); active &= success; + si.t = select(active, t, math::Infinity); + if (any(active)) fill_surface_interaction(ray, cache, si, active); return si; diff --git a/src/shapes/disk.cpp b/src/shapes/disk.cpp index 50ae91907..6385a4f69 100644 --- a/src/shapes/disk.cpp +++ b/src/shapes/disk.cpp @@ -13,7 +13,6 @@ NAMESPACE_BEGIN(mitsuba) - /**! .. _shape-disk: @@ -150,7 +149,7 @@ class Disk final : public Shape { MTS_MASK_ARGUMENT(active); Ray3f ray = m_world_to_object.transform_affine(ray_); - Float t = -ray.o.z() / ray.d.z(); + Float t = -ray.o.z() / ray.d.z(); Point3f local = ray(t); // Is intersection within ray segment and disk? @@ -183,16 +182,28 @@ class Disk final : public Shape { SurfaceInteraction3f &si_out, Mask active) const override { MTS_MASK_ARGUMENT(active); + // Load and/or recompute cache if necessary + Mask invalid_cache = neq(*cache++, 1.f); + Float local_x = cache[0]; + Float local_y = cache[1]; + if (any(invalid_cache)) { + Ray3f ray_ = m_world_to_object.transform_affine(ray); + Float t = -ray_.o.z() / ray_.d.z(); + Point3f local = ray_(t); + masked(local_x, invalid_cache) = local.x(); + masked(local_y, invalid_cache) = local.y(); + } + SurfaceInteraction3f si(si_out); - Float r = norm(Point2f(cache[0], cache[1])), + Float r = norm(Point2f(local_x, local_y)), inv_r = rcp(r); - Float v = atan2(cache[1], cache[0]) * math::InvTwoPi; + Float v = atan2(local_y, local_x) * math::InvTwoPi; masked(v, v < 0.f) += 1.f; - Float cos_phi = select(neq(r, 0.f), cache[0] * inv_r, 1.f), - sin_phi = select(neq(r, 0.f), cache[1] * inv_r, 0.f); + Float cos_phi = select(neq(r, 0.f), local_x * inv_r, 1.f), + sin_phi = select(neq(r, 0.f), local_y * inv_r, 0.f); si.dp_du = m_object_to_world * Vector3f( cos_phi, sin_phi, 0.f); si.dp_dv = m_object_to_world * Vector3f(-sin_phi, cos_phi, 0.f); diff --git a/src/shapes/rectangle.cpp b/src/shapes/rectangle.cpp index b13ef0dfd..b0a44c7ce 100644 --- a/src/shapes/rectangle.cpp +++ b/src/shapes/rectangle.cpp @@ -208,20 +208,32 @@ class Rectangle final : public Shape { && abs(local.y()) <= 1.f; } - void fill_surface_interaction(const Ray3f &ray, const Float *cache, + void fill_surface_interaction(const Ray3f &ray_, const Float *cache, SurfaceInteraction3f &si_out, Mask active) const override { MTS_MASK_ARGUMENT(active); + // Load and/or recompute cache if necessary + Mask invalid_cache = neq(*cache++, 1.f); + Float local_x = cache[0]; + Float local_y = cache[1]; + if (any(invalid_cache)) { + Ray3f ray = m_world_to_object.transform_affine(ray_); + Float t = -ray.o.z() * ray.d_rcp.z(); + Point3f local = ray(t); + masked(local_x, invalid_cache) = local.x(); + masked(local_y, invalid_cache) = local.y(); + } + SurfaceInteraction3f si(si_out); si.n = m_frame.n; si.sh_frame.n = m_frame.n; si.dp_du = m_du * m_frame.s; si.dp_dv = m_dv * m_frame.t; - si.p = ray(si.t); - si.time = ray.time; - si.uv = Point2f(fmadd(cache[0], .5f, .5f), - fmadd(cache[1], .5f, .5f)); + si.p = ray_(si.t); + si.time = ray_.time; + si.uv = Point2f(fmadd(local_x, .5f, .5f), + fmadd(local_y, .5f, .5f)); si_out[active] = si; } diff --git a/src/shapes/sphere.cpp b/src/shapes/sphere.cpp index c67584804..48bb8943d 100644 --- a/src/shapes/sphere.cpp +++ b/src/shapes/sphere.cpp @@ -11,6 +11,10 @@ #include #include +#if defined(MTS_ENABLE_EMBREE) + #include +#endif + NAMESPACE_BEGIN(mitsuba) /**! @@ -419,6 +423,18 @@ class Sphere final : public Shape { m_inv_surface_area = 1.f / surface_area(); } +#if defined(MTS_ENABLE_EMBREE) + RTCGeometry embree_geometry(RTCDevice device) const override { + RTCGeometry geom = rtcNewGeometry(device, RTC_GEOMETRY_TYPE_SPHERE_POINT); + float *buffer = (float*) rtcSetNewGeometryBuffer(geom, RTC_BUFFER_TYPE_VERTEX, 0, + RTC_FORMAT_FLOAT4, 4 * sizeof(float), 1); + buffer[0] = m_center.x(); buffer[1] = m_center.y(); buffer[2] = m_center.z(); + buffer[3] = m_radius; + rtcCommitGeometry(geom); + return geom; + } +#endif + std::string to_string() const override { std::ostringstream oss; oss << "Sphere[" << std::endl