Optimize atomic rendering for input attachments

Pack clip data into an RGBA8 attachment and turn on src-over blending for both color and clip. Handle advanced blend modes by rearranging the math such that the correct color isn't reached until *AFTER* the hardware blend state is applied. This allows us to preserve clip and color contents by just emitting a=0 instead of loading the current value. It also saves flops by offloading the blending work onto the ROP blending unit, and serves as a hint to the hardware that it doesn't need to read or write anything when a == 0. Diffs= 1b5e50fce Optimize atomic rendering for input attachments (#8310) Co-authored-by: Chris Dalton <[email protected]>
rive-app · Oct 11, 2024 · ee9325b · ee9325b
1 parent fc31e5e
commit ee9325b
Show file tree

Hide file tree

Showing 36 changed files with 696 additions and 470 deletions.
diff --git a/.rive_head b/.rive_head
@@ -1 +1 @@
-1cc5f2b6f6cf02d15bcb3654343e4f3322a95402
+1b5e50fcec4a38fc5056c0dff630d115f547cfb9
diff --git a/renderer/include/rive/renderer/gpu.hpp b/renderer/include/rive/renderer/gpu.hpp
@@ -270,11 +270,11 @@ constexpr uint16_t kImageRectIndices[14 * 3] = {
 
 enum class PaintType : uint32_t
 {
+    clipUpdate, // Update the clip buffer instead of drawing to the framebuffer.
     solidColor,
     linearGradient,
     radialGradient,
     image,
-    clipUpdate, // Update the clip buffer instead of drawing to the framebuffer.
 };
 
 // Specifies the location of a simple or complex horizontal color ramp within the gradient texture.
@@ -637,10 +637,9 @@ enum class ShaderMiscFlags : uint32_t
 {
     none = 0,
 
-    // InterlockMode::atomics only. Render color to a standard attachment instead of PLS. The
-    // backend implementation is responsible to turn on src-over blending. In atomic mode, we don't
-    // need to read the color buffer when advanced blend is not used.
-    fixedFunctionColorBlend = 1 << 0,
+    // InterlockMode::atomics only (without advanced blend). Render color to a standard attachment
+    // instead of PLS. The backend implementation is responsible to turn on src-over blending.
+    fixedFunctionColorOutput = 1 << 0,
 
     // DrawType::atomicInitialize only. Also store the color clear value to PLS when drawing a
     // clear, in addition to clearing the other PLS planes.

diff --git a/renderer/include/rive/renderer/vulkan/render_context_vulkan_impl.hpp b/renderer/include/rive/renderer/vulkan/render_context_vulkan_impl.hpp
@@ -58,9 +58,9 @@ class RenderTargetVulkan : public RenderTarget
     // getters that lazy load if needed.
 
     vkutil::TextureView* ensureOffscreenColorTextureView();
-    vkutil::TextureView* ensureCoverageTextureView();
     vkutil::TextureView* ensureClipTextureView();
     vkutil::TextureView* ensureScratchColorTextureView();
+    vkutil::TextureView* ensureCoverageTextureView();
     vkutil::TextureView* ensureCoverageAtomicTextureView();
 
 private:

diff --git a/renderer/include/rive/renderer/vulkan/vulkan_context.hpp b/renderer/include/rive/renderer/vulkan/vulkan_context.hpp
@@ -65,6 +65,7 @@ class VulkanContext : public RefCnt<VulkanContext>
     F(CmdDraw)                                                                                     \
     F(CmdDrawIndexed)                                                                              \
     F(CmdEndRenderPass)                                                                            \
+    F(CmdNextSubpass)                                                                              \
     F(CmdPipelineBarrier)                                                                          \
     F(CmdSetScissor)                                                                               \
     F(CmdSetViewport)                                                                              \

diff --git a/renderer/path_fiddle/fiddle_context_vulkan.cpp b/renderer/path_fiddle/fiddle_context_vulkan.cpp
@@ -143,11 +143,15 @@ class FiddleContextVulkanPLS : public FiddleContext
         vkb::SwapchainBuilder swapchainBuilder(m_device, m_windowSurface);
         swapchainBuilder
             .set_desired_format({
-                .format = VK_FORMAT_B8G8R8A8_UNORM,
+                // Swap the target format in "vkcore" mode, just for fun so we test both
+                // configurations.
+                .format = m_options.coreFeaturesOnly ? VK_FORMAT_B8G8R8A8_UNORM
+                                                     : VK_FORMAT_R8G8B8A8_UNORM,
                 .colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
             })
             .add_fallback_format({
-                .format = VK_FORMAT_R8G8B8A8_UNORM,
+                .format = m_options.coreFeaturesOnly ? VK_FORMAT_R8G8B8A8_UNORM
+                                                     : VK_FORMAT_B8G8R8A8_UNORM,
                 .colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
             })
             .set_desired_present_mode(VK_PRESENT_MODE_IMMEDIATE_KHR)

diff --git a/renderer/rive_vk_bootstrap/include/rive_vk_bootstrap/rive_vk_bootstrap.hpp b/renderer/rive_vk_bootstrap/include/rive_vk_bootstrap/rive_vk_bootstrap.hpp
@@ -27,10 +27,12 @@ T vkb_check(vkb::Result<T> result, const char* code, int line, const char* file)
 
 vkb::SystemInfo load_vulkan();
 
+#ifdef DEBUG
 VKAPI_ATTR VkBool32 VKAPI_CALL default_debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT,
                                                       VkDebugUtilsMessageTypeFlagsEXT,
                                                       const VkDebugUtilsMessengerCallbackDataEXT*,
                                                       void* pUserData);
+#endif
 
 enum class FeatureSet
 {

diff --git a/renderer/rive_vk_bootstrap/rive_vk_bootstrap.cpp b/renderer/rive_vk_bootstrap/rive_vk_bootstrap.cpp
@@ -25,6 +25,7 @@ vkb::SystemInfo load_vulkan()
     return VKB_CHECK(vkb::SystemInfo::get_system_info(fp_vkGetInstanceProcAddr));
 }
 
+#ifdef DEBUG
 VKAPI_ATTR VkBool32 VKAPI_CALL
 default_debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
                        VkDebugUtilsMessageTypeFlagsEXT messageType,
@@ -63,8 +64,9 @@ default_debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
                     pCallbackData->pMessage);
             break;
     }
-    return VK_FALSE;
+    return VK_TRUE;
 }
+#endif
 
 static const char* physical_device_type_name(VkPhysicalDeviceType type)
 {

diff --git a/renderer/src/d3d/render_context_d3d_impl.cpp b/renderer/src/d3d/render_context_d3d_impl.cpp
@@ -966,9 +966,9 @@ void RenderContextD3DImpl::setPipelineLayoutAndShaders(DrawType drawType,
         {
             s << "#define " << GLSL_ENABLE_MIN_16_PRECISION << '\n';
         }
-        if (pixelShaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorBlend)
+        if (pixelShaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorOutput)
         {
-            s << "#define " << GLSL_FIXED_FUNCTION_COLOR_BLEND << '\n';
+            s << "#define " << GLSL_FIXED_FUNCTION_COLOR_OUTPUT << '\n';
         }
         if (pixelShaderMiscFlags & gpu::ShaderMiscFlags::coalescedResolveAndTransfer)
         {
@@ -1482,7 +1482,7 @@ void RenderContextD3DImpl::flush(const FlushDescriptor& desc)
                 : gpu::ShaderMiscFlags::none;
         if (renderDirectToRasterPipeline)
         {
-            pixelShaderMiscFlags |= gpu::ShaderMiscFlags::fixedFunctionColorBlend;
+            pixelShaderMiscFlags |= gpu::ShaderMiscFlags::fixedFunctionColorOutput;
         }
         setPipelineLayoutAndShaders(drawType,
                                     shaderFeatures,

diff --git a/renderer/src/draw.cpp b/renderer/src/draw.cpp
@@ -394,7 +394,6 @@ DrawUniquePtr RiveRenderPathDraw::Make(RenderContext* context,
         // Use interior triangulation to draw filled paths if they're large enough to benefit from
         // it.
         // FIXME! Implement interior triangulation in msaa mode.
-
         if (context->frameInterlockMode() != gpu::InterlockMode::msaa &&
             path->getRawPath().verbs().count() < 1000 &&
             gpu::FindTransformedArea(localBounds, matrix) > 512 * 512)

diff --git a/renderer/src/gl/pls_impl_rw_texture.cpp b/renderer/src/gl/pls_impl_rw_texture.cpp
@@ -144,7 +144,7 @@ class RenderContextGLImpl::PLSImplRWTexture : public RenderContextGLImpl::PixelL
         {
             if (needs_atomic_fixed_function_color_blend(desc))
             {
-                flags |= gpu::ShaderMiscFlags::fixedFunctionColorBlend;
+                flags |= gpu::ShaderMiscFlags::fixedFunctionColorOutput;
             }
             if (drawType == gpu::DrawType::atomicResolve &&
                 needs_coalesced_atomic_resolve_and_transfer(desc))

diff --git a/renderer/src/gl/render_context_gl_impl.cpp b/renderer/src/gl/render_context_gl_impl.cpp
@@ -610,9 +610,9 @@ RenderContextGLImpl::DrawShader::DrawShader(RenderContextGLImpl* renderContextIm
         // Atomics are currently always done on storage textures.
         defines.push_back(GLSL_USING_PLS_STORAGE_TEXTURES);
     }
-    if (shaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorBlend)
+    if (shaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorOutput)
     {
-        defines.push_back(GLSL_FIXED_FUNCTION_COLOR_BLEND);
+        defines.push_back(GLSL_FIXED_FUNCTION_COLOR_OUTPUT);
     }
     for (size_t i = 0; i < kShaderFeatureCount; ++i)
     {

diff --git a/renderer/src/gpu.cpp b/renderer/src/gpu.cpp
@@ -297,11 +297,11 @@ void ClipRectInverseMatrix::reset(const Mat2D& clipMatrix, const AABB& clipRect)
 static uint32_t paint_type_to_glsl_id(PaintType paintType)
 {
     return static_cast<uint32_t>(paintType);
+    static_assert((int)PaintType::clipUpdate == CLIP_UPDATE_PAINT_TYPE);
     static_assert((int)PaintType::solidColor == SOLID_COLOR_PAINT_TYPE);
     static_assert((int)PaintType::linearGradient == LINEAR_GRADIENT_PAINT_TYPE);
     static_assert((int)PaintType::radialGradient == RADIAL_GRADIENT_PAINT_TYPE);
     static_assert((int)PaintType::image == IMAGE_PAINT_TYPE);
-    static_assert((int)PaintType::clipUpdate == CLIP_UPDATE_PAINT_TYPE);
 }
 
 uint32_t ConvertBlendModeToPLSBlendMode(BlendMode riveMode)

diff --git a/renderer/src/metal/background_shader_compiler.mm b/renderer/src/metal/background_shader_compiler.mm
@@ -108,7 +108,7 @@
             }
             if (!(shaderFeatures & gpu::ShaderFeatures::ENABLE_ADVANCED_BLEND))
             {
-                defines[@GLSL_FIXED_FUNCTION_COLOR_BLEND] = @"";
+                defines[@GLSL_FIXED_FUNCTION_COLOR_OUTPUT] = @"";
             }
         }
 

diff --git a/renderer/src/metal/render_context_metal_impl.mm b/renderer/src/metal/render_context_metal_impl.mm
@@ -165,7 +165,7 @@
                 case gpu::InterlockMode::atomics:
                     // In atomic mode, the PLS planes are accessed as device buffers. We only use
                     // the "framebuffer" attachment configured above.
-                    if (shaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorBlend)
+                    if (shaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorOutput)
                     {
                         // The shader expectes a "src-over" blend function in order to to implement
                         // antialiasing and opacity.
@@ -798,7 +798,7 @@ static MTLViewport make_viewport(uint32_t x, uint32_t y, uint32_t width, uint32_
         // In atomic mode, the PLS planes are buffers that we need to bind separately.
         // Since the PLS plane indices collide with other buffer bindings, offset the binding
         // indices of these buffers by DEFAULT_BINDINGS_SET_SIZE.
-        if (!(baselineShaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorBlend))
+        if (!(baselineShaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorOutput))
         {
             [encoder setFragmentBuffer:renderTarget->colorAtomicBuffer()
                                 offset:0
@@ -957,9 +957,7 @@ static MTLViewport make_viewport(uint32_t x, uint32_t y, uint32_t width, uint32_
         pass.colorAttachments[CLIP_PLANE_IDX].texture = renderTarget->m_clipMemorylessTexture;
         pass.colorAttachments[CLIP_PLANE_IDX].loadAction = MTLLoadActionClear;
         pass.colorAttachments[CLIP_PLANE_IDX].clearColor = MTLClearColorMake(0, 0, 0, 0);
-        pass.colorAttachments[CLIP_PLANE_IDX].storeAction =
-            desc.interlockMode == gpu::InterlockMode::atomics ? MTLStoreActionStore
-                                                              : MTLStoreActionDontCare;
+        pass.colorAttachments[CLIP_PLANE_IDX].storeAction = MTLStoreActionDontCare;
 
         pass.colorAttachments[SCRATCH_COLOR_PLANE_IDX].texture =
             renderTarget->m_scratchColorMemorylessTexture;
@@ -971,14 +969,12 @@ static MTLViewport make_viewport(uint32_t x, uint32_t y, uint32_t width, uint32_
         pass.colorAttachments[COVERAGE_PLANE_IDX].loadAction = MTLLoadActionClear;
         pass.colorAttachments[COVERAGE_PLANE_IDX].clearColor =
             MTLClearColorMake(desc.coverageClearValue, 0, 0, 0);
-        pass.colorAttachments[COVERAGE_PLANE_IDX].storeAction =
-            desc.interlockMode == gpu::InterlockMode::atomics ? MTLStoreActionStore
-                                                              : MTLStoreActionDontCare;
+        pass.colorAttachments[COVERAGE_PLANE_IDX].storeAction = MTLStoreActionDontCare;
     }
     else if (!(desc.combinedShaderFeatures & gpu::ShaderFeatures::ENABLE_ADVANCED_BLEND))
     {
         assert(desc.interlockMode == gpu::InterlockMode::atomics);
-        baselineShaderMiscFlags |= gpu::ShaderMiscFlags::fixedFunctionColorBlend;
+        baselineShaderMiscFlags |= gpu::ShaderMiscFlags::fixedFunctionColorOutput;
     }
     else if (desc.colorLoadAction == gpu::LoadAction::preserveRenderTarget)
     {
@@ -1019,7 +1015,7 @@ static MTLViewport make_viewport(uint32_t x, uint32_t y, uint32_t width, uint32_
                                                  ? desc.combinedShaderFeatures
                                                  : batch.shaderFeatures;
         gpu::ShaderMiscFlags batchMiscFlags = baselineShaderMiscFlags;
-        if (!(batchMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorBlend))
+        if (!(batchMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorOutput))
         {
             if (batch.drawType == gpu::DrawType::atomicResolve)
             {

diff --git a/renderer/src/render_context.cpp b/renderer/src/render_context.cpp
@@ -1850,7 +1850,6 @@ gpu::DrawBatch& RenderContext::LogicalFlush::pushDraw(Draw* draw,
         case DrawType::midpointFanPatches:
         case DrawType::outerCurvePatches:
         case DrawType::atomicInitialize:
-        case DrawType::atomicResolve:
         case DrawType::stencilClipReset:
             needsNewBatch =
                 m_drawList.empty() || m_drawList.tail().drawType != drawType ||
@@ -1860,6 +1859,7 @@ gpu::DrawBatch& RenderContext::LogicalFlush::pushDraw(Draw* draw,
         case DrawType::interiorTriangulation:
         case DrawType::imageRect:
         case DrawType::imageMesh:
+        case DrawType::atomicResolve:
             // We can't combine interior triangulations or image draws yet.
             needsNewBatch = true;
             break;

diff --git a/renderer/src/shaders/Makefile b/renderer/src/shaders/Makefile
@@ -91,11 +91,11 @@ SPIRV_INPUTS := $(wildcard spirv/*.main)
 SPIRV_OUTPUTS := \
     $(addprefix $(OUT)/, $(patsubst %.main, %.vert.h, $(SPIRV_INPUTS))) \
     $(addprefix $(OUT)/, $(patsubst %.main, %.frag.h, $(SPIRV_INPUTS))) \
-    $(OUT)/spirv/atomic_draw_image_mesh.fixedblend_frag.h \
-    $(OUT)/spirv/atomic_draw_image_rect.fixedblend_frag.h \
-    $(OUT)/spirv/atomic_draw_interior_triangles.fixedblend_frag.h \
-    $(OUT)/spirv/atomic_draw_path.fixedblend_frag.h \
-    $(OUT)/spirv/atomic_resolve_pls.fixedblend_frag.h \
+    $(OUT)/spirv/atomic_draw_image_mesh.fixedcolor_frag.h \
+    $(OUT)/spirv/atomic_draw_image_rect.fixedcolor_frag.h \
+    $(OUT)/spirv/atomic_draw_interior_triangles.fixedcolor_frag.h \
+    $(OUT)/spirv/atomic_draw_path.fixedcolor_frag.h \
+    $(OUT)/spirv/atomic_resolve_pls.fixedcolor_frag.h \
 
 ## Compile *.main into vertex shaders.
 $(OUT)/spirv/%.vert.h: spirv/%.main $(MINIFY_STAMP)
@@ -107,10 +107,10 @@ $(OUT)/spirv/%.frag.h: spirv/%.main $(MINIFY_STAMP)
 	@mkdir -p $(OUT)/spirv
 	@glslangValidator -S frag -DTARGET_VULKAN -DFRAGMENT -I$(OUT) -V --vn $(subst .main,_frag,$(notdir $<)) -o $@ $<
 
-## Compile atomic fragment shaders again with FIXED_FUNCTION_COLOR_BLEND defined.
-$(OUT)/spirv/%.fixedblend_frag.h: spirv/%.main $(MINIFY_STAMP)
+## Compile atomic fragment shaders again with FIXED_FUNCTION_COLOR_OUTPUT defined.
+$(OUT)/spirv/%.fixedcolor_frag.h: spirv/%.main $(MINIFY_STAMP)
 	@mkdir -p $(OUT)/spirv
-	@glslangValidator -S frag -DTARGET_VULKAN -DFRAGMENT -DFIXED_FUNCTION_COLOR_BLEND -I$(OUT) -V --vn $(subst .main,_fixedblend_frag,$(notdir $<)) -o $@ $<
+	@glslangValidator -S frag -DTARGET_VULKAN -DFRAGMENT -DFIXED_FUNCTION_COLOR_OUTPUT -I$(OUT) -V --vn $(subst .main,_fixedcolor_frag,$(notdir $<)) -o $@ $<
 
 spirv: $(SPIRV_OUTPUTS)
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		1cc5f2b6f6cf02d15bcb3654343e4f3322a95402
		1b5e50fcec4a38fc5056c0dff630d115f547cfb9
-Original file line number
+Diff line change
@@ Expand Up / @@ -108,7 +108,7 @@ @@
                 }
                 if (!(shaderFeatures & gpu::ShaderFeatures::ENABLE_ADVANCED_BLEND))
                 {
-                    defines[@GLSL_FIXED_FUNCTION_COLOR_BLEND] = @"";
+                    defines[@GLSL_FIXED_FUNCTION_COLOR_OUTPUT] = @"";
                 }
             }
@@ Expand Down @@