Skip to content

Commit

Permalink
Optimize atomic rendering for input attachments
Browse files Browse the repository at this point in the history
Pack clip data into an RGBA8 attachment and turn on src-over blending for both color and clip.

Handle advanced blend modes by rearranging the math such that the correct color isn't reached until *AFTER* the hardware blend state is applied.

This allows us to preserve clip and color contents by just emitting a=0 instead of loading the current value. It also saves flops by offloading the blending work onto the ROP blending unit, and serves as a hint to the hardware that it doesn't need to read or write anything when a == 0.

Diffs=
1b5e50fce Optimize atomic rendering for input attachments (#8310)

Co-authored-by: Chris Dalton <[email protected]>
  • Loading branch information
csmartdalton and csmartdalton committed Oct 11, 2024
1 parent fc31e5e commit ee9325b
Show file tree
Hide file tree
Showing 36 changed files with 696 additions and 470 deletions.
2 changes: 1 addition & 1 deletion .rive_head
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1cc5f2b6f6cf02d15bcb3654343e4f3322a95402
1b5e50fcec4a38fc5056c0dff630d115f547cfb9
9 changes: 4 additions & 5 deletions renderer/include/rive/renderer/gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,11 +270,11 @@ constexpr uint16_t kImageRectIndices[14 * 3] = {

enum class PaintType : uint32_t
{
clipUpdate, // Update the clip buffer instead of drawing to the framebuffer.
solidColor,
linearGradient,
radialGradient,
image,
clipUpdate, // Update the clip buffer instead of drawing to the framebuffer.
};

// Specifies the location of a simple or complex horizontal color ramp within the gradient texture.
Expand Down Expand Up @@ -637,10 +637,9 @@ enum class ShaderMiscFlags : uint32_t
{
none = 0,

// InterlockMode::atomics only. Render color to a standard attachment instead of PLS. The
// backend implementation is responsible to turn on src-over blending. In atomic mode, we don't
// need to read the color buffer when advanced blend is not used.
fixedFunctionColorBlend = 1 << 0,
// InterlockMode::atomics only (without advanced blend). Render color to a standard attachment
// instead of PLS. The backend implementation is responsible to turn on src-over blending.
fixedFunctionColorOutput = 1 << 0,

// DrawType::atomicInitialize only. Also store the color clear value to PLS when drawing a
// clear, in addition to clearing the other PLS planes.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ class RenderTargetVulkan : public RenderTarget
// getters that lazy load if needed.

vkutil::TextureView* ensureOffscreenColorTextureView();
vkutil::TextureView* ensureCoverageTextureView();
vkutil::TextureView* ensureClipTextureView();
vkutil::TextureView* ensureScratchColorTextureView();
vkutil::TextureView* ensureCoverageTextureView();
vkutil::TextureView* ensureCoverageAtomicTextureView();

private:
Expand Down
1 change: 1 addition & 0 deletions renderer/include/rive/renderer/vulkan/vulkan_context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class VulkanContext : public RefCnt<VulkanContext>
F(CmdDraw) \
F(CmdDrawIndexed) \
F(CmdEndRenderPass) \
F(CmdNextSubpass) \
F(CmdPipelineBarrier) \
F(CmdSetScissor) \
F(CmdSetViewport) \
Expand Down
8 changes: 6 additions & 2 deletions renderer/path_fiddle/fiddle_context_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,15 @@ class FiddleContextVulkanPLS : public FiddleContext
vkb::SwapchainBuilder swapchainBuilder(m_device, m_windowSurface);
swapchainBuilder
.set_desired_format({
.format = VK_FORMAT_B8G8R8A8_UNORM,
// Swap the target format in "vkcore" mode, just for fun so we test both
// configurations.
.format = m_options.coreFeaturesOnly ? VK_FORMAT_B8G8R8A8_UNORM
: VK_FORMAT_R8G8B8A8_UNORM,
.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
})
.add_fallback_format({
.format = VK_FORMAT_R8G8B8A8_UNORM,
.format = m_options.coreFeaturesOnly ? VK_FORMAT_R8G8B8A8_UNORM
: VK_FORMAT_B8G8R8A8_UNORM,
.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
})
.set_desired_present_mode(VK_PRESENT_MODE_IMMEDIATE_KHR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ T vkb_check(vkb::Result<T> result, const char* code, int line, const char* file)

vkb::SystemInfo load_vulkan();

#ifdef DEBUG
VKAPI_ATTR VkBool32 VKAPI_CALL default_debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT,
VkDebugUtilsMessageTypeFlagsEXT,
const VkDebugUtilsMessengerCallbackDataEXT*,
void* pUserData);
#endif

enum class FeatureSet
{
Expand Down
4 changes: 3 additions & 1 deletion renderer/rive_vk_bootstrap/rive_vk_bootstrap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ vkb::SystemInfo load_vulkan()
return VKB_CHECK(vkb::SystemInfo::get_system_info(fp_vkGetInstanceProcAddr));
}

#ifdef DEBUG
VKAPI_ATTR VkBool32 VKAPI_CALL
default_debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
VkDebugUtilsMessageTypeFlagsEXT messageType,
Expand Down Expand Up @@ -63,8 +64,9 @@ default_debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
pCallbackData->pMessage);
break;
}
return VK_FALSE;
return VK_TRUE;
}
#endif

static const char* physical_device_type_name(VkPhysicalDeviceType type)
{
Expand Down
6 changes: 3 additions & 3 deletions renderer/src/d3d/render_context_d3d_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -966,9 +966,9 @@ void RenderContextD3DImpl::setPipelineLayoutAndShaders(DrawType drawType,
{
s << "#define " << GLSL_ENABLE_MIN_16_PRECISION << '\n';
}
if (pixelShaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorBlend)
if (pixelShaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorOutput)
{
s << "#define " << GLSL_FIXED_FUNCTION_COLOR_BLEND << '\n';
s << "#define " << GLSL_FIXED_FUNCTION_COLOR_OUTPUT << '\n';
}
if (pixelShaderMiscFlags & gpu::ShaderMiscFlags::coalescedResolveAndTransfer)
{
Expand Down Expand Up @@ -1482,7 +1482,7 @@ void RenderContextD3DImpl::flush(const FlushDescriptor& desc)
: gpu::ShaderMiscFlags::none;
if (renderDirectToRasterPipeline)
{
pixelShaderMiscFlags |= gpu::ShaderMiscFlags::fixedFunctionColorBlend;
pixelShaderMiscFlags |= gpu::ShaderMiscFlags::fixedFunctionColorOutput;
}
setPipelineLayoutAndShaders(drawType,
shaderFeatures,
Expand Down
1 change: 0 additions & 1 deletion renderer/src/draw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,6 @@ DrawUniquePtr RiveRenderPathDraw::Make(RenderContext* context,
// Use interior triangulation to draw filled paths if they're large enough to benefit from
// it.
// FIXME! Implement interior triangulation in msaa mode.

if (context->frameInterlockMode() != gpu::InterlockMode::msaa &&
path->getRawPath().verbs().count() < 1000 &&
gpu::FindTransformedArea(localBounds, matrix) > 512 * 512)
Expand Down
2 changes: 1 addition & 1 deletion renderer/src/gl/pls_impl_rw_texture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ class RenderContextGLImpl::PLSImplRWTexture : public RenderContextGLImpl::PixelL
{
if (needs_atomic_fixed_function_color_blend(desc))
{
flags |= gpu::ShaderMiscFlags::fixedFunctionColorBlend;
flags |= gpu::ShaderMiscFlags::fixedFunctionColorOutput;
}
if (drawType == gpu::DrawType::atomicResolve &&
needs_coalesced_atomic_resolve_and_transfer(desc))
Expand Down
4 changes: 2 additions & 2 deletions renderer/src/gl/render_context_gl_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -610,9 +610,9 @@ RenderContextGLImpl::DrawShader::DrawShader(RenderContextGLImpl* renderContextIm
// Atomics are currently always done on storage textures.
defines.push_back(GLSL_USING_PLS_STORAGE_TEXTURES);
}
if (shaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorBlend)
if (shaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorOutput)
{
defines.push_back(GLSL_FIXED_FUNCTION_COLOR_BLEND);
defines.push_back(GLSL_FIXED_FUNCTION_COLOR_OUTPUT);
}
for (size_t i = 0; i < kShaderFeatureCount; ++i)
{
Expand Down
2 changes: 1 addition & 1 deletion renderer/src/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,11 +297,11 @@ void ClipRectInverseMatrix::reset(const Mat2D& clipMatrix, const AABB& clipRect)
static uint32_t paint_type_to_glsl_id(PaintType paintType)
{
return static_cast<uint32_t>(paintType);
static_assert((int)PaintType::clipUpdate == CLIP_UPDATE_PAINT_TYPE);
static_assert((int)PaintType::solidColor == SOLID_COLOR_PAINT_TYPE);
static_assert((int)PaintType::linearGradient == LINEAR_GRADIENT_PAINT_TYPE);
static_assert((int)PaintType::radialGradient == RADIAL_GRADIENT_PAINT_TYPE);
static_assert((int)PaintType::image == IMAGE_PAINT_TYPE);
static_assert((int)PaintType::clipUpdate == CLIP_UPDATE_PAINT_TYPE);
}

uint32_t ConvertBlendModeToPLSBlendMode(BlendMode riveMode)
Expand Down
2 changes: 1 addition & 1 deletion renderer/src/metal/background_shader_compiler.mm
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@
}
if (!(shaderFeatures & gpu::ShaderFeatures::ENABLE_ADVANCED_BLEND))
{
defines[@GLSL_FIXED_FUNCTION_COLOR_BLEND] = @"";
defines[@GLSL_FIXED_FUNCTION_COLOR_OUTPUT] = @"";
}
}

Expand Down
16 changes: 6 additions & 10 deletions renderer/src/metal/render_context_metal_impl.mm
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@
case gpu::InterlockMode::atomics:
// In atomic mode, the PLS planes are accessed as device buffers. We only use
// the "framebuffer" attachment configured above.
if (shaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorBlend)
if (shaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorOutput)
{
// The shader expectes a "src-over" blend function in order to to implement
// antialiasing and opacity.
Expand Down Expand Up @@ -798,7 +798,7 @@ static MTLViewport make_viewport(uint32_t x, uint32_t y, uint32_t width, uint32_
// In atomic mode, the PLS planes are buffers that we need to bind separately.
// Since the PLS plane indices collide with other buffer bindings, offset the binding
// indices of these buffers by DEFAULT_BINDINGS_SET_SIZE.
if (!(baselineShaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorBlend))
if (!(baselineShaderMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorOutput))
{
[encoder setFragmentBuffer:renderTarget->colorAtomicBuffer()
offset:0
Expand Down Expand Up @@ -957,9 +957,7 @@ static MTLViewport make_viewport(uint32_t x, uint32_t y, uint32_t width, uint32_
pass.colorAttachments[CLIP_PLANE_IDX].texture = renderTarget->m_clipMemorylessTexture;
pass.colorAttachments[CLIP_PLANE_IDX].loadAction = MTLLoadActionClear;
pass.colorAttachments[CLIP_PLANE_IDX].clearColor = MTLClearColorMake(0, 0, 0, 0);
pass.colorAttachments[CLIP_PLANE_IDX].storeAction =
desc.interlockMode == gpu::InterlockMode::atomics ? MTLStoreActionStore
: MTLStoreActionDontCare;
pass.colorAttachments[CLIP_PLANE_IDX].storeAction = MTLStoreActionDontCare;

pass.colorAttachments[SCRATCH_COLOR_PLANE_IDX].texture =
renderTarget->m_scratchColorMemorylessTexture;
Expand All @@ -971,14 +969,12 @@ static MTLViewport make_viewport(uint32_t x, uint32_t y, uint32_t width, uint32_
pass.colorAttachments[COVERAGE_PLANE_IDX].loadAction = MTLLoadActionClear;
pass.colorAttachments[COVERAGE_PLANE_IDX].clearColor =
MTLClearColorMake(desc.coverageClearValue, 0, 0, 0);
pass.colorAttachments[COVERAGE_PLANE_IDX].storeAction =
desc.interlockMode == gpu::InterlockMode::atomics ? MTLStoreActionStore
: MTLStoreActionDontCare;
pass.colorAttachments[COVERAGE_PLANE_IDX].storeAction = MTLStoreActionDontCare;
}
else if (!(desc.combinedShaderFeatures & gpu::ShaderFeatures::ENABLE_ADVANCED_BLEND))
{
assert(desc.interlockMode == gpu::InterlockMode::atomics);
baselineShaderMiscFlags |= gpu::ShaderMiscFlags::fixedFunctionColorBlend;
baselineShaderMiscFlags |= gpu::ShaderMiscFlags::fixedFunctionColorOutput;
}
else if (desc.colorLoadAction == gpu::LoadAction::preserveRenderTarget)
{
Expand Down Expand Up @@ -1019,7 +1015,7 @@ static MTLViewport make_viewport(uint32_t x, uint32_t y, uint32_t width, uint32_
? desc.combinedShaderFeatures
: batch.shaderFeatures;
gpu::ShaderMiscFlags batchMiscFlags = baselineShaderMiscFlags;
if (!(batchMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorBlend))
if (!(batchMiscFlags & gpu::ShaderMiscFlags::fixedFunctionColorOutput))
{
if (batch.drawType == gpu::DrawType::atomicResolve)
{
Expand Down
2 changes: 1 addition & 1 deletion renderer/src/render_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1850,7 +1850,6 @@ gpu::DrawBatch& RenderContext::LogicalFlush::pushDraw(Draw* draw,
case DrawType::midpointFanPatches:
case DrawType::outerCurvePatches:
case DrawType::atomicInitialize:
case DrawType::atomicResolve:
case DrawType::stencilClipReset:
needsNewBatch =
m_drawList.empty() || m_drawList.tail().drawType != drawType ||
Expand All @@ -1860,6 +1859,7 @@ gpu::DrawBatch& RenderContext::LogicalFlush::pushDraw(Draw* draw,
case DrawType::interiorTriangulation:
case DrawType::imageRect:
case DrawType::imageMesh:
case DrawType::atomicResolve:
// We can't combine interior triangulations or image draws yet.
needsNewBatch = true;
break;
Expand Down
16 changes: 8 additions & 8 deletions renderer/src/shaders/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,11 @@ SPIRV_INPUTS := $(wildcard spirv/*.main)
SPIRV_OUTPUTS := \
$(addprefix $(OUT)/, $(patsubst %.main, %.vert.h, $(SPIRV_INPUTS))) \
$(addprefix $(OUT)/, $(patsubst %.main, %.frag.h, $(SPIRV_INPUTS))) \
$(OUT)/spirv/atomic_draw_image_mesh.fixedblend_frag.h \
$(OUT)/spirv/atomic_draw_image_rect.fixedblend_frag.h \
$(OUT)/spirv/atomic_draw_interior_triangles.fixedblend_frag.h \
$(OUT)/spirv/atomic_draw_path.fixedblend_frag.h \
$(OUT)/spirv/atomic_resolve_pls.fixedblend_frag.h \
$(OUT)/spirv/atomic_draw_image_mesh.fixedcolor_frag.h \
$(OUT)/spirv/atomic_draw_image_rect.fixedcolor_frag.h \
$(OUT)/spirv/atomic_draw_interior_triangles.fixedcolor_frag.h \
$(OUT)/spirv/atomic_draw_path.fixedcolor_frag.h \
$(OUT)/spirv/atomic_resolve_pls.fixedcolor_frag.h \

## Compile *.main into vertex shaders.
$(OUT)/spirv/%.vert.h: spirv/%.main $(MINIFY_STAMP)
Expand All @@ -107,10 +107,10 @@ $(OUT)/spirv/%.frag.h: spirv/%.main $(MINIFY_STAMP)
@mkdir -p $(OUT)/spirv
@glslangValidator -S frag -DTARGET_VULKAN -DFRAGMENT -I$(OUT) -V --vn $(subst .main,_frag,$(notdir $<)) -o $@ $<

## Compile atomic fragment shaders again with FIXED_FUNCTION_COLOR_BLEND defined.
$(OUT)/spirv/%.fixedblend_frag.h: spirv/%.main $(MINIFY_STAMP)
## Compile atomic fragment shaders again with FIXED_FUNCTION_COLOR_OUTPUT defined.
$(OUT)/spirv/%.fixedcolor_frag.h: spirv/%.main $(MINIFY_STAMP)
@mkdir -p $(OUT)/spirv
@glslangValidator -S frag -DTARGET_VULKAN -DFRAGMENT -DFIXED_FUNCTION_COLOR_BLEND -I$(OUT) -V --vn $(subst .main,_fixedblend_frag,$(notdir $<)) -o $@ $<
@glslangValidator -S frag -DTARGET_VULKAN -DFRAGMENT -DFIXED_FUNCTION_COLOR_OUTPUT -I$(OUT) -V --vn $(subst .main,_fixedcolor_frag,$(notdir $<)) -o $@ $<

spirv: $(SPIRV_OUTPUTS)

Expand Down
Loading

0 comments on commit ee9325b

Please sign in to comment.