Skip to content

DX12: Align copies b/w textures and buffers when D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported is false #7721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ Bottom level categories:
#### DX12

- Get `vertex_index` & `instance_index` builtins working for indirect draws. By @teoxoy in [#7535](https://github.com/gfx-rs/wgpu/pull/7535)
- Align copies b/w textures and buffers when `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported` is `false`. By @ErichDonGubler in [#7721](https://github.com/gfx-rs/wgpu/pull/7721).

#### Metal

Expand Down
12 changes: 1 addition & 11 deletions tests/tests/wgpu-gpu/regression/issue_6827.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,7 @@ static TEST_SCATTER: GpuTestConfiguration = GpuTestConfiguration::new()
.expect_fail(FailureCase::backend_adapter(
wgpu::Backends::METAL,
"Apple Paravirtual device", // CI on M1
))
.expect_fail(
// Unfortunately this depends on if `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported`
// is true, which we have no way to encode. This reproduces in CI though, so not too worried about it.
FailureCase::backend(wgpu::Backends::DX12)
.flaky()
.validation_error(
"D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512",
)
.panic("GraphicsCommandList::close failed: The parameter is incorrect"),
),
)),
)
.run_async(|ctx| async move { run_test(ctx, true).await });

Expand Down
17 changes: 17 additions & 0 deletions wgpu-hal/src/dx12/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,21 @@ impl super::Adapter {
.is_ok()
};

let unrestricted_buffer_texture_copy_pitch_supported = {
let mut features13 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS13::default();
unsafe {
device.CheckFeatureSupport(
Direct3D12::D3D12_FEATURE_D3D12_OPTIONS13,
<*mut _>::cast(&mut features13),
size_of_val(&features13) as u32,
)
}
.is_ok()
&& features13
.UnrestrictedBufferTextureCopyPitchSupported
.as_bool()
};

let mut max_sampler_descriptor_heap_size =
Direct3D12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE;
{
Expand Down Expand Up @@ -302,6 +317,7 @@ impl super::Adapter {
suballocation_supported: !info.name.contains("Iris(R) Xe"),
shader_model,
max_sampler_descriptor_heap_size,
unrestricted_buffer_texture_copy_pitch_supported,
};

// Theoretically vram limited, but in practice 2^20 is the limit
Expand Down Expand Up @@ -667,6 +683,7 @@ impl crate::Adapter for super::Adapter {
queue: super::Queue {
raw: queue,
temp_lists: Mutex::new(Vec::new()),
intermediate_copy_bufs: Mutex::new(Vec::new()),
},
})
}
Expand Down
144 changes: 139 additions & 5 deletions wgpu-hal/src/dx12/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::{
dxgi::{name::ObjectExt, result::HResult as _},
},
dx12::borrow_interface_temporarily,
AccelerationStructureEntries,
AccelerationStructureEntries, CommandEncoder as _,
};

fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> Direct3D12::D3D12_BOX {
Expand Down Expand Up @@ -312,6 +312,78 @@ impl super::CommandEncoder {
}
}
}

unsafe fn buf_tex_intermediate<T>(
&mut self,
region: crate::BufferTextureCopy,
tex_fmt: wgt::TextureFormat,
copy_op: impl FnOnce(&mut Self, &super::Buffer, wgt::BufferSize, crate::BufferTextureCopy) -> T,
) -> Option<(T, super::Buffer)> {
let size = {
let copy_info = region.buffer_layout.get_buffer_texture_copy_info(
tex_fmt,
region.texture_base.aspect.map(),
&region.size.into(),
);
copy_info.bytes_in_copy
};

let size = wgt::BufferSize::new(size)?;

let buffer = {
let (resource, allocation) =
super::suballocation::DeviceAllocationContext::from(&*self)
.create_buffer(&crate::BufferDescriptor {
label: None,
size: size.get(),
usage: wgt::BufferUses::COPY_SRC | wgt::BufferUses::COPY_DST,
memory_flags: crate::MemoryFlags::empty(),
})
.expect(concat!(
"internal error: ",
"failed to allocate intermediate buffer ",
"for offset alignment"
));
super::Buffer {
resource,
size: size.get(),
allocation,
}
};

let mut region = region;
region.buffer_layout.offset = 0;

unsafe {
self.transition_buffers(
[crate::BufferBarrier {
buffer: &buffer,
usage: crate::StateTransition {
from: wgt::BufferUses::empty(),
to: wgt::BufferUses::COPY_DST,
},
}]
.into_iter(),
)
};

let t = copy_op(self, &buffer, size, region);

unsafe {
self.transition_buffers(
[crate::BufferBarrier {
buffer: &buffer,
usage: crate::StateTransition {
from: wgt::BufferUses::COPY_DST,
to: wgt::BufferUses::COPY_SRC,
},
}]
.into_iter(),
)
};

Some((t, buffer))
}
}

impl crate::CommandEncoder for super::CommandEncoder {
Expand Down Expand Up @@ -363,7 +435,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> {
let raw = self.list.take().unwrap();
unsafe { raw.Close() }.into_device_result("GraphicsCommandList::close")?;
Ok(super::CommandBuffer { raw })
Ok(super::CommandBuffer {
raw,
intermediate_copy_bufs: mem::take(&mut self.intermediate_copy_bufs).into(),
})
}
unsafe fn reset_all<I: Iterator<Item = super::CommandBuffer>>(&mut self, command_buffers: I) {
for cmd_buf in command_buffers {
Expand Down Expand Up @@ -612,8 +687,34 @@ impl crate::CommandEncoder for super::CommandEncoder {
) where
T: Iterator<Item = crate::BufferTextureCopy>,
{
let list = self.list.as_ref().unwrap();
let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();

for r in regions {
let is_offset_aligned = r.buffer_layout.offset % offset_alignment == 0;
let (r, src) = if is_offset_aligned {
(r, src)
} else {
let Some((r, src)) = (unsafe {
let src_offset = r.buffer_layout.offset;
self.buf_tex_intermediate(r, dst.format, |this, buf, size, r| {
let layout = crate::BufferCopy {
src_offset,
dst_offset: 0,
size,
};
this.copy_buffer_to_buffer(src, buf, [layout].into_iter());
r
})
}) else {
continue;
};
self.intermediate_copy_bufs.push(src);
let src = self.intermediate_copy_bufs.last().unwrap();
(r, src)
};

let list = self.list.as_ref().unwrap();

let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
pResource: unsafe { borrow_interface_temporarily(&src.resource) },
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
Expand Down Expand Up @@ -652,8 +753,12 @@ impl crate::CommandEncoder for super::CommandEncoder {
) where
T: Iterator<Item = crate::BufferTextureCopy>,
{
let list = self.list.as_ref().unwrap();
for r in regions {
let copy_aligned = |this: &mut Self,
src: &super::Texture,
dst: &super::Buffer,
r: crate::BufferTextureCopy| {
let list = this.list.as_ref().unwrap();

let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
pResource: unsafe { borrow_interface_temporarily(&src.resource) },
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
Expand All @@ -673,6 +778,35 @@ impl crate::CommandEncoder for super::CommandEncoder {
unsafe {
list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, Some(&src_box))
};
};

let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();

for r in regions {
let is_offset_aligned = r.buffer_layout.offset % offset_alignment == 0;
if is_offset_aligned {
copy_aligned(self, src, dst, r)
} else {
let orig_offset = r.buffer_layout.offset;
let Some((r, src)) = (unsafe {
self.buf_tex_intermediate(r, src.format, |this, buf, size, r| {
copy_aligned(this, src, buf, r);
crate::BufferCopy {
src_offset: orig_offset,
dst_offset: 0,
size,
}
})
}) else {
continue;
};

unsafe {
self.copy_buffer_to_buffer(&src, dst, [r].into_iter());
}

self.intermediate_copy_bufs.push(src);
};
}
}

Expand Down
1 change: 1 addition & 0 deletions wgpu-hal/src/dx12/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,7 @@ impl crate::Device for super::Device {
mem_allocator: self.mem_allocator.clone(),
rtv_pool: Arc::clone(&self.rtv_pool),
temp_rtv_handles: Vec::new(),
intermediate_copy_bufs: Vec::new(),
null_rtv_handle: self.null_rtv_handle,
list: None,
free_lists: Vec::new(),
Expand Down
24 changes: 23 additions & 1 deletion wgpu-hal/src/dx12/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,11 @@ use windows::{
core::{Free, Interface},
Win32::{
Foundation,
Graphics::{Direct3D, Direct3D12, DirectComposition, Dxgi},
Graphics::{
Direct3D,
Direct3D12::{self, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT},
DirectComposition, Dxgi,
},
System::Threading,
},
};
Expand Down Expand Up @@ -574,6 +578,17 @@ struct PrivateCapabilities {
suballocation_supported: bool,
shader_model: naga::back::hlsl::ShaderModel,
max_sampler_descriptor_heap_size: u32,
unrestricted_buffer_texture_copy_pitch_supported: bool,
}

impl PrivateCapabilities {
fn texture_data_placement_alignment(&self) -> u64 {
if self.unrestricted_buffer_texture_copy_pitch_supported {
D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT.into()
} else {
4
}
}
}

#[derive(Default)]
Expand Down Expand Up @@ -681,6 +696,7 @@ unsafe impl Sync for Device {}
pub struct Queue {
raw: Direct3D12::ID3D12CommandQueue,
temp_lists: Mutex<Vec<Option<Direct3D12::ID3D12CommandList>>>,
intermediate_copy_bufs: Mutex<Vec<Arc<Vec<Buffer>>>>,
}

impl Queue {
Expand Down Expand Up @@ -803,6 +819,8 @@ pub struct CommandEncoder {
rtv_pool: Arc<Mutex<descriptor::CpuPool>>,
temp_rtv_handles: Vec<descriptor::Handle>,

intermediate_copy_bufs: Vec<Buffer>,

null_rtv_handle: descriptor::Handle,
list: Option<Direct3D12::ID3D12GraphicsCommandList>,
free_lists: Vec<Direct3D12::ID3D12GraphicsCommandList>,
Expand Down Expand Up @@ -831,6 +849,7 @@ impl fmt::Debug for CommandEncoder {
#[derive(Debug)]
pub struct CommandBuffer {
raw: Direct3D12::ID3D12GraphicsCommandList,
intermediate_copy_bufs: Arc<Vec<Buffer>>,
}

impl crate::DynCommandBuffer for CommandBuffer {}
Expand Down Expand Up @@ -1435,8 +1454,11 @@ impl crate::Queue for Queue {
) -> Result<(), crate::DeviceError> {
let mut temp_lists = self.temp_lists.lock();
temp_lists.clear();
let mut intermediate_copy_bufs = self.intermediate_copy_bufs.lock();
for cmd_buf in command_buffers {
temp_lists.push(Some(cmd_buf.raw.clone().into()));
intermediate_copy_bufs.push(Arc::clone(&cmd_buf.intermediate_copy_bufs));
// TODO: When to clean accumulated copy buffers up?
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking that the best time to clean these up might be during a Poll, where we can guarantee that temporary buffers are no longer necessary. Validation, please? 🥺

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can store them in the CommandEncoder and they will be cleaned up on CommandEncoder::Drop, wgpu-core holds HAL command encoders alive until the end of the submission.

}

{
Expand Down
30 changes: 30 additions & 0 deletions wgpu-hal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2315,6 +2315,36 @@ pub struct CopyExtent {
pub depth: u32,
}

impl From<wgt::Extent3d> for CopyExtent {
fn from(value: wgt::Extent3d) -> Self {
let wgt::Extent3d {
width,
height,
depth_or_array_layers,
} = value;
Self {
width,
height,
depth: depth_or_array_layers,
}
}
}

impl From<CopyExtent> for wgt::Extent3d {
fn from(value: CopyExtent) -> Self {
let CopyExtent {
width,
height,
depth,
} = value;
Self {
width,
height,
depth_or_array_layers: depth,
}
}
}

#[derive(Clone, Debug)]
pub struct TextureCopy {
pub src_base: TextureCopyBase,
Expand Down
Loading