Skip to content

Commit 505438d

Browse files
WIP: fix(dx12): align tex. <-> buf. copies via intermediate buffer if !UnrestrictedBufferTextureCopyPitchSupported
TODO: resolve `TODO` comments
1 parent b804b94 commit 505438d

File tree

6 files changed

+156
-18
lines changed

6 files changed

+156
-18
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ Bottom level categories:
8484
#### DX12
8585

8686
- Get `vertex_index` & `instance_index` builtins working for indirect draws. By @teoxoy in [#7535](https://github.com/gfx-rs/wgpu/pull/7535)
87+
- Align copies b/w textures and buffers when `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported` is `false`. By @ErichDonGubler in [#7721](https://github.com/gfx-rs/wgpu/pull/7721).
8788

8889
#### Metal
8990

tests/tests/wgpu-gpu/regression/issue_6827.rs

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,7 @@ static TEST_SCATTER: GpuTestConfiguration = GpuTestConfiguration::new()
1919
.expect_fail(FailureCase::backend_adapter(
2020
wgpu::Backends::METAL,
2121
"Apple Paravirtual device", // CI on M1
22-
))
23-
.expect_fail(
24-
// Unfortunately this depends on if `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported`
25-
// is true, which we have no way to encode. This reproduces in CI though, so not too worried about it.
26-
FailureCase::backend(wgpu::Backends::DX12)
27-
.flaky()
28-
.validation_error(
29-
"D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512",
30-
)
31-
.panic("GraphicsCommandList::close failed: The parameter is incorrect"),
32-
),
22+
)),
3323
)
3424
.run_async(|ctx| async move { run_test(ctx, true).await });
3525

wgpu-hal/src/dx12/adapter.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,8 +317,7 @@ impl super::Adapter {
317317
suballocation_supported: !info.name.contains("Iris(R) Xe"),
318318
shader_model,
319319
max_sampler_descriptor_heap_size,
320-
_unrestricted_buffer_texture_copy_pitch_supported:
321-
unrestricted_buffer_texture_copy_pitch_supported,
320+
unrestricted_buffer_texture_copy_pitch_supported,
322321
};
323322

324323
// Theoretically vram limited, but in practice 2^20 is the limit
@@ -684,6 +683,7 @@ impl crate::Adapter for super::Adapter {
684683
queue: super::Queue {
685684
raw: queue,
686685
temp_lists: Mutex::new(Vec::new()),
686+
intermediate_copy_bufs: Mutex::new(Vec::new()),
687687
},
688688
})
689689
}

wgpu-hal/src/dx12/command.rs

Lines changed: 128 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use crate::{
1414
dxgi::{name::ObjectExt, result::HResult as _},
1515
},
1616
dx12::borrow_interface_temporarily,
17-
AccelerationStructureEntries,
17+
AccelerationStructureEntries, CommandEncoder as _,
1818
};
1919

2020
fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> Direct3D12::D3D12_BOX {
@@ -312,6 +312,78 @@ impl super::CommandEncoder {
312312
}
313313
}
314314
}
315+
316+
unsafe fn buf_tex_intermediate<T>(
317+
&mut self,
318+
region: crate::BufferTextureCopy,
319+
tex_fmt: wgt::TextureFormat,
320+
copy_op: impl FnOnce(&mut Self, &super::Buffer, wgt::BufferSize, crate::BufferTextureCopy) -> T,
321+
) -> Option<(T, super::Buffer)> {
322+
let size = {
323+
let copy_info = region.buffer_layout.get_buffer_texture_copy_info(
324+
tex_fmt,
325+
region.texture_base.aspect.map(),
326+
&region.size.into(),
327+
);
328+
copy_info.bytes_in_copy
329+
};
330+
331+
let size = wgt::BufferSize::new(size)?;
332+
333+
let buffer = {
334+
let (resource, allocation) =
335+
super::suballocation::DeviceAllocationContext::from(&*self)
336+
.create_buffer(&crate::BufferDescriptor {
337+
label: None,
338+
size: size.get(),
339+
usage: wgt::BufferUses::COPY_SRC | wgt::BufferUses::COPY_DST,
340+
memory_flags: crate::MemoryFlags::empty(),
341+
})
342+
.expect(concat!(
343+
"internal error: ",
344+
"failed to allocate intermediate buffer ",
345+
"for offset alignment"
346+
));
347+
super::Buffer {
348+
resource,
349+
size: size.get(),
350+
allocation,
351+
}
352+
};
353+
354+
let mut region = region;
355+
region.buffer_layout.offset = 0;
356+
357+
unsafe {
358+
self.transition_buffers(
359+
[crate::BufferBarrier {
360+
buffer: &buffer,
361+
usage: crate::StateTransition {
362+
from: wgt::BufferUses::empty(),
363+
to: wgt::BufferUses::COPY_DST,
364+
},
365+
}]
366+
.into_iter(),
367+
)
368+
};
369+
370+
let t = copy_op(self, &buffer, size, region);
371+
372+
unsafe {
373+
self.transition_buffers(
374+
[crate::BufferBarrier {
375+
buffer: &buffer,
376+
usage: crate::StateTransition {
377+
from: wgt::BufferUses::COPY_DST,
378+
to: wgt::BufferUses::COPY_SRC,
379+
},
380+
}]
381+
.into_iter(),
382+
)
383+
};
384+
385+
Some((t, buffer))
386+
}
315387
}
316388

317389
impl crate::CommandEncoder for super::CommandEncoder {
@@ -363,7 +435,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
363435
unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> {
364436
let raw = self.list.take().unwrap();
365437
unsafe { raw.Close() }.into_device_result("GraphicsCommandList::close")?;
366-
Ok(super::CommandBuffer { raw })
438+
Ok(super::CommandBuffer {
439+
raw,
440+
intermediate_copy_bufs: mem::take(&mut self.intermediate_copy_bufs).into(),
441+
})
367442
}
368443
unsafe fn reset_all<I: Iterator<Item = super::CommandBuffer>>(&mut self, command_buffers: I) {
369444
for cmd_buf in command_buffers {
@@ -612,7 +687,32 @@ impl crate::CommandEncoder for super::CommandEncoder {
612687
) where
613688
T: Iterator<Item = crate::BufferTextureCopy>,
614689
{
690+
let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();
691+
615692
for r in regions {
693+
let is_offset_aligned = r.buffer_layout.offset % offset_alignment == 0;
694+
let (r, src) = if is_offset_aligned {
695+
(r, src)
696+
} else {
697+
let Some((r, src)) = (unsafe {
698+
let src_offset = r.buffer_layout.offset;
699+
self.buf_tex_intermediate(r, dst.format, |this, buf, size, r| {
700+
let layout = crate::BufferCopy {
701+
src_offset,
702+
dst_offset: 0,
703+
size,
704+
};
705+
this.copy_buffer_to_buffer(src, buf, [layout].into_iter());
706+
r
707+
})
708+
}) else {
709+
continue;
710+
};
711+
self.intermediate_copy_bufs.push(src);
712+
let src = self.intermediate_copy_bufs.last().unwrap();
713+
(r, src)
714+
};
715+
616716
let list = self.list.as_ref().unwrap();
617717

618718
let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
@@ -680,8 +780,33 @@ impl crate::CommandEncoder for super::CommandEncoder {
680780
};
681781
};
682782

783+
let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();
784+
683785
for r in regions {
684-
copy_aligned(this, src, dst, r);
786+
let is_offset_aligned = r.buffer_layout.offset % offset_alignment == 0;
787+
if is_offset_aligned {
788+
copy_aligned(self, src, dst, r)
789+
} else {
790+
let orig_offset = r.buffer_layout.offset;
791+
let Some((r, src)) = (unsafe {
792+
self.buf_tex_intermediate(r, src.format, |this, buf, size, r| {
793+
copy_aligned(this, src, buf, r);
794+
crate::BufferCopy {
795+
src_offset: orig_offset,
796+
dst_offset: 0,
797+
size,
798+
}
799+
})
800+
}) else {
801+
continue;
802+
};
803+
804+
unsafe {
805+
self.copy_buffer_to_buffer(&src, dst, [r].into_iter());
806+
}
807+
808+
self.intermediate_copy_bufs.push(src);
809+
};
685810
}
686811
}
687812

wgpu-hal/src/dx12/device.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,7 @@ impl crate::Device for super::Device {
756756
mem_allocator: self.mem_allocator.clone(),
757757
rtv_pool: Arc::clone(&self.rtv_pool),
758758
temp_rtv_handles: Vec::new(),
759+
intermediate_copy_bufs: Vec::new(),
759760
null_rtv_handle: self.null_rtv_handle,
760761
list: None,
761762
free_lists: Vec::new(),

wgpu-hal/src/dx12/mod.rs

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,11 @@ use windows::{
9595
core::{Free, Interface},
9696
Win32::{
9797
Foundation,
98-
Graphics::{Direct3D, Direct3D12, DirectComposition, Dxgi},
98+
Graphics::{
99+
Direct3D,
100+
Direct3D12::{self, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT},
101+
DirectComposition, Dxgi,
102+
},
99103
System::Threading,
100104
},
101105
};
@@ -574,7 +578,17 @@ struct PrivateCapabilities {
574578
suballocation_supported: bool,
575579
shader_model: naga::back::hlsl::ShaderModel,
576580
max_sampler_descriptor_heap_size: u32,
577-
_unrestricted_buffer_texture_copy_pitch_supported: bool,
581+
unrestricted_buffer_texture_copy_pitch_supported: bool,
582+
}
583+
584+
impl PrivateCapabilities {
585+
fn texture_data_placement_alignment(&self) -> u64 {
586+
if self.unrestricted_buffer_texture_copy_pitch_supported {
587+
D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT.into()
588+
} else {
589+
4
590+
}
591+
}
578592
}
579593

580594
#[derive(Default)]
@@ -682,6 +696,7 @@ unsafe impl Sync for Device {}
682696
pub struct Queue {
683697
raw: Direct3D12::ID3D12CommandQueue,
684698
temp_lists: Mutex<Vec<Option<Direct3D12::ID3D12CommandList>>>,
699+
intermediate_copy_bufs: Mutex<Vec<Arc<Vec<Buffer>>>>,
685700
}
686701

687702
impl Queue {
@@ -804,6 +819,8 @@ pub struct CommandEncoder {
804819
rtv_pool: Arc<Mutex<descriptor::CpuPool>>,
805820
temp_rtv_handles: Vec<descriptor::Handle>,
806821

822+
intermediate_copy_bufs: Vec<Buffer>,
823+
807824
null_rtv_handle: descriptor::Handle,
808825
list: Option<Direct3D12::ID3D12GraphicsCommandList>,
809826
free_lists: Vec<Direct3D12::ID3D12GraphicsCommandList>,
@@ -832,6 +849,7 @@ impl fmt::Debug for CommandEncoder {
832849
#[derive(Debug)]
833850
pub struct CommandBuffer {
834851
raw: Direct3D12::ID3D12GraphicsCommandList,
852+
intermediate_copy_bufs: Arc<Vec<Buffer>>,
835853
}
836854

837855
impl crate::DynCommandBuffer for CommandBuffer {}
@@ -1436,8 +1454,11 @@ impl crate::Queue for Queue {
14361454
) -> Result<(), crate::DeviceError> {
14371455
let mut temp_lists = self.temp_lists.lock();
14381456
temp_lists.clear();
1457+
let mut intermediate_copy_bufs = self.intermediate_copy_bufs.lock();
14391458
for cmd_buf in command_buffers {
14401459
temp_lists.push(Some(cmd_buf.raw.clone().into()));
1460+
intermediate_copy_bufs.push(Arc::clone(&cmd_buf.intermediate_copy_bufs));
1461+
// TODO: When to clean accumulated copy buffers up?
14411462
}
14421463

14431464
{

0 commit comments

Comments
 (0)