Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions naga/src/back/msl/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7235,6 +7235,15 @@ template <typename A>
}
}

//https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf - 1.6.6
if ep.stage == crate::ShaderStage::Compute && options.lang_version >= (3, 0) {
let total_threads =
ep.workgroup_size[0] * ep.workgroup_size[1] * ep.workgroup_size[2];
write!(
self.out,
"[[max_total_threads_per_threadgroup({total_threads})]] "
)?;
}
// Write the entry point function's name, and begin its argument list.
writeln!(self.out, "{em_str} {result_type_name} {fun_name}(")?;

Expand Down
2 changes: 1 addition & 1 deletion naga/tests/out/msl/wgsl-atomicOps-float32.metal
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ struct Struct {

struct cs_mainInput {
};
kernel void cs_main(
[[max_total_threads_per_threadgroup(2)]] kernel void cs_main(
metal::uint3 id [[thread_position_in_threadgroup]]
, device metal::atomic_float& storage_atomic_scalar [[user(fake0)]]
, device type_2& storage_atomic_arr [[user(fake0)]]
Expand Down
2 changes: 1 addition & 1 deletion naga/tests/out/msl/wgsl-atomicTexture-int64.metal
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ using metal::uint;

struct cs_mainInput {
};
kernel void cs_main(
[[max_total_threads_per_threadgroup(2)]] kernel void cs_main(
metal::uint3 id [[thread_position_in_threadgroup]]
, metal::texture2d<ulong, metal::access::read_write> image [[user(fake0)]]
) {
Expand Down
2 changes: 1 addition & 1 deletion naga/tests/out/msl/wgsl-atomicTexture.metal
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ using metal::uint;

struct cs_mainInput {
};
kernel void cs_main(
[[max_total_threads_per_threadgroup(2)]] kernel void cs_main(
metal::uint3 id [[thread_position_in_threadgroup]]
, metal::texture2d<uint, metal::access::read_write> image_u [[user(fake0)]]
, metal::texture2d<int, metal::access::read_write> image_s [[user(fake0)]]
Expand Down
2 changes: 1 addition & 1 deletion naga/tests/out/msl/wgsl-memory-decorations-coherent.metal
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ struct Data {
type_1 values;
};

kernel void main_(
[[max_total_threads_per_threadgroup(1)]] kernel void main_(
coherent device Data& coherent_buf [[user(fake0)]]
, device Data const& plain_buf [[user(fake0)]]
, constant _mslBufferSizes& _buffer_sizes [[user(fake0)]]
Expand Down
3 changes: 3 additions & 0 deletions wgpu-types/src/limits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,9 @@ pub struct Limits {
pub max_compute_workgroup_storage_size: u32,
/// Maximum value of the product of the `workgroup_size` dimensions for a compute entry-point.
/// Defaults to 256. Higher is "better".
/// **Note:** On Metal, this is used to emit the `[[max_total_threads_per_threadgroup]]`
/// attribute. This is how we guarantee that compute dispatches will always run
/// and not silently fail due to hardware register pressure or occupancy limits.
pub max_compute_invocations_per_workgroup: u32,
/// The maximum value of the `workgroup_size` X dimension for a compute stage `ShaderModule` entry-point.
/// Defaults to 256. Higher is "better".
Expand Down
Loading