Skip to content

Commit

Permalink
Change memory order and scope for atomics that gate final results bei…
Browse files Browse the repository at this point in the history
…ng stored. (KhronosGroup#1377)

* Change memory order and scope for atomics that gate final results being stored.

memory_order_acq_rel with memory_scope_device is now used to guarantee that the correct memory consistency is observed before final results are stored.

Previously it was possible for kernels to be generated that all used relaxed memory ordering, which could lead to false-positive failures.

Fixes KhronosGroup#1370

* Disable atomics tests with global, in-program atomics.

If the device does not support `memory_order_relaxed` or `memory_scope_device`, disable atomics tests that declare their atomics in-program with global memory.

There is now an implicit requirement to support `memory_order_relaxed` and `memory_scope_device` for these tests.

* Fix misplaced parentheses.

* Change memory scope for atomic fetch and load calls in kernel

Change the memory scope from memory_scope_work_group to
memory_scope_device so the ordering applies across all work items

Co-authored-by: Sreelakshmi Haridas <[email protected]>
  • Loading branch information
Jeremy-Kemp and Sreelakshmi Haridas authored Apr 28, 2022
1 parent 35c21a8 commit 5d6ca3e
Showing 1 changed file with 40 additions and 11 deletions.
51 changes: 40 additions & 11 deletions test_conformance/c11_atomics/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -1031,20 +1031,40 @@ CBasicTest<HostAtomicType, HostDataType>::KernelCode(cl_uint maxNumDestItems)
}
code += "\n";
}
if (LocalMemory() || DeclaredInProgram())
if (LocalMemory())
{
code += " // Copy final values to host reachable buffer\n";
if (LocalMemory())
code += " barrier(CLK_LOCAL_MEM_FENCE);\n"
" if(get_local_id(0) == 0) // first thread in workgroup\n";
code += " barrier(CLK_LOCAL_MEM_FENCE);\n"
" if(get_local_id(0) == 0) // first thread in workgroup\n";
code += " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; "
"dstItemIdx++)\n";
if (aTypeName == "atomic_flag")
{
code += R"(
finalDest[dstItemIdx] =
atomic_flag_test_and_set_explicit(destMemory+dstItemIdx,
memory_order_relaxed,
memory_scope_work_group);)";
}
else
// global atomics declared in program scope
{
code += R"(
if(atomic_fetch_add_explicit(&finishedThreads, 1u,
memory_order_relaxed,
memory_scope_work_group)
finalDest[dstItemIdx] =
atomic_load_explicit(destMemory+dstItemIdx,
memory_order_relaxed,
memory_scope_work_group);)";
}
}
else if (DeclaredInProgram())
{
// global atomics declared in program scope
code += " // Copy final values to host reachable buffer\n";
code += R"(
if(atomic_fetch_add_explicit(&finishedThreads, 1u,
memory_order_acq_rel,
memory_scope_device)
== get_global_size(0)-1) // last finished thread
)";
)";
code += " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; "
"dstItemIdx++)\n";
if (aTypeName == "atomic_flag")
Expand All @@ -1053,15 +1073,15 @@ CBasicTest<HostAtomicType, HostDataType>::KernelCode(cl_uint maxNumDestItems)
finalDest[dstItemIdx] =
atomic_flag_test_and_set_explicit(destMemory+dstItemIdx,
memory_order_relaxed,
memory_scope_work_group);)";
memory_scope_device);)";
}
else
{
code += R"(
finalDest[dstItemIdx] =
atomic_load_explicit(destMemory+dstItemIdx,
memory_order_relaxed,
memory_scope_work_group);)";
memory_scope_device);)";
}
}
code += "}\n"
Expand Down Expand Up @@ -1108,6 +1128,15 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
log_info("\t\tTest disabled\n");
return 0;
}
if (!LocalMemory() && DeclaredInProgram())
{
if (((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0)
|| ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0))
{
log_info("\t\tTest disabled\n");
return 0;
}
}

// set up work sizes based on device capabilities and test configuration
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE,
Expand Down

0 comments on commit 5d6ca3e

Please sign in to comment.