Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add recognition of modern Intel processors to port library and compiler #7350

Merged
merged 2 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 22 additions & 18 deletions compiler/env/ProcessorInfo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,27 +243,31 @@ inline uint32_t getFeatureFlags10Mask()

enum TR_ProcessorDescription
{
TR_ProcessorUnknown = 0x00000000,
TR_ProcessorIntelPentium = 0x00000001,
TR_ProcessorIntelP6 = 0x00000002,
TR_ProcessorIntelPentium4 = 0x00000003,
TR_ProcessorUnknown = 0x00000000,
TR_ProcessorIntelPentium = 0x00000001,
TR_ProcessorIntelP6 = 0x00000002,
TR_ProcessorIntelPentium4 = 0x00000003,

TR_ProcessorAMDK5 = 0x00000004,
TR_ProcessorAMDK6 = 0x00000005,
TR_ProcessorAMDAthlonDuron = 0x00000006,
TR_ProcessorAMDOpteron = 0x00000007,
TR_ProcessorAMDK5 = 0x00000004,
TR_ProcessorAMDK6 = 0x00000005,
TR_ProcessorAMDAthlonDuron = 0x00000006,
TR_ProcessorAMDOpteron = 0x00000007,

TR_ProcessorIntelCore2 = 0x00000008,
TR_ProcessorIntelTulsa = 0x00000009,
TR_ProcessorIntelNehalem = 0x0000000a,
TR_ProcessorIntelCore2 = 0x00000008,
TR_ProcessorIntelTulsa = 0x00000009,
TR_ProcessorIntelNehalem = 0x0000000a,

TR_ProcessorAMDFamily15h = 0x0000000b,
TR_ProcessorIntelWestmere = 0x0000000c,
TR_ProcessorIntelSandyBridge = 0x0000000d,
TR_ProcessorIntelIvyBridge = 0x0000000e,
TR_ProcessorIntelHaswell = 0x0000000f,
TR_ProcessorIntelBroadwell = 0x00000010,
TR_ProcessorIntelSkylake = 0x00000011,
TR_ProcessorAMDFamily15h = 0x0000000b,
TR_ProcessorIntelWestmere = 0x0000000c,
TR_ProcessorIntelSandyBridge = 0x0000000d,
TR_ProcessorIntelIvyBridge = 0x0000000e,
TR_ProcessorIntelHaswell = 0x0000000f,
TR_ProcessorIntelBroadwell = 0x00000010,
TR_ProcessorIntelSkylake = 0x00000011,
TR_ProcessorIntelCascadeLake = 0x00000012,
TR_ProcessorIntelIceLake = 0x00000013,
TR_ProcessorIntelSapphireRapids = 0x00000014,
TR_ProcessorIntelEmeraldRapids = 0x00000015,
};

#endif
24 changes: 12 additions & 12 deletions compiler/x/codegen/IntegerMultiplyDecomposer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,21 +98,21 @@ TR::Register *TR_X86IntegerMultiplyDecomposer::decomposeIntegerMultiplier(int32_
const integerMultiplyComposition& composition = _integerMultiplySolutions[decompositionIndex];
if (composition._subsequentShiftTooExpensive == false)
{
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTELCORE2) == cg()->getX86ProcessorInfo().isIntelCore2(), "isIntelCore2 failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTELNEHALEM) == cg()->getX86ProcessorInfo().isIntelNehalem(), "isIntelNehalem failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTELWESTMERE) == cg()->getX86ProcessorInfo().isIntelWestmere(), "isIntelWestmere failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTELSANDYBRIDGE) == cg()->getX86ProcessorInfo().isIntelSandyBridge(), "isIntelSandyBridge failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_AMDFAMILY15H) == cg()->getX86ProcessorInfo().isAMD15h(), "isAMD15h failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_AMDOPTERON) == cg()->getX86ProcessorInfo().isAMDOpteron(), "isAMDOpteron failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTEL_CORE2) == cg()->getX86ProcessorInfo().isIntelCore2(), "isIntelCore2 failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTEL_NEHALEM) == cg()->getX86ProcessorInfo().isIntelNehalem(), "isIntelNehalem failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTEL_WESTMERE) == cg()->getX86ProcessorInfo().isIntelWestmere(), "isIntelWestmere failed\n");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these asserts really necessary?

Copy link
Contributor Author

@0xdaryl 0xdaryl May 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure, tbh. It has been decades since I looked at this code, but I think there used to be performance considerations for choosing instructions on various processors. But asserts aren't the way to enforce that.

Given what I've come to understand of the CPUID code as part of this PR, it can only be one of those exact processors coming through this path or the assertion will trip. Most of those processors are fairly old, so it seems this path is not taken very frequently (or never at all). (Upon reading the code more closely, I'm not sure this statement is true).

Your question is much broader than what this PR is attempting to address. I'd say it be revisited separately.

And if there is value to keeping these asserts for some reason, we should be able to come up with a more efficient way of expressing these conditions. There is a lot of redundant overhead here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created an issue: #7352

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe these asserts were added when, at least in openJ9, we switched to using the portlib for the processor info instead of the X86ProcessorInfo class; I guess it was to ensure that the two sources agreed on all queries.

I doubt these asserts are still needed.

TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTEL_SANDYBRIDGE) == cg()->getX86ProcessorInfo().isIntelSandyBridge(), "isIntelSandyBridge failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_AMD_FAMILY15H) == cg()->getX86ProcessorInfo().isAMD15h(), "isAMD15h failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_AMD_OPTERON) == cg()->getX86ProcessorInfo().isAMDOpteron(), "isAMDOpteron failed\n");

target = generateDecompositionInstructions(decompositionIndex, tempRegArraySize, tempRegArray);
if (shiftAmount < 3 &&
!comp->target().cpu.is(OMR_PROCESSOR_X86_INTELCORE2) &&
!comp->target().cpu.is(OMR_PROCESSOR_X86_INTELNEHALEM) &&
!comp->target().cpu.is(OMR_PROCESSOR_X86_INTELWESTMERE) &&
!comp->target().cpu.is(OMR_PROCESSOR_X86_INTELSANDYBRIDGE) &&
!comp->target().cpu.is(OMR_PROCESSOR_X86_AMDFAMILY15H) &&
!comp->target().cpu.is(OMR_PROCESSOR_X86_AMDOPTERON)) // TODO:: P3 should go straight to else and use shift always
!comp->target().cpu.is(OMR_PROCESSOR_X86_INTEL_CORE2) &&
!comp->target().cpu.is(OMR_PROCESSOR_X86_INTEL_NEHALEM) &&
!comp->target().cpu.is(OMR_PROCESSOR_X86_INTEL_WESTMERE) &&
!comp->target().cpu.is(OMR_PROCESSOR_X86_INTEL_SANDYBRIDGE) &&
!comp->target().cpu.is(OMR_PROCESSOR_X86_AMD_FAMILY15H) &&
!comp->target().cpu.is(OMR_PROCESSOR_X86_AMD_OPTERON)) // TODO:: P3 should go straight to else and use shift always
{
for (; shiftAmount > 0; --shiftAmount)
{
Expand Down
37 changes: 29 additions & 8 deletions compiler/x/codegen/OMRCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,30 @@ void TR_X86ProcessorInfo::initialize(bool force)
case 0x06:
{
uint32_t extended_model = getCPUModel(_processorSignature) + (getCPUExtendedModel(_processorSignature) << 4);
uint32_t processorStepping = getCPUStepping(_processorSignature);
switch (extended_model)
{
case 0x55:
case 0xcf:
_processorDescription |= TR_ProcessorIntelEmeraldRapids; break;
case 0x8f:
_processorDescription |= TR_ProcessorIntelSapphireRapids; break;
case 0x6a: // IceLake_X
case 0x6c: // IceLake_D
case 0x7d: // IceLake
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could not find 7D documented. I assume its a client processor?

For the client ice lake processors I see

Processor Family Extended Family Model
Ice Lake Client (U) 0x6 0x7 0xe
Ice Lake Client (Y) 0x6 0x7 0xe

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, see Vol 4., Table 2-1

case 0x7e: // IceLake_L
_processorDescription |= TR_ProcessorIntelIceLake; break;
case 0x55: // Skylake_X
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how skylake 0x55 will be differentiated from Cooper lake and Cascade lake. The model numbers overlap.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both are considered "optimizations" on Skylake. I believe the stepping is what distinguishes them. I didn't include them because I did not find a verifiable stepping value for both, and distinguishing the processor with that much refinement was not my goal (it may be part of your future CPUID refactoring work).

Nevertheless I have since found a stepping value for Cascade Lake, and I modified the PR to support it.

if (processorStepping == 7)
{
_processorDescription |= TR_ProcessorIntelCascadeLake;
}
else
{
_processorDescription |= TR_ProcessorIntelSkylake;
}
break;
case 0x4e: // Skylake_L
case 0x5e: // Skylake
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whats the meaning behind the suffix in your comment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a comment in my latest forced push. I borrowed the suffix conventions for the processors we recognize from the Linux kernel.

_processorDescription |= TR_ProcessorIntelSkylake; break;
case 0x4f:
_processorDescription |= TR_ProcessorIntelBroadwell; break;
Expand Down Expand Up @@ -270,8 +291,8 @@ OMR::X86::CodeGenerator::initializeX86(TR::Compilation *comp)
*
* TODO: Need to figure out from which mode of Broadwell start supporting TM
*/
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTELHASWELL) == getX86ProcessorInfo().isIntelHaswell(), "isIntelHaswell() failed\n");
if (!comp->target().cpu.is(OMR_PROCESSOR_X86_INTELHASWELL))
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTEL_HASWELL) == getX86ProcessorInfo().isIntelHaswell(), "isIntelHaswell() failed\n");
if (!comp->target().cpu.is(OMR_PROCESSOR_X86_INTEL_HASWELL))
{
if (comp->target().is64Bit())
{
Expand Down Expand Up @@ -303,9 +324,9 @@ OMR::X86::CodeGenerator::initializeX86(TR::Compilation *comp)
// Enable software prefetch of the TLH and configure the TLH prefetching
// geometry.
//
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTELCORE2) == comp->cg()->getX86ProcessorInfo().isIntelCore2(), "isIntelCore2() failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTELNEHALEM) == comp->cg()->getX86ProcessorInfo().isIntelNehalem(), "isIntelNehalem() failed\n");
if (((!comp->getOption(TR_DisableTLHPrefetch) && (comp->target().cpu.is(OMR_PROCESSOR_X86_INTELCORE2) || comp->target().cpu.is(OMR_PROCESSOR_X86_INTELNEHALEM))) ||
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTEL_CORE2) == comp->cg()->getX86ProcessorInfo().isIntelCore2(), "isIntelCore2() failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTEL_NEHALEM) == comp->cg()->getX86ProcessorInfo().isIntelNehalem(), "isIntelNehalem() failed\n");
if (((!comp->getOption(TR_DisableTLHPrefetch) && (comp->target().cpu.is(OMR_PROCESSOR_X86_INTEL_CORE2) || comp->target().cpu.is(OMR_PROCESSOR_X86_INTEL_NEHALEM))) ||
(comp->getOption(TR_TLHPrefetch) && self()->targetSupportsSoftwarePrefetches())))
{
self()->setEnableTLHPrefetching();
Expand Down Expand Up @@ -459,9 +480,9 @@ OMR::X86::CodeGenerator::initializeX86(TR::Compilation *comp)
//
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.isGenuineIntel() == getX86ProcessorInfo().isGenuineIntel(), "isGenuineIntel() failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.isAuthenticAMD() == getX86ProcessorInfo().isAuthenticAMD(), "isAuthenticAMD() failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_AMDFAMILY15H) == getX86ProcessorInfo().isAMD15h(), "isAMD15h() failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_AMD_FAMILY15H) == getX86ProcessorInfo().isAMD15h(), "isAMD15h() failed\n");
int32_t boundary;
if (comp->target().cpu.isGenuineIntel() || (comp->target().cpu.isAuthenticAMD() && comp->target().cpu.is(OMR_PROCESSOR_X86_AMDFAMILY15H)))
if (comp->target().cpu.isGenuineIntel() || (comp->target().cpu.isAuthenticAMD() && comp->target().cpu.is(OMR_PROCESSOR_X86_AMD_FAMILY15H)))
boundary = 32;
else
{
Expand Down
42 changes: 23 additions & 19 deletions compiler/x/codegen/OMRCodeGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,25 +195,29 @@ struct TR_X86ProcessorInfo
uint32_t getCPUExtendedModel(uint32_t signature) {return (signature & CPUID_SIGNATURE_EXTENDEDMODEL_MASK) >> 16;}
uint32_t getCPUExtendedFamily(uint32_t signature) {return (signature & CPUID_SIGNATURE_EXTENDEDFAMILY_MASK) >> 20;}

bool isIntelPentium() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelPentium; }
bool isIntelP6() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelP6; }
bool isIntelPentium4() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelPentium4; }
bool isIntelCore2() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelCore2; }
bool isIntelTulsa() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelTulsa; }
bool isIntelNehalem() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelNehalem; }
bool isIntelWestmere() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelWestmere; }
bool isIntelSandyBridge() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelSandyBridge; }
bool isIntelIvyBridge() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelIvyBridge; }
bool isIntelHaswell() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelHaswell; }
bool isIntelBroadwell() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelBroadwell; }
bool isIntelSkylake() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelSkylake; }

bool isIntelOldMachine() { return (isIntelPentium() || isIntelP6() || isIntelPentium4() || isIntelCore2() || isIntelTulsa() || isIntelNehalem()); }

bool isAMDK6() { return (_processorDescription & 0x000000fe) == TR_ProcessorAMDK5; } // accept either K5 or K6
bool isAMDAthlonDuron() { return (_processorDescription & 0x000000ff) == TR_ProcessorAMDAthlonDuron; }
bool isAMDOpteron() { return (_processorDescription & 0x000000ff) == TR_ProcessorAMDOpteron; }
bool isAMD15h() { return (_processorDescription & 0x000000ff) == TR_ProcessorAMDFamily15h; }
bool isIntelPentium() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelPentium; }
bool isIntelP6() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelP6; }
bool isIntelPentium4() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelPentium4; }
bool isIntelCore2() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelCore2; }
bool isIntelTulsa() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelTulsa; }
bool isIntelNehalem() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelNehalem; }
bool isIntelWestmere() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelWestmere; }
bool isIntelSandyBridge() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelSandyBridge; }
bool isIntelIvyBridge() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelIvyBridge; }
bool isIntelHaswell() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelHaswell; }
bool isIntelBroadwell() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelBroadwell; }
bool isIntelSkylake() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelSkylake; }
bool isIntelCascadeLake() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelCascadeLake; }
bool isIntelIceLake() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelIceLake; }
bool isIntelSapphireRapids() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelSapphireRapids; }
bool isIntelEmeraldRapids() { return (_processorDescription & 0x000000ff) == TR_ProcessorIntelEmeraldRapids; }

bool isIntelOldMachine() { return (isIntelPentium() || isIntelP6() || isIntelPentium4() || isIntelCore2() || isIntelTulsa() || isIntelNehalem()); }

bool isAMDK6() { return (_processorDescription & 0x000000fe) == TR_ProcessorAMDK5; } // accept either K5 or K6
bool isAMDAthlonDuron() { return (_processorDescription & 0x000000ff) == TR_ProcessorAMDAthlonDuron; }
bool isAMDOpteron() { return (_processorDescription & 0x000000ff) == TR_ProcessorAMDOpteron; }
bool isAMD15h() { return (_processorDescription & 0x000000ff) == TR_ProcessorAMDFamily15h; }

bool isGenuineIntel() {return _vendorFlags.testAny(TR_GenuineIntel);}
bool isAuthenticAMD() {return _vendorFlags.testAny(TR_AuthenticAMD);}
Expand Down
Loading