-
Notifications
You must be signed in to change notification settings - Fork 603
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AVX512 Support #3776
base: master
Are you sure you want to change the base?
AVX512 Support #3776
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,17 +66,59 @@ static bool reg_in_range(GdbServerRegister regno, GdbServerRegister low, GdbServ | |
return true; | ||
} | ||
|
||
static const int AVX_FEATURE_BIT = 2; | ||
static const int PKRU_FEATURE_BIT = 9; | ||
static constexpr int AVX_FEATURE_BIT = 2; | ||
static constexpr int AVX_OPMASK_FEATURE_BIT = 5; | ||
static constexpr int AVX_ZMM_HI256_FEATURE_BIT = 6; | ||
static constexpr int AVX_ZMM_HI16_FEATURE_BIT = 7; | ||
static constexpr int PKRU_FEATURE_BIT = 9; | ||
|
||
static const uint64_t PKRU_FEATURE_MASK = 1 << PKRU_FEATURE_BIT; | ||
|
||
static const size_t xsave_header_offset = 512; | ||
static const size_t xsave_header_size = 64; | ||
static const size_t xsave_header_end = xsave_header_offset + xsave_header_size; | ||
// This is always at 576 since AVX is always the first optional feature, | ||
// if present. | ||
static const size_t AVX_xsave_offset = 576; | ||
struct RegisterConfig { | ||
int8_t feature; | ||
GdbServerRegister base; | ||
int8_t size; | ||
int stride; | ||
|
||
int register_offset(GdbServerRegister reg, int base_offset) const noexcept { | ||
const auto& layout = xsave_native_layout(); | ||
return layout.feature_layouts[feature].offset + base_offset + (reg - base) * stride; | ||
} | ||
}; | ||
|
||
static constexpr std::array<RegisterConfig, 6> RegisterConfigLookupTable{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Call this |
||
{ { AVX_FEATURE_BIT, DREG_64_YMM0H, 16, 16 }, | ||
{ AVX_ZMM_HI16_FEATURE_BIT, DREG_64_XMM16, 16, 64 }, | ||
{ AVX_ZMM_HI16_FEATURE_BIT, DREG_64_YMM16H, 16, 64 }, | ||
{ AVX_ZMM_HI256_FEATURE_BIT, DREG_64_ZMM0H, 32, 32 }, | ||
{ AVX_ZMM_HI16_FEATURE_BIT, DREG_64_ZMM16H, 32, 64 }, | ||
{ AVX_OPMASK_FEATURE_BIT, DREG_64_K0, 8, 8 } } | ||
}; | ||
|
||
static constexpr auto YMM16_31 = 0b10; | ||
static constexpr auto ZMM16_31 = 0b100; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please document these. |
||
|
||
// Every range of registers (except K0-7) are 16 registers long. We use this fact to build | ||
// a lookup table, for the AVX2 and AVX512 registers. | ||
static bool reg_is_avx2_or_512(GdbServerRegister reg, RegData& out) noexcept { | ||
if(reg < DREG_64_YMM0H || reg > DREG_64_K7) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Space after |
||
return false; | ||
} | ||
|
||
const auto selector = (reg - DREG_64_YMM0H) >> 4; | ||
DEBUG_ASSERT(selector >= 0 && selector <= 5 && "GdbServerRegister enum values has been changed."); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "have been changed" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This selector thing seems a bit hard to understand. I think it's probably easier to just write out explicit |
||
const auto cfg = RegisterConfigLookupTable[selector]; | ||
out.xsave_feature_bit = cfg.feature; | ||
out.size = cfg.size; | ||
|
||
// only YMM16-31 and ZMM16-31 have a base offset (16 and 32 respectively) | ||
const auto base_offset = cfg.size * (selector == YMM16_31) | cfg.size * (selector == ZMM16_31); | ||
out.offset = cfg.register_offset(reg, base_offset); | ||
return true; | ||
} | ||
|
||
// Return the size and data location of register |regno|. | ||
// If we can't read the register, returns -1 in 'offset'. | ||
|
@@ -95,6 +137,14 @@ static RegData xsave_register_data(SupportedArch arch, GdbServerRegister regno) | |
regno = (GdbServerRegister)(regno - DREG_YMM0H + DREG_64_YMM0H); | ||
break; | ||
} | ||
if(regno >= DREG_ZMM0H && regno <= DREG_ZMM7H) { | ||
regno = (GdbServerRegister)(regno - DREG_ZMM0H + DREG_64_ZMM0H); | ||
break; | ||
} | ||
if(regno >= DREG_K0 && regno <= DREG_K7) { | ||
regno = (GdbServerRegister)(regno - DREG_K0 + DREG_64_K0); | ||
break; | ||
} | ||
if (regno == DREG_MXCSR) { | ||
regno = DREG_64_MXCSR; | ||
} else if (regno == DREG_PKRU) { | ||
|
@@ -123,9 +173,7 @@ static RegData xsave_register_data(SupportedArch arch, GdbServerRegister regno) | |
return result; | ||
} | ||
|
||
if (reg_in_range(regno, DREG_64_YMM0H, DREG_64_YMM15H, AVX_xsave_offset, 16, | ||
16, &result)) { | ||
result.xsave_feature_bit = AVX_FEATURE_BIT; | ||
if(reg_is_avx2_or_512(regno, result)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Space after |
||
return result; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -87,10 +87,14 @@ static uint32_t get_cpu_features(SupportedArch arch) { | |
auto cpuid_data = cpuid(CPUID_GETEXTENDEDFEATURES, 0); | ||
if ((cpuid_data.ecx & PKU_FEATURE_FLAG) == PKU_FEATURE_FLAG) { | ||
// PKU (Skylake) implies AVX (Sandy Bridge). | ||
cpu_features |= GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_PKU; | ||
cpu_features |= GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_AVX512 | GdbServerConnection::CPU_PKU; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does PKU imply AVX512? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In general nothing implies anything because these feature sets can be independently disabled. We tell Pernosco customers to disable AVX-512 all the time ... |
||
break; | ||
} | ||
|
||
if((cpuid_data.ebx & AVX_512_FOUNDATION_FLAG) == AVX_512_FOUNDATION_FLAG) { | ||
cpu_features |= GdbServerConnection::CPU_AVX512 | GdbServerConnection::CPU_AVX; | ||
} | ||
|
||
cpuid_data = cpuid(CPUID_GETFEATURES, 0); | ||
// We're assuming here that AVX support on the system making the recording | ||
// is the same as the AVX support during replay. But if that's not true, | ||
|
@@ -108,6 +112,8 @@ static uint32_t get_cpu_features(SupportedArch arch) { | |
return 0; | ||
} | ||
|
||
LOG(debug) << "cpu features " << std::hex << cpu_features; | ||
|
||
return cpu_features; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we really need this? Do we actually need to support compilers that don't understand AVX512 assembly?