Skip to content

Commit

Permalink
Initial AVX512 Support
Browse files Browse the repository at this point in the history
CMake Changes:
Added the AVX512F_SUPPORT flag to CMake. Set by checking
if system supports AVX512F. If it doesn't it will not build (and therefore run) any of the AVX512 tests

Added the target descriptions. Assumed that PKRU support also implies AVX512 support as I could find no information that said otherwise.

Added gdb_avx512 test - sets 3 ZMM registers and reads them back in a gdb session of a replay to verify the contents.

We place mask registers K0-K7 after ZMM31H internally, because this simplifies the logic that parses the XSAVE area. The target description we provide, takes this into account (and also places them last) so they come after the ZMM registers in the 'g' packet. GDB doesn't care so long as we do what we say in the target desc

Changed test harness util.sh to also export TESTNAME variable as we may (I do at least) want to query from the python script what mode we are running in (64-bit/32-bit). See gdb_avx512.py for further info.
  • Loading branch information
theIDinside committed Feb 19, 2025
1 parent 1d59f70 commit f72d79d
Show file tree
Hide file tree
Showing 17 changed files with 475 additions and 34 deletions.
32 changes: 31 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ if (SUPPORTS_MACRO_PREFIX_MAP)
set(FLAGS_COMMON "${FLAGS_COMMON} -fmacro-prefix-map=${CMAKE_SOURCE_DIR}/=")
endif()

try_compile(AVX512F_SUPPORT ${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/src/test/gdb_avx512.c CMAKE_FLAGS -DCOMPILE_DEFINITIONS=-march=native)

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAGS_COMMON} -Wstrict-prototypes -std=gnu11")
# Define __STDC_LIMIT_MACROS so |#include <stdint.h>| works as expected.
# Define __STDC_FORMAT_MACROS so |#include <inttypes.h>| works as expected.
Expand Down Expand Up @@ -788,11 +790,13 @@ set_source_files_properties(src/exec_stub.c

set(RR_GDB_RESOURCES
32bit-avx.xml
32bit-avx512.xml
32bit-core.xml
32bit-linux.xml
32bit-sse.xml
32bit-pkeys.xml
64bit-avx.xml
64bit-avx512.xml
64bit-core.xml
64bit-linux.xml
64bit-seg.xml
Expand Down Expand Up @@ -1798,6 +1802,10 @@ set(TESTS_WITHOUT_PROGRAM
when
)

if(AVX512F_SUPPORT)
set(TESTS_WITHOUT_PROGRAM ${TESTS_WITHOUT_PROGRAM} gdb_avx512)
endif()

if(BUILD_TESTS)
# Part of the installable testsuite (test files).
if(INSTALL_TESTSUITE)
Expand Down Expand Up @@ -1872,6 +1880,13 @@ if(BUILD_TESTS)

add_executable(prctl_tsc_supported src/test/prctl_tsc_supported.c)
post_build_executable(prctl_tsc_supported)
if(AVX512F_SUPPORT)
add_executable(gdb_avx512 src/test/gdb_avx512.c)
post_build_executable(gdb_avx512)
set_target_properties(gdb_avx512
PROPERTIES COMPILE_FLAGS "${RR_TEST_FLAGS} -g3 -mavx512f")
add_dependencies(gdb_avx512 Generated)
endif()

# Test disabled because it requires libuvc to be built and installed, and a
# working USB camera
Expand Down Expand Up @@ -1993,7 +2008,7 @@ if(BUILD_TESTS)
bash source_dir/src/test/${test}.run ${testname} -n bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT})
configure_test(${test}-no-syscallbuf)
endforeach(test)

# Run 32-bit tests on 64-bit builds.
# We copy the test files into '32' subdirectories in the output
# directory, so we can set different compile options on them.
Expand All @@ -2014,6 +2029,11 @@ if(BUILD_TESTS)
PROPERTIES COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS}")
endforeach(test)

if(AVX512F_SUPPORT)
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/test/gdb_avx512.c" "${CMAKE_CURRENT_BINARY_DIR}/32/gdb_avx512.c" COPYONLY)
set_source_files_properties("${CMAKE_CURRENT_BINARY_DIR}/32/${test}.c" PROPERTIES COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS} -g3 -mavx512f")
endif()

foreach(test ${BASIC_CPP_TESTS})
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/test/${test}.cc"
"${CMAKE_CURRENT_BINARY_DIR}/32/${test}.cc"
Expand Down Expand Up @@ -2082,6 +2102,16 @@ if(BUILD_TESTS)
COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS} -g -O3")
add_dependencies(watchpoint_unaligned2_32 Generated)

if(AVX512F_SUPPORT)
add_executable(gdb_avx512_32 "${CMAKE_CURRENT_BINARY_DIR}/32/gdb_avx512.c")
post_build_executable(gdb_avx512_32)
set_target_properties(gdb_avx512_32
PROPERTIES
LINK_FLAGS "-m32"
COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS} -g3 -mavx512f")
add_dependencies(gdb_avx512_32 Generated)
endif()

add_library(test_lib_32
"${CMAKE_CURRENT_BINARY_DIR}/32/test_lib.c"
)
Expand Down
64 changes: 56 additions & 8 deletions src/ExtraRegisters.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,59 @@ static bool reg_in_range(GdbServerRegister regno, GdbServerRegister low, GdbServ
return true;
}

static const int AVX_FEATURE_BIT = 2;
static const int PKRU_FEATURE_BIT = 9;
static constexpr int AVX_FEATURE_BIT = 2;
static constexpr int AVX_OPMASK_FEATURE_BIT = 5;
static constexpr int AVX_ZMM_HI256_FEATURE_BIT = 6;
static constexpr int AVX_ZMM_HI16_FEATURE_BIT = 7;
static constexpr int PKRU_FEATURE_BIT = 9;

static const uint64_t PKRU_FEATURE_MASK = 1 << PKRU_FEATURE_BIT;

static const size_t xsave_header_offset = 512;
static const size_t xsave_header_size = 64;
static const size_t xsave_header_end = xsave_header_offset + xsave_header_size;
// This is always at 576 since AVX is always the first optional feature,
// if present.
static const size_t AVX_xsave_offset = 576;
struct RegisterConfig {
int8_t feature;
GdbServerRegister base;
int8_t size;
int stride;

int register_offset(GdbServerRegister reg, int base_offset) const noexcept {
const auto& layout = xsave_native_layout();
return layout.feature_layouts[feature].offset + base_offset + (reg - base) * stride;
}
};

static constexpr std::array<RegisterConfig, 6> RegisterConfigLookupTable{
{ { AVX_FEATURE_BIT, DREG_64_YMM0H, 16, 16 },
{ AVX_ZMM_HI16_FEATURE_BIT, DREG_64_XMM16, 16, 64 },
{ AVX_ZMM_HI16_FEATURE_BIT, DREG_64_YMM16H, 16, 64 },
{ AVX_ZMM_HI256_FEATURE_BIT, DREG_64_ZMM0H, 32, 32 },
{ AVX_ZMM_HI16_FEATURE_BIT, DREG_64_ZMM16H, 32, 64 },
{ AVX_OPMASK_FEATURE_BIT, DREG_64_K0, 8, 8 } }
};

static constexpr auto YMM16_31 = 0b10;
static constexpr auto ZMM16_31 = 0b100;

// Every range of registers (except K0-7) are 16 registers long. We use this fact to build
// a lookup table, for the AVX2 and AVX512 registers.
static bool reg_is_avx2_or_512(GdbServerRegister reg, RegData& out) noexcept {
if(reg < DREG_64_YMM0H || reg > DREG_64_K7) {
return false;
}

const auto selector = (reg - DREG_64_YMM0H) >> 4;
DEBUG_ASSERT(selector >= 0 && selector <= 5 && "GdbServerRegister enum values has been changed.");
const auto cfg = RegisterConfigLookupTable[selector];
out.xsave_feature_bit = cfg.feature;
out.size = cfg.size;

// only YMM16-31 and ZMM16-31 have a base offset (16 and 32 respectively)
const auto base_offset = cfg.size * (selector == YMM16_31) | cfg.size * (selector == ZMM16_31);
out.offset = cfg.register_offset(reg, base_offset);
return true;
}

// Return the size and data location of register |regno|.
// If we can't read the register, returns -1 in 'offset'.
Expand All @@ -95,6 +137,14 @@ static RegData xsave_register_data(SupportedArch arch, GdbServerRegister regno)
regno = (GdbServerRegister)(regno - DREG_YMM0H + DREG_64_YMM0H);
break;
}
if(regno >= DREG_ZMM0H && regno <= DREG_ZMM7H) {
regno = (GdbServerRegister)(regno - DREG_ZMM0H + DREG_64_ZMM0H);
break;
}
if(regno >= DREG_K0 && regno <= DREG_K7) {
regno = (GdbServerRegister)(regno - DREG_K0 + DREG_64_K0);
break;
}
if (regno == DREG_MXCSR) {
regno = DREG_64_MXCSR;
} else if (regno == DREG_PKRU) {
Expand Down Expand Up @@ -123,9 +173,7 @@ static RegData xsave_register_data(SupportedArch arch, GdbServerRegister regno)
return result;
}

if (reg_in_range(regno, DREG_64_YMM0H, DREG_64_YMM15H, AVX_xsave_offset, 16,
16, &result)) {
result.xsave_feature_bit = AVX_FEATURE_BIT;
if(reg_is_avx2_or_512(regno, result)) {
return result;
}

Expand Down
63 changes: 44 additions & 19 deletions src/GdbServer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -186,25 +186,7 @@ static void maybe_singlestep_for_event(Task* t, GdbRequest* req) {

void GdbServer::dispatch_regs_request(const Registers& regs,
const ExtraRegisters& extra_regs) {
GdbServerRegister end;
// Send values for all the registers we sent XML register descriptions for.
// Those descriptions are controlled by GdbServerConnection::cpu_features().
bool have_PKU = dbg->cpu_features() & GdbServerConnection::CPU_PKU;
bool have_AVX = dbg->cpu_features() & GdbServerConnection::CPU_AVX;
switch (regs.arch()) {
case x86:
end = have_PKU ? DREG_PKRU : (have_AVX ? DREG_YMM7H : DREG_ORIG_EAX);
break;
case x86_64:
end = have_PKU ? DREG_64_PKRU : (have_AVX ? DREG_64_YMM15H : DREG_GS_BASE);
break;
case aarch64:
end = DREG_FPCR;
break;
default:
FATAL() << "Unknown architecture";
return;
}
const GdbServerRegister end = arch_reg_end(regs.arch());
vector<GdbServerRegisterValue> rs;
rs.reserve(end);
for (GdbServerRegister r = GdbServerRegister(0); r <= end; r = GdbServerRegister(r + 1)) {
Expand Down Expand Up @@ -2317,6 +2299,49 @@ void GdbServer::read_back_debugger_mem(DiversionSession& session) {
}
}

GdbServerRegister GdbServer::arch_reg_end(SupportedArch arch) noexcept {
if(target_regs_end != GdbServerRegister(0)) {
return target_regs_end;
}

// Send values for all the registers we sent XML register descriptions for.
// Those descriptions are controlled by GdbServerConnection::cpu_features().
bool have_PKU = dbg->cpu_features() & GdbServerConnection::CPU_PKU;
bool have_AVX = dbg->cpu_features() & GdbServerConnection::CPU_AVX;
bool have_AVX512 = dbg->cpu_features() & GdbServerConnection::CPU_AVX512;
switch (arch) {
case x86:
if(have_PKU) {
target_regs_end = DREG_PKRU;
} else if(have_AVX512) {
target_regs_end = DREG_K7;
} else if(have_AVX) {
target_regs_end = DREG_YMM7H;
} else {
target_regs_end = DREG_ORIG_EAX;
}
break;
case x86_64:
if(have_PKU) {
target_regs_end = DREG_64_PKRU;
} else if(have_AVX512) {
target_regs_end = DREG_64_K7;
} else if(have_AVX) {
target_regs_end = DREG_64_YMM15H;
} else {
target_regs_end = DREG_GS_BASE;
}
break;
case aarch64:
target_regs_end = DREG_FPCR;
break;
default:
FATAL() << "Unknown architecture";
return target_regs_end;
}
return target_regs_end;
}

bool GdbServer::debugger_mem_region(ThreadGroupUid tguid, remote_ptr<void> addr,
int* prot, MemoryRange* mem_range) {
auto it = debugger_mem.find(tguid);
Expand Down
5 changes: 5 additions & 0 deletions src/GdbServer.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,9 @@ class GdbServer {
// Read back the contents of all debugger memory regions from the session.
void read_back_debugger_mem(DiversionSession& session);

// Get the last GdbServerRegister for "this" arch. If it hasn't be determined, configure it.
GdbServerRegister arch_reg_end(SupportedArch arch) noexcept;

// dbg is never null.
std::unique_ptr<GdbServerConnection> dbg;
// The ThreadGroupUid of the task being debugged.
Expand Down Expand Up @@ -316,6 +319,8 @@ class GdbServer {
ExtraRegisters extra_regs;
};
std::unordered_map<int, SavedRegisters> saved_register_states;

GdbServerRegister target_regs_end = GdbServerRegister(0);
};

} // namespace rr
Expand Down
8 changes: 7 additions & 1 deletion src/GdbServerConnection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,14 @@ static uint32_t get_cpu_features(SupportedArch arch) {
auto cpuid_data = cpuid(CPUID_GETEXTENDEDFEATURES, 0);
if ((cpuid_data.ecx & PKU_FEATURE_FLAG) == PKU_FEATURE_FLAG) {
// PKU (Skylake) implies AVX (Sandy Bridge).
cpu_features |= GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_PKU;
cpu_features |= GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_AVX512 | GdbServerConnection::CPU_PKU;
break;
}

if((cpuid_data.ebx & AVX_512_FOUNDATION_FLAG) == AVX_512_FOUNDATION_FLAG) {
cpu_features |= GdbServerConnection::CPU_AVX512 | GdbServerConnection::CPU_AVX;
}

cpuid_data = cpuid(CPUID_GETFEATURES, 0);
// We're assuming here that AVX support on the system making the recording
// is the same as the AVX support during replay. But if that's not true,
Expand All @@ -108,6 +112,8 @@ static uint32_t get_cpu_features(SupportedArch arch) {
return 0;
}

LOG(debug) << "cpu features " << std::hex << cpu_features;

return cpu_features;
}

Expand Down
9 changes: 5 additions & 4 deletions src/GdbServerConnection.h
Original file line number Diff line number Diff line change
Expand Up @@ -748,10 +748,11 @@ class GdbServerConnection {
const Features& features() { return features_; }

enum {
CPU_X86_64 = 0x1,
CPU_AVX = 0x2,
CPU_AARCH64 = 0x4,
CPU_PKU = 0x8
CPU_X86_64 = 1 << 0,
CPU_AVX = 1 << 1,
CPU_AARCH64 = 1 << 2,
CPU_PKU = 1 << 3,
CPU_AVX512 = 1 << 4
};

void set_cpu_features(SupportedArch arch);
Expand Down
Loading

0 comments on commit f72d79d

Please sign in to comment.