Skip to content

Commit

Permalink
Patch EXTRQ
Browse files Browse the repository at this point in the history
  • Loading branch information
OFFTKP committed Sep 17, 2024
1 parent b42034d commit 074c2d7
Show file tree
Hide file tree
Showing 2 changed files with 287 additions and 5 deletions.
132 changes: 131 additions & 1 deletion src/core/cpu_patches.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <set>
#include <Zydis/Zydis.h>
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/decoder.h"
Expand All @@ -27,6 +28,16 @@

using namespace Xbyak::util;

#define MAYBE_AVX(OPCODE, ...) \
[&] { \
Cpu cpu; \
if (cpu.has(Cpu::tAVX)) { \
c.v##OPCODE(__VA_ARGS__); \
} else { \
c.OPCODE(__VA_ARGS__); \
} \
}()

namespace Core {

static Xbyak::Reg ZydisToXbyakRegister(const ZydisRegister reg) {
Expand Down Expand Up @@ -587,6 +598,114 @@ static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGe

#endif // __APPLE__

static bool FilterNoSSE4a(const ZydisDecodedOperand*) {
Cpu cpu;
return !cpu.has(Cpu::tSSE4a);
}

static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;

ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER, "operand 0 must be a register");

const auto dst = ZydisToXbyakRegisterOperand(operands[0]);

ASSERT_MSG(dst.isXMM(), "operand 0 must be an XMM register");

Xbyak::Xmm xmm_dst = *reinterpret_cast<const Xbyak::Xmm*>(&dst);

if (immediateForm) {
u8 length = operands[1].imm.value.u & 0x3F;
u8 index = operands[2].imm.value.u & 0x3F;
if (length == 0) {
length = 64;
}

LOG_DEBUG(Core, "Patching immediate form EXTRQ, length: {}, index: {}", length, index);

const Xbyak::Reg64 scratch1 = rax;
const Xbyak::Reg64 scratch2 = rcx;

// Set rsp to before red zone and save scratch registers
c.lea(rsp, ptr[rsp - 128]);
c.pushfq();
c.push(scratch1);
c.push(scratch2);

u64 mask = (1ULL << length) - 1;

// Get lower qword from xmm register
MAYBE_AVX(movq, scratch1, xmm_dst);

if (index != 0) {
c.shr(scratch1, index);
}

// We need to move mask to a register because we can't use all the possible
// immediate values with `and reg, imm32`
c.mov(scratch2, mask);
c.and_(scratch1, scratch2);

// Writeback to xmm register, extrq instruction says top 64-bits are undefined so we don't
// care to preserve them
MAYBE_AVX(movq, xmm_dst, scratch1);

c.pop(scratch2);
c.pop(scratch1);
c.popfq();
c.lea(rsp, ptr[rsp + 128]);
} else {
ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[0].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[0].reg.value <= ZYDIS_REGISTER_XMM15 &&
operands[1].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[1].reg.value <= ZYDIS_REGISTER_XMM15,
"Unexpected operand types for EXTRQ instruction");

const auto src = ZydisToXbyakRegisterOperand(operands[1]);

ASSERT_MSG(src.isXMM(), "operand 1 must be an XMM register");

Xbyak::Xmm xmm_src = *reinterpret_cast<const Xbyak::Xmm*>(&src);

const Xbyak::Reg64 scratch1 = rax;
const Xbyak::Reg64 scratch2 = rcx;
const Xbyak::Reg64 mask = rdx;

c.lea(rsp, ptr[rsp - 128]);
c.pushfq();
c.push(scratch1);
c.push(scratch2);
c.push(mask);

// Construct the mask out of the length that resides in bottom 6 bits of source xmm
MAYBE_AVX(movq, scratch1, xmm_src);
c.mov(scratch2, scratch1);
c.and_(scratch2, 0x3F);
c.mov(mask, 1);
c.shl(mask, cl);
c.dec(mask);

// Get the shift amount and store it in scratch2
c.shr(scratch1, 8);
c.and_(scratch1, 0x3F);
c.mov(scratch2, scratch1); // cl now contains the shift amount

MAYBE_AVX(movq, scratch1, xmm_dst);
c.shr(scratch1, cl);
c.and_(scratch1, mask);
MAYBE_AVX(movq, xmm_dst, scratch1);

c.pop(mask);
c.pop(scratch2);
c.pop(scratch1);
c.popfq();
c.lea(rsp, ptr[rsp + 128]);
}
}

using PatchFilter = bool (*)(const ZydisDecodedOperand*);
using InstructionGenerator = void (*)(const ZydisDecodedOperand*, Xbyak::CodeGenerator&);
struct PatchInfo {
Expand All @@ -608,6 +727,8 @@ static const std::unordered_map<ZydisMnemonic, PatchInfo> Patches = {
{ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, false}},
#endif

{ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}},

#ifdef __APPLE__
// Patches for instruction sets not supported by Rosetta 2.
// BMI1
Expand Down Expand Up @@ -671,14 +792,23 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {

if (Patches.contains(instruction.mnemonic)) {
const auto& patch_info = Patches.at(instruction.mnemonic);
bool needs_trampoline = patch_info.trampoline;
if (patch_info.filter(operands)) {
auto& patch_gen = module->patch_gen;

if (needs_trampoline && instruction.length < 5) {
// Trampoline is needed but instruction is too short to patch.
// Should be handled at illegal instruction handler.
// This if is for Linux which does some AOT patching,
// should be removed if that gets removed.
return std::make_pair(false, instruction.length);
}

// Reset state and move to current code position.
patch_gen.reset();
patch_gen.setSize(code - patch_gen.getCode());

if (patch_info.trampoline) {
if (needs_trampoline) {
auto& trampoline_gen = module->trampoline_gen;
const auto trampoline_ptr = trampoline_gen.getCurr();

Expand Down
160 changes: 156 additions & 4 deletions src/core/signals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,150 @@
#include <csignal>
#ifdef ARCH_X86_64
#include <Zydis/Formatter.h>
#include <sys/ucontext.h>
#endif
#endif

namespace {

#if defined(ARCH_X86_64)

#ifdef _WIN32
#define INCREMENT_RIP(ctx, length) ((CONTEXT*)ctx)->Rip += length
#else
#define INCREMENT_RIP(ctx, length) ((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP] += length
#endif

void* getXmmPointer(void* ctx, u32 index) {
#if defined(_WIN32)
#define CASE(index) \
case index: \
return (void*)(&(((CONTEXT*)ctx)->Xmm##index.Low))
#elif defined(__APPLE__)
#define CASE(index) \
case index: \
return (void*)(&((ucontext_t*)ctx)->uc_mcontext.__fs.fpu_xmm##index);
#else
#define CASE(index) return (void*)(&((ucontext_t*)ctx)->uc_mcontext.fpregs->_xmm[index].element[0])
#endif
switch (index) {
CASE(0);
CASE(1);
CASE(2);
CASE(3);
CASE(4);
CASE(5);
CASE(6);
CASE(7);
CASE(8);
CASE(9);
CASE(10);
CASE(11);
CASE(12);
CASE(13);
CASE(14);
CASE(15);
default: {
UNREACHABLE_MSG("Invalid XMM register index: {}", index);
return nullptr;
}
}
#undef CASE
}

// We need to check, before patching, if there's enough space for a relative jump to the trampoline.
// If there isn't, the instruction must be handled specially in the illegal instruction handler
// itself.
bool shouldNotBePatched(void* code_address) {
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
const auto status =
Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address);
if (!ZYAN_SUCCESS(status)) {
LOG_ERROR(Core, "Failed to decode instruction at: {}", fmt::ptr(code_address));
}

if (instruction.length < 5) {
// not enough bytes for a relative jump for the trampoline
return true;
}

return false;
}

bool handleIllegalInstruction(void* ctx, void* code_address) {
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
const auto status =
Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address);

switch (instruction.mnemonic) {
case ZYDIS_MNEMONIC_EXTRQ: {
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
if (immediateForm) {
LOG_ERROR(Core, "EXTRQ immediate form should have been patched at code address: {}",
fmt::ptr(code_address));
return false;
} else {
ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[0].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[0].reg.value <= ZYDIS_REGISTER_XMM15 &&
operands[1].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[1].reg.value <= ZYDIS_REGISTER_XMM15,
"Unexpected operand types for EXTRQ instruction");

const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0;
const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0;

const auto dst = getXmmPointer(ctx, dstIndex);
const auto src = getXmmPointer(ctx, srcIndex);

u64 lowQWordSrc;
memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc));

u64 lowQWordDst;
memcpy(&lowQWordDst, dst, sizeof(lowQWordDst));

u64 mask = lowQWordSrc & 0x3F;
mask = (1ULL << mask) - 1;

u64 shift = (lowQWordSrc >> 8) & 0x3F;

lowQWordDst >>= shift;
lowQWordDst &= mask;

memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));

INCREMENT_RIP(ctx, instruction.length);

return true;
}
break;
}
default: {
LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}",
fmt::ptr(code_address), ZydisMnemonicGetString(instruction.mnemonic));
return false;
}
}
}
#elif defined(ARCH_ARM64)
// These functions shouldn't be needed for ARM as it will use a JIT so there's no need to patch
// instructions. Returning false lets it go through with whatever handler is set up.
bool shouldNotBePatched(void* code_address) {
return false;
}

bool handleIllegalInstruction(void* code_address) {
return false;
}
#else
#error "Unsupported architecture"
#endif
} // namespace

namespace Core {

#if defined(_WIN32)
Expand All @@ -32,7 +173,11 @@ static LONG WINAPI SignalHandler(EXCEPTION_POINTERS* pExp) noexcept {
pExp->ExceptionRecord->ExceptionInformation[0] == 1);
break;
case EXCEPTION_ILLEGAL_INSTRUCTION:
handled = signals->DispatchIllegalInstruction(code_address);
if (shouldNotBePatched(code_address)) {
handled = handleIllegalInstruction((void*)pExp->ContextRecord, code_address);
} else {
handled = signals->DispatchIllegalInstruction(code_address);
}
break;
default:
break;
Expand Down Expand Up @@ -99,9 +244,16 @@ static void SignalHandler(int sig, siginfo_t* info, void* raw_context) {
}
break;
case SIGILL:
if (!signals->DispatchIllegalInstruction(code_address)) {
UNREACHABLE_MSG("Unhandled illegal instruction at code address {}: {}",
fmt::ptr(code_address), DisassembleInstruction(code_address));
if (shouldNotBePatched(code_address)) {
if (!handleIllegalInstruction(raw_context, code_address)) {
UNREACHABLE_MSG("Unhandled illegal instruction at code address {}: {}",
fmt::ptr(code_address), DisassembleInstruction(code_address));
}
} else {
if (!signals->DispatchIllegalInstruction(code_address)) {
UNREACHABLE_MSG("Unhandled illegal instruction at code address {}: {}",
fmt::ptr(code_address), DisassembleInstruction(code_address));
}
}
break;
default:
Expand Down

0 comments on commit 074c2d7

Please sign in to comment.