- Add RETGUARD to clang for amd64. This security mechanism uses per-function random cookies to protect access to function return instructions, with the effect that the integrity of the return address is protected, and function return instructions are harder to use in ROP gadgets. On function entry the return address is combined with a per-function random cookie and stored in the stack frame. The integrity of this value is verified before function return, and if this check fails, the program aborts. In this way RETGUARD is an improved stack protector, since the cookies are per-function. The verification routine is constructed such that the binary space immediately before each ret instruction is padded with int03 instructions, which makes these return instructions difficult to use in ROP gadgets. In the kernel, this has the effect of removing approximately 50% of total ROP gadgets, and 15% of unique ROP gadgets compared to the 6.3 release kernel. Function epilogues are essentially gadget free, leaving only the polymorphic gadgets that result from jumping into the instruction stream partway through other instructions. Work to remove these gadgets will continue through other mechanisms. - Refactor retguard to make adding additional arches easier. - implement -msave-args in clang/llvm, like the sun did for gcc Index: lib/Target/X86/X86FrameLowering.cpp --- lib/Target/X86/X86FrameLowering.cpp.orig +++ lib/Target/X86/X86FrameLowering.cpp @@ -14,6 +14,7 @@ #include "X86InstrBuilder.h" #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" +#include "X86ReturnProtectorLowering.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/ADT/SmallSet.h" @@ -46,7 +47,7 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget MaybeAlign StackAlignOverride) : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(), STI.is64Bit() ? -8 : -4), - STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) { + STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()), RPL() { // Cache a bunch of frame-related predicates for this subtarget. SlotSize = TRI->getSlotSize(); Is64Bit = STI.is64Bit(); @@ -54,6 +55,7 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); StackPtr = TRI->getStackRegister(); + SaveArgs = Is64Bit ? STI.getSaveArgs() : 0; } bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { @@ -99,7 +101,8 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF MF.getInfo()->hasPreallocatedCall() || MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() || MFI.hasStackMap() || MFI.hasPatchPoint() || - MFI.hasCopyImplyingStackAdjustment()); + MFI.hasCopyImplyingStackAdjustment() || + SaveArgs); } static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) { @@ -1213,6 +1216,24 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasic } } +// FIXME: Get this from tablegen. +static ArrayRef get64BitArgumentGPRs(CallingConv::ID CallConv, + const X86Subtarget &Subtarget) { + assert(Subtarget.is64Bit()); + + if (Subtarget.isCallingConvWin64(CallConv)) { + static const MCPhysReg GPR64ArgRegsWin64[] = { + X86::RCX, X86::RDX, X86::R8, X86::R9 + }; + return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64)); + } + + static const MCPhysReg GPR64ArgRegs64Bit[] = { + X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 + }; + return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit)); +} + bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const { // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be // clobbered by any interrupt handler. @@ -1537,6 +1558,43 @@ void X86FrameLowering::emitPrologue(MachineFunction &M .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); + if (SaveArgs && !Fn.arg_empty()) { + ArrayRef GPRs = + get64BitArgumentGPRs(Fn.getCallingConv(), STI); + unsigned arg_size = Fn.arg_size(); + unsigned RI = 0; + int64_t SaveSize = 0; + + if (Fn.hasStructRetAttr()) { + GPRs = GPRs.drop_front(1); + arg_size--; + } + + for (MCPhysReg Reg : GPRs) { + if (++RI > arg_size) + break; + + SaveSize += SlotSize; + + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) + .addReg(Reg) + .setMIFlag(MachineInstr::FrameSetup); + } + + // Realign the stack. PUSHes are the most space efficient. + while (SaveSize % getStackAlignment()) { + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) + .addReg(GPRs.front()) + .setMIFlag(MachineInstr::FrameSetup); + + SaveSize += SlotSize; + } + + //dlg StackSize -= SaveSize; + //dlg MFI.setStackSize(StackSize); + X86FI->setSaveArgSize(SaveSize); + } + if (NeedsDwarfCFI) { // Mark effective beginning of when frame pointer becomes valid. // Define the current CFA to use the EBP/RBP register. @@ -2047,10 +2105,17 @@ void X86FrameLowering::emitEpilogue(MachineFunction &M int Offset = 16 + mergeSPUpdates(MBB, MBBI, true); emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true); } - // Pop EBP. - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), + + if (X86FI->getSaveArgSize()) { + // LEAVE is effectively mov rbp,rsp; pop rbp + BuildMI(MBB, MBBI, DL, TII.get(X86::LEAVE64)) + .setMIFlag(MachineInstr::FrameDestroy); + } else { + // Pop EBP. + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr) .setMIFlag(MachineInstr::FrameDestroy); + } // We need to reset FP to its untagged state on return. Bit 60 is currently // used to show the presence of an extended frame. @@ -2087,6 +2152,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &M if (Opc != X86::DBG_VALUE && !PI->isTerminator()) { if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) && (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) && + (Opc != X86::LEAVE64 || !PI->getFlag(MachineInstr::FrameDestroy)) && (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) && (Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy))) break; @@ -2266,6 +2332,8 @@ StackOffset X86FrameLowering::getFrameIndexReference(c "FPDelta isn't aligned per the Win64 ABI!"); } + if (FI >= 0) + Offset -= X86FI->getSaveArgSize(); if (TRI->hasBasePointer(MF)) { assert(HasFP && "VLAs and dynamic stack realign, but no FP?!"); @@ -3699,6 +3767,10 @@ void X86FrameLowering::adjustFrameForMsvcCxxEh(Machine addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)), UnwindHelpFI) .addImm(-2); +} + +const ReturnProtectorLowering *X86FrameLowering::getReturnProtector() const { + return &RPL; } void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced(