175 lines
7.4 KiB
Text
175 lines
7.4 KiB
Text
- Add RETGUARD to clang for amd64. This security mechanism uses per-function
|
|
random cookies to protect access to function return instructions, with the
|
|
effect that the integrity of the return address is protected, and function
|
|
return instructions are harder to use in ROP gadgets.
|
|
|
|
On function entry the return address is combined with a per-function random
|
|
cookie and stored in the stack frame. The integrity of this value is verified
|
|
before function return, and if this check fails, the program aborts. In this way
|
|
RETGUARD is an improved stack protector, since the cookies are per-function. The
|
|
verification routine is constructed such that the binary space immediately
|
|
before each ret instruction is padded with int03 instructions, which makes these
|
|
return instructions difficult to use in ROP gadgets. In the kernel, this has the
|
|
effect of removing approximately 50% of total ROP gadgets, and 15% of unique
|
|
ROP gadgets compared to the 6.3 release kernel. Function epilogues are
|
|
essentially gadget free, leaving only the polymorphic gadgets that result from
|
|
jumping into the instruction stream partway through other instructions. Work to
|
|
remove these gadgets will continue through other mechanisms.
|
|
- Refactor retguard to make adding additional arches easier.
|
|
- implement -msave-args in clang/llvm, like the sun did for gcc
|
|
|
|
Index: lib/Target/X86/X86FrameLowering.cpp
|
|
--- lib/Target/X86/X86FrameLowering.cpp.orig
|
|
+++ lib/Target/X86/X86FrameLowering.cpp
|
|
@@ -14,6 +14,7 @@
|
|
#include "X86InstrBuilder.h"
|
|
#include "X86InstrInfo.h"
|
|
#include "X86MachineFunctionInfo.h"
|
|
+#include "X86ReturnProtectorLowering.h"
|
|
#include "X86Subtarget.h"
|
|
#include "X86TargetMachine.h"
|
|
#include "llvm/ADT/SmallSet.h"
|
|
@@ -46,7 +47,7 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget
|
|
MaybeAlign StackAlignOverride)
|
|
: TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
|
|
STI.is64Bit() ? -8 : -4),
|
|
- STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
|
|
+ STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()), RPL() {
|
|
// Cache a bunch of frame-related predicates for this subtarget.
|
|
SlotSize = TRI->getSlotSize();
|
|
Is64Bit = STI.is64Bit();
|
|
@@ -54,6 +55,7 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget
|
|
// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
|
|
Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
|
|
StackPtr = TRI->getStackRegister();
|
|
+ SaveArgs = Is64Bit ? STI.getSaveArgs() : 0;
|
|
}
|
|
|
|
bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
|
|
@@ -99,7 +101,8 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF
|
|
MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
|
|
MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
|
|
MFI.hasStackMap() || MFI.hasPatchPoint() ||
|
|
- MFI.hasCopyImplyingStackAdjustment());
|
|
+ MFI.hasCopyImplyingStackAdjustment() ||
|
|
+ SaveArgs);
|
|
}
|
|
|
|
static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) {
|
|
@@ -1213,6 +1216,24 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasic
|
|
}
|
|
}
|
|
|
|
+// FIXME: Get this from tablegen.
|
|
+static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
|
|
+ const X86Subtarget &Subtarget) {
|
|
+ assert(Subtarget.is64Bit());
|
|
+
|
|
+ if (Subtarget.isCallingConvWin64(CallConv)) {
|
|
+ static const MCPhysReg GPR64ArgRegsWin64[] = {
|
|
+ X86::RCX, X86::RDX, X86::R8, X86::R9
|
|
+ };
|
|
+ return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
|
|
+ }
|
|
+
|
|
+ static const MCPhysReg GPR64ArgRegs64Bit[] = {
|
|
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
|
|
+ };
|
|
+ return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
|
|
+}
|
|
+
|
|
bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
|
|
// x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
|
|
// clobbered by any interrupt handler.
|
|
@@ -1537,6 +1558,43 @@ void X86FrameLowering::emitPrologue(MachineFunction &M
|
|
.addReg(StackPtr)
|
|
.setMIFlag(MachineInstr::FrameSetup);
|
|
|
|
+ if (SaveArgs && !Fn.arg_empty()) {
|
|
+ ArrayRef<MCPhysReg> GPRs =
|
|
+ get64BitArgumentGPRs(Fn.getCallingConv(), STI);
|
|
+ unsigned arg_size = Fn.arg_size();
|
|
+ unsigned RI = 0;
|
|
+ int64_t SaveSize = 0;
|
|
+
|
|
+ if (Fn.hasStructRetAttr()) {
|
|
+ GPRs = GPRs.drop_front(1);
|
|
+ arg_size--;
|
|
+ }
|
|
+
|
|
+ for (MCPhysReg Reg : GPRs) {
|
|
+ if (++RI > arg_size)
|
|
+ break;
|
|
+
|
|
+ SaveSize += SlotSize;
|
|
+
|
|
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
|
|
+ .addReg(Reg)
|
|
+ .setMIFlag(MachineInstr::FrameSetup);
|
|
+ }
|
|
+
|
|
+ // Realign the stack. PUSHes are the most space efficient.
|
|
+ while (SaveSize % getStackAlignment()) {
|
|
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
|
|
+ .addReg(GPRs.front())
|
|
+ .setMIFlag(MachineInstr::FrameSetup);
|
|
+
|
|
+ SaveSize += SlotSize;
|
|
+ }
|
|
+
|
|
+ //dlg StackSize -= SaveSize;
|
|
+ //dlg MFI.setStackSize(StackSize);
|
|
+ X86FI->setSaveArgSize(SaveSize);
|
|
+ }
|
|
+
|
|
if (NeedsDwarfCFI) {
|
|
// Mark effective beginning of when frame pointer becomes valid.
|
|
// Define the current CFA to use the EBP/RBP register.
|
|
@@ -2047,10 +2105,17 @@ void X86FrameLowering::emitEpilogue(MachineFunction &M
|
|
int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
|
|
emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true);
|
|
}
|
|
- // Pop EBP.
|
|
- BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
|
|
+
|
|
+ if (X86FI->getSaveArgSize()) {
|
|
+ // LEAVE is effectively mov rbp,rsp; pop rbp
|
|
+ BuildMI(MBB, MBBI, DL, TII.get(X86::LEAVE64))
|
|
+ .setMIFlag(MachineInstr::FrameDestroy);
|
|
+ } else {
|
|
+ // Pop EBP.
|
|
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
|
|
MachineFramePtr)
|
|
.setMIFlag(MachineInstr::FrameDestroy);
|
|
+ }
|
|
|
|
// We need to reset FP to its untagged state on return. Bit 60 is currently
|
|
// used to show the presence of an extended frame.
|
|
@@ -2087,6 +2152,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &M
|
|
if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
|
|
if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
|
|
(Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
|
|
+ (Opc != X86::LEAVE64 || !PI->getFlag(MachineInstr::FrameDestroy)) &&
|
|
(Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) &&
|
|
(Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)))
|
|
break;
|
|
@@ -2266,6 +2332,8 @@ StackOffset X86FrameLowering::getFrameIndexReference(c
|
|
"FPDelta isn't aligned per the Win64 ABI!");
|
|
}
|
|
|
|
+ if (FI >= 0)
|
|
+ Offset -= X86FI->getSaveArgSize();
|
|
|
|
if (TRI->hasBasePointer(MF)) {
|
|
assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
|
|
@@ -3699,6 +3767,10 @@ void X86FrameLowering::adjustFrameForMsvcCxxEh(Machine
|
|
addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
|
|
UnwindHelpFI)
|
|
.addImm(-2);
|
|
+}
|
|
+
|
|
+const ReturnProtectorLowering *X86FrameLowering::getReturnProtector() const {
|
|
+ return &RPL;
|
|
}
|
|
|
|
void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced(
|