Index: src/aarch64/sysv.S --- src/aarch64/sysv.S.orig +++ src/aarch64/sysv.S @@ -78,6 +78,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. cfi_startproc CNAME(ffi_call_SYSV): + bti c /* Sign the lr with x1 since that is where it will be stored */ SIGN_LR_WITH_REG(x1) @@ -138,78 +139,142 @@ CNAME(ffi_call_SYSV): /* Save the return value as directed. */ adr x5, 0f and w4, w4, #AARCH64_RET_MASK - add x5, x5, x4, lsl #3 + add x5, x5, x4, lsl #4 br x5 - /* Note that each table entry is 2 insns, and thus 8 bytes. + /* Note that each table entry is 4 insns, and thus 16 bytes. For integer data, note that we're storing into ffi_arg and therefore we want to extend to 64 bits; these types have two consecutive entries allocated for them. */ .align 4 -0: b 99f /* VOID */ +0: bti j /* VOID */ + b 99f + nop nop -1: str x0, [x3] /* INT64 */ +1: bti j /* INT64 */ + str x0, [x3] b 99f -2: stp x0, x1, [x3] /* INT128 */ + nop +2: bti j /* INT128 */ + stp x0, x1, [x3] b 99f + nop 3: brk #1000 /* UNUSED */ b 99f + nop + nop 4: brk #1000 /* UNUSED */ b 99f + nop + nop 5: brk #1000 /* UNUSED */ b 99f + nop + nop 6: brk #1000 /* UNUSED */ b 99f + nop + nop 7: brk #1000 /* UNUSED */ b 99f -8: st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] /* S4 */ + nop + nop +8: bti j /* S4 */ + st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] b 99f -9: st3 { v0.s, v1.s, v2.s }[0], [x3] /* S3 */ + nop +9: bti j /* S3 */ + st3 { v0.s, v1.s, v2.s }[0], [x3] b 99f -10: stp s0, s1, [x3] /* S2 */ + nop +10: bti j /* S2 */ + stp s0, s1, [x3] b 99f -11: str s0, [x3] /* S1 */ + nop +11: bti j + str s0, [x3] /* S1 */ b 99f -12: st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] /* D4 */ + nop +12: bti j /* D4 */ + st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] b 99f -13: st3 { v0.d, v1.d, v2.d }[0], [x3] /* D3 */ + nop +13: bti j /* D3 */ + st3 { v0.d, v1.d, v2.d }[0], [x3] b 99f -14: stp d0, d1, [x3] /* D2 */ + nop +14: bti j /* D2 */ + stp d0, d1, [x3] b 99f -15: str d0, [x3] /* D1 */ + nop +15: bti j /* D1 */ + str d0, [x3] b 99f -16: str q3, [x3, #48] /* Q4 */ nop -17: str q2, [x3, #32] /* Q3 */ +16: bti j /* Q4 */ + str q3, [x3, #48] nop -18: stp q0, q1, [x3] /* Q2 */ + nop +17: bti j /* Q3 */ + str q2, [x3, #32] + nop + nop +18: bti j /* Q2 */ + stp q0, q1, [x3] b 99f -19: str q0, [x3] /* Q1 */ + nop +19: bti j /* Q1 */ + str q0, [x3] b 99f -20: uxtb w0, w0 /* UINT8 */ + nop +20: bti j /* UINT8 */ + uxtb w0, w0 str x0, [x3] + nop 21: b 99f /* reserved */ nop -22: uxth w0, w0 /* UINT16 */ + nop + nop +22: bti j /* UINT16 */ + uxth w0, w0 str x0, [x3] + nop 23: b 99f /* reserved */ nop -24: mov w0, w0 /* UINT32 */ + nop + nop +24: bti j /* UINT32 */ + mov w0, w0 str x0, [x3] + nop 25: b 99f /* reserved */ nop -26: sxtb x0, w0 /* SINT8 */ + nop + nop +26: bti j /* SINT8 */ + sxtb x0, w0 str x0, [x3] + nop 27: b 99f /* reserved */ nop -28: sxth x0, w0 /* SINT16 */ + nop + nop +28: bti j /* SINT16 */ + sxth x0, w0 str x0, [x3] + nop 29: b 99f /* reserved */ nop -30: sxtw x0, w0 /* SINT32 */ + nop + nop +30: bti j /* SINT32 */ + sxtw x0, w0 str x0, [x3] + nop 31: b 99f /* reserved */ nop + nop + nop /* Return now that result has been populated. */ 99: @@ -246,6 +311,7 @@ CNAME(ffi_call_SYSV): .align 4 CNAME(ffi_closure_SYSV_V): cfi_startproc + bti c SIGN_LR stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) @@ -270,6 +336,7 @@ CNAME(ffi_closure_SYSV_V): .align 4 cfi_startproc CNAME(ffi_closure_SYSV): + bti c SIGN_LR stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) @@ -299,74 +366,136 @@ CNAME(ffi_closure_SYSV): /* Load the return value as directed. */ adr x1, 0f and w0, w0, #AARCH64_RET_MASK - add x1, x1, x0, lsl #3 + add x1, x1, x0, lsl #4 add x3, sp, #16+CALL_CONTEXT_SIZE br x1 - /* Note that each table entry is 2 insns, and thus 8 bytes. */ + /* Note that each table entry is 4 insns, and thus 16 bytes. */ .align 4 -0: b 99f /* VOID */ +0: bti j /* VOID */ + b 99f nop -1: ldr x0, [x3] /* INT64 */ + nop +1: bti j /* INT64 */ + ldr x0, [x3] b 99f -2: ldp x0, x1, [x3] /* INT128 */ + nop +2: bti j /* INT128 */ + ldp x0, x1, [x3] b 99f + nop 3: brk #1000 /* UNUSED */ nop + nop + nop 4: brk #1000 /* UNUSED */ nop + nop + nop 5: brk #1000 /* UNUSED */ nop + nop + nop 6: brk #1000 /* UNUSED */ nop + nop + nop 7: brk #1000 /* UNUSED */ nop -8: ldr s3, [x3, #12] /* S4 */ nop -9: ldr s2, [x3, #8] /* S3 */ nop -10: ldp s0, s1, [x3] /* S2 */ +8: bti j /* S4 */ + ldr s3, [x3, #12] + nop + nop +9: bti j /* S3 */ + ldr s2, [x3, #8] + nop + nop +10: bti j /* S2 */ + ldp s0, s1, [x3] b 99f -11: ldr s0, [x3] /* S1 */ + nop +11: bti j /* S1 */ + ldr s0, [x3] b 99f -12: ldr d3, [x3, #24] /* D4 */ nop -13: ldr d2, [x3, #16] /* D3 */ +12: bti j /* D4 */ + ldr d3, [x3, #24] nop -14: ldp d0, d1, [x3] /* D2 */ + nop +13: bti j /* D3 */ + ldr d2, [x3, #16] + nop + nop +14: bti j /* D2 */ + ldp d0, d1, [x3] b 99f -15: ldr d0, [x3] /* D1 */ + nop +15: bti j /* D1 */ + ldr d0, [x3] b 99f -16: ldr q3, [x3, #48] /* Q4 */ nop -17: ldr q2, [x3, #32] /* Q3 */ +16: bti j /* Q4 */ + ldr q3, [x3, #48] nop -18: ldp q0, q1, [x3] /* Q2 */ + nop +17: bti j /* Q3 */ + ldr q2, [x3, #32] + nop + nop +18: bti j /* Q2 */ + ldp q0, q1, [x3] b 99f -19: ldr q0, [x3] /* Q1 */ + nop +19: bti j /* Q1 */ + ldr q0, [x3] b 99f -20: ldrb w0, [x3, #BE(7)] /* UINT8 */ + nop +20: bti j /* UINT8 */ + ldrb w0, [x3, #BE(7)] b 99f + nop 21: brk #1000 /* reserved */ nop -22: ldrh w0, [x3, #BE(6)] /* UINT16 */ + nop + nop +22: bti j /* UINT16 */ + ldrh w0, [x3, #BE(6)] b 99f + nop 23: brk #1000 /* reserved */ nop -24: ldr w0, [x3, #BE(4)] /* UINT32 */ + nop + nop +24: bti j /* UINT32 */ + ldr w0, [x3, #BE(4)] b 99f + nop 25: brk #1000 /* reserved */ nop -26: ldrsb x0, [x3, #BE(7)] /* SINT8 */ + nop + nop +26: bti j /* SINT8 */ + ldrsb x0, [x3, #BE(7)] b 99f + nop 27: brk #1000 /* reserved */ nop -28: ldrsh x0, [x3, #BE(6)] /* SINT16 */ + nop + nop +28: bti j /* SINT16 */ + ldrsh x0, [x3, #BE(6)] b 99f + nop 29: brk #1000 /* reserved */ nop -30: ldrsw x0, [x3, #BE(4)] /* SINT32 */ nop + nop +30: bti j /* SINT32 */ + ldrsw x0, [x3, #BE(4)] + nop + nop 31: /* reserved */ 99: ldp x29, x30, [sp], #ffi_closure_SYSV_FS cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS) @@ -479,6 +608,7 @@ CNAME(ffi_closure_trampoline_table_page): .align 4 CNAME(ffi_go_closure_SYSV_V): cfi_startproc + bti c stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) cfi_rel_offset (x29, 0) @@ -502,6 +632,7 @@ CNAME(ffi_go_closure_SYSV_V): .align 4 cfi_startproc CNAME(ffi_go_closure_SYSV): + bti c stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) cfi_rel_offset (x29, 0)