ports/graphics/ffmpeg/patches/patch-libavcodec_x86_fft_asm

128 lines
2.5 KiB
Text

- place a table in rodata
- use _CET_ENDBR in functions header
Index: libavcodec/x86/fft.asm
--- libavcodec/x86/fft.asm.orig
+++ libavcodec/x86/fft.asm
@@ -325,6 +325,7 @@ INIT_YMM avx
%if HAVE_AVX_EXTERNAL
align 16
fft8_avx:
+ _CET_ENDBR
mova m0, Z(0)
mova m1, Z(1)
T8_AVX m0, m1, m2, m3, m4
@@ -335,6 +336,7 @@ fft8_avx:
align 16
fft16_avx:
+ _CET_ENDBR
mova m2, Z(2)
mova m3, Z(3)
T4_SSE m2, m3, m7
@@ -372,6 +374,7 @@ fft16_avx:
align 16
fft32_avx:
+ _CET_ENDBR
call fft16_avx
mova m0, Z(4)
@@ -396,6 +399,7 @@ fft32_avx:
ret
fft32_interleave_avx:
+ _CET_ENDBR
call fft32_avx
mov r2d, 32
.deint_loop:
@@ -419,6 +423,7 @@ INIT_XMM sse
align 16
fft4_avx:
fft4_sse:
+ _CET_ENDBR
mova m0, Z(0)
mova m1, Z(1)
T4_SSE m0, m1, m2
@@ -428,6 +433,7 @@ fft4_sse:
align 16
fft8_sse:
+ _CET_ENDBR
mova m0, Z(0)
mova m1, Z(1)
T4_SSE m0, m1, m2
@@ -442,6 +448,7 @@ fft8_sse:
align 16
fft16_sse:
+ _CET_ENDBR
mova m0, Z(0)
mova m1, Z(1)
T4_SSE m0, m1, m2
@@ -465,6 +472,7 @@ fft16_sse:
%macro FFT48_3DNOW 0
align 16
fft4 %+ SUFFIX:
+ _CET_ENDBR
T2_3DNOW m0, m1, Z(0), Z(1)
mova m2, Z(2)
mova m3, Z(3)
@@ -479,6 +487,7 @@ fft4 %+ SUFFIX:
align 16
fft8 %+ SUFFIX:
+ _CET_ENDBR
T2_3DNOW m0, m1, Z(0), Z(1)
mova m2, Z(2)
mova m3, Z(3)
@@ -532,6 +541,7 @@ FFT48_3DNOW
%macro DECL_PASS 2+ ; name, payload
align 16
%1:
+ _CET_ENDBR
DEFINE_ARGS zc, w, n, o1, o3
lea o3q, [nq*3]
lea o1q, [nq*8]
@@ -548,10 +558,6 @@ DEFINE_ARGS zc, w, n, o1, o3
%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs
lea r2, [dispatch_tab%1]
mov r2, [r2 + (%2q-2)*gprsize]
-%ifdef PIC
- lea r3, [$$]
- add r2, r3
-%endif
call r2
%endmacro ; FFT_DISPATCH
@@ -731,11 +737,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
%define pass_interleave_3dnowext pass_interleave_3dnow
%endif
-%ifdef PIC
-%define SECTION_REL - $$
-%else
%define SECTION_REL
-%endif
%macro DECL_FFT 1-2 ; nbits, suffix
%ifidn %0, 1
@@ -759,6 +761,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
align 16
fft %+ n %+ fullsuffix:
+ _CET_ENDBR
call fft %+ n2 %+ SUFFIX
add r0, n*4 - (n&(-2<<%1))
call fft %+ n4 %+ SUFFIX
@@ -773,8 +776,10 @@ fft %+ n %+ fullsuffix:
%endrep
%undef n
+[SECTION .data.rel.ro write]
align 8
dispatch_tab %+ fullsuffix: pointer list_of_fft
+__?SECT?__
%endmacro ; DECL_FFT
%if HAVE_AVX_EXTERNAL