63 lines
1.8 KiB
Text
63 lines
1.8 KiB
Text
Index: src/x86/filmgrain16_avx2.asm
|
|
--- src/x86/filmgrain16_avx2.asm.orig
|
|
+++ src/x86/filmgrain16_avx2.asm
|
|
@@ -163,6 +163,7 @@ cglobal generate_grain_y_16bpc, 3, 9, 14, buf, fg_data
|
|
jmp r5
|
|
|
|
.ar1:
|
|
+ _CET_ENDBR
|
|
DEFINE_ARGS buf, fg_data, max, shift, val3, min, cf3, x, val0
|
|
mov shiftd, [fg_dataq+FGData.ar_coeff_shift]
|
|
movsx cf3d, byte [fg_dataq+FGData.ar_coeffs_y+3]
|
|
@@ -214,9 +215,11 @@ cglobal generate_grain_y_16bpc, 3, 9, 14, buf, fg_data
|
|
dec hd
|
|
jg .y_loop_ar1
|
|
.ar0:
|
|
+ _CET_ENDBR
|
|
RET
|
|
|
|
.ar2:
|
|
+ _CET_ENDBR
|
|
DEFINE_ARGS buf, fg_data, bdmax, shift
|
|
mov shiftd, [fg_dataq+FGData.ar_coeff_shift]
|
|
movq xm0, [fg_dataq+FGData.ar_coeffs_y+5] ; cf5-11
|
|
@@ -286,6 +289,7 @@ cglobal generate_grain_y_16bpc, 3, 9, 14, buf, fg_data
|
|
RET
|
|
|
|
.ar3:
|
|
+ _CET_ENDBR
|
|
DEFINE_ARGS buf, fg_data, bdmax, shift
|
|
mov shiftd, [fg_dataq+FGData.ar_coeff_shift]
|
|
sar bdmaxd, 1
|
|
@@ -447,6 +451,7 @@ cglobal generate_grain_uv_%1_16bpc, 4, 11, 8, buf, buf
|
|
|
|
INIT_YMM avx2
|
|
.ar0:
|
|
+ _CET_ENDBR
|
|
DEFINE_ARGS buf, bufy, fg_data, uv, bdmax, shift
|
|
imul uvd, 28
|
|
mov shiftd, [fg_dataq+FGData.ar_coeff_shift]
|
|
@@ -566,6 +571,7 @@ INIT_YMM avx2
|
|
|
|
INIT_XMM avx2
|
|
.ar1:
|
|
+ _CET_ENDBR
|
|
DEFINE_ARGS buf, bufy, fg_data, uv, max, cf3, min, val3, x, shift
|
|
imul uvd, 28
|
|
mov shiftd, [fg_dataq+FGData.ar_coeff_shift]
|
|
@@ -645,6 +651,7 @@ INIT_XMM avx2
|
|
|
|
INIT_YMM avx2
|
|
.ar2:
|
|
+ _CET_ENDBR
|
|
%if WIN64
|
|
; xmm6 and xmm7 already saved
|
|
%assign xmm_regs_used 13 + %2
|
|
@@ -746,6 +753,7 @@ INIT_YMM avx2
|
|
RET
|
|
|
|
.ar3:
|
|
+ _CET_ENDBR
|
|
%if WIN64
|
|
; xmm6 and xmm7 already saved
|
|
%assign stack_offset 32
|