As we discussed in the last meeting, we reset the ports tree and began from scratch, even though this change involves porting all the packages. Starting small and growing gradually, this approach will reduce build times and consequently lower energy consumption in a world affected by climate change. We will add new ports as users needs arise; ok h3artbl33d@

This commit is contained in:
purplerain 2024-05-26 03:08:12 +00:00
parent 83a0aaf92c
commit 9a3af55370
Signed by: purplerain
GPG key ID: F42C07F07E2E35B7
59377 changed files with 98673 additions and 4712155 deletions

View file

@ -49,7 +49,15 @@ Index: src/x86/itx_avx512.asm
vextracti32x4 xm2, m0, 1
vextracti32x4 xm3, m1, 1
pshufd xm4, xm0, q1032
@@ -818,6 +824,7 @@ cglobal iidentity_4x8_internal_8bpc, 0, 6, 0, dst, str
@@ -787,6 +793,7 @@ cglobal iflipadst_4x8_internal_8bpc, 0, 6, 0, dst, str
punpckhwd m1, m3
jmp tx2q
.pass2:
+ _CET_ENDBR
vextracti32x4 xm2, m0, 1
vextracti32x4 xm3, m1, 1
pshufd xm4, xm0, q1032
@@ -818,6 +825,7 @@ cglobal iidentity_4x8_internal_8bpc, 0, 6, 0, dst, str
vextracti32x8 ym1, m0, 1
jmp tx2q
.pass2:
@ -57,7 +65,7 @@ Index: src/x86/itx_avx512.asm
vpbroadcastd ym4, [o(pw_4096)]
jmp m(iadst_4x8_internal_8bpc).end2
@@ -935,6 +942,7 @@ cglobal idct_4x16_internal_8bpc, 0, 6, 0, dst, stride,
@@ -935,6 +943,7 @@ cglobal idct_4x16_internal_8bpc, 0, 6, 0, dst, stride,
pmulhrsw m1, m4
jmp tx2q
.pass2:
@ -65,7 +73,7 @@ Index: src/x86/itx_avx512.asm
vextracti32x4 xm2, ym0, 1
vextracti32x4 xm3, ym1, 1
vextracti32x4 xm4, m0, 2
@@ -975,6 +983,7 @@ cglobal iadst_4x16_internal_8bpc, 0, 6, 0, dst, stride
@@ -975,6 +984,7 @@ cglobal iadst_4x16_internal_8bpc, 0, 6, 0, dst, stride
punpcklwd m0, m2
jmp tx2q
.pass2:
@ -73,7 +81,7 @@ Index: src/x86/itx_avx512.asm
call .main
vpbroadcastd m5, [o(pw_2048)]
psrlq m10, 4
@@ -1082,6 +1091,7 @@ cglobal iflipadst_4x16_internal_8bpc, 0, 6, 0, dst, st
@@ -1082,6 +1092,7 @@ cglobal iflipadst_4x16_internal_8bpc, 0, 6, 0, dst, st
punpckhwd m1, m2
jmp tx2q
.pass2:
@ -81,7 +89,7 @@ Index: src/x86/itx_avx512.asm
call m(iadst_4x16_internal_8bpc).main
vpbroadcastd m6, [o(pw_2048)]
psrlq m10, 12
@@ -1109,6 +1119,7 @@ cglobal iidentity_4x16_internal_8bpc, 0, 6, 0, dst, st
@@ -1109,6 +1120,7 @@ cglobal iidentity_4x16_internal_8bpc, 0, 6, 0, dst, st
punpckhdq m1, m2
jmp tx2q
.pass2:
@ -89,7 +97,7 @@ Index: src/x86/itx_avx512.asm
vpbroadcastd m3, [o(pw_1697x16)]
vpbroadcastd m5, [o(pw_2048)]
pmulhrsw m2, m3, m0
@@ -1181,6 +1192,7 @@ cglobal idct_8x4_internal_8bpc, 0, 6, 0, dst, stride,
@@ -1181,6 +1193,7 @@ cglobal idct_8x4_internal_8bpc, 0, 6, 0, dst, stride,
pshufb m1, m4
jmp tx2q
.pass2:
@ -97,7 +105,7 @@ Index: src/x86/itx_avx512.asm
IDCT4_1D_PACKED
vpermq m0, m0, q3120
vpermq m1, m1, q2031
@@ -1210,6 +1222,7 @@ cglobal iadst_8x4_internal_8bpc, 0, 6, 0, dst, stride,
@@ -1210,6 +1223,7 @@ cglobal iadst_8x4_internal_8bpc, 0, 6, 0, dst, stride,
punpcklwd m0, m3
jmp tx2q
.pass2:
@ -105,7 +113,7 @@ Index: src/x86/itx_avx512.asm
call .main
.end:
vpermq m0, m0, q3120
@@ -1253,6 +1266,7 @@ cglobal iflipadst_8x4_internal_8bpc, 0, 6, 0, dst, str
@@ -1253,6 +1267,7 @@ cglobal iflipadst_8x4_internal_8bpc, 0, 6, 0, dst, str
punpcklwd m0, m3
jmp tx2q
.pass2:
@ -113,7 +121,7 @@ Index: src/x86/itx_avx512.asm
call m(iadst_8x4_internal_8bpc).main
mova m2, m1
vpermq m1, m0, q2031
@@ -1280,6 +1294,7 @@ cglobal iidentity_8x4_internal_8bpc, 0, 6, 0, dst, str
@@ -1280,6 +1295,7 @@ cglobal iidentity_8x4_internal_8bpc, 0, 6, 0, dst, str
paddsw m1, m1
jmp tx2q
.pass2:
@ -121,7 +129,7 @@ Index: src/x86/itx_avx512.asm
vpbroadcastd m3, [o(pw_1697x8)]
pmulhrsw m2, m3, m0
pmulhrsw m3, m1
@@ -1349,6 +1364,7 @@ cglobal idct_8x8_internal_8bpc, 0, 6, 0, dst, stride,
@@ -1349,6 +1365,7 @@ cglobal idct_8x8_internal_8bpc, 0, 6, 0, dst, stride,
vshufi32x4 m3, m5, m3, 0x03
jmp tx2q
.pass2:
@ -129,7 +137,7 @@ Index: src/x86/itx_avx512.asm
call .main
vpbroadcastd m4, [o(pw_2048)]
vpermq m0, m0, q3120
@@ -1388,6 +1404,7 @@ cglobal iadst_8x8_internal_8bpc, 0, 6, 0, dst, stride,
@@ -1388,6 +1405,7 @@ cglobal iadst_8x8_internal_8bpc, 0, 6, 0, dst, stride,
vinserti32x4 m1, m4, xm1, 1
jmp tx2q
.pass2:
@ -137,7 +145,7 @@ Index: src/x86/itx_avx512.asm
pshufd m4, m0, q1032
pshufd m5, m1, q1032
call .main_pass2
@@ -1455,6 +1472,7 @@ cglobal iflipadst_8x8_internal_8bpc, 0, 6, 0, dst, str
@@ -1455,6 +1473,7 @@ cglobal iflipadst_8x8_internal_8bpc, 0, 6, 0, dst, str
vshufi32x4 m2, m4, m2, 0x03
jmp tx2q
.pass2:
@ -145,7 +153,7 @@ Index: src/x86/itx_avx512.asm
pshufd m4, m0, q1032
pshufd m5, m1, q1032
call m(iadst_8x8_internal_8bpc).main_pass2
@@ -1493,6 +1511,7 @@ cglobal iidentity_8x8_internal_8bpc, 0, 6, 0, dst, str
@@ -1493,6 +1512,7 @@ cglobal iidentity_8x8_internal_8bpc, 0, 6, 0, dst, str
punpckhdq m3, m4
jmp tx2q
.pass2:
@ -153,7 +161,7 @@ Index: src/x86/itx_avx512.asm
vpbroadcastd m4, [o(pw_4096)]
jmp m(iadst_8x8_internal_8bpc).end
@@ -1553,6 +1572,7 @@ cglobal idct_8x16_internal_8bpc, 0, 6, 0, dst, stride,
@@ -1553,6 +1573,7 @@ cglobal idct_8x16_internal_8bpc, 0, 6, 0, dst, stride,
punpckhdq m3, m4 ; 3 7 11 15
jmp tx2q
.pass2:
@ -161,7 +169,7 @@ Index: src/x86/itx_avx512.asm
vprord m5, [o(int16_perm)], 16
vshufi32x4 m2, m2, q1320 ; 2 10 14 6
vshufi32x4 m4, m1, m3, q2310 ; 1 5 15 11
@@ -1686,6 +1706,7 @@ cglobal iadst_8x16_internal_8bpc, 0, 6, 0, dst, stride
@@ -1686,6 +1707,7 @@ cglobal iadst_8x16_internal_8bpc, 0, 6, 0, dst, stride
punpckhqdq m3, m5
jmp tx2q
.pass2:
@ -169,7 +177,7 @@ Index: src/x86/itx_avx512.asm
call .main_pass2
vpbroadcastd m6, [o(pw_2048)]
psrlq m10, 4
@@ -1794,6 +1815,7 @@ cglobal iflipadst_8x16_internal_8bpc, 0, 6, 0, dst, st
@@ -1794,6 +1816,7 @@ cglobal iflipadst_8x16_internal_8bpc, 0, 6, 0, dst, st
pshufb m2, m1, m6 ; e0 f0 e1 f1 e2 f2 e3 f3
jmp m(iadst_8x16_internal_8bpc).pass1_end
.pass2:
@ -177,7 +185,7 @@ Index: src/x86/itx_avx512.asm
call m(iadst_8x16_internal_8bpc).main_pass2
vpbroadcastd m7, [o(pw_2048)]
psrlq m10, 36
@@ -1823,6 +1845,7 @@ cglobal iidentity_8x16_internal_8bpc, 0, 6, 0, dst, st
@@ -1823,6 +1846,7 @@ cglobal iidentity_8x16_internal_8bpc, 0, 6, 0, dst, st
punpckhqdq m3, m4 ; a3 b3 c3 d3 e3 f3 g3 h3
jmp tx2q
.pass2:
@ -185,7 +193,7 @@ Index: src/x86/itx_avx512.asm
vpbroadcastd m7, [o(pw_1697x16)]
mova ym8, [o(gather8b)]
lea r3, [dstq+strideq*2]
@@ -1897,6 +1920,7 @@ cglobal idct_16x4_internal_8bpc, 0, 6, 0, dst, stride,
@@ -1897,6 +1921,7 @@ cglobal idct_16x4_internal_8bpc, 0, 6, 0, dst, stride,
punpcklwd m0, m2
jmp tx2q
.pass2:
@ -193,7 +201,7 @@ Index: src/x86/itx_avx512.asm
IDCT4_1D_PACKED
mova m2, [o(permA)]
jmp m(iadst_16x4_internal_8bpc).end
@@ -1936,6 +1960,7 @@ cglobal iadst_16x4_internal_8bpc, 0, 6, 0, dst, stride
@@ -1936,6 +1961,7 @@ cglobal iadst_16x4_internal_8bpc, 0, 6, 0, dst, stride
pmulhrsw m1, m6
jmp tx2q
.pass2:
@ -201,7 +209,7 @@ Index: src/x86/itx_avx512.asm
call .main
movu m2, [o(permA+1)]
.end:
@@ -1986,6 +2011,7 @@ cglobal iflipadst_16x4_internal_8bpc, 0, 6, 0, dst, st
@@ -1986,6 +2012,7 @@ cglobal iflipadst_16x4_internal_8bpc, 0, 6, 0, dst, st
psrlq m10, 16
jmp m(iadst_16x4_internal_8bpc).pass1_end
.pass2:
@ -209,7 +217,7 @@ Index: src/x86/itx_avx512.asm
call m(iadst_16x4_internal_8bpc).main
movu m2, [o(permA+2)]
jmp m(iadst_16x4_internal_8bpc).end
@@ -2013,6 +2039,7 @@ cglobal iidentity_16x4_internal_8bpc, 0, 6, 0, dst, st
@@ -2013,6 +2040,7 @@ cglobal iidentity_16x4_internal_8bpc, 0, 6, 0, dst, st
vpermb m1, m5, m1
jmp tx2q
.pass2:
@ -217,7 +225,7 @@ Index: src/x86/itx_avx512.asm
vpbroadcastd m3, [o(pw_1697x8)]
pmulhrsw m2, m3, m0
pmulhrsw m3, m1
@@ -2112,6 +2139,7 @@ cglobal idct_16x8_internal_8bpc, 0, 6, 0, dst, stride,
@@ -2112,6 +2140,7 @@ cglobal idct_16x8_internal_8bpc, 0, 6, 0, dst, stride,
punpckhdq m5, m6, m7 ; i2 j2 k2 l2 i3 j3 k3 l3
jmp tx2q
.pass2:
@ -225,7 +233,7 @@ Index: src/x86/itx_avx512.asm
vshufi32x4 m0, m2, m4, q2020 ; 0 1
vshufi32x4 m2, m4, q3131 ; 4 5
vshufi32x4 m1, m3, m5, q2020 ; 2 3
@@ -2211,6 +2239,7 @@ cglobal iadst_16x8_internal_8bpc, 0, 6, 0, dst, stride
@@ -2211,6 +2240,7 @@ cglobal iadst_16x8_internal_8bpc, 0, 6, 0, dst, stride
REPX {pmulhrsw x, m7}, m2, m3, m4, m5
jmp tx2q
.pass2:
@ -233,7 +241,7 @@ Index: src/x86/itx_avx512.asm
vshufi32x4 m0, m2, m4, q2020
vshufi32x4 m2, m4, q3131 ; 4 5
vshufi32x4 m1, m3, m5, q2020
@@ -2265,6 +2294,7 @@ cglobal iflipadst_16x8_internal_8bpc, 0, 6, 0, dst, st
@@ -2265,6 +2295,7 @@ cglobal iflipadst_16x8_internal_8bpc, 0, 6, 0, dst, st
psrlq m10, 20
jmp m(iadst_16x8_internal_8bpc).pass1_end
.pass2:
@ -241,7 +249,7 @@ Index: src/x86/itx_avx512.asm
vshufi32x4 m0, m2, m4, q2020
vshufi32x4 m2, m4, q3131 ; 4 5
vshufi32x4 m1, m3, m5, q2020
@@ -2314,6 +2344,7 @@ cglobal iidentity_16x8_internal_8bpc, 0, 6, 0, dst, st
@@ -2314,6 +2345,7 @@ cglobal iidentity_16x8_internal_8bpc, 0, 6, 0, dst, st
REPX {vpermb x, m9, x}, m2, m3, m4, m5
jmp tx2q
.pass2:
@ -249,7 +257,7 @@ Index: src/x86/itx_avx512.asm
mova m7, [o(permB)]
vpbroadcastd m6, [o(pw_4096)]
vpermq m0, m7, m2
@@ -2373,6 +2404,7 @@ cglobal idct_16x16_internal_8bpc, 0, 6, 0, dst, stride
@@ -2373,6 +2405,7 @@ cglobal idct_16x16_internal_8bpc, 0, 6, 0, dst, stride
punpckldq m6, m11
jmp tx2q
.pass2:
@ -257,7 +265,7 @@ Index: src/x86/itx_avx512.asm
vshufi32x4 m8, m4, m6, q3232 ; i8 ic m8 mc
vinserti32x8 m4, ym6, 1 ; i0 i4 m0 m4
vshufi32x4 m6, m0, m2, q3232 ; a8 ac e8 ec
@@ -2538,6 +2570,7 @@ cglobal iadst_16x16_internal_8bpc, 0, 6, 0, dst, strid
@@ -2538,6 +2571,7 @@ cglobal iadst_16x16_internal_8bpc, 0, 6, 0, dst, strid
REPX {pmulhrsw x, m10}, m0, m1, m2, m3, m4, m5, m6, m7
jmp tx2q
.pass2:
@ -265,7 +273,7 @@ Index: src/x86/itx_avx512.asm
call .main_pass2
mova m10, [o(permD)]
psrlq m8, m10, 8
@@ -2720,6 +2753,7 @@ cglobal iflipadst_16x16_internal_8bpc, 0, 6, 0, dst, s
@@ -2720,6 +2754,7 @@ cglobal iflipadst_16x16_internal_8bpc, 0, 6, 0, dst, s
punpckhwd m5, m8, m9 ; i2 j2 k2 l2 i3 j3 k3 l3
jmp m(iadst_16x16_internal_8bpc).pass1_end
.pass2:
@ -273,7 +281,7 @@ Index: src/x86/itx_avx512.asm
call m(iadst_16x16_internal_8bpc).main_pass2
mova m10, [o(permD)]
psrlq m8, m10, 8
@@ -2789,6 +2823,7 @@ cglobal iidentity_16x16_internal_8bpc, 0, 6, 0, dst, s
@@ -2789,6 +2824,7 @@ cglobal iidentity_16x16_internal_8bpc, 0, 6, 0, dst, s
jmp tx2q
ALIGN function_align
.pass2:
@ -281,7 +289,7 @@ Index: src/x86/itx_avx512.asm
vpbroadcastd m11, [o(pw_1697x16)]
pmulhrsw m12, m11, m0
pmulhrsw m13, m11, m1
@@ -3131,6 +3166,7 @@ cglobal inv_txfm_add_dct_dct_32x8_8bpc, 4, 4, 0, dst,
@@ -3131,6 +3167,7 @@ cglobal inv_txfm_add_dct_dct_32x8_8bpc, 4, 4, 0, dst,
call m(idct_8x16_internal_8bpc).main
call m(inv_txfm_add_dct_dct_8x32_8bpc).main_fast
.pass2:
@ -289,7 +297,7 @@ Index: src/x86/itx_avx512.asm
vpbroadcastd m10, [o(pw_8192)]
vpermt2q m0, m15, m4 ; t0 t1 t9 t8
vpermt2q m20, m15, m18 ; t31 t30a t23a t22
@@ -3586,6 +3622,7 @@ cglobal inv_txfm_add_dct_dct_16x32_8bpc, 4, 4, 22, dst
@@ -3586,6 +3623,7 @@ cglobal inv_txfm_add_dct_dct_16x32_8bpc, 4, 4, 22, dst
punpckhwd m17, m17
call .main_oddhalf_fast
.pass2:
@ -297,7 +305,7 @@ Index: src/x86/itx_avx512.asm
vpbroadcastd m10, [o(pw_2048)]
mova m11, [o(end_16x32p)]
lea r3, [strideq*3]
@@ -3798,6 +3835,7 @@ cglobal inv_txfm_add_dct_dct_32x16_8bpc, 4, 6, 22, dst
@@ -3798,6 +3836,7 @@ cglobal inv_txfm_add_dct_dct_32x16_8bpc, 4, 6, 22, dst
punpckhwd m17, m17 ; 15
call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast
.pass2:
@ -305,7 +313,7 @@ Index: src/x86/itx_avx512.asm
vpbroadcastd m9, [o(pw_16384)]
call .transpose_round
vshufi32x4 m16, m14, m2, q3131 ; 5
@@ -5683,6 +5721,7 @@ ALIGN function_align
@@ -5683,6 +5722,7 @@ ALIGN function_align
vinserti32x8 m17, ym21, 1 ; c30 c31 d30 d31
ret
.pass2: