120 lines
3.9 KiB
Text
120 lines
3.9 KiB
Text
Index: libavcodec/aarch64/vp9itxfm_neon.S
|
|
--- libavcodec/aarch64/vp9itxfm_neon.S.orig
|
|
+++ libavcodec/aarch64/vp9itxfm_neon.S
|
|
@@ -787,7 +787,7 @@ function \txfm\()16_1d_8x16_pass1_neon
|
|
.irp i, 16, 24, 17, 25, 18, 26, 19, 27, 20, 28, 21, 29, 22, 30, 23, 31
|
|
store \i, x0, #16
|
|
.endr
|
|
- br x14
|
|
+ ret x14
|
|
1:
|
|
// Special case: For the last input column (x1 == 8),
|
|
// which would be stored as the last row in the temp buffer,
|
|
@@ -806,7 +806,7 @@ function \txfm\()16_1d_8x16_pass1_neon
|
|
mov v29.16b, v21.16b
|
|
mov v30.16b, v22.16b
|
|
mov v31.16b, v23.16b
|
|
- br x14
|
|
+ ret x14
|
|
endfunc
|
|
|
|
// Read a vertical 8x16 slice out of a 16x16 matrix, do a transform on it,
|
|
@@ -834,7 +834,7 @@ function \txfm\()16_1d_8x16_pass2_neon
|
|
load_add_store v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v16.8b, v17.8b
|
|
load_add_store v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h, v16.8b, v17.8b
|
|
|
|
- br x14
|
|
+ ret x14
|
|
endfunc
|
|
.endm
|
|
|
|
@@ -925,7 +925,7 @@ function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, ex
|
|
ldp d12, d13, [sp], 0x10
|
|
ldp d14, d15, [sp], 0x10
|
|
.endif
|
|
- br x15
|
|
+ ret x15
|
|
endfunc
|
|
.endm
|
|
|
|
@@ -960,7 +960,7 @@ function idct16_1d_8x16_pass1_quarter_neon
|
|
.irp i, 24, 25, 26, 27
|
|
store \i, x0, x9
|
|
.endr
|
|
- br x14
|
|
+ ret x14
|
|
endfunc
|
|
|
|
function idct16_1d_8x16_pass2_quarter_neon
|
|
@@ -978,7 +978,7 @@ function idct16_1d_8x16_pass2_quarter_neon
|
|
load_add_store v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v16.8b, v17.8b
|
|
load_add_store v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h, v16.8b, v17.8b
|
|
|
|
- br x14
|
|
+ ret x14
|
|
endfunc
|
|
|
|
function idct16_1d_8x16_pass1_half_neon
|
|
@@ -1003,7 +1003,7 @@ function idct16_1d_8x16_pass1_half_neon
|
|
.irp i, 24, 25, 26, 27, 28, 29, 30, 31
|
|
store \i, x0, x9
|
|
.endr
|
|
- br x14
|
|
+ ret x14
|
|
endfunc
|
|
|
|
function idct16_1d_8x16_pass2_half_neon
|
|
@@ -1021,7 +1021,7 @@ function idct16_1d_8x16_pass2_half_neon
|
|
load_add_store v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v16.8b, v17.8b
|
|
load_add_store v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h, v16.8b, v17.8b
|
|
|
|
- br x14
|
|
+ ret x14
|
|
endfunc
|
|
|
|
.macro idct16_partial size
|
|
@@ -1038,7 +1038,7 @@ function idct16x16_\size\()_add_neon
|
|
.endr
|
|
|
|
add sp, sp, #512
|
|
- br x15
|
|
+ ret x15
|
|
endfunc
|
|
.endm
|
|
|
|
@@ -1349,7 +1349,7 @@ function idct32_1d_8x32_pass1\suffix\()_neon
|
|
store_rev v25.8h, v17.8h
|
|
store_rev v24.8h, v16.8h
|
|
.purgem store_rev
|
|
- br x14
|
|
+ ret x14
|
|
endfunc
|
|
|
|
// This is mostly the same as 8x32_pass1, but without the transpose,
|
|
@@ -1466,7 +1466,7 @@ function idct32_1d_8x32_pass2\suffix\()_neon
|
|
load_acc_store v24.8h, v25.8h, v26.8h, v27.8h, 1
|
|
load_acc_store v28.8h, v29.8h, v30.8h, v31.8h, 1
|
|
.purgem load_acc_store
|
|
- br x14
|
|
+ ret x14
|
|
endfunc
|
|
.endm
|
|
|
|
@@ -1547,7 +1547,7 @@ function ff_vp9_idct_idct_32x32_add_neon, export=1
|
|
ldp d8, d9, [sp], 0x10
|
|
ldp d10, d11, [sp], 0x10
|
|
|
|
- br x15
|
|
+ ret x15
|
|
endfunc
|
|
|
|
.macro idct32_partial size
|
|
@@ -1572,7 +1572,7 @@ function idct32x32_\size\()_add_neon
|
|
ldp d8, d9, [sp], 0x10
|
|
ldp d10, d11, [sp], 0x10
|
|
|
|
- br x15
|
|
+ ret x15
|
|
endfunc
|
|
.endm
|
|
|