Index: libavcodec/aarch64/vp9itxfm_neon.S
--- libavcodec/aarch64/vp9itxfm_neon.S.orig
+++ libavcodec/aarch64/vp9itxfm_neon.S
@@ -787,7 +787,7 @@ function \txfm\()16_1d_8x16_pass1_neon
 .irp i, 16, 24, 17, 25, 18, 26, 19, 27, 20, 28, 21, 29, 22, 30, 23, 31
         store           \i,  x0,  #16
 .endr
-        br              x14
+        ret             x14
 1:
         // Special case: For the last input column (x1 == 8),
         // which would be stored as the last row in the temp buffer,
@@ -806,7 +806,7 @@ function \txfm\()16_1d_8x16_pass1_neon
         mov             v29.16b, v21.16b
         mov             v30.16b, v22.16b
         mov             v31.16b, v23.16b
-        br              x14
+        ret             x14
 endfunc
 
 // Read a vertical 8x16 slice out of a 16x16 matrix, do a transform on it,
@@ -834,7 +834,7 @@ function \txfm\()16_1d_8x16_pass2_neon
         load_add_store  v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v16.8b, v17.8b
         load_add_store  v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h, v16.8b, v17.8b
 
-        br              x14
+        ret             x14
 endfunc
 .endm
 
@@ -925,7 +925,7 @@ function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, ex
         ldp             d12, d13, [sp], 0x10
         ldp             d14, d15, [sp], 0x10
 .endif
-        br              x15
+        ret             x15
 endfunc
 .endm
 
@@ -960,7 +960,7 @@ function idct16_1d_8x16_pass1_quarter_neon
 .irp i, 24, 25, 26, 27
         store           \i,  x0,  x9
 .endr
-        br              x14
+        ret             x14
 endfunc
 
 function idct16_1d_8x16_pass2_quarter_neon
@@ -978,7 +978,7 @@ function idct16_1d_8x16_pass2_quarter_neon
         load_add_store  v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v16.8b, v17.8b
         load_add_store  v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h, v16.8b, v17.8b
 
-        br              x14
+        ret             x14
 endfunc
 
 function idct16_1d_8x16_pass1_half_neon
@@ -1003,7 +1003,7 @@ function idct16_1d_8x16_pass1_half_neon
 .irp i, 24, 25, 26, 27, 28, 29, 30, 31
         store           \i,  x0,  x9
 .endr
-        br              x14
+        ret             x14
 endfunc
 
 function idct16_1d_8x16_pass2_half_neon
@@ -1021,7 +1021,7 @@ function idct16_1d_8x16_pass2_half_neon
         load_add_store  v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v16.8b, v17.8b
         load_add_store  v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h, v16.8b, v17.8b
 
-        br              x14
+        ret             x14
 endfunc
 
 .macro idct16_partial size
@@ -1038,7 +1038,7 @@ function idct16x16_\size\()_add_neon
 .endr
 
         add             sp,  sp,  #512
-        br              x15
+        ret             x15
 endfunc
 .endm
 
@@ -1349,7 +1349,7 @@ function idct32_1d_8x32_pass1\suffix\()_neon
         store_rev       v25.8h, v17.8h
         store_rev       v24.8h, v16.8h
 .purgem store_rev
-        br              x14
+        ret             x14
 endfunc
 
 // This is mostly the same as 8x32_pass1, but without the transpose,
@@ -1466,7 +1466,7 @@ function idct32_1d_8x32_pass2\suffix\()_neon
         load_acc_store  v24.8h, v25.8h, v26.8h, v27.8h, 1
         load_acc_store  v28.8h, v29.8h, v30.8h, v31.8h, 1
 .purgem load_acc_store
-        br              x14
+        ret             x14
 endfunc
 .endm
 
@@ -1547,7 +1547,7 @@ function ff_vp9_idct_idct_32x32_add_neon, export=1
         ldp             d8,  d9,  [sp], 0x10
         ldp             d10, d11, [sp], 0x10
 
-        br              x15
+        ret             x15
 endfunc
 
 .macro idct32_partial size
@@ -1572,7 +1572,7 @@ function idct32x32_\size\()_add_neon
         ldp             d8,  d9,  [sp], 0x10
         ldp             d10, d11, [sp], 0x10
 
-        br              x15
+        ret             x15
 endfunc
 .endm