diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S index 2648e06aeb..c42db6ef29 100644 --- a/libavcodec/riscv/h264idct_rvv.S +++ b/libavcodec/riscv/h264idct_rvv.S @@ -536,7 +536,7 @@ endconst .macro idct4_adds type, depth func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x csrwi vxrm, 0 - addi sp, sp, -96 + addi sp, sp, -64 lla t0, ff_h264_scan8 sd s0, (sp) li t1, 32 * (\depth / 8) @@ -547,14 +547,6 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x sd s3, 32(sp) sd s4, 40(sp) sd s5, 48(sp) - sd s6, 56(sp) - sd s7, 64(sp) -.if \depth > 8 - sd s8, 72(sp) - sd s9, 80(sp) - mv s8, a5 - mv s9, a6 -.endif vsetivli zero, 16, e8, m1, ta, ma vle8.v v8, (t0) .if \depth == 8 @@ -583,8 +575,8 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x li s1, 16 mv s4, a0 mv s5, a1 - mv s6, a2 - mv s7, a3 + mv a1, a2 + mv a2, a3 1: andi t0, s2, 1 addi s1, s1, -1 @@ -594,12 +586,7 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x .endif lw t2, (s5) # block_offset[i] andi t1, s3, 1 - mv a1, s6 - mv a2, s7 add a0, s4, t2 -.if \depth > 8 - mv a5, s8 -.endif .ifc \type, 16 bnez t1, 2f # if (nnz == 1 && block[i * 16]) .else @@ -611,23 +598,13 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x .ifnc \type, 16 beqz t1, 3f # if (block[i * 16]) .endif -.if \depth == 8 - call ff_h264_idct_dc_add_\depth\()_c -.else - jalr s9 -.endif + jal ff_h264_idct4_dc_add_\depth\()_rvv 3: srli s3, s3, 1 addi s5, s5, 4 - addi s6, s6, 16 * 2 * (\depth / 8) + addi a1, a1, 16 * 2 * (\depth / 8) bnez s1, 1b -.if \depth > 8 - ld s9, 80(sp) - ld s8, 72(sp) -.endif - ld s7, 64(sp) - ld s6, 56(sp) ld s5, 48(sp) ld s4, 40(sp) ld s3, 32(sp) @@ -635,7 +612,7 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x ld s1, 16(sp) ld ra, 8(sp) ld s0, 0(sp) - addi sp, sp, 96 + addi sp, sp, 64 ret endfunc .endm @@ -646,7 +623,7 @@ idct4_adds 16intra, \depth func ff_h264_idct8_add4_\depth\()_rvv, zve32x csrwi vxrm, 0 - addi sp, sp, -96 + addi sp, sp, -64 lla t0, ff_h264_scan8 sd s0, (sp) li t1, 4 * 32 * (\depth / 8) @@ -658,14 +635,6 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x sd s3, 32(sp) sd s4, 40(sp) sd s5, 48(sp) - sd s6, 56(sp) - sd s7, 64(sp) -.if \depth > 8 - sd s8, 72(sp) - sd s9, 80(sp) - mv s8, a5 - mv s9, a6 -.endif vsetivli zero, 4, e8, mf4, ta, ma vlse8.v v8, (t0), t2 .if \depth == 8 @@ -689,8 +658,8 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x li s1, 4 mv s4, a0 mv s5, a1 - mv s6, a2 - mv s7, a3 + mv a1, a2 + mv a2, a3 1: andi t0, s2, 1 addi s1, s1, -1 @@ -698,33 +667,23 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x beqz t0, 3f # if (nnz) lw t2, (s5) # block_offset[i] andi t1, s3, 1 - mv a1, s6 - mv a2, s7 add a0, s4, t2 -.if \depth > 8 - mv a5, s8 -.endif bnez t1, 2f # if (nnz == 1 && block[i * 16]) jal .Lidct8_add_\depth\()_rvv - j 3f -2: .if \depth == 8 - call ff_h264_idct8_dc_add_\depth\()_c + j 3f .else - jalr s9 + j 4f # idct8_add_16 updates a1 .endif +2: + jal ff_h264_idct8_dc_add_\depth\()_rvv 3: + addi a1, a1, 4 * 16 * 2 * (\depth / 8) +4: srli s3, s3, 1 addi s5, s5, 4 * 4 - addi s6, s6, 4 * 16 * 2 * (\depth / 8) bnez s1, 1b -.if \depth > 8 - ld s9, 80(sp) - ld s8, 72(sp) -.endif - ld s7, 64(sp) - ld s6, 56(sp) ld s5, 48(sp) ld s4, 40(sp) ld s3, 32(sp) @@ -732,7 +691,7 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x ld s1, 16(sp) ld ra, 8(sp) ld s0, 0(sp) - addi sp, sp, 96 + addi sp, sp, 64 ret endfunc .endr @@ -740,19 +699,16 @@ endfunc .irp depth, 9, 10, 12, 14 func ff_h264_idct_add16_\depth\()_rvv, zve32x li a5, (1 << \depth) - 1 - lla a6, ff_h264_idct_dc_add_\depth\()_c j ff_h264_idct_add16_16_rvv endfunc func ff_h264_idct_add16intra_\depth\()_rvv, zve32x li a5, (1 << \depth) - 1 - lla a6, ff_h264_idct_dc_add_\depth\()_c j ff_h264_idct_add16intra_16_rvv endfunc func ff_h264_idct8_add4_\depth\()_rvv, zve32x li a5, (1 << \depth) - 1 - lla a6, ff_h264_idct8_dc_add_\depth\()_c j ff_h264_idct8_add4_16_rvv endfunc .endr