From 1a4bd76ea5493954806607b015546206ff854f48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Fri, 29 Sep 2023 20:14:26 +0300 Subject: [PATCH] swscale/rgb2rgb: remove R-V V shuffle_bytes_3012 This is slower than the Zbb version on real hardware due to register strides. Proper support for vector byte-swap requires the Zvbb extension, but it's much too early for me to worry about it. --- libswscale/riscv/rgb2rgb.c | 2 -- libswscale/riscv/rgb2rgb_rvv.S | 25 ------------------------- 2 files changed, 27 deletions(-) diff --git a/libswscale/riscv/rgb2rgb.c b/libswscale/riscv/rgb2rgb.c index 21baaa21c5..162a4082b0 100644 --- a/libswscale/riscv/rgb2rgb.c +++ b/libswscale/riscv/rgb2rgb.c @@ -30,7 +30,6 @@ void ff_shuffle_bytes_2103_rvv(const uint8_t *src, uint8_t *dst, int src_len); void ff_shuffle_bytes_1230_rvv(const uint8_t *src, uint8_t *dst, int src_len); void ff_shuffle_bytes_3012_rvv(const uint8_t *src, uint8_t *dst, int src_len); void ff_shuffle_bytes_3210_rvb(const uint8_t *src, uint8_t *dst, int src_len); -void ff_shuffle_bytes_3210_rvv(const uint8_t *src, uint8_t *dst, int src_len); void ff_interleave_bytes_rvv(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, int width, int height, int s1stride, int s2stride, int dstride); @@ -55,7 +54,6 @@ av_cold void rgb2rgb_init_riscv(void) shuffle_bytes_2103 = ff_shuffle_bytes_2103_rvv; shuffle_bytes_1230 = ff_shuffle_bytes_1230_rvv; shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv; - shuffle_bytes_3210 = ff_shuffle_bytes_3210_rvv; interleaveBytes = ff_interleave_bytes_rvv; #if (__riscv_xlen == 64) uyvytoyuv422 = ff_uyvytoyuv422_rvv; diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S index 0ac3afff7c..008f098bfe 100644 --- a/libswscale/riscv/rgb2rgb_rvv.S +++ b/libswscale/riscv/rgb2rgb_rvv.S @@ -74,31 +74,6 @@ func ff_shuffle_bytes_3012_rvv, zve32x ret endfunc -func ff_shuffle_bytes_3210_rvv, zve32x - addi t1, a0, 2 - addi t2, a0, 1 - addi t3, a0, 0 - addi a0, a0, 3 - srai a2, a2, 2 - li t4, 4 -1: - vsetvli t0, a2, e8, m1, ta, ma - sub a2, a2, t0 - vlse8.v v8, (a0), t4 - sh2add a0, t0, a0 - vlse8.v v9, (t1), t4 - sh2add t1, t0, t1 - vlse8.v v10, (t2), t4 - sh2add t2, t0, t2 - vlse8.v v11, (t3), t4 - sh2add t3, t0, t3 - vsseg4e8.v v8, (a1) - sh2add a1, t0, a1 - bnez a2, 1b - - ret -endfunc - func ff_interleave_bytes_rvv, zve32x 1: mv t0, a0