mirror of
https://github.com/librempeg/librempeg
synced 2024-11-22 18:49:58 +00:00
swscale/rgb2rgb: remove R-V V shuffle_bytes_3012
This is slower than the Zbb version on real hardware due to register strides. Proper support for vector byte-swap requires the Zvbb extension, but it's much too early for me to worry about it.
This commit is contained in:
parent
c4a144c29d
commit
1a4bd76ea5
@ -30,7 +30,6 @@ void ff_shuffle_bytes_2103_rvv(const uint8_t *src, uint8_t *dst, int src_len);
|
|||||||
void ff_shuffle_bytes_1230_rvv(const uint8_t *src, uint8_t *dst, int src_len);
|
void ff_shuffle_bytes_1230_rvv(const uint8_t *src, uint8_t *dst, int src_len);
|
||||||
void ff_shuffle_bytes_3012_rvv(const uint8_t *src, uint8_t *dst, int src_len);
|
void ff_shuffle_bytes_3012_rvv(const uint8_t *src, uint8_t *dst, int src_len);
|
||||||
void ff_shuffle_bytes_3210_rvb(const uint8_t *src, uint8_t *dst, int src_len);
|
void ff_shuffle_bytes_3210_rvb(const uint8_t *src, uint8_t *dst, int src_len);
|
||||||
void ff_shuffle_bytes_3210_rvv(const uint8_t *src, uint8_t *dst, int src_len);
|
|
||||||
void ff_interleave_bytes_rvv(const uint8_t *src1, const uint8_t *src2,
|
void ff_interleave_bytes_rvv(const uint8_t *src1, const uint8_t *src2,
|
||||||
uint8_t *dst, int width, int height, int s1stride,
|
uint8_t *dst, int width, int height, int s1stride,
|
||||||
int s2stride, int dstride);
|
int s2stride, int dstride);
|
||||||
@ -55,7 +54,6 @@ av_cold void rgb2rgb_init_riscv(void)
|
|||||||
shuffle_bytes_2103 = ff_shuffle_bytes_2103_rvv;
|
shuffle_bytes_2103 = ff_shuffle_bytes_2103_rvv;
|
||||||
shuffle_bytes_1230 = ff_shuffle_bytes_1230_rvv;
|
shuffle_bytes_1230 = ff_shuffle_bytes_1230_rvv;
|
||||||
shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv;
|
shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv;
|
||||||
shuffle_bytes_3210 = ff_shuffle_bytes_3210_rvv;
|
|
||||||
interleaveBytes = ff_interleave_bytes_rvv;
|
interleaveBytes = ff_interleave_bytes_rvv;
|
||||||
#if (__riscv_xlen == 64)
|
#if (__riscv_xlen == 64)
|
||||||
uyvytoyuv422 = ff_uyvytoyuv422_rvv;
|
uyvytoyuv422 = ff_uyvytoyuv422_rvv;
|
||||||
|
@ -74,31 +74,6 @@ func ff_shuffle_bytes_3012_rvv, zve32x
|
|||||||
ret
|
ret
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
func ff_shuffle_bytes_3210_rvv, zve32x
|
|
||||||
addi t1, a0, 2
|
|
||||||
addi t2, a0, 1
|
|
||||||
addi t3, a0, 0
|
|
||||||
addi a0, a0, 3
|
|
||||||
srai a2, a2, 2
|
|
||||||
li t4, 4
|
|
||||||
1:
|
|
||||||
vsetvli t0, a2, e8, m1, ta, ma
|
|
||||||
sub a2, a2, t0
|
|
||||||
vlse8.v v8, (a0), t4
|
|
||||||
sh2add a0, t0, a0
|
|
||||||
vlse8.v v9, (t1), t4
|
|
||||||
sh2add t1, t0, t1
|
|
||||||
vlse8.v v10, (t2), t4
|
|
||||||
sh2add t2, t0, t2
|
|
||||||
vlse8.v v11, (t3), t4
|
|
||||||
sh2add t3, t0, t3
|
|
||||||
vsseg4e8.v v8, (a1)
|
|
||||||
sh2add a1, t0, a1
|
|
||||||
bnez a2, 1b
|
|
||||||
|
|
||||||
ret
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
func ff_interleave_bytes_rvv, zve32x
|
func ff_interleave_bytes_rvv, zve32x
|
||||||
1:
|
1:
|
||||||
mv t0, a0
|
mv t0, a0
|
||||||
|
Loading…
Reference in New Issue
Block a user