mirror of
https://github.com/librempeg/librempeg
synced 2024-11-22 00:51:37 +00:00
avcodec/ac3: Implement sum_square_butterfly_float for aarch64 NEON
Signed-off-by: Geoff Hill <geoff@geoffhill.org> Signed-off-by: Martin Storsjö <martin@martin.st> Signed-off-by: Paul B Mahol <onemda@gmail.com>
This commit is contained in:
parent
dc8865d219
commit
9b7f43c04a
@ -32,6 +32,10 @@ void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4],
|
|||||||
const int32_t *coef0,
|
const int32_t *coef0,
|
||||||
const int32_t *coef1,
|
const int32_t *coef1,
|
||||||
int len);
|
int len);
|
||||||
|
void ff_ac3_sum_square_butterfly_float_neon(float sum[4],
|
||||||
|
const float *coef0,
|
||||||
|
const float *coef1,
|
||||||
|
int len);
|
||||||
|
|
||||||
av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
|
av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
|
||||||
{
|
{
|
||||||
@ -42,4 +46,5 @@ av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
|
|||||||
c->extract_exponents = ff_ac3_extract_exponents_neon;
|
c->extract_exponents = ff_ac3_extract_exponents_neon;
|
||||||
c->float_to_fixed24 = ff_float_to_fixed24_neon;
|
c->float_to_fixed24 = ff_float_to_fixed24_neon;
|
||||||
c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon;
|
c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon;
|
||||||
|
c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_neon;
|
||||||
}
|
}
|
||||||
|
@ -87,3 +87,33 @@ function ff_ac3_sum_square_butterfly_int32_neon, export=1
|
|||||||
st1 {v0.1d-v3.1d}, [x0]
|
st1 {v0.1d-v3.1d}, [x0]
|
||||||
ret
|
ret
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
|
function ff_ac3_sum_square_butterfly_float_neon, export=1
|
||||||
|
movi v0.4s, #0
|
||||||
|
movi v1.4s, #0
|
||||||
|
movi v2.4s, #0
|
||||||
|
movi v3.4s, #0
|
||||||
|
1: ld1 {v30.4s}, [x1], #16
|
||||||
|
ld1 {v31.4s}, [x2], #16
|
||||||
|
fadd v16.4s, v30.4s, v31.4s
|
||||||
|
fsub v17.4s, v30.4s, v31.4s
|
||||||
|
fmla v0.4s, v30.4s, v30.4s
|
||||||
|
fmla v1.4s, v31.4s, v31.4s
|
||||||
|
fmla v2.4s, v16.4s, v16.4s
|
||||||
|
fmla v3.4s, v17.4s, v17.4s
|
||||||
|
subs w3, w3, #4
|
||||||
|
b.gt 1b
|
||||||
|
faddp v0.4s, v0.4s, v0.4s
|
||||||
|
faddp v0.2s, v0.2s, v0.2s
|
||||||
|
st1 {v0.s}[0], [x0], #4
|
||||||
|
faddp v1.4s, v1.4s, v1.4s
|
||||||
|
faddp v1.2s, v1.2s, v1.2s
|
||||||
|
st1 {v1.s}[0], [x0], #4
|
||||||
|
faddp v2.4s, v2.4s, v2.4s
|
||||||
|
faddp v2.2s, v2.2s, v2.2s
|
||||||
|
st1 {v2.s}[0], [x0], #4
|
||||||
|
faddp v3.4s, v3.4s, v3.4s
|
||||||
|
faddp v3.2s, v3.2s, v3.2s
|
||||||
|
st1 {v3.s}[0], [x0]
|
||||||
|
ret
|
||||||
|
endfunc
|
||||||
|
@ -165,6 +165,31 @@ static void check_ac3_sum_square_butterfly_int32(AC3DSPContext *c) {
|
|||||||
report("ac3_sum_square_butterfly_int32");
|
report("ac3_sum_square_butterfly_int32");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void check_ac3_sum_square_butterfly_float(AC3DSPContext *c) {
|
||||||
|
LOCAL_ALIGNED_32(float, lt, [ELEMS]);
|
||||||
|
LOCAL_ALIGNED_32(float, rt, [ELEMS]);
|
||||||
|
LOCAL_ALIGNED_16(float, v1, [4]);
|
||||||
|
LOCAL_ALIGNED_16(float, v2, [4]);
|
||||||
|
|
||||||
|
declare_func(void, float[4], const float *, const float *, int);
|
||||||
|
|
||||||
|
randomize_float(lt, ELEMS);
|
||||||
|
randomize_float(rt, ELEMS);
|
||||||
|
|
||||||
|
if (check_func(c->sum_square_butterfly_float,
|
||||||
|
"ac3_sum_square_bufferfly_float")) {
|
||||||
|
call_ref(v1, lt, rt, ELEMS);
|
||||||
|
call_new(v2, lt, rt, ELEMS);
|
||||||
|
|
||||||
|
if (!float_near_ulp_array(v1, v2, 10, 4))
|
||||||
|
fail();
|
||||||
|
|
||||||
|
bench_new(v2, lt, rt, ELEMS);
|
||||||
|
}
|
||||||
|
|
||||||
|
report("ac3_sum_square_butterfly_float");
|
||||||
|
}
|
||||||
|
|
||||||
void checkasm_check_ac3dsp(void)
|
void checkasm_check_ac3dsp(void)
|
||||||
{
|
{
|
||||||
AC3DSPContext c;
|
AC3DSPContext c;
|
||||||
@ -174,4 +199,5 @@ void checkasm_check_ac3dsp(void)
|
|||||||
check_ac3_extract_exponents(&c);
|
check_ac3_extract_exponents(&c);
|
||||||
check_float_to_fixed24(&c);
|
check_float_to_fixed24(&c);
|
||||||
check_ac3_sum_square_butterfly_int32(&c);
|
check_ac3_sum_square_butterfly_int32(&c);
|
||||||
|
check_ac3_sum_square_butterfly_float(&c);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user