mirror of
https://github.com/librempeg/librempeg
synced 2024-11-22 00:51:37 +00:00
avcodec/ac3: Implement ac3_exponent_min for aarch64 NEON
Signed-off-by: Geoff Hill <geoff@geoffhill.org> Signed-off-by: Martin Storsjö <martin@martin.st> Signed-off-by: Paul B Mahol <onemda@gmail.com>
This commit is contained in:
parent
3ebb49a3d3
commit
0d2fcaf176
@ -25,6 +25,7 @@
|
||||
#include "libavcodec/ac3dsp.h"
|
||||
#include "config.h"
|
||||
|
||||
void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
|
||||
void ff_float_to_fixed24_neon(int32_t *dst, const float *src, size_t len);
|
||||
|
||||
av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
|
||||
@ -32,5 +33,6 @@ av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
if (!have_neon(cpu_flags)) return;
|
||||
|
||||
c->ac3_exponent_min = ff_ac3_exponent_min_neon;
|
||||
c->float_to_fixed24 = ff_float_to_fixed24_neon;
|
||||
}
|
||||
|
@ -21,6 +21,22 @@
|
||||
|
||||
#include "libavutil/aarch64/asm.S"
|
||||
|
||||
function ff_ac3_exponent_min_neon, export=1
|
||||
cbz w1, 3f
|
||||
1: ld1 {v0.16b}, [x0]
|
||||
mov w3, w1
|
||||
add x4, x0, #256
|
||||
2: ld1 {v1.16b}, [x4]
|
||||
umin v0.16b, v0.16b, v1.16b
|
||||
add x4, x4, #256
|
||||
subs w3, w3, #1
|
||||
b.gt 2b
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w2, w2, #16
|
||||
b.gt 1b
|
||||
3: ret
|
||||
endfunc
|
||||
|
||||
function ff_float_to_fixed24_neon, export=1
|
||||
1: ld1 {v0.4s, v1.4s}, [x1], #32
|
||||
fcvtzs v0.4s, v0.4s, #24
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
|
||||
* Copyright (c) 2024 Geoff Hill <geoff@geoffhill.org>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
@ -27,6 +28,14 @@
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define randomize_exp(buf, len) \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < len; i++) { \
|
||||
buf[i] = (uint8_t)rnd(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define randomize_float(buf, len) \
|
||||
do { \
|
||||
int i; \
|
||||
@ -36,6 +45,38 @@
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void check_ac3_exponent_min(AC3DSPContext *c) {
|
||||
#define MAX_COEFS 256
|
||||
#define MAX_CTXT 6
|
||||
#define EXP_SIZE (MAX_CTXT * MAX_COEFS)
|
||||
|
||||
LOCAL_ALIGNED_16(uint8_t, src, [EXP_SIZE]);
|
||||
LOCAL_ALIGNED_16(uint8_t, v1, [EXP_SIZE]);
|
||||
LOCAL_ALIGNED_16(uint8_t, v2, [EXP_SIZE]);
|
||||
int n;
|
||||
|
||||
declare_func(void, uint8_t *, int, int);
|
||||
|
||||
for (n = 0; n < MAX_CTXT; ++n) {
|
||||
if (check_func(c->ac3_exponent_min, "ac3_exponent_min_reuse%d", n)) {
|
||||
randomize_exp(src, EXP_SIZE);
|
||||
|
||||
memcpy(v1, src, EXP_SIZE);
|
||||
memcpy(v2, src, EXP_SIZE);
|
||||
|
||||
call_ref(v1, n, MAX_COEFS);
|
||||
call_new(v2, n, MAX_COEFS);
|
||||
|
||||
if (memcmp(v1, v2, EXP_SIZE) != 0)
|
||||
fail();
|
||||
|
||||
bench_new(v2, n, MAX_COEFS);
|
||||
}
|
||||
}
|
||||
|
||||
report("ac3_exponent_min");
|
||||
}
|
||||
|
||||
static void check_float_to_fixed24(AC3DSPContext *c) {
|
||||
#define BUF_SIZE 1024
|
||||
LOCAL_ALIGNED_32(float, src, [BUF_SIZE]);
|
||||
@ -66,5 +107,6 @@ void checkasm_check_ac3dsp(void)
|
||||
AC3DSPContext c;
|
||||
ff_ac3dsp_init(&c);
|
||||
|
||||
check_ac3_exponent_min(&c);
|
||||
check_float_to_fixed24(&c);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user