mirror of
https://github.com/librempeg/librempeg
synced 2024-11-22 18:49:58 +00:00
avcodec/jpeg200dsp: add ff_rct_int_{sse2,avx2}
Reviewed-by: Michael Niedermayer <michaelni@gmx.at> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
7912a6830d
commit
9f815bc2c2
@ -221,6 +221,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
|
||||
if (!comp->f_data)
|
||||
return AVERROR(ENOMEM);
|
||||
} else {
|
||||
csize += FF_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data);
|
||||
comp->f_data = NULL;
|
||||
comp->i_data = av_mallocz_array(csize, sizeof(*comp->i_data));
|
||||
if (!comp->i_data)
|
||||
|
@ -106,3 +106,39 @@ INIT_XMM sse
|
||||
ICT_FLOAT 10
|
||||
INIT_YMM avx
|
||||
ICT_FLOAT 9
|
||||
|
||||
;***************************************************************************
|
||||
; ff_rct_int_<opt>(int32_t *src0, int32_t *src1, int32_t *src2, int csize)
|
||||
;***************************************************************************
|
||||
%macro RCT_INT 0
|
||||
cglobal rct_int, 4, 4, 4, src0, src1, src2, csize
|
||||
shl csized, 2
|
||||
add src0q, csizeq
|
||||
add src1q, csizeq
|
||||
add src2q, csizeq
|
||||
neg csizeq
|
||||
|
||||
align 16
|
||||
.loop:
|
||||
mova m1, [src1q+csizeq]
|
||||
mova m2, [src2q+csizeq]
|
||||
mova m0, [src0q+csizeq]
|
||||
paddd m3, m1, m2
|
||||
psrad m3, 2
|
||||
psubd m0, m3
|
||||
paddd m1, m0
|
||||
paddd m2, m0
|
||||
mova [src1q+csizeq], m0
|
||||
mova [src2q+csizeq], m1
|
||||
mova [src0q+csizeq], m2
|
||||
add csizeq, mmsize
|
||||
jl .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
RCT_INT
|
||||
%if HAVE_AVX2_EXTERNAL
|
||||
INIT_YMM avx2
|
||||
RCT_INT
|
||||
%endif
|
||||
|
@ -26,6 +26,8 @@
|
||||
|
||||
void ff_ict_float_sse(void *src0, void *src1, void *src2, int csize);
|
||||
void ff_ict_float_avx(void *src0, void *src1, void *src2, int csize);
|
||||
void ff_rct_int_sse2 (void *src0, void *src1, void *src2, int csize);
|
||||
void ff_rct_int_avx2 (void *src0, void *src1, void *src2, int csize);
|
||||
|
||||
av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
|
||||
{
|
||||
@ -34,7 +36,15 @@ av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
|
||||
c->mct_decode[FF_DWT97] = ff_ict_float_sse;
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
c->mct_decode[FF_DWT53] = ff_rct_int_sse2;
|
||||
}
|
||||
|
||||
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||
c->mct_decode[FF_DWT97] = ff_ict_float_avx;
|
||||
}
|
||||
|
||||
if (EXTERNAL_AVX2(cpu_flags)) {
|
||||
c->mct_decode[FF_DWT53] = ff_rct_int_avx2;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user