mirror of
https://github.com/librempeg/librempeg
synced 2024-11-23 03:28:27 +00:00
avcodec/x86/idctdsp: Remove obsolete MMX(EXT) functions
x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
parent
b2437a45af
commit
bfb28b5ce8
@ -65,9 +65,6 @@ static const struct algo fdct_tab_arch[] = {
|
||||
};
|
||||
|
||||
static const struct algo idct_tab_arch[] = {
|
||||
#if HAVE_MMX_EXTERNAL
|
||||
{ "SIMPLE-MMX", ff_simple_idct_mmx, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX },
|
||||
#endif
|
||||
#if CONFIG_MPEG4_DECODER && HAVE_X86ASM
|
||||
#if HAVE_SSE2_EXTERNAL
|
||||
{ "XVID-SSE2", ff_xvid_idct_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 },
|
||||
|
@ -37,47 +37,24 @@ SECTION .text
|
||||
%macro PUT_SIGNED_PIXELS_CLAMPED_HALF 1
|
||||
mova m1, [blockq+mmsize*0+%1]
|
||||
mova m2, [blockq+mmsize*2+%1]
|
||||
%if mmsize == 8
|
||||
mova m3, [blockq+mmsize*4+%1]
|
||||
mova m4, [blockq+mmsize*6+%1]
|
||||
%endif
|
||||
packsswb m1, [blockq+mmsize*1+%1]
|
||||
packsswb m2, [blockq+mmsize*3+%1]
|
||||
%if mmsize == 8
|
||||
packsswb m3, [blockq+mmsize*5+%1]
|
||||
packsswb m4, [blockq+mmsize*7+%1]
|
||||
%endif
|
||||
paddb m1, m0
|
||||
paddb m2, m0
|
||||
%if mmsize == 8
|
||||
paddb m3, m0
|
||||
paddb m4, m0
|
||||
movq [pixelsq+lsizeq*0], m1
|
||||
movq [pixelsq+lsizeq*1], m2
|
||||
movq [pixelsq+lsizeq*2], m3
|
||||
movq [pixelsq+lsize3q ], m4
|
||||
%else
|
||||
movq [pixelsq+lsizeq*0], m1
|
||||
movhps [pixelsq+lsizeq*1], m1
|
||||
movq [pixelsq+lsizeq*2], m2
|
||||
movhps [pixelsq+lsize3q ], m2
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro PUT_SIGNED_PIXELS_CLAMPED 1
|
||||
cglobal put_signed_pixels_clamped, 3, 4, %1, block, pixels, lsize, lsize3
|
||||
INIT_XMM sse2
|
||||
cglobal put_signed_pixels_clamped, 3, 4, 3, block, pixels, lsize, lsize3
|
||||
mova m0, [pb_80]
|
||||
lea lsize3q, [lsizeq*3]
|
||||
PUT_SIGNED_PIXELS_CLAMPED_HALF 0
|
||||
lea pixelsq, [pixelsq+lsizeq*4]
|
||||
PUT_SIGNED_PIXELS_CLAMPED_HALF 64
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmx
|
||||
PUT_SIGNED_PIXELS_CLAMPED 0
|
||||
INIT_XMM sse2
|
||||
PUT_SIGNED_PIXELS_CLAMPED 3
|
||||
|
||||
;--------------------------------------------------------------------------
|
||||
; void ff_put_pixels_clamped(const int16_t *block, uint8_t *pixels,
|
||||
@ -87,40 +64,21 @@ PUT_SIGNED_PIXELS_CLAMPED 3
|
||||
%macro PUT_PIXELS_CLAMPED_HALF 1
|
||||
mova m0, [blockq+mmsize*0+%1]
|
||||
mova m1, [blockq+mmsize*2+%1]
|
||||
%if mmsize == 8
|
||||
mova m2, [blockq+mmsize*4+%1]
|
||||
mova m3, [blockq+mmsize*6+%1]
|
||||
%endif
|
||||
packuswb m0, [blockq+mmsize*1+%1]
|
||||
packuswb m1, [blockq+mmsize*3+%1]
|
||||
%if mmsize == 8
|
||||
packuswb m2, [blockq+mmsize*5+%1]
|
||||
packuswb m3, [blockq+mmsize*7+%1]
|
||||
movq [pixelsq], m0
|
||||
movq [lsizeq+pixelsq], m1
|
||||
movq [2*lsizeq+pixelsq], m2
|
||||
movq [lsize3q+pixelsq], m3
|
||||
%else
|
||||
movq [pixelsq], m0
|
||||
movhps [lsizeq+pixelsq], m0
|
||||
movq [2*lsizeq+pixelsq], m1
|
||||
movhps [lsize3q+pixelsq], m1
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro PUT_PIXELS_CLAMPED 0
|
||||
INIT_XMM sse2
|
||||
cglobal put_pixels_clamped, 3, 4, 2, block, pixels, lsize, lsize3
|
||||
lea lsize3q, [lsizeq*3]
|
||||
PUT_PIXELS_CLAMPED_HALF 0
|
||||
lea pixelsq, [pixelsq+lsizeq*4]
|
||||
PUT_PIXELS_CLAMPED_HALF 64
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmx
|
||||
PUT_PIXELS_CLAMPED
|
||||
INIT_XMM sse2
|
||||
PUT_PIXELS_CLAMPED
|
||||
|
||||
;--------------------------------------------------------------------------
|
||||
; void ff_add_pixels_clamped(const int16_t *block, uint8_t *pixels,
|
||||
@ -130,41 +88,18 @@ PUT_PIXELS_CLAMPED
|
||||
%macro ADD_PIXELS_CLAMPED 1
|
||||
mova m0, [blockq+mmsize*0+%1]
|
||||
mova m1, [blockq+mmsize*1+%1]
|
||||
%if mmsize == 8
|
||||
mova m5, [blockq+mmsize*2+%1]
|
||||
mova m6, [blockq+mmsize*3+%1]
|
||||
%endif
|
||||
movq m2, [pixelsq]
|
||||
movq m3, [pixelsq+lsizeq]
|
||||
%if mmsize == 8
|
||||
mova m7, m2
|
||||
punpcklbw m2, m4
|
||||
punpckhbw m7, m4
|
||||
paddsw m0, m2
|
||||
paddsw m1, m7
|
||||
mova m7, m3
|
||||
punpcklbw m3, m4
|
||||
punpckhbw m7, m4
|
||||
paddsw m5, m3
|
||||
paddsw m6, m7
|
||||
%else
|
||||
punpcklbw m2, m4
|
||||
punpcklbw m3, m4
|
||||
paddsw m0, m2
|
||||
paddsw m1, m3
|
||||
%endif
|
||||
packuswb m0, m1
|
||||
%if mmsize == 8
|
||||
packuswb m5, m6
|
||||
movq [pixelsq], m0
|
||||
movq [pixelsq+lsizeq], m5
|
||||
%else
|
||||
movq [pixelsq], m0
|
||||
movhps [pixelsq+lsizeq], m0
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro ADD_PIXELS_CLAMPED 0
|
||||
INIT_XMM sse2
|
||||
cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize
|
||||
pxor m4, m4
|
||||
ADD_PIXELS_CLAMPED 0
|
||||
@ -175,9 +110,3 @@ cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize
|
||||
lea pixelsq, [pixelsq+lsizeq*2]
|
||||
ADD_PIXELS_CLAMPED 96
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmx
|
||||
ADD_PIXELS_CLAMPED
|
||||
INIT_XMM sse2
|
||||
ADD_PIXELS_CLAMPED
|
||||
|
@ -22,16 +22,10 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
|
||||
ptrdiff_t line_size);
|
||||
void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
|
||||
ptrdiff_t line_size);
|
||||
void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
|
||||
ptrdiff_t line_size);
|
||||
void ff_put_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
|
||||
ptrdiff_t line_size);
|
||||
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
|
||||
ptrdiff_t line_size);
|
||||
void ff_put_signed_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
|
||||
ptrdiff_t line_size);
|
||||
|
||||
|
@ -63,28 +63,24 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
#if ARCH_X86_32
|
||||
if (EXTERNAL_MMX(cpu_flags)) {
|
||||
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
|
||||
c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
|
||||
c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
|
||||
|
||||
if (!high_bit_depth &&
|
||||
avctx->lowres == 0 &&
|
||||
(avctx->idct_algo == FF_IDCT_AUTO ||
|
||||
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
|
||||
avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
|
||||
c->idct_put = ff_simple_idct_put_mmx;
|
||||
c->idct_add = ff_simple_idct_add_mmx;
|
||||
c->idct = ff_simple_idct_mmx;
|
||||
c->perm_type = FF_IDCT_PERM_SIMPLE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
|
||||
c->put_pixels_clamped = ff_put_pixels_clamped_sse2;
|
||||
c->add_pixels_clamped = ff_add_pixels_clamped_sse2;
|
||||
|
||||
#if ARCH_X86_32
|
||||
if (!high_bit_depth &&
|
||||
avctx->lowres == 0 &&
|
||||
(avctx->idct_algo == FF_IDCT_AUTO ||
|
||||
@ -94,6 +90,7 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
|
||||
c->idct_add = ff_simple_idct_add_sse2;
|
||||
c->perm_type = FF_IDCT_PERM_SIMPLE;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (ARCH_X86_64 &&
|
||||
!high_bit_depth &&
|
||||
|
@ -25,6 +25,7 @@
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
%if ARCH_X86_32
|
||||
SECTION_RODATA
|
||||
|
||||
cextern pb_80
|
||||
@ -846,26 +847,6 @@ cglobal simple_idct, 1, 2, 8, 128, block, t0
|
||||
IDCT
|
||||
RET
|
||||
|
||||
cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0
|
||||
IDCT
|
||||
lea lsize3q, [lsizeq*3]
|
||||
PUT_PIXELS_CLAMPED_HALF 0
|
||||
lea pixelsq, [pixelsq+lsizeq*4]
|
||||
PUT_PIXELS_CLAMPED_HALF 64
|
||||
RET
|
||||
|
||||
cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0
|
||||
IDCT
|
||||
pxor m4, m4
|
||||
ADD_PIXELS_CLAMPED 0
|
||||
lea pixelsq, [pixelsq+lsizeq*2]
|
||||
ADD_PIXELS_CLAMPED 32
|
||||
lea pixelsq, [pixelsq+lsizeq*2]
|
||||
ADD_PIXELS_CLAMPED 64
|
||||
lea pixelsq, [pixelsq+lsizeq*2]
|
||||
ADD_PIXELS_CLAMPED 96
|
||||
RET
|
||||
|
||||
INIT_XMM sse2
|
||||
|
||||
cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0
|
||||
@ -887,3 +868,4 @@ cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0
|
||||
lea pixelsq, [pixelsq+lsizeq*2]
|
||||
ADD_PIXELS_CLAMPED 96
|
||||
RET
|
||||
%endif
|
||||
|
Loading…
Reference in New Issue
Block a user