avcodec/x86/idctdsp: Remove obsolete MMX(EXT) functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt 2022-06-10 20:28:06 +02:00
parent b2437a45af
commit bfb28b5ce8
5 changed files with 10 additions and 111 deletions

View File

@ -65,9 +65,6 @@ static const struct algo fdct_tab_arch[] = {
};
static const struct algo idct_tab_arch[] = {
#if HAVE_MMX_EXTERNAL
{ "SIMPLE-MMX", ff_simple_idct_mmx, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX },
#endif
#if CONFIG_MPEG4_DECODER && HAVE_X86ASM
#if HAVE_SSE2_EXTERNAL
{ "XVID-SSE2", ff_xvid_idct_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 },

View File

@ -37,47 +37,24 @@ SECTION .text
%macro PUT_SIGNED_PIXELS_CLAMPED_HALF 1
mova m1, [blockq+mmsize*0+%1]
mova m2, [blockq+mmsize*2+%1]
%if mmsize == 8
mova m3, [blockq+mmsize*4+%1]
mova m4, [blockq+mmsize*6+%1]
%endif
packsswb m1, [blockq+mmsize*1+%1]
packsswb m2, [blockq+mmsize*3+%1]
%if mmsize == 8
packsswb m3, [blockq+mmsize*5+%1]
packsswb m4, [blockq+mmsize*7+%1]
%endif
paddb m1, m0
paddb m2, m0
%if mmsize == 8
paddb m3, m0
paddb m4, m0
movq [pixelsq+lsizeq*0], m1
movq [pixelsq+lsizeq*1], m2
movq [pixelsq+lsizeq*2], m3
movq [pixelsq+lsize3q ], m4
%else
movq [pixelsq+lsizeq*0], m1
movhps [pixelsq+lsizeq*1], m1
movq [pixelsq+lsizeq*2], m2
movhps [pixelsq+lsize3q ], m2
%endif
%endmacro
%macro PUT_SIGNED_PIXELS_CLAMPED 1
cglobal put_signed_pixels_clamped, 3, 4, %1, block, pixels, lsize, lsize3
INIT_XMM sse2
cglobal put_signed_pixels_clamped, 3, 4, 3, block, pixels, lsize, lsize3
mova m0, [pb_80]
lea lsize3q, [lsizeq*3]
PUT_SIGNED_PIXELS_CLAMPED_HALF 0
lea pixelsq, [pixelsq+lsizeq*4]
PUT_SIGNED_PIXELS_CLAMPED_HALF 64
RET
%endmacro
INIT_MMX mmx
PUT_SIGNED_PIXELS_CLAMPED 0
INIT_XMM sse2
PUT_SIGNED_PIXELS_CLAMPED 3
;--------------------------------------------------------------------------
; void ff_put_pixels_clamped(const int16_t *block, uint8_t *pixels,
@ -87,40 +64,21 @@ PUT_SIGNED_PIXELS_CLAMPED 3
%macro PUT_PIXELS_CLAMPED_HALF 1
mova m0, [blockq+mmsize*0+%1]
mova m1, [blockq+mmsize*2+%1]
%if mmsize == 8
mova m2, [blockq+mmsize*4+%1]
mova m3, [blockq+mmsize*6+%1]
%endif
packuswb m0, [blockq+mmsize*1+%1]
packuswb m1, [blockq+mmsize*3+%1]
%if mmsize == 8
packuswb m2, [blockq+mmsize*5+%1]
packuswb m3, [blockq+mmsize*7+%1]
movq [pixelsq], m0
movq [lsizeq+pixelsq], m1
movq [2*lsizeq+pixelsq], m2
movq [lsize3q+pixelsq], m3
%else
movq [pixelsq], m0
movhps [lsizeq+pixelsq], m0
movq [2*lsizeq+pixelsq], m1
movhps [lsize3q+pixelsq], m1
%endif
%endmacro
%macro PUT_PIXELS_CLAMPED 0
INIT_XMM sse2
cglobal put_pixels_clamped, 3, 4, 2, block, pixels, lsize, lsize3
lea lsize3q, [lsizeq*3]
PUT_PIXELS_CLAMPED_HALF 0
lea pixelsq, [pixelsq+lsizeq*4]
PUT_PIXELS_CLAMPED_HALF 64
RET
%endmacro
INIT_MMX mmx
PUT_PIXELS_CLAMPED
INIT_XMM sse2
PUT_PIXELS_CLAMPED
;--------------------------------------------------------------------------
; void ff_add_pixels_clamped(const int16_t *block, uint8_t *pixels,
@ -130,41 +88,18 @@ PUT_PIXELS_CLAMPED
%macro ADD_PIXELS_CLAMPED 1
mova m0, [blockq+mmsize*0+%1]
mova m1, [blockq+mmsize*1+%1]
%if mmsize == 8
mova m5, [blockq+mmsize*2+%1]
mova m6, [blockq+mmsize*3+%1]
%endif
movq m2, [pixelsq]
movq m3, [pixelsq+lsizeq]
%if mmsize == 8
mova m7, m2
punpcklbw m2, m4
punpckhbw m7, m4
paddsw m0, m2
paddsw m1, m7
mova m7, m3
punpcklbw m3, m4
punpckhbw m7, m4
paddsw m5, m3
paddsw m6, m7
%else
punpcklbw m2, m4
punpcklbw m3, m4
paddsw m0, m2
paddsw m1, m3
%endif
packuswb m0, m1
%if mmsize == 8
packuswb m5, m6
movq [pixelsq], m0
movq [pixelsq+lsizeq], m5
%else
movq [pixelsq], m0
movhps [pixelsq+lsizeq], m0
%endif
%endmacro
%macro ADD_PIXELS_CLAMPED 0
INIT_XMM sse2
cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize
pxor m4, m4
ADD_PIXELS_CLAMPED 0
@ -175,9 +110,3 @@ cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 96
RET
%endmacro
INIT_MMX mmx
ADD_PIXELS_CLAMPED
INIT_XMM sse2
ADD_PIXELS_CLAMPED

View File

@ -22,16 +22,10 @@
#include <stddef.h>
#include <stdint.h>
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
void ff_put_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
void ff_put_signed_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);

View File

@ -63,28 +63,24 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
{
int cpu_flags = av_get_cpu_flags();
#if ARCH_X86_32
if (EXTERNAL_MMX(cpu_flags)) {
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
if (!high_bit_depth &&
avctx->lowres == 0 &&
(avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
c->idct_put = ff_simple_idct_put_mmx;
c->idct_add = ff_simple_idct_add_mmx;
c->idct = ff_simple_idct_mmx;
c->perm_type = FF_IDCT_PERM_SIMPLE;
}
}
#endif
if (EXTERNAL_SSE2(cpu_flags)) {
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
c->put_pixels_clamped = ff_put_pixels_clamped_sse2;
c->add_pixels_clamped = ff_add_pixels_clamped_sse2;
#if ARCH_X86_32
if (!high_bit_depth &&
avctx->lowres == 0 &&
(avctx->idct_algo == FF_IDCT_AUTO ||
@ -94,6 +90,7 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
c->idct_add = ff_simple_idct_add_sse2;
c->perm_type = FF_IDCT_PERM_SIMPLE;
}
#endif
if (ARCH_X86_64 &&
!high_bit_depth &&

View File

@ -25,6 +25,7 @@
%include "libavutil/x86/x86util.asm"
%if ARCH_X86_32
SECTION_RODATA
cextern pb_80
@ -846,26 +847,6 @@ cglobal simple_idct, 1, 2, 8, 128, block, t0
IDCT
RET
cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0
IDCT
lea lsize3q, [lsizeq*3]
PUT_PIXELS_CLAMPED_HALF 0
lea pixelsq, [pixelsq+lsizeq*4]
PUT_PIXELS_CLAMPED_HALF 64
RET
cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0
IDCT
pxor m4, m4
ADD_PIXELS_CLAMPED 0
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 32
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 64
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 96
RET
INIT_XMM sse2
cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0
@ -887,3 +868,4 @@ cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 96
RET
%endif