From 2b1bf296e2ae4abd64fe8e62fa3e1ea5821b2dd7 Mon Sep 17 00:00:00 2001 From: Ramiro Polla Date: Thu, 4 Jul 2024 11:40:18 +0200 Subject: [PATCH] swscale/x86/yuv2rgb: Detemplatize Every function in yuv2rgb_template.c is only compiled exactly once, so detemplatize it. Signed-off-by: Paul B Mahol --- libswscale/x86/yuv2rgb.c | 167 +++++++++++++++++++++++++- libswscale/x86/yuv2rgb_template.c | 188 ------------------------------ 2 files changed, 162 insertions(+), 193 deletions(-) delete mode 100644 libswscale/x86/yuv2rgb_template.c diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index ddc7cca2c8..68e903c6ad 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -1,7 +1,8 @@ /* * software YUV to RGB converter * - * Copyright (C) 2009 Konstantin Shishkov + * Copyright (C) 2001-2007 Michael Niedermayer + * Copyright (C) 2009-2010 Konstantin Shishkov * * MMX/MMXEXT template stuff (needed for fast movntq support), * 1,4,8bpp support and context / deglobalize stuff @@ -39,10 +40,166 @@ #if HAVE_X86ASM -//SSSE3 versions -#undef RENAME -#define RENAME(a) a ## _ssse3 -#include "yuv2rgb_template.c" +#define YUV2RGB_LOOP(depth) \ + h_size = (c->dstW + 7) & ~7; \ + if (h_size * depth > FFABS(dstStride[0])) \ + h_size -= 8; \ + \ + vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \ + \ + for (y = 0; y < srcSliceH; y++) { \ + uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \ + const uint8_t *py = src[0] + y * srcStride[0]; \ + const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \ + const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ + x86_reg index = -h_size / 2; \ + +extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); + +extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index, const uint8_t *pa_2index); +extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index, const uint8_t *pa_2index); + +static inline int yuv420_rgb15_ssse3(SwsContext *c, const uint8_t *src[], + int srcStride[], + int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +{ + int y, h_size, vshift; + + YUV2RGB_LOOP(2) + + c->blueDither = ff_dither8[y & 1]; + c->greenDither = ff_dither8[y & 1]; + c->redDither = ff_dither8[(y + 1) & 1]; + + ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); + } + return srcSliceH; +} + +static inline int yuv420_rgb16_ssse3(SwsContext *c, const uint8_t *src[], + int srcStride[], + int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +{ + int y, h_size, vshift; + + YUV2RGB_LOOP(2) + + c->blueDither = ff_dither8[y & 1]; + c->greenDither = ff_dither4[y & 1]; + c->redDither = ff_dither8[(y + 1) & 1]; + + ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); + } + return srcSliceH; +} + +static inline int yuv420_rgb32_ssse3(SwsContext *c, const uint8_t *src[], + int srcStride[], + int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +{ + int y, h_size, vshift; + + YUV2RGB_LOOP(4) + + ff_yuv_420_rgb32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); + } + return srcSliceH; +} + +static inline int yuv420_bgr32_ssse3(SwsContext *c, const uint8_t *src[], + int srcStride[], + int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +{ + int y, h_size, vshift; + + YUV2RGB_LOOP(4) + + ff_yuv_420_bgr32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); + } + return srcSliceH; +} + +static inline int yuva420_rgb32_ssse3(SwsContext *c, const uint8_t *src[], + int srcStride[], + int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +{ + int y, h_size, vshift; + YUV2RGB_LOOP(4) + + const uint8_t *pa = src[3] + y * srcStride[3]; + ff_yuva_420_rgb32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index); + } + return srcSliceH; +} + +static inline int yuva420_bgr32_ssse3(SwsContext *c, const uint8_t *src[], + int srcStride[], + int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +{ + int y, h_size, vshift; + + YUV2RGB_LOOP(4) + + const uint8_t *pa = src[3] + y * srcStride[3]; + ff_yuva_420_bgr32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index); + } + return srcSliceH; +} + +static inline int yuv420_rgb24_ssse3(SwsContext *c, const uint8_t *src[], + int srcStride[], + int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +{ + int y, h_size, vshift; + + YUV2RGB_LOOP(3) + + ff_yuv_420_rgb24_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); + } + return srcSliceH; +} + +static inline int yuv420_bgr24_ssse3(SwsContext *c, const uint8_t *src[], + int srcStride[], + int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +{ + int y, h_size, vshift; + + YUV2RGB_LOOP(3) + + ff_yuv_420_bgr24_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); + } + return srcSliceH; +} #endif /* HAVE_X86ASM */ diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c deleted file mode 100644 index abaf80eec2..0000000000 --- a/libswscale/x86/yuv2rgb_template.c +++ /dev/null @@ -1,188 +0,0 @@ -/* - * software YUV to RGB converter - * - * Copyright (C) 2001-2007 Michael Niedermayer - * (c) 2010 Konstantin Shishkov - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include - -#include "libavutil/x86/asm.h" -#include "libswscale/swscale_internal.h" - -#define YUV2RGB_LOOP(depth) \ - h_size = (c->dstW + 7) & ~7; \ - if (h_size * depth > FFABS(dstStride[0])) \ - h_size -= 8; \ - \ - vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \ - \ - for (y = 0; y < srcSliceH; y++) { \ - uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \ - const uint8_t *py = src[0] + y * srcStride[0]; \ - const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \ - const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ - x86_reg index = -h_size / 2; \ - -extern void RENAME(ff_yuv_420_rgb24)(x86_reg index, uint8_t *image, const uint8_t *pu_index, - const uint8_t *pv_index, const uint64_t *pointer_c_dither, - const uint8_t *py_2index); -extern void RENAME(ff_yuv_420_bgr24)(x86_reg index, uint8_t *image, const uint8_t *pu_index, - const uint8_t *pv_index, const uint64_t *pointer_c_dither, - const uint8_t *py_2index); - -extern void RENAME(ff_yuv_420_rgb15)(x86_reg index, uint8_t *image, const uint8_t *pu_index, - const uint8_t *pv_index, const uint64_t *pointer_c_dither, - const uint8_t *py_2index); -extern void RENAME(ff_yuv_420_rgb16)(x86_reg index, uint8_t *image, const uint8_t *pu_index, - const uint8_t *pv_index, const uint64_t *pointer_c_dither, - const uint8_t *py_2index); -extern void RENAME(ff_yuv_420_rgb32)(x86_reg index, uint8_t *image, const uint8_t *pu_index, - const uint8_t *pv_index, const uint64_t *pointer_c_dither, - const uint8_t *py_2index); -extern void RENAME(ff_yuv_420_bgr32)(x86_reg index, uint8_t *image, const uint8_t *pu_index, - const uint8_t *pv_index, const uint64_t *pointer_c_dither, - const uint8_t *py_2index); -extern void RENAME(ff_yuva_420_rgb32)(x86_reg index, uint8_t *image, const uint8_t *pu_index, - const uint8_t *pv_index, const uint64_t *pointer_c_dither, - const uint8_t *py_2index, const uint8_t *pa_2index); -extern void RENAME(ff_yuva_420_bgr32)(x86_reg index, uint8_t *image, const uint8_t *pu_index, - const uint8_t *pv_index, const uint64_t *pointer_c_dither, - const uint8_t *py_2index, const uint8_t *pa_2index); - -static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - - YUV2RGB_LOOP(2) - - c->blueDither = ff_dither8[y & 1]; - c->greenDither = ff_dither8[y & 1]; - c->redDither = ff_dither8[(y + 1) & 1]; - - RENAME(ff_yuv_420_rgb15)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); - } - return srcSliceH; -} - -static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - - YUV2RGB_LOOP(2) - - c->blueDither = ff_dither8[y & 1]; - c->greenDither = ff_dither4[y & 1]; - c->redDither = ff_dither8[(y + 1) & 1]; - - RENAME(ff_yuv_420_rgb16)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); - } - return srcSliceH; -} - -static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - - YUV2RGB_LOOP(4) - - RENAME(ff_yuv_420_rgb32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); - } - return srcSliceH; -} - -static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - - YUV2RGB_LOOP(4) - - RENAME(ff_yuv_420_bgr32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); - } - return srcSliceH; -} - -static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - YUV2RGB_LOOP(4) - - const uint8_t *pa = src[3] + y * srcStride[3]; - RENAME(ff_yuva_420_rgb32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index); - } - return srcSliceH; -} - -static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - - YUV2RGB_LOOP(4) - - const uint8_t *pa = src[3] + y * srcStride[3]; - RENAME(ff_yuva_420_bgr32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index); - } - return srcSliceH; -} - -static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - - YUV2RGB_LOOP(3) - - RENAME(ff_yuv_420_rgb24)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); - } - return srcSliceH; -} - -static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[], - int srcStride[], - int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - int y, h_size, vshift; - - YUV2RGB_LOOP(3) - - RENAME(ff_yuv_420_bgr24)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); - } - return srcSliceH; -}