/* * software YUV to RGB converter * * Copyright (C) 2001-2007 Michael Niedermayer * Copyright (C) 2009-2010 Konstantin Shishkov * * MMX/MMXEXT template stuff (needed for fast movntq support), * 1,4,8bpp support and context / deglobalize stuff * by Michael Niedermayer (michaelni@gmx.at) * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include "config.h" #include "libswscale/rgb2rgb.h" #include "libswscale/swscale.h" #include "libswscale/swscale_internal.h" #include "libavutil/attributes.h" #include "libavutil/x86/asm.h" #include "libavutil/x86/cpu.h" #include "libavutil/cpu.h" #if HAVE_X86ASM #define YUV2RGB_LOOP(depth) \ h_size = (c->dstW + 7) & ~7; \ if (h_size * depth > FFABS(dstStride[0])) \ h_size -= 8; \ \ vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \ \ for (y = 0; y < srcSliceH; y++) { \ uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \ const uint8_t *py = src[0] + y * srcStride[0]; \ const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \ const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ x86_reg index = -h_size / 2; \ extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, const uint8_t *pv_index, const uint64_t *pointer_c_dither, const uint8_t *py_2index); extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, const uint8_t *pv_index, const uint64_t *pointer_c_dither, const uint8_t *py_2index); extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, const uint8_t *pv_index, const uint64_t *pointer_c_dither, const uint8_t *py_2index); extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, const uint8_t *pv_index, const uint64_t *pointer_c_dither, const uint8_t *py_2index); extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, const uint8_t *pv_index, const uint64_t *pointer_c_dither, const uint8_t *py_2index); extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, const uint8_t *pv_index, const uint64_t *pointer_c_dither, const uint8_t *py_2index); extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, const uint8_t *pv_index, const uint64_t *pointer_c_dither, const uint8_t *py_2index, const uint8_t *pa_2index); extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, const uint8_t *pv_index, const uint64_t *pointer_c_dither, const uint8_t *py_2index, const uint8_t *pa_2index); #if ARCH_X86_64 extern void ff_yuv_420_gbrp24_ssse3(x86_reg index, uint8_t *image, uint8_t *dst_b, uint8_t *dst_r, const uint8_t *pu_index, const uint8_t *pv_index, const uint64_t *pointer_c_dither, const uint8_t *py_2index); #endif static inline int yuv420_rgb15_ssse3(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { int y, h_size, vshift; YUV2RGB_LOOP(2) c->blueDither = ff_dither8[y & 1]; c->greenDither = ff_dither8[y & 1]; c->redDither = ff_dither8[(y + 1) & 1]; ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); } return srcSliceH; } static inline int yuv420_rgb16_ssse3(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { int y, h_size, vshift; YUV2RGB_LOOP(2) c->blueDither = ff_dither8[y & 1]; c->greenDither = ff_dither4[y & 1]; c->redDither = ff_dither8[(y + 1) & 1]; ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); } return srcSliceH; } static inline int yuv420_rgb32_ssse3(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { int y, h_size, vshift; YUV2RGB_LOOP(4) ff_yuv_420_rgb32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); } return srcSliceH; } static inline int yuv420_bgr32_ssse3(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { int y, h_size, vshift; YUV2RGB_LOOP(4) ff_yuv_420_bgr32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); } return srcSliceH; } static inline int yuva420_rgb32_ssse3(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { int y, h_size, vshift; YUV2RGB_LOOP(4) const uint8_t *pa = src[3] + y * srcStride[3]; ff_yuva_420_rgb32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index); } return srcSliceH; } static inline int yuva420_bgr32_ssse3(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { int y, h_size, vshift; YUV2RGB_LOOP(4) const uint8_t *pa = src[3] + y * srcStride[3]; ff_yuva_420_bgr32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index); } return srcSliceH; } static inline int yuv420_rgb24_ssse3(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { int y, h_size, vshift; YUV2RGB_LOOP(3) ff_yuv_420_rgb24_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); } return srcSliceH; } static inline int yuv420_bgr24_ssse3(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { int y, h_size, vshift; YUV2RGB_LOOP(3) ff_yuv_420_bgr24_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); } return srcSliceH; } #if ARCH_X86_64 static inline int yuv420_gbrp_ssse3(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { int y, h_size, vshift; h_size = (c->dstW + 7) & ~7; if (h_size * 3 > FFABS(dstStride[0])) h_size -= 8; vshift = c->srcFormat != AV_PIX_FMT_YUV422P; for (y = 0; y < srcSliceH; y++) { uint8_t *dst_g = dst[0] + (y + srcSliceY) * dstStride[0]; uint8_t *dst_b = dst[1] + (y + srcSliceY) * dstStride[1]; uint8_t *dst_r = dst[2] + (y + srcSliceY) * dstStride[2]; const uint8_t *py = src[0] + y * srcStride[0]; const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; x86_reg index = -h_size / 2; ff_yuv_420_gbrp24_ssse3(index, dst_g, dst_b, dst_r, pu - index, pv - index, &(c->redDither), py - 2 * index); } return srcSliceH; } #endif #endif /* HAVE_X86ASM */ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { #if HAVE_X86ASM int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_SSSE3(cpu_flags)) { switch (c->dstFormat) { case AV_PIX_FMT_RGB32: if (c->srcFormat == AV_PIX_FMT_YUVA420P) { #if CONFIG_SWSCALE_ALPHA return yuva420_rgb32_ssse3; #endif break; } else return yuv420_rgb32_ssse3; case AV_PIX_FMT_BGR32: if (c->srcFormat == AV_PIX_FMT_YUVA420P) { #if CONFIG_SWSCALE_ALPHA return yuva420_bgr32_ssse3; #endif break; } else return yuv420_bgr32_ssse3; case AV_PIX_FMT_RGB24: return yuv420_rgb24_ssse3; case AV_PIX_FMT_BGR24: return yuv420_bgr24_ssse3; case AV_PIX_FMT_RGB565: return yuv420_rgb16_ssse3; case AV_PIX_FMT_RGB555: return yuv420_rgb15_ssse3; #if ARCH_X86_64 case AV_PIX_FMT_GBRP: return yuv420_gbrp_ssse3; #endif } } #endif /* HAVE_X86ASM */ return NULL; }