librempeg/libavfilter/gblur.h
Ruiling Song 83f9da7768 avfilter/vf_gblur: add x86 SIMD optimizations
The horizontal pass get ~2x performance with the patch
under single thread.

Tested overall performance using the command(avx2 enabled):
./ffmpeg -i 1080p.mp4 -vf gblur -f null /dev/null
./ffmpeg -i 1080p.mp4 -vf gblur=threads=1 -f null /dev/null
For single thread, the fps improves from 43 to 60, about 40%.
For multi-thread, the fps improves from 110 to 130, about 20%.

Signed-off-by: Ruiling Song <ruiling.song@intel.com>
2019-06-12 08:53:11 +08:00

56 lines
1.9 KiB
C

/*
* Copyright (c) 2011 Pascal Getreuer
* Copyright (c) 2016 Paul B Mahol
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef AVFILTER_GBLUR_H
#define AVFILTER_GBLUR_H
#include "avfilter.h"
typedef struct GBlurContext {
const AVClass *class;
float sigma;
float sigmaV;
int steps;
int planes;
int depth;
int planewidth[4];
int planeheight[4];
float *buffer;
float boundaryscale;
float boundaryscaleV;
float postscale;
float postscaleV;
float nu;
float nuV;
int nb_planes;
void (*horiz_slice)(float *buffer, int width, int height, int steps, float nu, float bscale);
} GBlurContext;
void ff_gblur_init(GBlurContext *s);
void ff_gblur_init_x86(GBlurContext *s);
#endif