diff --git a/Changelog b/Changelog index 6a520d21d7..4687cb872e 100644 --- a/Changelog +++ b/Changelog @@ -4,6 +4,7 @@ releases are sorted from youngest to oldest. version : - VDPAU hardware acceleration through normal hwaccel - SRTP support +- Error diffusion dither in Swscale version 1.1: diff --git a/libswscale/output.c b/libswscale/output.c index 8c200685a9..d2244c21cc 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -333,6 +333,7 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter, const uint8_t * const d128=dither_8x8_220[y&7]; int i; unsigned acc = 0; + int err = 0; for (i = 0; i < dstW; i += 2) { int j; @@ -349,12 +350,25 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter, Y1 = av_clip_uint8(Y1); Y2 = av_clip_uint8(Y2); } - accumulate_bit(acc, Y1 + d128[(i + 0) & 7]); - accumulate_bit(acc, Y2 + d128[(i + 1) & 7]); + if (c->flags & SWS_ERROR_DIFFUSION) { + Y1 += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4; + c->dither_error[0][i] = err; + acc = 2*acc + (Y1 >= 128); + Y1 -= 220*(acc&1); + + err = Y2 + ((7*Y1 + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3])>>4); + c->dither_error[0][i+1] = Y1; + acc = 2*acc + (err >= 128); + err -= 220*(acc&1); + } else { + accumulate_bit(acc, Y1 + d128[(i + 0) & 7]); + accumulate_bit(acc, Y2 + d128[(i + 1) & 7]); + } if ((i & 7) == 6) { output_pixel(*dest++, acc); } } + c->dither_error[0][i] = err; if (i & 6) { output_pixel(*dest, acc); @@ -373,6 +387,29 @@ yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2], int yalpha1 = 4096 - yalpha; int i; + if (c->flags & SWS_ERROR_DIFFUSION) { + int err = 0; + int acc = 0; + for (i = 0; i < dstW; i +=2) { + int Y; + + Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19; + Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4; + c->dither_error[0][i] = err; + acc = 2*acc + (Y >= 128); + Y -= 220*(acc&1); + + err = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19; + err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3])>>4; + c->dither_error[0][i+1] = Y; + acc = 2*acc + (err >= 128); + err -= 220*(acc&1); + + if ((i & 7) == 6) + output_pixel(*dest++, acc); + } + c->dither_error[0][i] = err; + } else { for (i = 0; i < dstW; i += 8) { int Y, acc = 0; @@ -395,6 +432,7 @@ yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2], output_pixel(*dest++, acc); } + } } static av_always_inline void @@ -406,9 +444,31 @@ yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0, const uint8_t * const d128 = dither_8x8_220[y & 7]; int i; + if (c->flags & SWS_ERROR_DIFFUSION) { + int err = 0; + int acc = 0; + for (i = 0; i < dstW; i +=2) { + int Y; + + Y = ((buf0[i + 0] + 64) >> 7); + Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4; + c->dither_error[0][i] = err; + acc = 2*acc + (Y >= 128); + Y -= 220*(acc&1); + + err = ((buf0[i + 1] + 64) >> 7); + err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3])>>4; + c->dither_error[0][i+1] = Y; + acc = 2*acc + (err >= 128); + err -= 220*(acc&1); + + if ((i & 7) == 6) + output_pixel(*dest++, acc); + } + c->dither_error[0][i] = err; + } else { for (i = 0; i < dstW; i += 8) { int acc = 0; - accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]); accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]); accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]); @@ -420,6 +480,7 @@ yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0, output_pixel(*dest++, acc); } + } } #undef output_pixel diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 2c9e3da86e..dbe61c7a97 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -910,6 +910,11 @@ int attribute_align_arg sws_scale(struct SwsContext *c, src2[0] = base; } + if (!srcSliceY && (c->flags & SWS_ERROR_DIFFUSION)) + for (i = 0; i < 4; i++) + memset(c->dither_error[i], 0, sizeof(c->dither_error[0][0]) * (c->dstW+2)); + + // copy strides, so they can safely be modified if (c->sliceDir == 1) { // slices go from top to bottom diff --git a/libswscale/swscale.h b/libswscale/swscale.h index ac789412a1..ace7c01839 100644 --- a/libswscale/swscale.h +++ b/libswscale/swscale.h @@ -80,6 +80,7 @@ const char *swscale_license(void); //input subsampling info #define SWS_FULL_CHR_H_INP 0x4000 #define SWS_DIRECT_BGR 0x8000 +#define SWS_ERROR_DIFFUSION 0x20000 #define SWS_ACCURATE_RND 0x40000 #define SWS_BITEXACT 0x80000 diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 6a942d6799..d1f9539246 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -327,6 +327,8 @@ typedef struct SwsContext { int table_gV[256 + 2*YUVRGB_TABLE_HEADROOM]; uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM]; + int *dither_error[4]; + //Colorspace stuff int contrast, brightness, saturation; // for sws_getColorspaceDetails int srcColorspaceTable[4]; diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index f35d1ba352..01396c2217 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -936,7 +936,7 @@ void ff_get_unscaled_swscale(SwsContext *c) /* yuv2bgr */ if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUV422P || srcFormat == AV_PIX_FMT_YUVA420P) && isAnyRGB(dstFormat) && - !(flags & SWS_ACCURATE_RND) && !(dstH & 1)) { + !(flags & (SWS_ACCURATE_RND|SWS_ERROR_DIFFUSION)) && !(dstH & 1)) { c->swScale = ff_yuv2rgb_get_func_ptr(c); } diff --git a/libswscale/utils.c b/libswscale/utils.c index 89bf0da59b..53f4fb8121 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -1247,6 +1247,9 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, c->vChrFilterPos[chrI]; } + for (i = 0; i < 4; i++) + FF_ALLOCZ_OR_GOTO(c, c->dither_error[i], (c->dstW+2) * sizeof(int), fail); + /* Allocate pixbufs (we use dynamic allocation because otherwise we would * need to allocate several megabytes to handle all possible cases) */ FF_ALLOC_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize * 3 * sizeof(int16_t *), fail); @@ -1740,6 +1743,9 @@ void sws_freeContext(SwsContext *c) av_freep(&c->alpPixBuf); } + for (i = 0; i < 4; i++) + av_freep(&c->dither_error[i]); + av_freep(&c->vLumFilter); av_freep(&c->vChrFilter); av_freep(&c->hLumFilter);