diff --git a/libavcodec/x86/ac3dsp_init.c b/libavcodec/x86/ac3dsp_init.c index 23f0162558..89044f4088 100644 --- a/libavcodec/x86/ac3dsp_init.c +++ b/libavcodec/x86/ac3dsp_init.c @@ -223,16 +223,19 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) c->float_to_fixed24 = ff_float_to_fixed24_sse2; c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2; c->extract_exponents = ff_ac3_extract_exponents_sse2; - if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) { - c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; - c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; - } if (bit_exact) { c->apply_window_int16 = ff_apply_window_int16_sse2; - } else if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) { + } + } + + if (EXTERNAL_SSE2_FAST(cpu_flags)) { + c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; + c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; + if (!bit_exact) { c->apply_window_int16 = ff_apply_window_int16_round_sse2; } } + if (EXTERNAL_SSSE3(cpu_flags)) { c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; if (cpu_flags & AV_CPU_FLAG_ATOM) { diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index 541ebb2a04..43e150c3c5 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -566,11 +566,6 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) } if (EXTERNAL_SSE2(cpu_flags)) { - if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) { - // these functions are slower than mmx on AMD, but faster on Intel - H264_QPEL_FUNCS(0, 0, sse2); - } - if (!high_bit_depth) { H264_QPEL_FUNCS(0, 1, sse2); H264_QPEL_FUNCS(0, 2, sse2); @@ -597,6 +592,12 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) } } + if (EXTERNAL_SSE2_FAST(cpu_flags)) { + if (!high_bit_depth) { + H264_QPEL_FUNCS(0, 0, sse2); + } + } + if (EXTERNAL_SSSE3(cpu_flags)) { if (!high_bit_depth) { H264_QPEL_FUNCS(1, 0, ssse3); diff --git a/libavcodec/x86/lpc.c b/libavcodec/x86/lpc.c index ea5d2eab56..e8cce42af4 100644 --- a/libavcodec/x86/lpc.c +++ b/libavcodec/x86/lpc.c @@ -152,7 +152,7 @@ av_cold void ff_lpc_init_x86(LPCContext *c) #if HAVE_SSE2_INLINE int cpu_flags = av_get_cpu_flags(); - if (INLINE_SSE2(cpu_flags) && (cpu_flags & AV_CPU_FLAG_SSE2SLOW)) { + if (INLINE_SSE2_SLOW(cpu_flags)) { c->lpc_apply_welch_window = lpc_apply_welch_window_sse2; c->lpc_compute_autocorr = lpc_compute_autocorr_sse2; } diff --git a/libavcodec/x86/vp8dsp_init.c b/libavcodec/x86/vp8dsp_init.c index 8f6d43ce16..3e84bed424 100644 --- a/libavcodec/x86/vp8dsp_init.c +++ b/libavcodec/x86/vp8dsp_init.c @@ -346,7 +346,7 @@ av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c) c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; } - if (EXTERNAL_SSE2(cpu_flags) && (cpu_flags & AV_CPU_FLAG_SSE2SLOW)) { + if (EXTERNAL_SSE2_SLOW(cpu_flags)) { VP8_LUMA_MC_FUNC(0, 16, sse2); VP8_MC_FUNC(1, 8, sse2); VP8_BILINEAR_MC_FUNC(0, 16, sse2); @@ -416,7 +416,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c) c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse; } - if (EXTERNAL_SSE2(cpu_flags) && (cpu_flags & AV_CPU_FLAG_SSE2SLOW)) { + if (EXTERNAL_SSE2_SLOW(cpu_flags)) { c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;