00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "libswresample/swresample_internal.h"
00022 #include "libswresample/audioconvert.h"
00023
00024 #define PROTO(pre, in, out, cap) void ff ## pre ## _ ##in## _to_ ##out## _a_ ##cap(uint8_t **dst, const uint8_t **src, int len);
00025 #define PROTO2(pre, out, cap) PROTO(pre, int16, out, cap) PROTO(pre, int32, out, cap) PROTO(pre, float, out, cap)
00026 #define PROTO3(pre, cap) PROTO2(pre, int16, cap) PROTO2(pre, int32, cap) PROTO2(pre, float, cap)
00027 #define PROTO4(pre) PROTO3(pre, mmx) PROTO3(pre, sse) PROTO3(pre, sse2) PROTO3(pre, ssse3) PROTO3(pre, sse4) PROTO3(pre, avx)
00028 PROTO4()
00029 PROTO4(_pack_2ch)
00030 PROTO4(_pack_6ch)
00031 PROTO4(_unpack_2ch)
00032
00033 av_cold void swri_audio_convert_init_x86(struct AudioConvert *ac,
00034 enum AVSampleFormat out_fmt,
00035 enum AVSampleFormat in_fmt,
00036 int channels){
00037 int mm_flags = av_get_cpu_flags();
00038
00039 ac->simd_f= NULL;
00040
00041
00042
00043 #define MULTI_CAPS_FUNC(flag, cap) \
00044 if (mm_flags & flag) {\
00045 if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16P)\
00046 ac->simd_f = ff_int16_to_int32_a_ ## cap;\
00047 if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S32P)\
00048 ac->simd_f = ff_int32_to_int16_a_ ## cap;\
00049 }
00050
00051 MULTI_CAPS_FUNC(AV_CPU_FLAG_MMX, mmx)
00052 MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE2, sse2)
00053
00054 if(mm_flags & AV_CPU_FLAG_MMX) {
00055 if(channels == 6) {
00056 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
00057 ac->simd_f = ff_pack_6ch_float_to_float_a_mmx;
00058 }
00059 }
00060
00061 if(mm_flags & AV_CPU_FLAG_SSE2) {
00062 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
00063 ac->simd_f = ff_int32_to_float_a_sse2;
00064 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16P)
00065 ac->simd_f = ff_int16_to_float_a_sse2;
00066 if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
00067 ac->simd_f = ff_float_to_int32_a_sse2;
00068 if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
00069 ac->simd_f = ff_float_to_int16_a_sse2;
00070
00071 if(channels == 2) {
00072 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
00073 ac->simd_f = ff_pack_2ch_int32_to_int32_a_sse2;
00074 if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_S16P)
00075 ac->simd_f = ff_pack_2ch_int16_to_int16_a_sse2;
00076 if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S16P)
00077 ac->simd_f = ff_pack_2ch_int16_to_int32_a_sse2;
00078 if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_S32P)
00079 ac->simd_f = ff_pack_2ch_int32_to_int16_a_sse2;
00080
00081 if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
00082 ac->simd_f = ff_unpack_2ch_int32_to_int32_a_sse2;
00083 if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S16)
00084 ac->simd_f = ff_unpack_2ch_int16_to_int16_a_sse2;
00085 if( out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16)
00086 ac->simd_f = ff_unpack_2ch_int16_to_int32_a_sse2;
00087 if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S32)
00088 ac->simd_f = ff_unpack_2ch_int32_to_int16_a_sse2;
00089
00090 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P)
00091 ac->simd_f = ff_pack_2ch_int32_to_float_a_sse2;
00092 if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP)
00093 ac->simd_f = ff_pack_2ch_float_to_int32_a_sse2;
00094 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S16P)
00095 ac->simd_f = ff_pack_2ch_int16_to_float_a_sse2;
00096 if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLTP)
00097 ac->simd_f = ff_pack_2ch_float_to_int16_a_sse2;
00098 if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32)
00099 ac->simd_f = ff_unpack_2ch_int32_to_float_a_sse2;
00100 if( out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLT)
00101 ac->simd_f = ff_unpack_2ch_float_to_int32_a_sse2;
00102 if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16)
00103 ac->simd_f = ff_unpack_2ch_int16_to_float_a_sse2;
00104 if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLT)
00105 ac->simd_f = ff_unpack_2ch_float_to_int16_a_sse2;
00106 }
00107 }
00108 if(mm_flags & AV_CPU_FLAG_SSSE3) {
00109 if(channels == 2) {
00110 if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S16)
00111 ac->simd_f = ff_unpack_2ch_int16_to_int16_a_ssse3;
00112 if( out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16)
00113 ac->simd_f = ff_unpack_2ch_int16_to_int32_a_ssse3;
00114 if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16)
00115 ac->simd_f = ff_unpack_2ch_int16_to_float_a_ssse3;
00116 }
00117 }
00118 if(mm_flags & AV_CPU_FLAG_SSE4) {
00119 if(channels == 6) {
00120 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
00121 ac->simd_f = ff_pack_6ch_float_to_float_a_sse4;
00122 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P)
00123 ac->simd_f = ff_pack_6ch_int32_to_float_a_sse4;
00124 if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP)
00125 ac->simd_f = ff_pack_6ch_float_to_int32_a_sse4;
00126 }
00127 }
00128 if(HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) {
00129 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
00130 ac->simd_f = ff_int32_to_float_a_avx;
00131 if(channels == 6) {
00132 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
00133 ac->simd_f = ff_pack_6ch_float_to_float_a_avx;
00134 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P)
00135 ac->simd_f = ff_pack_6ch_int32_to_float_a_avx;
00136 if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP)
00137 ac->simd_f = ff_pack_6ch_float_to_int32_a_avx;
00138 }
00139 }
00140 }
00141
00142 #define D(type, simd) \
00143 mix_1_1_func_type ff_mix_1_1_a_## type ## _ ## simd;\
00144 mix_2_1_func_type ff_mix_2_1_a_## type ## _ ## simd;
00145
00146 D(float, sse)
00147 D(float, avx)
00148 D(int16, mmx)
00149 D(int16, sse2)
00150
00151
00152 av_cold void swri_rematrix_init_x86(struct SwrContext *s){
00153 int mm_flags = av_get_cpu_flags();
00154 int nb_in = av_get_channel_layout_nb_channels(s->in_ch_layout);
00155 int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout);
00156 int num = nb_in * nb_out;
00157 int i,j;
00158
00159 s->mix_1_1_simd = NULL;
00160 s->mix_2_1_simd = NULL;
00161
00162 if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){
00163 if(mm_flags & AV_CPU_FLAG_MMX) {
00164 s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx;
00165 s->mix_2_1_simd = ff_mix_2_1_a_int16_mmx;
00166 }
00167 if(mm_flags & AV_CPU_FLAG_SSE2) {
00168 s->mix_1_1_simd = ff_mix_1_1_a_int16_sse2;
00169 s->mix_2_1_simd = ff_mix_2_1_a_int16_sse2;
00170 }
00171 s->native_simd_matrix = av_mallocz(2 * num * sizeof(int16_t));
00172 for(i=0; i<nb_out; i++){
00173 int sh = 0;
00174 for(j=0; j<nb_in; j++)
00175 sh = FFMAX(sh, FFABS(((int*)s->native_matrix)[i * nb_in + j]));
00176 sh = FFMAX(av_log2(sh) - 14, 0);
00177 for(j=0; j<nb_in; j++) {
00178 ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)+1] = 15 - sh;
00179 ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)] =
00180 ((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh;
00181 }
00182 }
00183 } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
00184 if(mm_flags & AV_CPU_FLAG_SSE) {
00185 s->mix_1_1_simd = ff_mix_1_1_a_float_sse;
00186 s->mix_2_1_simd = ff_mix_2_1_a_float_sse;
00187 }
00188 if(HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) {
00189 s->mix_1_1_simd = ff_mix_1_1_a_float_avx;
00190 s->mix_2_1_simd = ff_mix_2_1_a_float_avx;
00191 }
00192 s->native_simd_matrix = av_mallocz(num * sizeof(float));
00193 memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float));
00194 }
00195 }