00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00026 #include "libavutil/mathematics.h"
00027 #include "libavutil/lfg.h"
00028 #include "libavutil/log.h"
00029 #include "fft.h"
00030 #if CONFIG_FFT_FLOAT
00031 #include "dct.h"
00032 #include "rdft.h"
00033 #endif
00034 #include <math.h>
00035 #include <unistd.h>
00036 #include <sys/time.h>
00037 #include <stdlib.h>
00038 #include <string.h>
00039
00040 #undef exit
00041
00042
00043
00044 #define MUL16(a,b) ((a) * (b))
00045
00046 #define CMAC(pre, pim, are, aim, bre, bim) \
00047 {\
00048 pre += (MUL16(are, bre) - MUL16(aim, bim));\
00049 pim += (MUL16(are, bim) + MUL16(bre, aim));\
00050 }
00051
00052 #if CONFIG_FFT_FLOAT
00053 # define RANGE 1.0
00054 # define REF_SCALE(x, bits) (x)
00055 # define FMT "%10.6f"
00056 #else
00057 # define RANGE 16384
00058 # define REF_SCALE(x, bits) ((x) / (1<<(bits)))
00059 # define FMT "%6d"
00060 #endif
00061
00062 struct {
00063 float re, im;
00064 } *exptab;
00065
00066 static void fft_ref_init(int nbits, int inverse)
00067 {
00068 int n, i;
00069 double c1, s1, alpha;
00070
00071 n = 1 << nbits;
00072 exptab = av_malloc((n / 2) * sizeof(*exptab));
00073
00074 for (i = 0; i < (n/2); i++) {
00075 alpha = 2 * M_PI * (float)i / (float)n;
00076 c1 = cos(alpha);
00077 s1 = sin(alpha);
00078 if (!inverse)
00079 s1 = -s1;
00080 exptab[i].re = c1;
00081 exptab[i].im = s1;
00082 }
00083 }
00084
00085 static void fft_ref(FFTComplex *tabr, FFTComplex *tab, int nbits)
00086 {
00087 int n, i, j, k, n2;
00088 double tmp_re, tmp_im, s, c;
00089 FFTComplex *q;
00090
00091 n = 1 << nbits;
00092 n2 = n >> 1;
00093 for (i = 0; i < n; i++) {
00094 tmp_re = 0;
00095 tmp_im = 0;
00096 q = tab;
00097 for (j = 0; j < n; j++) {
00098 k = (i * j) & (n - 1);
00099 if (k >= n2) {
00100 c = -exptab[k - n2].re;
00101 s = -exptab[k - n2].im;
00102 } else {
00103 c = exptab[k].re;
00104 s = exptab[k].im;
00105 }
00106 CMAC(tmp_re, tmp_im, c, s, q->re, q->im);
00107 q++;
00108 }
00109 tabr[i].re = REF_SCALE(tmp_re, nbits);
00110 tabr[i].im = REF_SCALE(tmp_im, nbits);
00111 }
00112 }
00113
00114 static void imdct_ref(FFTSample *out, FFTSample *in, int nbits)
00115 {
00116 int n = 1<<nbits;
00117 int k, i, a;
00118 double sum, f;
00119
00120 for (i = 0; i < n; i++) {
00121 sum = 0;
00122 for (k = 0; k < n/2; k++) {
00123 a = (2 * i + 1 + (n / 2)) * (2 * k + 1);
00124 f = cos(M_PI * a / (double)(2 * n));
00125 sum += f * in[k];
00126 }
00127 out[i] = REF_SCALE(-sum, nbits - 2);
00128 }
00129 }
00130
00131
00132 static void mdct_ref(FFTSample *output, FFTSample *input, int nbits)
00133 {
00134 int n = 1<<nbits;
00135 int k, i;
00136 double a, s;
00137
00138
00139 for (k = 0; k < n/2; k++) {
00140 s = 0;
00141 for (i = 0; i < n; i++) {
00142 a = (2*M_PI*(2*i+1+n/2)*(2*k+1) / (4 * n));
00143 s += input[i] * cos(a);
00144 }
00145 output[k] = REF_SCALE(s, nbits - 1);
00146 }
00147 }
00148
00149 #if CONFIG_FFT_FLOAT
00150 static void idct_ref(float *output, float *input, int nbits)
00151 {
00152 int n = 1<<nbits;
00153 int k, i;
00154 double a, s;
00155
00156
00157 for (i = 0; i < n; i++) {
00158 s = 0.5 * input[0];
00159 for (k = 1; k < n; k++) {
00160 a = M_PI*k*(i+0.5) / n;
00161 s += input[k] * cos(a);
00162 }
00163 output[i] = 2 * s / n;
00164 }
00165 }
00166 static void dct_ref(float *output, float *input, int nbits)
00167 {
00168 int n = 1<<nbits;
00169 int k, i;
00170 double a, s;
00171
00172
00173 for (k = 0; k < n; k++) {
00174 s = 0;
00175 for (i = 0; i < n; i++) {
00176 a = M_PI*k*(i+0.5) / n;
00177 s += input[i] * cos(a);
00178 }
00179 output[k] = s;
00180 }
00181 }
00182 #endif
00183
00184
00185 static FFTSample frandom(AVLFG *prng)
00186 {
00187 return (int16_t)av_lfg_get(prng) / 32768.0 * RANGE;
00188 }
00189
00190 static int64_t gettime(void)
00191 {
00192 struct timeval tv;
00193 gettimeofday(&tv,NULL);
00194 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
00195 }
00196
00197 static int check_diff(FFTSample *tab1, FFTSample *tab2, int n, double scale)
00198 {
00199 int i;
00200 double max= 0;
00201 double error= 0;
00202 int err = 0;
00203
00204 for (i = 0; i < n; i++) {
00205 double e = fabsf(tab1[i] - (tab2[i] / scale)) / RANGE;
00206 if (e >= 1e-3) {
00207 av_log(NULL, AV_LOG_ERROR, "ERROR %5d: "FMT" "FMT"\n",
00208 i, tab1[i], tab2[i]);
00209 err = 1;
00210 }
00211 error+= e*e;
00212 if(e>max) max= e;
00213 }
00214 av_log(NULL, AV_LOG_INFO, "max:%f e:%g\n", max, sqrt(error)/n);
00215 return err;
00216 }
00217
00218
00219 static void help(void)
00220 {
00221 av_log(NULL, AV_LOG_INFO,"usage: fft-test [-h] [-s] [-i] [-n b]\n"
00222 "-h print this help\n"
00223 "-s speed test\n"
00224 "-m (I)MDCT test\n"
00225 "-d (I)DCT test\n"
00226 "-r (I)RDFT test\n"
00227 "-i inverse transform test\n"
00228 "-n b set the transform size to 2^b\n"
00229 "-f x set scale factor for output data of (I)MDCT to x\n"
00230 );
00231 exit(1);
00232 }
00233
00234 enum tf_transform {
00235 TRANSFORM_FFT,
00236 TRANSFORM_MDCT,
00237 TRANSFORM_RDFT,
00238 TRANSFORM_DCT,
00239 };
00240
00241 int main(int argc, char **argv)
00242 {
00243 FFTComplex *tab, *tab1, *tab_ref;
00244 FFTSample *tab2;
00245 int it, i, c;
00246 int do_speed = 0;
00247 int err = 1;
00248 enum tf_transform transform = TRANSFORM_FFT;
00249 int do_inverse = 0;
00250 FFTContext s1, *s = &s1;
00251 FFTContext m1, *m = &m1;
00252 #if CONFIG_FFT_FLOAT
00253 RDFTContext r1, *r = &r1;
00254 DCTContext d1, *d = &d1;
00255 #endif
00256 int fft_nbits, fft_size, fft_size_2;
00257 double scale = 1.0;
00258 AVLFG prng;
00259 av_lfg_init(&prng, 1);
00260
00261 fft_nbits = 9;
00262 for(;;) {
00263 c = getopt(argc, argv, "hsimrdn:f:");
00264 if (c == -1)
00265 break;
00266 switch(c) {
00267 case 'h':
00268 help();
00269 break;
00270 case 's':
00271 do_speed = 1;
00272 break;
00273 case 'i':
00274 do_inverse = 1;
00275 break;
00276 case 'm':
00277 transform = TRANSFORM_MDCT;
00278 break;
00279 case 'r':
00280 transform = TRANSFORM_RDFT;
00281 break;
00282 case 'd':
00283 transform = TRANSFORM_DCT;
00284 break;
00285 case 'n':
00286 fft_nbits = atoi(optarg);
00287 break;
00288 case 'f':
00289 scale = atof(optarg);
00290 break;
00291 }
00292 }
00293
00294 fft_size = 1 << fft_nbits;
00295 fft_size_2 = fft_size >> 1;
00296 tab = av_malloc(fft_size * sizeof(FFTComplex));
00297 tab1 = av_malloc(fft_size * sizeof(FFTComplex));
00298 tab_ref = av_malloc(fft_size * sizeof(FFTComplex));
00299 tab2 = av_malloc(fft_size * sizeof(FFTSample));
00300
00301 switch (transform) {
00302 case TRANSFORM_MDCT:
00303 av_log(NULL, AV_LOG_INFO,"Scale factor is set to %f\n", scale);
00304 if (do_inverse)
00305 av_log(NULL, AV_LOG_INFO,"IMDCT");
00306 else
00307 av_log(NULL, AV_LOG_INFO,"MDCT");
00308 ff_mdct_init(m, fft_nbits, do_inverse, scale);
00309 break;
00310 case TRANSFORM_FFT:
00311 if (do_inverse)
00312 av_log(NULL, AV_LOG_INFO,"IFFT");
00313 else
00314 av_log(NULL, AV_LOG_INFO,"FFT");
00315 ff_fft_init(s, fft_nbits, do_inverse);
00316 fft_ref_init(fft_nbits, do_inverse);
00317 break;
00318 #if CONFIG_FFT_FLOAT
00319 case TRANSFORM_RDFT:
00320 if (do_inverse)
00321 av_log(NULL, AV_LOG_INFO,"IDFT_C2R");
00322 else
00323 av_log(NULL, AV_LOG_INFO,"DFT_R2C");
00324 ff_rdft_init(r, fft_nbits, do_inverse ? IDFT_C2R : DFT_R2C);
00325 fft_ref_init(fft_nbits, do_inverse);
00326 break;
00327 case TRANSFORM_DCT:
00328 if (do_inverse)
00329 av_log(NULL, AV_LOG_INFO,"DCT_III");
00330 else
00331 av_log(NULL, AV_LOG_INFO,"DCT_II");
00332 ff_dct_init(d, fft_nbits, do_inverse ? DCT_III : DCT_II);
00333 break;
00334 #endif
00335 default:
00336 av_log(NULL, AV_LOG_ERROR, "Requested transform not supported\n");
00337 return 1;
00338 }
00339 av_log(NULL, AV_LOG_INFO," %d test\n", fft_size);
00340
00341
00342
00343 for (i = 0; i < fft_size; i++) {
00344 tab1[i].re = frandom(&prng);
00345 tab1[i].im = frandom(&prng);
00346 }
00347
00348
00349 av_log(NULL, AV_LOG_INFO,"Checking...\n");
00350
00351 switch (transform) {
00352 case TRANSFORM_MDCT:
00353 if (do_inverse) {
00354 imdct_ref((FFTSample *)tab_ref, (FFTSample *)tab1, fft_nbits);
00355 m->imdct_calc(m, tab2, (FFTSample *)tab1);
00356 err = check_diff((FFTSample *)tab_ref, tab2, fft_size, scale);
00357 } else {
00358 mdct_ref((FFTSample *)tab_ref, (FFTSample *)tab1, fft_nbits);
00359
00360 m->mdct_calc(m, tab2, (FFTSample *)tab1);
00361
00362 err = check_diff((FFTSample *)tab_ref, tab2, fft_size / 2, scale);
00363 }
00364 break;
00365 case TRANSFORM_FFT:
00366 memcpy(tab, tab1, fft_size * sizeof(FFTComplex));
00367 s->fft_permute(s, tab);
00368 s->fft_calc(s, tab);
00369
00370 fft_ref(tab_ref, tab1, fft_nbits);
00371 err = check_diff((FFTSample *)tab_ref, (FFTSample *)tab, fft_size * 2, 1.0);
00372 break;
00373 #if CONFIG_FFT_FLOAT
00374 case TRANSFORM_RDFT:
00375 if (do_inverse) {
00376 tab1[ 0].im = 0;
00377 tab1[fft_size_2].im = 0;
00378 for (i = 1; i < fft_size_2; i++) {
00379 tab1[fft_size_2+i].re = tab1[fft_size_2-i].re;
00380 tab1[fft_size_2+i].im = -tab1[fft_size_2-i].im;
00381 }
00382
00383 memcpy(tab2, tab1, fft_size * sizeof(FFTSample));
00384 tab2[1] = tab1[fft_size_2].re;
00385
00386 r->rdft_calc(r, tab2);
00387 fft_ref(tab_ref, tab1, fft_nbits);
00388 for (i = 0; i < fft_size; i++) {
00389 tab[i].re = tab2[i];
00390 tab[i].im = 0;
00391 }
00392 err = check_diff((float *)tab_ref, (float *)tab, fft_size * 2, 0.5);
00393 } else {
00394 for (i = 0; i < fft_size; i++) {
00395 tab2[i] = tab1[i].re;
00396 tab1[i].im = 0;
00397 }
00398 r->rdft_calc(r, tab2);
00399 fft_ref(tab_ref, tab1, fft_nbits);
00400 tab_ref[0].im = tab_ref[fft_size_2].re;
00401 err = check_diff((float *)tab_ref, (float *)tab2, fft_size, 1.0);
00402 }
00403 break;
00404 case TRANSFORM_DCT:
00405 memcpy(tab, tab1, fft_size * sizeof(FFTComplex));
00406 d->dct_calc(d, tab);
00407 if (do_inverse) {
00408 idct_ref(tab_ref, tab1, fft_nbits);
00409 } else {
00410 dct_ref(tab_ref, tab1, fft_nbits);
00411 }
00412 err = check_diff((float *)tab_ref, (float *)tab, fft_size, 1.0);
00413 break;
00414 #endif
00415 }
00416
00417
00418
00419 if (do_speed) {
00420 int64_t time_start, duration;
00421 int nb_its;
00422
00423 av_log(NULL, AV_LOG_INFO,"Speed test...\n");
00424
00425 nb_its = 1;
00426 for(;;) {
00427 time_start = gettime();
00428 for (it = 0; it < nb_its; it++) {
00429 switch (transform) {
00430 case TRANSFORM_MDCT:
00431 if (do_inverse) {
00432 m->imdct_calc(m, (FFTSample *)tab, (FFTSample *)tab1);
00433 } else {
00434 m->mdct_calc(m, (FFTSample *)tab, (FFTSample *)tab1);
00435 }
00436 break;
00437 case TRANSFORM_FFT:
00438 memcpy(tab, tab1, fft_size * sizeof(FFTComplex));
00439 s->fft_calc(s, tab);
00440 break;
00441 #if CONFIG_FFT_FLOAT
00442 case TRANSFORM_RDFT:
00443 memcpy(tab2, tab1, fft_size * sizeof(FFTSample));
00444 r->rdft_calc(r, tab2);
00445 break;
00446 case TRANSFORM_DCT:
00447 memcpy(tab2, tab1, fft_size * sizeof(FFTSample));
00448 d->dct_calc(d, tab2);
00449 break;
00450 #endif
00451 }
00452 }
00453 duration = gettime() - time_start;
00454 if (duration >= 1000000)
00455 break;
00456 nb_its *= 2;
00457 }
00458 av_log(NULL, AV_LOG_INFO,"time: %0.1f us/transform [total time=%0.2f s its=%d]\n",
00459 (double)duration / nb_its,
00460 (double)duration / 1000000.0,
00461 nb_its);
00462 }
00463
00464 switch (transform) {
00465 case TRANSFORM_MDCT:
00466 ff_mdct_end(m);
00467 break;
00468 case TRANSFORM_FFT:
00469 ff_fft_end(s);
00470 break;
00471 #if CONFIG_FFT_FLOAT
00472 case TRANSFORM_RDFT:
00473 ff_rdft_end(r);
00474 break;
00475 case TRANSFORM_DCT:
00476 ff_dct_end(d);
00477 break;
00478 #endif
00479 }
00480
00481 av_free(tab);
00482 av_free(tab1);
00483 av_free(tab2);
00484 av_free(tab_ref);
00485 av_free(exptab);
00486
00487 return err;
00488 }