84 #if HAVE_MMXEXT_INLINE
110 for(i=0; i<64; i++) {
114 #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM
116 int16_t *
block, int16_t *qmat);
118 static void ff_prores_idct_put_10_sse2_wrap(int16_t *dst){
129 for(i=0; i<64; i++) {
146 #if HAVE_MMXEXT_INLINE
151 #if ARCH_X86_64 && HAVE_YASM
170 #if HAVE_NEON && ARCH_ARM
177 #define AANSCALE_BITS 12
180 #define NB_ITS_SPEED 50000
185 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
186 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
187 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
188 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
189 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
190 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
191 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
192 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
202 for (i = 0; i < 64; i++) {
203 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
214 memset(block, 0, 64 *
sizeof(*block));
218 for (i = 0; i < 64; i++)
219 block[i] = (
av_lfg_get(prng) % (2*vals)) -vals;
222 for (i = 0; i < 64; i++)
228 for (i = 0; i < j; i++) {
230 block[idx] =
av_lfg_get(prng) % (2*vals) -vals;
234 block[ 0] =
av_lfg_get(prng) % (16*vals) - (8*vals);
235 block[63] = (block[0] & 1) ^ 1;
245 for (i = 0; i < 64; i++)
246 dst[idct_mmx_perm[i]] = src[i];
248 for (i = 0; i < 64; i++)
249 dst[idct_simple_mmx_perm[i]] = src[i];
251 for (i = 0; i < 64; i++)
252 dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
254 for (i = 0; i < 64; i++)
255 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
257 for (i = 0; i < 64; i++)
258 dst[(i>>3) | ((i<<3)&0x38)] = src[i];
260 for (i = 0; i < 64; i++)
270 int64_t err2, ti, ti1, it1, err_sum = 0;
271 int64_t sysErr[64], sysErrMax = 0;
273 int blockSumErrMax = 0, blockSumErr;
275 const int vals=1<<
bits;
283 for (i = 0; i < 64; i++)
285 for (it = 0; it <
NB_ITS; it++) {
293 for (i = 0; i < 64; i++) {
300 if (!strcmp(dct->
name,
"PR-SSE2"))
301 for (i = 0; i < 64; i++)
305 for (i = 0; i < 64; i++) {
312 sysErr[i] +=
block[i] - block1[i];
314 if (abs(
block[i]) > maxout)
315 maxout = abs(
block[i]);
317 if (blockSumErrMax < blockSumErr)
318 blockSumErrMax = blockSumErr;
320 for (i = 0; i < 64; i++)
321 sysErrMax =
FFMAX(sysErrMax,
FFABS(sysErr[i]));
323 for (i = 0; i < 64; i++) {
326 printf(
"%7d ", (
int) sysErr[i]);
330 omse = (double) err2 / NB_ITS / 64;
331 ome = (double) err_sum / NB_ITS / 64;
333 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
335 printf(
"%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
336 is_idct ?
"IDCT" :
"DCT", dct->
name, err_inf,
337 omse, ome, (
double) sysErrMax / NB_ITS,
338 maxout, blockSumErrMax);
361 }
while (ti1 < 1000000);
363 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT", dct->
name,
364 (
double) it1 * 1000.0 / (
double) ti1);
375 static double c8[8][8];
376 static double c4[4][4];
377 double block1[64], block2[64], block3[64];
384 for (i = 0; i < 8; i++) {
386 for (j = 0; j < 8; j++) {
387 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
388 c8[i][j] = s * cos(
M_PI * i * (j + 0.5) / 8.0);
389 sum += c8[i][j] * c8[i][j];
393 for (i = 0; i < 4; i++) {
395 for (j = 0; j < 4; j++) {
396 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
397 c4[i][j] = s * cos(
M_PI * i * (j + 0.5) / 4.0);
398 sum += c4[i][j] * c4[i][j];
405 for (i = 0; i < 4; i++) {
406 for (j = 0; j < 8; j++) {
407 block1[8 * (2 * i) + j] =
408 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) *
s;
409 block1[8 * (2 * i + 1) + j] =
410 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) *
s;
415 for (i = 0; i < 8; i++) {
416 for (j = 0; j < 8; j++) {
418 for (k = 0; k < 8; k++)
419 sum += c8[k][j] * block1[8 * i + k];
420 block2[8 * i + j] = sum;
425 for (i = 0; i < 8; i++) {
426 for (j = 0; j < 4; j++) {
429 for (k = 0; k < 4; k++)
430 sum += c4[k][j] * block2[8 * (2 * k) + i];
431 block3[8 * (2 * j) + i] = sum;
435 for (k = 0; k < 4; k++)
436 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
437 block3[8 * (2 * j + 1) + i] = sum;
442 for (i = 0; i < 8; i++) {
443 for (j = 0; j < 8; j++) {
444 v = block3[8 * i + j];
446 else if (v > 255) v = 255;
447 dest[i * linesize + j] = (int)
rint(v);
453 void (*idct248_put)(
uint8_t *dest,
int line_size,
457 int it, i, it1, ti, ti1, err_max,
v;
465 for (it = 0; it <
NB_ITS; it++) {
467 for (i = 0; i < 64; i++)
471 for (i = 0; i < 64; i++)
475 for (i = 0; i < 64; i++)
479 for (i = 0; i < 64; i++) {
506 printf(
"%s %s: err_inf=%d\n", 1 ?
"IDCT248" :
"DCT248", name, err_max);
515 for (i = 0; i < 64; i++)
522 }
while (ti1 < 1000000);
524 printf(
"%s %s: %0.1f kdct/s\n", 1 ?
"IDCT248" :
"DCT248", name,
525 (
double) it1 * 1000.0 / (
double) ti1);
530 printf(
"dct-test [-i] [<test-number>] [<bits>]\n"
531 "test-number 0 -> test with random matrixes\n"
532 " 1 -> test with random sparse matrixes\n"
533 " 2 -> do 3. test from mpeg4 std\n"
534 "bits Number of time domain bits to use, 8 is default\n"
535 "-i test IDCT implementations\n"
536 "-4 test IDCT248 implementations\n"
544 int main(
int argc,
char **argv)
546 int test_idct = 0, test_248_dct = 0;
559 c =
getopt(argc, argv,
"ih4t");
580 test = atoi(argv[
optind]);
581 if(optind+1 < argc) bits= atoi(argv[optind+1]);
583 printf(
"ffmpeg DCT/IDCT test\n");
588 const struct algo *algos = test_idct ? idct_tab :
fdct_tab;
589 for (i = 0; algos[i].
name; i++)
591 err |=
dct_error(&algos[i], test, test_idct, speed, bits);
596 printf(
"Error: %d.\n", err);