Go to the documentation of this file.
29 #define TABLE_DEF(name, size) \
30 DECLARE_ALIGNED(32, TXSample, TX_TAB(ff_tx_tab_ ##name))[size]
32 #define SR_POW2_TABLES \
49 #define SR_TABLE(len) \
50 TABLE_DEF(len, len/4 + 1);
65 #define SR_TABLE(len) \
66 static av_cold void TX_TAB(ff_tx_init_tab_ ##len)(void) \
68 double freq = 2*M_PI/len; \
69 TXSample *tab = TX_TAB(ff_tx_tab_ ##len); \
71 for (int i = 0; i < len/4; i++) \
72 *tab++ = RESCALE(cos(i*freq)); \
80 #define SR_TABLE(len) TX_TAB(ff_tx_init_tab_ ##len),
86 #define SR_TABLE(len) AV_ONCE_INIT,
94 TX_TAB(ff_tx_tab_53)[0] = RESCALE(cos(2 *
M_PI / 5));
95 TX_TAB(ff_tx_tab_53)[1] = RESCALE(cos(2 *
M_PI / 5));
96 TX_TAB(ff_tx_tab_53)[2] = RESCALE(cos(2 *
M_PI / 10));
97 TX_TAB(ff_tx_tab_53)[3] = RESCALE(cos(2 *
M_PI / 10));
98 TX_TAB(ff_tx_tab_53)[4] = RESCALE(sin(2 *
M_PI / 5));
99 TX_TAB(ff_tx_tab_53)[5] = RESCALE(sin(2 *
M_PI / 5));
100 TX_TAB(ff_tx_tab_53)[6] = RESCALE(sin(2 *
M_PI / 10));
101 TX_TAB(ff_tx_tab_53)[7] = RESCALE(sin(2 *
M_PI / 10));
104 TX_TAB(ff_tx_tab_53)[ 8] = RESCALE(cos(2 *
M_PI / 12));
105 TX_TAB(ff_tx_tab_53)[ 9] = RESCALE(cos(2 *
M_PI / 12));
106 TX_TAB(ff_tx_tab_53)[10] = RESCALE(cos(2 *
M_PI / 6));
107 TX_TAB(ff_tx_tab_53)[11] = RESCALE(cos(8 *
M_PI / 6));
112 TX_TAB(ff_tx_tab_7)[0] = RESCALE(cos(2 *
M_PI / 7));
113 TX_TAB(ff_tx_tab_7)[1] = RESCALE(sin(2 *
M_PI / 7));
114 TX_TAB(ff_tx_tab_7)[2] = RESCALE(sin(2 *
M_PI / 28));
115 TX_TAB(ff_tx_tab_7)[3] = RESCALE(cos(2 *
M_PI / 28));
116 TX_TAB(ff_tx_tab_7)[4] = RESCALE(cos(2 *
M_PI / 14));
117 TX_TAB(ff_tx_tab_7)[5] = RESCALE(sin(2 *
M_PI / 14));
122 TX_TAB(ff_tx_tab_9)[0] = RESCALE(cos(2 *
M_PI / 3));
123 TX_TAB(ff_tx_tab_9)[1] = RESCALE(sin(2 *
M_PI / 3));
124 TX_TAB(ff_tx_tab_9)[2] = RESCALE(cos(2 *
M_PI / 9));
125 TX_TAB(ff_tx_tab_9)[3] = RESCALE(sin(2 *
M_PI / 9));
126 TX_TAB(ff_tx_tab_9)[4] = RESCALE(cos(2 *
M_PI / 36));
127 TX_TAB(ff_tx_tab_9)[5] = RESCALE(sin(2 *
M_PI / 36));
128 TX_TAB(ff_tx_tab_9)[6] = TX_TAB(ff_tx_tab_9)[2] + TX_TAB(ff_tx_tab_9)[5];
129 TX_TAB(ff_tx_tab_9)[7] = TX_TAB(ff_tx_tab_9)[3] - TX_TAB(ff_tx_tab_9)[4];
148 int idx = factor_2 - 3;
149 for (
int i = 0;
i <= idx;
i++)
177 const TXSample *
tab = TX_TAB(ff_tx_tab_53);
183 BF(
tmp[1].re,
tmp[2].im, in[1].im, in[2].im);
184 BF(
tmp[1].im,
tmp[2].re, in[1].re, in[2].re);
193 out[1*
stride].re =
tmp[0].re - (mtmp[2] + mtmp[0] + 0x40000000 >> 31);
194 out[1*
stride].im =
tmp[0].im - (mtmp[3] - mtmp[1] + 0x40000000 >> 31);
195 out[2*
stride].re =
tmp[0].re - (mtmp[2] - mtmp[0] + 0x40000000 >> 31);
196 out[2*
stride].im =
tmp[0].im - (mtmp[3] + mtmp[1] + 0x40000000 >> 31);
211 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
212 static av_always_inline void NAME(TXComplex *out, TXComplex *in, \
215 TXComplex dc, z0[4], t[6]; \
216 const TXSample *tab = TX_TAB(ff_tx_tab_53); \
219 BF(t[1].im, t[0].re, in[1].re, in[4].re); \
220 BF(t[1].re, t[0].im, in[1].im, in[4].im); \
221 BF(t[3].im, t[2].re, in[2].re, in[3].re); \
222 BF(t[3].re, t[2].im, in[2].im, in[3].im); \
224 out[D0*stride].re = dc.re + (TXUSample)t[0].re + t[2].re; \
225 out[D0*stride].im = dc.im + (TXUSample)t[0].im + t[2].im; \
227 SMUL(t[4].re, t[0].re, tab[0], tab[2], t[2].re, t[0].re); \
228 SMUL(t[4].im, t[0].im, tab[0], tab[2], t[2].im, t[0].im); \
229 CMUL(t[5].re, t[1].re, tab[4], tab[6], t[3].re, t[1].re); \
230 CMUL(t[5].im, t[1].im, tab[4], tab[6], t[3].im, t[1].im); \
232 BF(z0[0].re, z0[3].re, t[0].re, t[1].re); \
233 BF(z0[0].im, z0[3].im, t[0].im, t[1].im); \
234 BF(z0[2].re, z0[1].re, t[4].re, t[5].re); \
235 BF(z0[2].im, z0[1].im, t[4].im, t[5].im); \
237 out[D1*stride].re = dc.re + (TXUSample)z0[3].re; \
238 out[D1*stride].im = dc.im + (TXUSample)z0[0].im; \
239 out[D2*stride].re = dc.re + (TXUSample)z0[2].re; \
240 out[D2*stride].im = dc.im + (TXUSample)z0[1].im; \
241 out[D3*stride].re = dc.re + (TXUSample)z0[1].re; \
242 out[D3*stride].im = dc.im + (TXUSample)z0[2].im; \
243 out[D4*stride].re = dc.re + (TXUSample)z0[0].re; \
244 out[D4*stride].im = dc.im + (TXUSample)z0[3].im; \
262 BF(t[1].re, t[0].re, in[1].re, in[6].re);
263 BF(t[1].im, t[0].im, in[1].im, in[6].im);
264 BF(t[3].re, t[2].re, in[2].re, in[5].re);
265 BF(t[3].im, t[2].im, in[2].im, in[5].im);
266 BF(t[5].re, t[4].re, in[3].re, in[4].re);
267 BF(t[5].im, t[4].im, in[3].im, in[4].im);
269 out[0*
stride].re =
dc.re + t[0].re + t[2].re + t[4].re;
270 out[0*
stride].im =
dc.im + t[0].im + t[2].im + t[4].im;
301 z[0].re =
tab[0].re*t[0].re -
tab[2].re*t[4].re -
tab[1].re*t[2].re;
302 z[1].re =
tab[0].re*t[4].re -
tab[1].re*t[0].re -
tab[2].re*t[2].re;
303 z[2].re =
tab[0].re*t[2].re -
tab[2].re*t[0].re -
tab[1].re*t[4].re;
304 z[0].im =
tab[0].re*t[0].im -
tab[1].re*t[2].im -
tab[2].re*t[4].im;
305 z[1].im =
tab[0].re*t[4].im -
tab[1].re*t[0].im -
tab[2].re*t[2].im;
306 z[2].im =
tab[0].re*t[2].im -
tab[2].re*t[0].im -
tab[1].re*t[4].im;
311 t[0].re =
tab[2].im*t[1].im +
tab[1].im*t[5].im -
tab[0].im*t[3].im;
312 t[2].re =
tab[0].im*t[5].im +
tab[2].im*t[3].im -
tab[1].im*t[1].im;
313 t[4].re =
tab[2].im*t[5].im +
tab[1].im*t[3].im +
tab[0].im*t[1].im;
314 t[0].im =
tab[0].im*t[1].re +
tab[1].im*t[3].re +
tab[2].im*t[5].re;
315 t[2].im =
tab[2].im*t[3].re +
tab[0].im*t[5].re -
tab[1].im*t[1].re;
316 t[4].im =
tab[2].im*t[1].re +
tab[1].im*t[5].re -
tab[0].im*t[3].re;
319 BF(t[1].re, z[0].re, z[0].re, t[4].re);
320 BF(t[3].re, z[1].re, z[1].re, t[2].re);
321 BF(t[5].re, z[2].re, z[2].re, t[0].re);
322 BF(t[1].im, z[0].im, z[0].im, t[0].im);
323 BF(t[3].im, z[1].im, z[1].im, t[2].im);
324 BF(t[5].im, z[2].im, z[2].im, t[4].im);
350 BF(t[1].re, t[0].re, in[1].re, in[8].re);
351 BF(t[1].im, t[0].im, in[1].im, in[8].im);
352 BF(t[3].re, t[2].re, in[2].re, in[7].re);
353 BF(t[3].im, t[2].im, in[2].im, in[7].im);
354 BF(t[5].re, t[4].re, in[3].re, in[6].re);
355 BF(t[5].im, t[4].im, in[3].im, in[6].im);
356 BF(t[7].re, t[6].re, in[4].re, in[5].re);
357 BF(t[7].im, t[6].im, in[4].im, in[5].im);
359 w[0].re = t[0].re - t[6].re;
360 w[0].im = t[0].im - t[6].im;
361 w[1].re = t[2].re - t[6].re;
362 w[1].im = t[2].im - t[6].im;
363 w[2].re = t[1].re - t[7].re;
364 w[2].im = t[1].im - t[7].im;
365 w[3].re = t[3].re + t[7].re;
366 w[3].im = t[3].im + t[7].im;
368 z[0].re =
dc.re + t[4].re;
369 z[0].im =
dc.im + t[4].im;
371 z[1].re = t[0].re + t[2].re + t[6].re;
372 z[1].im = t[0].im + t[2].im + t[6].im;
378 mtmp[0] = t[1].re - t[3].re + t[7].re;
379 mtmp[1] = t[1].im - t[3].im + t[7].im;
389 x[3].re = z[0].re + (
int32_t)mtmp[0];
390 x[3].im = z[0].im + (
int32_t)mtmp[1];
391 z[0].re = in[0].re + (
int32_t)mtmp[2];
392 z[0].im = in[0].im + (
int32_t)mtmp[3];
416 y[3].re =
tab[0].im*(t[1].re - t[3].re + t[7].re);
417 y[3].im =
tab[0].im*(t[1].im - t[3].im + t[7].im);
419 x[3].re = z[0].re +
tab[0].re*z[1].re;
420 x[3].im = z[0].im +
tab[0].re*z[1].im;
421 z[0].re =
dc.re +
tab[0].re*t[4].re;
422 z[0].im =
dc.im +
tab[0].re*t[4].im;
424 x[1].re =
tab[1].re*
w[0].re +
tab[2].im*
w[1].re;
425 x[1].im =
tab[1].re*
w[0].im +
tab[2].im*
w[1].im;
426 x[2].re =
tab[2].im*
w[0].re -
tab[3].re*
w[1].re;
427 x[2].im =
tab[2].im*
w[0].im -
tab[3].re*
w[1].im;
428 y[1].re =
tab[1].im*
w[2].re +
tab[2].re*
w[3].re;
429 y[1].im =
tab[1].im*
w[2].im +
tab[2].re*
w[3].im;
430 y[2].re =
tab[2].re*
w[2].re -
tab[3].im*
w[3].re;
431 y[2].im =
tab[2].re*
w[2].im -
tab[3].im*
w[3].im;
433 y[0].re =
tab[0].im*t[5].re;
434 y[0].im =
tab[0].im*t[5].im;
437 x[4].re = x[1].re + x[2].re;
438 x[4].im = x[1].im + x[2].im;
440 y[4].re = y[1].re - y[2].re;
441 y[4].im = y[1].im - y[2].im;
442 x[1].re = z[0].re + x[1].re;
443 x[1].im = z[0].im + x[1].im;
444 y[1].re = y[0].re + y[1].re;
445 y[1].im = y[0].im + y[1].im;
446 x[2].re = z[0].re + x[2].re;
447 x[2].im = z[0].im + x[2].im;
448 y[2].re = y[2].re - y[0].re;
449 y[2].im = y[2].im - y[0].im;
450 x[4].re = z[0].re - x[4].re;
451 x[4].im = z[0].im - x[4].im;
452 y[4].re = y[0].re - y[4].re;
453 y[4].im = y[0].im - y[4].im;
470 for (
int i = 0;
i < 5;
i++)
496 #define DECL_FACTOR_S(n) \
497 static void TX_NAME(ff_tx_fft##n)(AVTXContext *s, void *dst, \
498 void *src, ptrdiff_t stride) \
500 fft##n((TXComplex *)dst, (TXComplex *)src, stride / sizeof(TXComplex)); \
502 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \
503 .name = TX_NAME_STR("fft" #n "_ns"), \
504 .function = TX_NAME(ff_tx_fft##n), \
505 .type = TX_TYPE(FFT), \
506 .flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
507 AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \
512 .init = TX_NAME(ff_tx_fft_factor_init), \
513 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
514 .prio = FF_TX_PRIO_BASE, \
517 #define DECL_FACTOR_F(n) \
519 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_fwd_def) = { \
520 .name = TX_NAME_STR("fft" #n "_fwd"), \
521 .function = TX_NAME(ff_tx_fft##n), \
522 .type = TX_TYPE(FFT), \
523 .flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
524 AV_TX_UNALIGNED | FF_TX_FORWARD_ONLY, \
529 .init = TX_NAME(ff_tx_fft_factor_init), \
530 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
531 .prio = FF_TX_PRIO_BASE, \
540 #define BUTTERFLIES(a0, a1, a2, a3) \
546 BF(t3, t5, t5, t1); \
547 BF(a2.re, a0.re, r0, t5); \
548 BF(a3.im, a1.im, i1, t3); \
549 BF(t4, t6, t2, t6); \
550 BF(a3.re, a1.re, r1, t4); \
551 BF(a2.im, a0.im, i0, t6); \
554 #define TRANSFORM(a0, a1, a2, a3, wre, wim) \
556 CMUL(t1, t2, a2.re, a2.im, wre, -wim); \
557 CMUL(t5, t6, a3.re, a3.im, wre, wim); \
558 BUTTERFLIES(a0, a1, a2, a3); \
563 const TXSample *cos,
int len)
568 const TXSample *wim = cos + o1 - 7;
569 TXUSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
571 for (
int i = 0;
i <
len;
i += 4) {
572 TRANSFORM(z[0], z[o1 + 0], z[o2 + 0], z[o3 + 0], cos[0], wim[7]);
573 TRANSFORM(z[2], z[o1 + 2], z[o2 + 2], z[o3 + 2], cos[2], wim[5]);
574 TRANSFORM(z[4], z[o1 + 4], z[o2 + 4], z[o3 + 4], cos[4], wim[3]);
575 TRANSFORM(z[6], z[o1 + 6], z[o2 + 6], z[o3 + 6], cos[6], wim[1]);
577 TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], cos[1], wim[6]);
578 TRANSFORM(z[3], z[o1 + 3], z[o2 + 3], z[o3 + 3], cos[3], wim[4]);
579 TRANSFORM(z[5], z[o1 + 5], z[o2 + 5], z[o3 + 5], cos[5], wim[2]);
580 TRANSFORM(z[7], z[o1 + 7], z[o2 + 7], z[o3 + 7], cos[7], wim[0]);
599 #define DECL_SR_CODELET_DEF(n) \
600 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \
601 .name = TX_NAME_STR("fft" #n "_ns"), \
602 .function = TX_NAME(ff_tx_fft##n##_ns), \
603 .type = TX_TYPE(FFT), \
604 .flags = FF_TX_OUT_OF_PLACE | AV_TX_INPLACE | \
605 AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \
610 .init = TX_NAME(ff_tx_fft_sr_codelet_init), \
611 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
612 .prio = FF_TX_PRIO_BASE, \
615 #define DECL_SR_CODELET(n, n2, n4) \
616 static void TX_NAME(ff_tx_fft##n##_ns)(AVTXContext *s, void *_dst, \
617 void *_src, ptrdiff_t stride) \
619 TXComplex *src = _src; \
620 TXComplex *dst = _dst; \
621 const TXSample *cos = TX_TAB(ff_tx_tab_##n); \
623 TX_NAME(ff_tx_fft##n2##_ns)(s, dst, src, stride); \
624 TX_NAME(ff_tx_fft##n4##_ns)(s, dst + n4*2, src + n4*2, stride); \
625 TX_NAME(ff_tx_fft##n4##_ns)(s, dst + n4*3, src + n4*3, stride); \
626 TX_NAME(ff_tx_fft_sr_combine)(dst, cos, n4 >> 1); \
629 DECL_SR_CODELET_DEF(n)
648 TXSample t1, t2, t3, t4, t5, t6, t7, t8;
665 TXUSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
666 const TXSample cos = TX_TAB(ff_tx_tab_8)[1];
684 const TXSample *cos = TX_TAB(ff_tx_tab_16);
686 TXUSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
687 TXSample cos_16_1 = cos[1];
688 TXSample cos_16_2 = cos[2];
689 TXSample cos_16_3 = cos[3];
769 int *
map =
s->sub[0].map;
774 for (
int i = 0;
i <
len;
i++)
777 s->fn[0](&
s->sub[0], dst2, dst1,
stride);
786 const int *
map =
s->sub->map;
787 const int *inplace_idx =
s->map;
788 int src_idx, dst_idx;
790 src_idx = *inplace_idx++;
793 dst_idx =
map[src_idx];
796 dst_idx =
map[dst_idx];
797 }
while (dst_idx != src_idx);
799 }
while ((src_idx = *inplace_idx++));
805 .
name = TX_NAME_STR(
"fft"),
819 .
name = TX_NAME_STR(
"fft_inplace_small"),
833 .
name = TX_NAME_STR(
"fft_inplace"),
858 for (
int i = 0;
i <
len;
i++) {
859 for (
int j = 0; j <
len; j++) {
860 const double factor = phase*
i*j;
876 const int n =
s->len;
877 double phase =
s->inv ? 2.0*
M_PI/n : -2.0*
M_PI/n;
881 for (
int i = 0;
i < n;
i++) {
883 for (
int j = 0; j < n; j++) {
884 const double factor = phase*
i*j;
903 const int n =
s->len;
907 for (
int i = 0;
i < n;
i++) {
909 for (
int j = 0; j < n; j++) {
921 .
name = TX_NAME_STR(
"fft_naive_small"),
935 .
name = TX_NAME_STR(
"fft_naive"),
957 size_t extra_tmp_len = 0;
964 for (
int i = 0;
i <
ret;
i++) {
965 int len1 = len_list[
i];
966 int len2 =
len / len1;
969 if (len2 & (len2 - 1))
984 }
else if (
ret < 0) {
1005 }
else if (
ret < 0) {
1012 }
else if (
ret < 0) {
1032 s->sub[0].len,
s->sub[1].len)))
1039 tmp = (
int *)
s->tmp;
1040 for (
int k = 0; k <
len; k +=
s->sub[0].len) {
1041 memcpy(
tmp, &
s->map[k],
s->sub[0].len*
sizeof(*
tmp));
1042 for (
int i = 0;
i <
s->sub[0].len;
i++)
1043 s->map[k +
i] =
tmp[
s->sub[0].map[
i]];
1048 extra_tmp_len =
len;
1050 extra_tmp_len =
s->sub[0].len;
1052 if (extra_tmp_len && !(
s->exp =
av_malloc(extra_tmp_len*
sizeof(*
s->exp))))
1059 void *_in, ptrdiff_t
stride)
1061 const int n =
s->sub[0].len, m =
s->sub[1].len, l =
s->len;
1062 const int *in_map =
s->map, *out_map = in_map + l;
1063 const int *sub_map =
s->sub[1].map;
1069 for (
int i = 0;
i < m;
i++) {
1070 for (
int j = 0; j < n; j++)
1071 s->exp[j] = in[in_map[
i*n + j]];
1072 s->fn[0](&
s->sub[0], &
s->tmp[sub_map[
i]],
s->exp, m*
sizeof(
TXComplex));
1075 for (
int i = 0;
i < n;
i++)
1076 s->fn[1](&
s->sub[1], &tmp1[m*
i], &
s->tmp[m*
i],
sizeof(
TXComplex));
1078 for (
int i = 0;
i < l;
i++)
1083 void *_in, ptrdiff_t
stride)
1085 const int n =
s->sub[0].len, m =
s->sub[1].len, l =
s->len;
1086 const int *in_map =
s->map, *out_map = in_map + l;
1087 const int *sub_map =
s->sub[1].map;
1093 for (
int i = 0;
i < m;
i++)
1094 s->fn[0](&
s->sub[0], &
s->tmp[sub_map[
i]], &in[
i*n], m*
sizeof(
TXComplex));
1096 for (
int i = 0;
i < n;
i++)
1097 s->fn[1](&
s->sub[1], &tmp1[m*
i], &
s->tmp[m*
i],
sizeof(
TXComplex));
1099 for (
int i = 0;
i < l;
i++)
1104 .
name = TX_NAME_STR(
"fft_pfa"),
1118 .
name = TX_NAME_STR(
"fft_pfa_ns"),
1139 s->scale_d = *((SCALE_TYPE *)
scale);
1140 s->scale_f =
s->scale_d;
1149 double scale =
s->scale_d;
1151 const double phase =
M_PI/(4.0*
len);
1155 for (
int i = 0;
i <
len;
i++) {
1157 for (
int j = 0; j <
len*2; j++) {
1158 int a = (2*j + 1 +
len) * (2*
i + 1);
1159 sum += UNSCALE(
src[j]) * cos(
a * phase);
1170 double scale =
s->scale_d;
1171 int len =
s->len >> 1;
1173 const double phase =
M_PI/(4.0*len2);
1177 for (
int i = 0;
i <
len;
i++) {
1180 double i_d = phase * (4*
len - 2*
i - 1);
1181 double i_u = phase * (3*len2 + 2*
i + 1);
1182 for (
int j = 0; j < len2; j++) {
1183 double a = (2 * j + 1);
1184 double a_d = cos(
a * i_d);
1185 double a_u = cos(
a * i_u);
1196 .
name = TX_NAME_STR(
"mdct_naive_fwd"),
1210 .
name = TX_NAME_STR(
"mdct_naive_inv"),
1235 s->scale_d = *((SCALE_TYPE *)
scale);
1236 s->scale_f =
s->scale_d;
1256 memcpy(
s->map,
s->sub->map, (
len >> 1)*
sizeof(*
s->map));
1258 for (
int i = 0; i < len >> 1;
i++)
1267 for (
int i = 0;
i < (
s->len >> 1);
i++)
1278 const int len2 =
s->len >> 1;
1279 const int len4 =
s->len >> 2;
1280 const int len3 = len2 * 3;
1281 const int *sub_map =
s->map;
1285 for (
int i = 0;
i < len2;
i++) {
1287 const int idx = sub_map[
i];
1289 tmp.re = FOLD(-
src[ len2 + k],
src[1*len2 - 1 - k]);
1290 tmp.im = FOLD(-
src[ len3 + k], -
src[1*len3 - 1 - k]);
1292 tmp.re = FOLD(-
src[ len2 + k], -
src[5*len2 - 1 - k]);
1293 tmp.im = FOLD(
src[-len2 + k], -
src[1*len3 - 1 - k]);
1295 CMUL(z[idx].im, z[idx].re,
tmp.re,
tmp.im,
exp[
i].re,
exp[
i].im);
1300 for (
int i = 0;
i < len4;
i++) {
1301 const int i0 = len4 +
i, i1 = len4 -
i - 1;
1316 const TXSample *
src =
_src, *in1, *in2;
1317 const int len2 =
s->len >> 1;
1318 const int len4 =
s->len >> 2;
1319 const int *sub_map =
s->map;
1325 for (
int i = 0;
i < len2;
i++) {
1334 for (
int i = 0;
i < len4;
i++) {
1335 const int i0 = len4 +
i, i1 = len4 -
i - 1;
1339 CMUL(z[i1].re, z[i0].im,
src1.re,
src1.im,
exp[i1].im,
exp[i1].re);
1340 CMUL(z[i0].re, z[i1].im,
src0.re,
src0.im,
exp[i0].im,
exp[i0].re);
1345 .
name = TX_NAME_STR(
"mdct_fwd"),
1359 .
name = TX_NAME_STR(
"mdct_inv"),
1381 s->scale_d = *((SCALE_TYPE *)
scale);
1382 s->scale_f =
s->scale_d;
1395 int len =
s->len << 1;
1396 int len2 =
len >> 1;
1397 int len4 =
len >> 2;
1404 for (
int i = 0;
i < len4;
i++) {
1411 .
name = TX_NAME_STR(
"mdct_inv_full"),
1436 sub_len =
len / cd->factors[0];
1438 s->scale_d = *((SCALE_TYPE *)
scale);
1439 s->scale_f =
s->scale_d;
1446 sub_len, inv,
scale)))
1453 if (cd->factors[0] == 15)
1460 for (
int i = 0;
i <
len;
i++)
1471 #define DECL_COMP_IMDCT(N) \
1472 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_inv)(AVTXContext *s, void *_dst, \
1473 void *_src, ptrdiff_t stride) \
1475 TXComplex fft##N##in[N]; \
1476 TXComplex *z = _dst, *exp = s->exp; \
1477 const TXSample *src = _src, *in1, *in2; \
1478 const int len4 = s->len >> 2; \
1479 const int len2 = s->len >> 1; \
1480 const int m = s->sub->len; \
1481 const int *in_map = s->map, *out_map = in_map + N*m; \
1482 const int *sub_map = s->sub->map; \
1484 stride /= sizeof(*src); \
1486 in2 = src + ((N*m*2) - 1) * stride; \
1488 for (int i = 0; i < len2; i += N) { \
1489 for (int j = 0; j < N; j++) { \
1490 const int k = in_map[j]; \
1491 TXComplex tmp = { in2[-k*stride], in1[k*stride] }; \
1492 CMUL3(fft##N##in[j], tmp, exp[j]); \
1494 fft##N(s->tmp + *(sub_map++), fft##N##in, m); \
1499 for (int i = 0; i < N; i++) \
1500 s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex)); \
1502 for (int i = 0; i < len4; i++) { \
1503 const int i0 = len4 + i, i1 = len4 - i - 1; \
1504 const int s0 = out_map[i0], s1 = out_map[i1]; \
1505 TXComplex src1 = { s->tmp[s1].im, s->tmp[s1].re }; \
1506 TXComplex src0 = { s->tmp[s0].im, s->tmp[s0].re }; \
1508 CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re); \
1509 CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re); \
1513 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_inv_def) = { \
1514 .name = TX_NAME_STR("mdct_pfa_" #N "xM_inv"), \
1515 .function = TX_NAME(ff_tx_mdct_pfa_##N##xM_inv), \
1516 .type = TX_TYPE(MDCT), \
1517 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY, \
1518 .factors = { N, TX_FACTOR_ANY }, \
1521 .max_len = TX_LEN_UNLIMITED, \
1522 .init = TX_NAME(ff_tx_mdct_pfa_init), \
1523 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1524 .prio = FF_TX_PRIO_BASE, \
1533 #define DECL_COMP_MDCT(N) \
1534 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd)(AVTXContext *s, void *_dst, \
1535 void *_src, ptrdiff_t stride) \
1537 TXComplex fft##N##in[N]; \
1538 TXSample *src = _src, *dst = _dst; \
1539 TXComplex *exp = s->exp, tmp; \
1540 const int m = s->sub->len; \
1541 const int len4 = N*m; \
1542 const int len3 = len4 * 3; \
1543 const int len8 = s->len >> 2; \
1544 const int *in_map = s->map, *out_map = in_map + N*m; \
1545 const int *sub_map = s->sub->map; \
1547 stride /= sizeof(*dst); \
1549 for (int i = 0; i < m; i++) { \
1550 for (int j = 0; j < N; j++) { \
1551 const int k = in_map[i*N + j]; \
1553 tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); \
1554 tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); \
1556 tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); \
1557 tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); \
1559 CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
1560 exp[k >> 1].re, exp[k >> 1].im); \
1562 fft##N(s->tmp + sub_map[i], fft##N##in, m); \
1565 for (int i = 0; i < N; i++) \
1566 s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex)); \
1568 for (int i = 0; i < len8; i++) { \
1569 const int i0 = len8 + i, i1 = len8 - i - 1; \
1570 const int s0 = out_map[i0], s1 = out_map[i1]; \
1571 TXComplex src1 = { s->tmp[s1].re, s->tmp[s1].im }; \
1572 TXComplex src0 = { s->tmp[s0].re, s->tmp[s0].im }; \
1574 CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im, \
1575 exp[i0].im, exp[i0].re); \
1576 CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im, \
1577 exp[i1].im, exp[i1].re); \
1581 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd_def) = { \
1582 .name = TX_NAME_STR("mdct_pfa_" #N "xM_fwd"), \
1583 .function = TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd), \
1584 .type = TX_TYPE(MDCT), \
1585 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \
1586 .factors = { N, TX_FACTOR_ANY }, \
1589 .max_len = TX_LEN_UNLIMITED, \
1590 .init = TX_NAME(ff_tx_mdct_pfa_init), \
1591 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1592 .prio = FF_TX_PRIO_BASE, \
1614 s->scale_d = *((SCALE_TYPE *)
scale);
1615 s->scale_f =
s->scale_d;
1622 if (!(
s->exp =
av_mallocz((8 + 2*len4)*
sizeof(*
s->exp))))
1625 tab = (TXSample *)
s->exp;
1629 m = (inv ? 2*
s->scale_d :
s->scale_d);
1631 *
tab++ = RESCALE((inv ? 0.5 : 1.0) * m);
1632 *
tab++ = RESCALE(inv ? 0.5*m : 1.0*m);
1633 *
tab++ = RESCALE( m);
1634 *
tab++ = RESCALE(-m);
1636 *
tab++ = RESCALE( (0.5 - 0.0) * m);
1638 *
tab++ = 1 /
s->scale_f;
1640 *
tab++ = RESCALE( (0.0 - 0.5) * m);
1641 *
tab++ = RESCALE( (0.5 - inv) * m);
1642 *
tab++ = RESCALE(-(0.5 - inv) * m);
1644 for (
int i = 0;
i < len4;
i++)
1645 *
tab++ = RESCALE(cos(
i*
f));
1647 tab = ((TXSample *)
s->exp) + len4 + 8;
1649 for (
int i = 0;
i < len4;
i++)
1650 *
tab++ = RESCALE(cos(((
len -
i*4)/4.0)*
f)) * (inv ? 1 : -1);
1655 #define DECL_RDFT(n, inv) \
1656 static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst, \
1657 void *_src, ptrdiff_t stride) \
1659 const int len2 = s->len >> 1; \
1660 const int len4 = s->len >> 2; \
1661 const TXSample *fact = (void *)s->exp; \
1662 const TXSample *tcos = fact + 8; \
1663 const TXSample *tsin = tcos + len4; \
1664 TXComplex *data = inv ? _src : _dst; \
1668 s->fn[0](&s->sub[0], data, _src, sizeof(TXComplex)); \
1670 data[0].im = data[len2].re; \
1675 t[0].re = data[0].re; \
1676 data[0].re = t[0].re + data[0].im; \
1677 data[0].im = t[0].re - data[0].im; \
1678 data[ 0].re = MULT(fact[0], data[ 0].re); \
1679 data[ 0].im = MULT(fact[1], data[ 0].im); \
1680 data[len4].re = MULT(fact[2], data[len4].re); \
1681 data[len4].im = MULT(fact[3], data[len4].im); \
1683 for (int i = 1; i < len4; i++) { \
1685 t[0].re = MULT(fact[4], (data[i].re + data[len2 - i].re)); \
1686 t[0].im = MULT(fact[5], (data[i].im - data[len2 - i].im)); \
1687 t[1].re = MULT(fact[6], (data[i].im + data[len2 - i].im)); \
1688 t[1].im = MULT(fact[7], (data[i].re - data[len2 - i].re)); \
1691 CMUL(t[2].re, t[2].im, t[1].re, t[1].im, tcos[i], tsin[i]); \
1693 data[ i].re = t[0].re + t[2].re; \
1694 data[ i].im = t[2].im - t[0].im; \
1695 data[len2 - i].re = t[0].re - t[2].re; \
1696 data[len2 - i].im = t[2].im + t[0].im; \
1700 s->fn[0](&s->sub[0], _dst, data, sizeof(TXComplex)); \
1703 data[len2].re = data[0].im; \
1704 data[ 0].im = data[len2].im = 0; \
1708 static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = { \
1709 .name = TX_NAME_STR("rdft_" #n), \
1710 .function = TX_NAME(ff_tx_rdft_ ##n), \
1711 .type = TX_TYPE(RDFT), \
1712 .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
1713 (inv ? FF_TX_INVERSE_ONLY : FF_TX_FORWARD_ONLY), \
1714 .factors = { 4, TX_FACTOR_ANY }, \
1717 .max_len = TX_LEN_UNLIMITED, \
1718 .init = TX_NAME(ff_tx_rdft_init), \
1719 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1720 .prio = FF_TX_PRIO_BASE, \
1726 #define DECL_RDFT_HALF(n, mode, mod2) \
1727 static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst, \
1728 void *_src, ptrdiff_t stride) \
1730 const int len = s->len; \
1731 const int len2 = len >> 1; \
1732 const int len4 = len >> 2; \
1733 const int aligned_len4 = FFALIGN(len, 4)/4; \
1734 const TXSample *fact = (void *)s->exp; \
1735 const TXSample *tcos = fact + 8; \
1736 const TXSample *tsin = tcos + aligned_len4; \
1737 TXComplex *data = _dst; \
1738 TXSample *out = _dst; \
1740 av_unused TXSample tmp_mid; \
1744 s->fn[0](&s->sub[0], _dst, _src, sizeof(TXComplex)); \
1746 tmp_dc = data[0].re; \
1747 data[ 0].re = tmp_dc + data[0].im; \
1748 tmp_dc = tmp_dc - data[0].im; \
1750 data[ 0].re = MULT(fact[0], data[ 0].re); \
1751 tmp_dc = MULT(fact[1], tmp_dc); \
1752 data[len4].re = MULT(fact[2], data[len4].re); \
1755 data[len4].im = MULT(fact[3], data[len4].im); \
1758 sl = data[len4 + 1]; \
1759 if (mode == AV_TX_REAL_TO_REAL) \
1760 tmp[0] = MULT(fact[4], (sf.re + sl.re)); \
1762 tmp[0] = MULT(fact[5], (sf.im - sl.im)); \
1763 tmp[1] = MULT(fact[6], (sf.im + sl.im)); \
1764 tmp[2] = MULT(fact[7], (sf.re - sl.re)); \
1766 if (mode == AV_TX_REAL_TO_REAL) { \
1767 tmp[3] = tmp[1]*tcos[len4] - tmp[2]*tsin[len4]; \
1768 tmp_mid = (tmp[0] - tmp[3]); \
1770 tmp[3] = tmp[1]*tsin[len4] + tmp[2]*tcos[len4]; \
1771 tmp_mid = (tmp[0] + tmp[3]); \
1776 for (int i = 1; i <= len4; i++) { \
1778 TXComplex sf = data[i]; \
1779 TXComplex sl = data[len2 - i]; \
1781 if (mode == AV_TX_REAL_TO_REAL) \
1782 tmp[0] = MULT(fact[4], (sf.re + sl.re)); \
1784 tmp[0] = MULT(fact[5], (sf.im - sl.im)); \
1786 tmp[1] = MULT(fact[6], (sf.im + sl.im)); \
1787 tmp[2] = MULT(fact[7], (sf.re - sl.re)); \
1789 if (mode == AV_TX_REAL_TO_REAL) { \
1790 tmp[3] = tmp[1]*tcos[i] - tmp[2]*tsin[i]; \
1791 out[i] = (tmp[0] + tmp[3]); \
1792 out[len - i] = (tmp[0] - tmp[3]); \
1794 tmp[3] = tmp[1]*tsin[i] + tmp[2]*tcos[i]; \
1795 out[i - 1] = (tmp[3] - tmp[0]); \
1796 out[len - i - 1] = (tmp[0] + tmp[3]); \
1800 for (int i = 1; i < (len4 + (mode == AV_TX_REAL_TO_IMAGINARY)); i++) \
1801 out[len2 - i] = out[len - i]; \
1803 if (mode == AV_TX_REAL_TO_REAL) { \
1804 out[len2] = tmp_dc; \
1806 out[len4 + 1] = tmp_mid * fact[5]; \
1807 } else if (mod2) { \
1808 out[len4] = tmp_mid; \
1812 static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = { \
1813 .name = TX_NAME_STR("rdft_" #n), \
1814 .function = TX_NAME(ff_tx_rdft_ ##n), \
1815 .type = TX_TYPE(RDFT), \
1816 .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | mode | \
1817 FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \
1818 .factors = { 2 + 2*(!mod2), TX_FACTOR_ANY }, \
1820 .min_len = 2 + 2*(!mod2), \
1821 .max_len = TX_LEN_UNLIMITED, \
1822 .init = TX_NAME(ff_tx_rdft_init), \
1823 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1824 .prio = FF_TX_PRIO_BASE, \
1842 SCALE_TYPE rsc = *((SCALE_TYPE *)
scale);
1857 tab = (TXSample *)
s->exp;
1861 for (
int i = 0;
i <
len;
i++)
1862 tab[
i] = RESCALE(cos(
i*freq)*(!inv + 1));
1865 for (
int i = 0;
i <
len/2;
i++)
1866 tab[
len +
i] = RESCALE(0.5 / sin((2*
i + 1)*freq));
1868 for (
int i = 0;
i <
len/2;
i++)
1869 tab[
len +
i] = RESCALE(cos((
len - 2*
i - 1)*freq));
1880 const int len =
s->len;
1881 const int len2 =
len >> 1;
1882 const TXSample *
exp = (
void *)
s->exp;
1887 TXSample tmp1, tmp2;
1890 for (
int i = 0;
i < len2;
i++) {
1891 TXSample in1 =
src[
i];
1892 TXSample in2 =
src[
len -
i - 1];
1902 tmp2 = (tmp2 + 0x40000000) >> 31;
1904 tmp1 = (in1 + in2)*0.5;
1905 tmp2 = (in1 - in2)*
s;
1908 src[
i] = tmp1 + tmp2;
1909 src[
len -
i - 1] = tmp1 - tmp2;
1916 for (
int i =
len - 2;
i > 0;
i -= 2) {
1928 dst[0] = (tmp1 + 0x40000000) >> 31;
1940 const int len =
s->len;
1941 const int len2 =
len >> 1;
1942 const TXSample *
exp = (
void *)
s->exp;
1945 tmp2 = (2*tmp2 + 0x40000000) >> 31;
1947 TXSample tmp1, tmp2 = 2*
src[
len - 1];
1952 for (
int i =
len - 2;
i >= 2;
i -= 2) {
1953 TXSample val1 =
src[
i - 0];
1954 TXSample val2 =
src[
i - 1] -
src[
i + 1];
1961 for (
int i = 0;
i < len2;
i++) {
1962 TXSample in1 =
dst[
i];
1963 TXSample in2 =
dst[
len -
i - 1];
1970 tmp2 = (tmp2 + 0x40000000) >> 31;
1973 dst[
i] = tmp1 + tmp2;
1974 dst[
len -
i - 1] = tmp1 - tmp2;
1979 .
name = TX_NAME_STR(
"dctII"),
1993 .
name = TX_NAME_STR(
"dctIII"),
2014 SCALE_TYPE rsc = *((SCALE_TYPE *)
scale);
2027 (
len - 1 + 2*(cd->type ==
TX_TYPE(DST_I)))*2,
2043 const int len =
s->len - 1;
2044 TXSample *
tmp = (TXSample *)
s->tmp;
2046 stride /=
sizeof(TXSample);
2048 for (
int i = 0;
i <
len;
i++)
2053 s->fn[0](&
s->sub[0],
dst,
tmp,
sizeof(TXSample));
2061 const int len =
s->len + 1;
2062 TXSample *
tmp = (
void *)
s->tmp;
2064 stride /=
sizeof(TXSample);
2068 for (
int i = 1;
i <
len;
i++) {
2080 .
name = TX_NAME_STR(
"dctI"),
2094 .
name = TX_NAME_STR(
"dstI"),
2110 int len4 =
s->len >> 1;
2111 double scale =
s->scale_d;
2112 const double theta = (
scale < 0 ? len4 : 0) + 1.0/8.0;
2113 size_t alloc = pre_tab ? 2*len4 : len4;
2123 for (
int i = 0;
i < len4;
i++) {
2130 for (
int i = 0;
i < len4;
i++)
2131 s->exp[
i] =
s->exp[len4 + pre_tab[
i]];
2144 &
TX_NAME(ff_tx_fft128_ns_def),
2145 &
TX_NAME(ff_tx_fft256_ns_def),
2146 &
TX_NAME(ff_tx_fft512_ns_def),
2147 &
TX_NAME(ff_tx_fft1024_ns_def),
2148 &
TX_NAME(ff_tx_fft2048_ns_def),
2149 &
TX_NAME(ff_tx_fft4096_ns_def),
2150 &
TX_NAME(ff_tx_fft8192_ns_def),
2151 &
TX_NAME(ff_tx_fft16384_ns_def),
2152 &
TX_NAME(ff_tx_fft32768_ns_def),
2153 &
TX_NAME(ff_tx_fft65536_ns_def),
2154 &
TX_NAME(ff_tx_fft131072_ns_def),
2171 &
TX_NAME(ff_tx_fft_inplace_def),
2172 &
TX_NAME(ff_tx_fft_inplace_small_def),
2174 &
TX_NAME(ff_tx_fft_pfa_ns_def),
2175 &
TX_NAME(ff_tx_fft_naive_def),
2176 &
TX_NAME(ff_tx_fft_naive_small_def),
2179 &
TX_NAME(ff_tx_mdct_pfa_3xM_fwd_def),
2180 &
TX_NAME(ff_tx_mdct_pfa_5xM_fwd_def),
2181 &
TX_NAME(ff_tx_mdct_pfa_7xM_fwd_def),
2182 &
TX_NAME(ff_tx_mdct_pfa_9xM_fwd_def),
2183 &
TX_NAME(ff_tx_mdct_pfa_15xM_fwd_def),
2184 &
TX_NAME(ff_tx_mdct_pfa_3xM_inv_def),
2185 &
TX_NAME(ff_tx_mdct_pfa_5xM_inv_def),
2186 &
TX_NAME(ff_tx_mdct_pfa_7xM_inv_def),
2187 &
TX_NAME(ff_tx_mdct_pfa_9xM_inv_def),
2188 &
TX_NAME(ff_tx_mdct_pfa_15xM_inv_def),
2189 &
TX_NAME(ff_tx_mdct_naive_fwd_def),
2190 &
TX_NAME(ff_tx_mdct_naive_inv_def),
2191 &
TX_NAME(ff_tx_mdct_inv_full_def),
2194 &
TX_NAME(ff_tx_rdft_r2r_mod2_def),
2196 &
TX_NAME(ff_tx_rdft_r2i_mod2_def),
int(* func)(AVBPrint *dst, const char *in, const char *arg)
static void TX_NAME() ff_tx_fft_sr_combine(TXComplex *z, const TXSample *cos, int len)
static av_cold int TX_NAME() ff_tx_dct_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
@ AV_TX_REAL_TO_REAL
Perform a real to half-complex RDFT.
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
static void TX_NAME() ff_tx_fft(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define TX_MAX_DECOMPOSITIONS
static void TX_NAME() ff_tx_fft_pfa(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
static void TX_NAME() ff_tx_fft16_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
int ff_tx_gen_inplace_map(AVTXContext *s, int len)
static av_always_inline void fft15(TXComplex *out, TXComplex *in, ptrdiff_t stride)
#define FF_TX_CPU_FLAGS_ALL
int ff_tx_gen_compound_mapping(AVTXContext *s, FFTXCodeletOptions *opts, int inv, int n, int m)
static void TX_NAME() ff_tx_dctI(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static void TX_NAME() ff_tx_fft_naive(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
uint8_t ptrdiff_t const uint8_t * _src
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)
static void TX_NAME() ff_tx_mdct_naive_fwd(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold int TX_NAME() ff_tx_rdft_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
#define DECL_SR_CODELET_DEF(n)
static SR_POW2_TABLES void(*const sr_tabs_init_funcs[])(void)
static const struct twinvq_data tab
static const FFTXCodelet TX_NAME(ff_tx_fft_def)
static void sum_d(const int *input, int *output, int len)
static AVOnce sr_tabs_init_once[]
static double val(void *priv, double ch)
#define TABLE_DEF(name, size)
static int16_t mult(Float11 *f1, Float11 *f2)
static int ff_thread_once(char *control, void(*routine)(void))
#define FF_ARRAY_ELEMS(a)
static void c2r(float *buffer, int size)
static av_cold int TX_NAME() ff_tx_fft_factor_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
static void TX_NAME() ff_tx_mdct_fwd(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold int TX_NAME() ff_tx_mdct_naive_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
#define FF_TX_FORWARD_ONLY
static void TX_NAME() ff_tx_dstI(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
@ AV_TX_FULL_IMDCT
Performs a full inverse MDCT rather than leaving out samples that can be derived through symmetry.
static __device__ float fabs(float a)
@ AV_TX_REAL_TO_IMAGINARY
static av_cold int TX_NAME() ff_tx_mdct_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
@ AV_TX_INPLACE
Allows for in-place transformations, where input == output.
int ff_tx_gen_ptwo_revtab(AVTXContext *s, FFTXCodeletOptions *opts)
static void r2c(float *buffer, int size)
#define FF_TX_OUT_OF_PLACE
@ AV_TX_UNALIGNED
Relaxes alignment requirement for the in and out arrays of av_tx_fn().
static void TX_NAME() ff_tx_dctIII(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define DECL_COMP_MDCT(N)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
static av_cold int TX_NAME() ff_tx_fft_pfa_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
void ff_tx_clear_ctx(AVTXContext *s)
static void TX_NAME() ff_tx_fft2_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold int TX_NAME() ff_tx_fft_sr_codelet_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts)
static av_cold void TX_TAB() ff_tx_init_tab_53(void)
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
static void TX_NAME() ff_tx_fft8_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_always_inline void fft9(TXComplex *out, TXComplex *in, ptrdiff_t stride)
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
#define TX_EMBED_INPUT_PFA_MAP(map, tot_len, d1, d2)
static void TX_NAME() ff_tx_fft_inplace(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define DECL_RDFT_HALF(n, mode, mod2)
static av_cold int TX_NAME() ff_tx_fft_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
static void TX_NAME() ff_tx_mdct_inv(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define i(width, name, range_min, range_max)
#define av_malloc_array(a, b)
static AVOnce nptwo_tabs_init_once[]
static av_cold int TX_NAME() ff_tx_fft_init_naive_small(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
#define DECL_SR_CODELET(n, n2, n4)
#define DECL_COMP_IMDCT(N)
static av_always_inline void fft3(TXComplex *out, TXComplex *in, ptrdiff_t stride)
static const FFTabInitData nptwo_tabs_init_data[]
av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
#define FFSWAP(type, a, b)
static av_cold void TX_TAB() ff_tx_init_tab_7(void)
#define FF_TX_INVERSE_ONLY
void * av_malloc(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
static void TX_NAME() ff_tx_fft_naive_small(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold void TX_TAB() ff_tx_init_tab_9(void)
av_cold void TX_TAB() ff_tx_init_tabs(int len)
static void TX_NAME() ff_tx_mdct_naive_inv(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static void TX_NAME() ff_tx_dctII(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define BUTTERFLIES(a0, a1, a2, a3)
static void TX_NAME() ff_tx_fft_pfa_ns(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
static const int factor[16]
static av_cold int TX_NAME() ff_tx_dcstI_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
static av_cold int TX_NAME() ff_tx_fft_inplace_small_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
const VDPAUPixFmtMap * map
static void scale(int *out, const int *in, const int w, const int h, const int shift)
static const int16_t alpha[]
static av_always_inline void fft7(TXComplex *out, TXComplex *in, ptrdiff_t stride)
int TX_TAB() ff_tx_mdct_gen_exp(AVTXContext *s, int *pre_tab)
int ff_tx_gen_pfa_input_map(AVTXContext *s, FFTXCodeletOptions *opts, int d1, int d2)
#define DECL_RDFT(n, inv)
static av_cold int TX_NAME() ff_tx_mdct_pfa_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
static void TX_NAME() ff_tx_fft4_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold int TX_NAME() ff_tx_mdct_inv_full_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type, int len, int inv)
static void TX_NAME() ff_tx_mdct_inv_full(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)