FFmpeg
tx_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) Lynne
3  *
4  * Power of two FFT:
5  * Copyright (c) Lynne
6  * Copyright (c) 2008 Loren Merritt
7  * Copyright (c) 2002 Fabrice Bellard
8  * Partly based on libdjbfft by D. J. Bernstein
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26 
27 #include "mem.h"
28 
29 #define TABLE_DEF(name, size) \
30  DECLARE_ALIGNED(32, TXSample, TX_TAB(ff_tx_tab_ ##name))[size]
31 
32 #define SR_POW2_TABLES \
33  SR_TABLE(8) \
34  SR_TABLE(16) \
35  SR_TABLE(32) \
36  SR_TABLE(64) \
37  SR_TABLE(128) \
38  SR_TABLE(256) \
39  SR_TABLE(512) \
40  SR_TABLE(1024) \
41  SR_TABLE(2048) \
42  SR_TABLE(4096) \
43  SR_TABLE(8192) \
44  SR_TABLE(16384) \
45  SR_TABLE(32768) \
46  SR_TABLE(65536) \
47  SR_TABLE(131072) \
48 
49 #define SR_TABLE(len) \
50  TABLE_DEF(len, len/4 + 1);
51 /* Power of two tables */
53 #undef SR_TABLE
54 
55 /* Other factors' tables */
56 TABLE_DEF(53, 12);
57 TABLE_DEF( 7, 6);
58 TABLE_DEF( 9, 8);
59 
60 typedef struct FFTabInitData {
61  void (*func)(void);
62  int factors[TX_MAX_SUB]; /* Must be sorted high -> low */
64 
65 #define SR_TABLE(len) \
66 static av_cold void TX_TAB(ff_tx_init_tab_ ##len)(void) \
67 { \
68  double freq = 2*M_PI/len; \
69  TXSample *tab = TX_TAB(ff_tx_tab_ ##len); \
70  \
71  for (int i = 0; i < len/4; i++) \
72  *tab++ = RESCALE(cos(i*freq)); \
73  \
74  *tab = 0; \
75 }
77 #undef SR_TABLE
78 
79 static void (*const sr_tabs_init_funcs[])(void) = {
80 #define SR_TABLE(len) TX_TAB(ff_tx_init_tab_ ##len),
82 #undef SR_TABLE
83 };
84 
86 #define SR_TABLE(len) AV_ONCE_INIT,
88 #undef SR_TABLE
89 };
90 
91 static av_cold void TX_TAB(ff_tx_init_tab_53)(void)
92 {
93  /* 5pt, doubled to eliminate AVX lane shuffles */
94  TX_TAB(ff_tx_tab_53)[0] = RESCALE(cos(2 * M_PI / 5));
95  TX_TAB(ff_tx_tab_53)[1] = RESCALE(cos(2 * M_PI / 5));
96  TX_TAB(ff_tx_tab_53)[2] = RESCALE(cos(2 * M_PI / 10));
97  TX_TAB(ff_tx_tab_53)[3] = RESCALE(cos(2 * M_PI / 10));
98  TX_TAB(ff_tx_tab_53)[4] = RESCALE(sin(2 * M_PI / 5));
99  TX_TAB(ff_tx_tab_53)[5] = RESCALE(sin(2 * M_PI / 5));
100  TX_TAB(ff_tx_tab_53)[6] = RESCALE(sin(2 * M_PI / 10));
101  TX_TAB(ff_tx_tab_53)[7] = RESCALE(sin(2 * M_PI / 10));
102 
103  /* 3pt */
104  TX_TAB(ff_tx_tab_53)[ 8] = RESCALE(cos(2 * M_PI / 12));
105  TX_TAB(ff_tx_tab_53)[ 9] = RESCALE(cos(2 * M_PI / 12));
106  TX_TAB(ff_tx_tab_53)[10] = RESCALE(cos(2 * M_PI / 6));
107  TX_TAB(ff_tx_tab_53)[11] = RESCALE(cos(8 * M_PI / 6));
108 }
109 
110 static av_cold void TX_TAB(ff_tx_init_tab_7)(void)
111 {
112  TX_TAB(ff_tx_tab_7)[0] = RESCALE(cos(2 * M_PI / 7));
113  TX_TAB(ff_tx_tab_7)[1] = RESCALE(sin(2 * M_PI / 7));
114  TX_TAB(ff_tx_tab_7)[2] = RESCALE(sin(2 * M_PI / 28));
115  TX_TAB(ff_tx_tab_7)[3] = RESCALE(cos(2 * M_PI / 28));
116  TX_TAB(ff_tx_tab_7)[4] = RESCALE(cos(2 * M_PI / 14));
117  TX_TAB(ff_tx_tab_7)[5] = RESCALE(sin(2 * M_PI / 14));
118 }
119 
120 static av_cold void TX_TAB(ff_tx_init_tab_9)(void)
121 {
122  TX_TAB(ff_tx_tab_9)[0] = RESCALE(cos(2 * M_PI / 3));
123  TX_TAB(ff_tx_tab_9)[1] = RESCALE(sin(2 * M_PI / 3));
124  TX_TAB(ff_tx_tab_9)[2] = RESCALE(cos(2 * M_PI / 9));
125  TX_TAB(ff_tx_tab_9)[3] = RESCALE(sin(2 * M_PI / 9));
126  TX_TAB(ff_tx_tab_9)[4] = RESCALE(cos(2 * M_PI / 36));
127  TX_TAB(ff_tx_tab_9)[5] = RESCALE(sin(2 * M_PI / 36));
128  TX_TAB(ff_tx_tab_9)[6] = TX_TAB(ff_tx_tab_9)[2] + TX_TAB(ff_tx_tab_9)[5];
129  TX_TAB(ff_tx_tab_9)[7] = TX_TAB(ff_tx_tab_9)[3] - TX_TAB(ff_tx_tab_9)[4];
130 }
131 
133  { TX_TAB(ff_tx_init_tab_53), { 15, 5, 3 } },
134  { TX_TAB(ff_tx_init_tab_9), { 9 } },
135  { TX_TAB(ff_tx_init_tab_7), { 7 } },
136 };
137 
139  AV_ONCE_INIT,
140  AV_ONCE_INIT,
141  AV_ONCE_INIT,
142 };
143 
144 av_cold void TX_TAB(ff_tx_init_tabs)(int len)
145 {
146  int factor_2 = ff_ctz(len);
147  if (factor_2) {
148  int idx = factor_2 - 3;
149  for (int i = 0; i <= idx; i++)
152  len >>= factor_2;
153  }
154 
155  for (int i = 0; i < FF_ARRAY_ELEMS(nptwo_tabs_init_data); i++) {
156  int f, f_idx = 0;
157 
158  if (len <= 1)
159  return;
160 
161  while ((f = nptwo_tabs_init_data[i].factors[f_idx++])) {
162  if (f % len)
163  continue;
164 
167  len /= f;
168  break;
169  }
170  }
171 }
172 
174  ptrdiff_t stride)
175 {
176  TXComplex tmp[3];
177  const TXSample *tab = TX_TAB(ff_tx_tab_53);
178 #ifdef TX_INT32
179  int64_t mtmp[4];
180 #endif
181 
182  tmp[0] = in[0];
183  BF(tmp[1].re, tmp[2].im, in[1].im, in[2].im);
184  BF(tmp[1].im, tmp[2].re, in[1].re, in[2].re);
185 
186 #ifdef TX_INT32
187  out[0*stride].re = (int64_t)tmp[0].re + tmp[2].re;
188  out[0*stride].im = (int64_t)tmp[0].im + tmp[2].im;
189  mtmp[0] = (int64_t)tab[ 8] * tmp[1].re;
190  mtmp[1] = (int64_t)tab[ 9] * tmp[1].im;
191  mtmp[2] = (int64_t)tab[10] * tmp[2].re;
192  mtmp[3] = (int64_t)tab[10] * tmp[2].im;
193  out[1*stride].re = tmp[0].re - (mtmp[2] + mtmp[0] + 0x40000000 >> 31);
194  out[1*stride].im = tmp[0].im - (mtmp[3] - mtmp[1] + 0x40000000 >> 31);
195  out[2*stride].re = tmp[0].re - (mtmp[2] - mtmp[0] + 0x40000000 >> 31);
196  out[2*stride].im = tmp[0].im - (mtmp[3] + mtmp[1] + 0x40000000 >> 31);
197 #else
198  out[0*stride].re = tmp[0].re + tmp[2].re;
199  out[0*stride].im = tmp[0].im + tmp[2].im;
200  tmp[1].re = tab[ 8] * tmp[1].re;
201  tmp[1].im = tab[ 9] * tmp[1].im;
202  tmp[2].re = tab[10] * tmp[2].re;
203  tmp[2].im = tab[10] * tmp[2].im;
204  out[1*stride].re = tmp[0].re - tmp[2].re + tmp[1].re;
205  out[1*stride].im = tmp[0].im - tmp[2].im - tmp[1].im;
206  out[2*stride].re = tmp[0].re - tmp[2].re - tmp[1].re;
207  out[2*stride].im = tmp[0].im - tmp[2].im + tmp[1].im;
208 #endif
209 }
210 
211 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
212 static av_always_inline void NAME(TXComplex *out, TXComplex *in, \
213  ptrdiff_t stride) \
214 { \
215  TXComplex dc, z0[4], t[6]; \
216  const TXSample *tab = TX_TAB(ff_tx_tab_53); \
217  \
218  dc = in[0]; \
219  BF(t[1].im, t[0].re, in[1].re, in[4].re); \
220  BF(t[1].re, t[0].im, in[1].im, in[4].im); \
221  BF(t[3].im, t[2].re, in[2].re, in[3].re); \
222  BF(t[3].re, t[2].im, in[2].im, in[3].im); \
223  \
224  out[D0*stride].re = dc.re + (TXUSample)t[0].re + t[2].re; \
225  out[D0*stride].im = dc.im + (TXUSample)t[0].im + t[2].im; \
226  \
227  SMUL(t[4].re, t[0].re, tab[0], tab[2], t[2].re, t[0].re); \
228  SMUL(t[4].im, t[0].im, tab[0], tab[2], t[2].im, t[0].im); \
229  CMUL(t[5].re, t[1].re, tab[4], tab[6], t[3].re, t[1].re); \
230  CMUL(t[5].im, t[1].im, tab[4], tab[6], t[3].im, t[1].im); \
231  \
232  BF(z0[0].re, z0[3].re, t[0].re, t[1].re); \
233  BF(z0[0].im, z0[3].im, t[0].im, t[1].im); \
234  BF(z0[2].re, z0[1].re, t[4].re, t[5].re); \
235  BF(z0[2].im, z0[1].im, t[4].im, t[5].im); \
236  \
237  out[D1*stride].re = dc.re + (TXUSample)z0[3].re; \
238  out[D1*stride].im = dc.im + (TXUSample)z0[0].im; \
239  out[D2*stride].re = dc.re + (TXUSample)z0[2].re; \
240  out[D2*stride].im = dc.im + (TXUSample)z0[1].im; \
241  out[D3*stride].re = dc.re + (TXUSample)z0[1].re; \
242  out[D3*stride].im = dc.im + (TXUSample)z0[2].im; \
243  out[D4*stride].re = dc.re + (TXUSample)z0[0].re; \
244  out[D4*stride].im = dc.im + (TXUSample)z0[3].im; \
245 }
246 
247 DECL_FFT5(fft5, 0, 1, 2, 3, 4)
248 DECL_FFT5(fft5_m1, 0, 6, 12, 3, 9)
249 DECL_FFT5(fft5_m2, 10, 1, 7, 13, 4)
250 DECL_FFT5(fft5_m3, 5, 11, 2, 8, 14)
251 
253  ptrdiff_t stride)
254 {
255  TXComplex dc, t[6], z[3];
256  const TXComplex *tab = (const TXComplex *)TX_TAB(ff_tx_tab_7);
257 #ifdef TX_INT32
258  int64_t mtmp[12];
259 #endif
260 
261  dc = in[0];
262  BF(t[1].re, t[0].re, in[1].re, in[6].re);
263  BF(t[1].im, t[0].im, in[1].im, in[6].im);
264  BF(t[3].re, t[2].re, in[2].re, in[5].re);
265  BF(t[3].im, t[2].im, in[2].im, in[5].im);
266  BF(t[5].re, t[4].re, in[3].re, in[4].re);
267  BF(t[5].im, t[4].im, in[3].im, in[4].im);
268 
269  out[0*stride].re = dc.re + t[0].re + t[2].re + t[4].re;
270  out[0*stride].im = dc.im + t[0].im + t[2].im + t[4].im;
271 
272 #ifdef TX_INT32 /* NOTE: it's possible to do this with 16 mults but 72 adds */
273  mtmp[ 0] = ((int64_t)tab[0].re)*t[0].re - ((int64_t)tab[2].re)*t[4].re;
274  mtmp[ 1] = ((int64_t)tab[0].re)*t[4].re - ((int64_t)tab[1].re)*t[0].re;
275  mtmp[ 2] = ((int64_t)tab[0].re)*t[2].re - ((int64_t)tab[2].re)*t[0].re;
276  mtmp[ 3] = ((int64_t)tab[0].re)*t[0].im - ((int64_t)tab[1].re)*t[2].im;
277  mtmp[ 4] = ((int64_t)tab[0].re)*t[4].im - ((int64_t)tab[1].re)*t[0].im;
278  mtmp[ 5] = ((int64_t)tab[0].re)*t[2].im - ((int64_t)tab[2].re)*t[0].im;
279 
280  mtmp[ 6] = ((int64_t)tab[2].im)*t[1].im + ((int64_t)tab[1].im)*t[5].im;
281  mtmp[ 7] = ((int64_t)tab[0].im)*t[5].im + ((int64_t)tab[2].im)*t[3].im;
282  mtmp[ 8] = ((int64_t)tab[2].im)*t[5].im + ((int64_t)tab[1].im)*t[3].im;
283  mtmp[ 9] = ((int64_t)tab[0].im)*t[1].re + ((int64_t)tab[1].im)*t[3].re;
284  mtmp[10] = ((int64_t)tab[2].im)*t[3].re + ((int64_t)tab[0].im)*t[5].re;
285  mtmp[11] = ((int64_t)tab[2].im)*t[1].re + ((int64_t)tab[1].im)*t[5].re;
286 
287  z[0].re = (int32_t)(mtmp[ 0] - ((int64_t)tab[1].re)*t[2].re + 0x40000000 >> 31);
288  z[1].re = (int32_t)(mtmp[ 1] - ((int64_t)tab[2].re)*t[2].re + 0x40000000 >> 31);
289  z[2].re = (int32_t)(mtmp[ 2] - ((int64_t)tab[1].re)*t[4].re + 0x40000000 >> 31);
290  z[0].im = (int32_t)(mtmp[ 3] - ((int64_t)tab[2].re)*t[4].im + 0x40000000 >> 31);
291  z[1].im = (int32_t)(mtmp[ 4] - ((int64_t)tab[2].re)*t[2].im + 0x40000000 >> 31);
292  z[2].im = (int32_t)(mtmp[ 5] - ((int64_t)tab[1].re)*t[4].im + 0x40000000 >> 31);
293 
294  t[0].re = (int32_t)(mtmp[ 6] - ((int64_t)tab[0].im)*t[3].im + 0x40000000 >> 31);
295  t[2].re = (int32_t)(mtmp[ 7] - ((int64_t)tab[1].im)*t[1].im + 0x40000000 >> 31);
296  t[4].re = (int32_t)(mtmp[ 8] + ((int64_t)tab[0].im)*t[1].im + 0x40000000 >> 31);
297  t[0].im = (int32_t)(mtmp[ 9] + ((int64_t)tab[2].im)*t[5].re + 0x40000000 >> 31);
298  t[2].im = (int32_t)(mtmp[10] - ((int64_t)tab[1].im)*t[1].re + 0x40000000 >> 31);
299  t[4].im = (int32_t)(mtmp[11] - ((int64_t)tab[0].im)*t[3].re + 0x40000000 >> 31);
300 #else
301  z[0].re = tab[0].re*t[0].re - tab[2].re*t[4].re - tab[1].re*t[2].re;
302  z[1].re = tab[0].re*t[4].re - tab[1].re*t[0].re - tab[2].re*t[2].re;
303  z[2].re = tab[0].re*t[2].re - tab[2].re*t[0].re - tab[1].re*t[4].re;
304  z[0].im = tab[0].re*t[0].im - tab[1].re*t[2].im - tab[2].re*t[4].im;
305  z[1].im = tab[0].re*t[4].im - tab[1].re*t[0].im - tab[2].re*t[2].im;
306  z[2].im = tab[0].re*t[2].im - tab[2].re*t[0].im - tab[1].re*t[4].im;
307 
308  /* It's possible to do t[4].re and t[0].im with 2 multiplies only by
309  * multiplying the sum of all with the average of the twiddles */
310 
311  t[0].re = tab[2].im*t[1].im + tab[1].im*t[5].im - tab[0].im*t[3].im;
312  t[2].re = tab[0].im*t[5].im + tab[2].im*t[3].im - tab[1].im*t[1].im;
313  t[4].re = tab[2].im*t[5].im + tab[1].im*t[3].im + tab[0].im*t[1].im;
314  t[0].im = tab[0].im*t[1].re + tab[1].im*t[3].re + tab[2].im*t[5].re;
315  t[2].im = tab[2].im*t[3].re + tab[0].im*t[5].re - tab[1].im*t[1].re;
316  t[4].im = tab[2].im*t[1].re + tab[1].im*t[5].re - tab[0].im*t[3].re;
317 #endif
318 
319  BF(t[1].re, z[0].re, z[0].re, t[4].re);
320  BF(t[3].re, z[1].re, z[1].re, t[2].re);
321  BF(t[5].re, z[2].re, z[2].re, t[0].re);
322  BF(t[1].im, z[0].im, z[0].im, t[0].im);
323  BF(t[3].im, z[1].im, z[1].im, t[2].im);
324  BF(t[5].im, z[2].im, z[2].im, t[4].im);
325 
326  out[1*stride].re = dc.re + z[0].re;
327  out[1*stride].im = dc.im + t[1].im;
328  out[2*stride].re = dc.re + t[3].re;
329  out[2*stride].im = dc.im + z[1].im;
330  out[3*stride].re = dc.re + z[2].re;
331  out[3*stride].im = dc.im + t[5].im;
332  out[4*stride].re = dc.re + t[5].re;
333  out[4*stride].im = dc.im + z[2].im;
334  out[5*stride].re = dc.re + z[1].re;
335  out[5*stride].im = dc.im + t[3].im;
336  out[6*stride].re = dc.re + t[1].re;
337  out[6*stride].im = dc.im + z[0].im;
338 }
339 
341  ptrdiff_t stride)
342 {
343  const TXComplex *tab = (const TXComplex *)TX_TAB(ff_tx_tab_9);
344  TXComplex dc, t[16], w[4], x[5], y[5], z[2];
345 #ifdef TX_INT32
346  int64_t mtmp[12];
347 #endif
348 
349  dc = in[0];
350  BF(t[1].re, t[0].re, in[1].re, in[8].re);
351  BF(t[1].im, t[0].im, in[1].im, in[8].im);
352  BF(t[3].re, t[2].re, in[2].re, in[7].re);
353  BF(t[3].im, t[2].im, in[2].im, in[7].im);
354  BF(t[5].re, t[4].re, in[3].re, in[6].re);
355  BF(t[5].im, t[4].im, in[3].im, in[6].im);
356  BF(t[7].re, t[6].re, in[4].re, in[5].re);
357  BF(t[7].im, t[6].im, in[4].im, in[5].im);
358 
359  w[0].re = t[0].re - t[6].re;
360  w[0].im = t[0].im - t[6].im;
361  w[1].re = t[2].re - t[6].re;
362  w[1].im = t[2].im - t[6].im;
363  w[2].re = t[1].re - t[7].re;
364  w[2].im = t[1].im - t[7].im;
365  w[3].re = t[3].re + t[7].re;
366  w[3].im = t[3].im + t[7].im;
367 
368  z[0].re = dc.re + t[4].re;
369  z[0].im = dc.im + t[4].im;
370 
371  z[1].re = t[0].re + t[2].re + t[6].re;
372  z[1].im = t[0].im + t[2].im + t[6].im;
373 
374  out[0*stride].re = z[0].re + z[1].re;
375  out[0*stride].im = z[0].im + z[1].im;
376 
377 #ifdef TX_INT32
378  mtmp[0] = t[1].re - t[3].re + t[7].re;
379  mtmp[1] = t[1].im - t[3].im + t[7].im;
380 
381  y[3].re = (int32_t)(((int64_t)tab[0].im)*mtmp[0] + 0x40000000 >> 31);
382  y[3].im = (int32_t)(((int64_t)tab[0].im)*mtmp[1] + 0x40000000 >> 31);
383 
384  mtmp[0] = (int32_t)(((int64_t)tab[0].re)*z[1].re + 0x40000000 >> 31);
385  mtmp[1] = (int32_t)(((int64_t)tab[0].re)*z[1].im + 0x40000000 >> 31);
386  mtmp[2] = (int32_t)(((int64_t)tab[0].re)*t[4].re + 0x40000000 >> 31);
387  mtmp[3] = (int32_t)(((int64_t)tab[0].re)*t[4].im + 0x40000000 >> 31);
388 
389  x[3].re = z[0].re + (int32_t)mtmp[0];
390  x[3].im = z[0].im + (int32_t)mtmp[1];
391  z[0].re = in[0].re + (int32_t)mtmp[2];
392  z[0].im = in[0].im + (int32_t)mtmp[3];
393 
394  mtmp[0] = ((int64_t)tab[1].re)*w[0].re;
395  mtmp[1] = ((int64_t)tab[1].re)*w[0].im;
396  mtmp[2] = ((int64_t)tab[2].im)*w[0].re;
397  mtmp[3] = ((int64_t)tab[2].im)*w[0].im;
398  mtmp[4] = ((int64_t)tab[1].im)*w[2].re;
399  mtmp[5] = ((int64_t)tab[1].im)*w[2].im;
400  mtmp[6] = ((int64_t)tab[2].re)*w[2].re;
401  mtmp[7] = ((int64_t)tab[2].re)*w[2].im;
402 
403  x[1].re = (int32_t)(mtmp[0] + ((int64_t)tab[2].im)*w[1].re + 0x40000000 >> 31);
404  x[1].im = (int32_t)(mtmp[1] + ((int64_t)tab[2].im)*w[1].im + 0x40000000 >> 31);
405  x[2].re = (int32_t)(mtmp[2] - ((int64_t)tab[3].re)*w[1].re + 0x40000000 >> 31);
406  x[2].im = (int32_t)(mtmp[3] - ((int64_t)tab[3].re)*w[1].im + 0x40000000 >> 31);
407  y[1].re = (int32_t)(mtmp[4] + ((int64_t)tab[2].re)*w[3].re + 0x40000000 >> 31);
408  y[1].im = (int32_t)(mtmp[5] + ((int64_t)tab[2].re)*w[3].im + 0x40000000 >> 31);
409  y[2].re = (int32_t)(mtmp[6] - ((int64_t)tab[3].im)*w[3].re + 0x40000000 >> 31);
410  y[2].im = (int32_t)(mtmp[7] - ((int64_t)tab[3].im)*w[3].im + 0x40000000 >> 31);
411 
412  y[0].re = (int32_t)(((int64_t)tab[0].im)*t[5].re + 0x40000000 >> 31);
413  y[0].im = (int32_t)(((int64_t)tab[0].im)*t[5].im + 0x40000000 >> 31);
414 
415 #else
416  y[3].re = tab[0].im*(t[1].re - t[3].re + t[7].re);
417  y[3].im = tab[0].im*(t[1].im - t[3].im + t[7].im);
418 
419  x[3].re = z[0].re + tab[0].re*z[1].re;
420  x[3].im = z[0].im + tab[0].re*z[1].im;
421  z[0].re = dc.re + tab[0].re*t[4].re;
422  z[0].im = dc.im + tab[0].re*t[4].im;
423 
424  x[1].re = tab[1].re*w[0].re + tab[2].im*w[1].re;
425  x[1].im = tab[1].re*w[0].im + tab[2].im*w[1].im;
426  x[2].re = tab[2].im*w[0].re - tab[3].re*w[1].re;
427  x[2].im = tab[2].im*w[0].im - tab[3].re*w[1].im;
428  y[1].re = tab[1].im*w[2].re + tab[2].re*w[3].re;
429  y[1].im = tab[1].im*w[2].im + tab[2].re*w[3].im;
430  y[2].re = tab[2].re*w[2].re - tab[3].im*w[3].re;
431  y[2].im = tab[2].re*w[2].im - tab[3].im*w[3].im;
432 
433  y[0].re = tab[0].im*t[5].re;
434  y[0].im = tab[0].im*t[5].im;
435 #endif
436 
437  x[4].re = x[1].re + x[2].re;
438  x[4].im = x[1].im + x[2].im;
439 
440  y[4].re = y[1].re - y[2].re;
441  y[4].im = y[1].im - y[2].im;
442  x[1].re = z[0].re + x[1].re;
443  x[1].im = z[0].im + x[1].im;
444  y[1].re = y[0].re + y[1].re;
445  y[1].im = y[0].im + y[1].im;
446  x[2].re = z[0].re + x[2].re;
447  x[2].im = z[0].im + x[2].im;
448  y[2].re = y[2].re - y[0].re;
449  y[2].im = y[2].im - y[0].im;
450  x[4].re = z[0].re - x[4].re;
451  x[4].im = z[0].im - x[4].im;
452  y[4].re = y[0].re - y[4].re;
453  y[4].im = y[0].im - y[4].im;
454 
455  out[1*stride] = (TXComplex){ x[1].re + y[1].im, x[1].im - y[1].re };
456  out[2*stride] = (TXComplex){ x[2].re + y[2].im, x[2].im - y[2].re };
457  out[3*stride] = (TXComplex){ x[3].re + y[3].im, x[3].im - y[3].re };
458  out[4*stride] = (TXComplex){ x[4].re + y[4].im, x[4].im - y[4].re };
459  out[5*stride] = (TXComplex){ x[4].re - y[4].im, x[4].im + y[4].re };
460  out[6*stride] = (TXComplex){ x[3].re - y[3].im, x[3].im + y[3].re };
461  out[7*stride] = (TXComplex){ x[2].re - y[2].im, x[2].im + y[2].re };
462  out[8*stride] = (TXComplex){ x[1].re - y[1].im, x[1].im + y[1].re };
463 }
464 
466  ptrdiff_t stride)
467 {
468  TXComplex tmp[15];
469 
470  for (int i = 0; i < 5; i++)
471  fft3(tmp + i, in + i*3, 5);
472 
473  fft5_m1(out, tmp + 0, stride);
474  fft5_m2(out, tmp + 5, stride);
475  fft5_m3(out, tmp + 10, stride);
476 }
477 
479  const FFTXCodelet *cd,
480  uint64_t flags,
482  int len, int inv,
483  const void *scale)
484 {
485  int ret = 0;
486  TX_TAB(ff_tx_init_tabs)(len);
487 
488  if (len == 15)
489  ret = ff_tx_gen_pfa_input_map(s, opts, 3, 5);
490  else if (flags & FF_TX_PRESHUFFLE)
492 
493  return ret;
494 }
495 
496 #define DECL_FACTOR_S(n) \
497 static void TX_NAME(ff_tx_fft##n)(AVTXContext *s, void *dst, \
498  void *src, ptrdiff_t stride) \
499 { \
500  fft##n((TXComplex *)dst, (TXComplex *)src, stride / sizeof(TXComplex)); \
501 } \
502 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \
503  .name = TX_NAME_STR("fft" #n "_ns"), \
504  .function = TX_NAME(ff_tx_fft##n), \
505  .type = TX_TYPE(FFT), \
506  .flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
507  AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \
508  .factors[0] = n, \
509  .nb_factors = 1, \
510  .min_len = n, \
511  .max_len = n, \
512  .init = TX_NAME(ff_tx_fft_factor_init), \
513  .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
514  .prio = FF_TX_PRIO_BASE, \
515 };
516 
517 #define DECL_FACTOR_F(n) \
518 DECL_FACTOR_S(n) \
519 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_fwd_def) = { \
520  .name = TX_NAME_STR("fft" #n "_fwd"), \
521  .function = TX_NAME(ff_tx_fft##n), \
522  .type = TX_TYPE(FFT), \
523  .flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
524  AV_TX_UNALIGNED | FF_TX_FORWARD_ONLY, \
525  .factors[0] = n, \
526  .nb_factors = 1, \
527  .min_len = n, \
528  .max_len = n, \
529  .init = TX_NAME(ff_tx_fft_factor_init), \
530  .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
531  .prio = FF_TX_PRIO_BASE, \
532 };
533 
534 DECL_FACTOR_F(3)
535 DECL_FACTOR_F(5)
536 DECL_FACTOR_F(7)
537 DECL_FACTOR_F(9)
538 DECL_FACTOR_S(15)
539 
540 #define BUTTERFLIES(a0, a1, a2, a3) \
541  do { \
542  r0=a0.re; \
543  i0=a0.im; \
544  r1=a1.re; \
545  i1=a1.im; \
546  BF(t3, t5, t5, t1); \
547  BF(a2.re, a0.re, r0, t5); \
548  BF(a3.im, a1.im, i1, t3); \
549  BF(t4, t6, t2, t6); \
550  BF(a3.re, a1.re, r1, t4); \
551  BF(a2.im, a0.im, i0, t6); \
552  } while (0)
553 
554 #define TRANSFORM(a0, a1, a2, a3, wre, wim) \
555  do { \
556  CMUL(t1, t2, a2.re, a2.im, wre, -wim); \
557  CMUL(t5, t6, a3.re, a3.im, wre, wim); \
558  BUTTERFLIES(a0, a1, a2, a3); \
559  } while (0)
560 
561 /* z[0...8n-1], w[1...2n-1] */
562 static inline void TX_NAME(ff_tx_fft_sr_combine)(TXComplex *z,
563  const TXSample *cos, int len)
564 {
565  int o1 = 2*len;
566  int o2 = 4*len;
567  int o3 = 6*len;
568  const TXSample *wim = cos + o1 - 7;
569  TXUSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
570 
571  for (int i = 0; i < len; i += 4) {
572  TRANSFORM(z[0], z[o1 + 0], z[o2 + 0], z[o3 + 0], cos[0], wim[7]);
573  TRANSFORM(z[2], z[o1 + 2], z[o2 + 2], z[o3 + 2], cos[2], wim[5]);
574  TRANSFORM(z[4], z[o1 + 4], z[o2 + 4], z[o3 + 4], cos[4], wim[3]);
575  TRANSFORM(z[6], z[o1 + 6], z[o2 + 6], z[o3 + 6], cos[6], wim[1]);
576 
577  TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], cos[1], wim[6]);
578  TRANSFORM(z[3], z[o1 + 3], z[o2 + 3], z[o3 + 3], cos[3], wim[4]);
579  TRANSFORM(z[5], z[o1 + 5], z[o2 + 5], z[o3 + 5], cos[5], wim[2]);
580  TRANSFORM(z[7], z[o1 + 7], z[o2 + 7], z[o3 + 7], cos[7], wim[0]);
581 
582  z += 2*4;
583  cos += 2*4;
584  wim -= 2*4;
585  }
586 }
587 
589  const FFTXCodelet *cd,
590  uint64_t flags,
592  int len, int inv,
593  const void *scale)
594 {
595  TX_TAB(ff_tx_init_tabs)(len);
596  return ff_tx_gen_ptwo_revtab(s, opts);
597 }
598 
599 #define DECL_SR_CODELET_DEF(n) \
600 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \
601  .name = TX_NAME_STR("fft" #n "_ns"), \
602  .function = TX_NAME(ff_tx_fft##n##_ns), \
603  .type = TX_TYPE(FFT), \
604  .flags = FF_TX_OUT_OF_PLACE | AV_TX_INPLACE | \
605  AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \
606  .factors[0] = 2, \
607  .nb_factors = 1, \
608  .min_len = n, \
609  .max_len = n, \
610  .init = TX_NAME(ff_tx_fft_sr_codelet_init), \
611  .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
612  .prio = FF_TX_PRIO_BASE, \
613 };
614 
615 #define DECL_SR_CODELET(n, n2, n4) \
616 static void TX_NAME(ff_tx_fft##n##_ns)(AVTXContext *s, void *_dst, \
617  void *_src, ptrdiff_t stride) \
618 { \
619  TXComplex *src = _src; \
620  TXComplex *dst = _dst; \
621  const TXSample *cos = TX_TAB(ff_tx_tab_##n); \
622  \
623  TX_NAME(ff_tx_fft##n2##_ns)(s, dst, src, stride); \
624  TX_NAME(ff_tx_fft##n4##_ns)(s, dst + n4*2, src + n4*2, stride); \
625  TX_NAME(ff_tx_fft##n4##_ns)(s, dst + n4*3, src + n4*3, stride); \
626  TX_NAME(ff_tx_fft_sr_combine)(dst, cos, n4 >> 1); \
627 } \
628  \
629 DECL_SR_CODELET_DEF(n)
630 
631 static void TX_NAME(ff_tx_fft2_ns)(AVTXContext *s, void *_dst,
632  void *_src, ptrdiff_t stride)
633 {
634  TXComplex *src = _src;
635  TXComplex *dst = _dst;
636  TXComplex tmp;
637 
638  BF(tmp.re, dst[0].re, src[0].re, src[1].re);
639  BF(tmp.im, dst[0].im, src[0].im, src[1].im);
640  dst[1] = tmp;
641 }
642 
643 static void TX_NAME(ff_tx_fft4_ns)(AVTXContext *s, void *_dst,
644  void *_src, ptrdiff_t stride)
645 {
646  TXComplex *src = _src;
647  TXComplex *dst = _dst;
648  TXSample t1, t2, t3, t4, t5, t6, t7, t8;
649 
650  BF(t3, t1, src[0].re, src[1].re);
651  BF(t8, t6, src[3].re, src[2].re);
652  BF(dst[2].re, dst[0].re, t1, t6);
653  BF(t4, t2, src[0].im, src[1].im);
654  BF(t7, t5, src[2].im, src[3].im);
655  BF(dst[3].im, dst[1].im, t4, t8);
656  BF(dst[3].re, dst[1].re, t3, t7);
657  BF(dst[2].im, dst[0].im, t2, t5);
658 }
659 
660 static void TX_NAME(ff_tx_fft8_ns)(AVTXContext *s, void *_dst,
661  void *_src, ptrdiff_t stride)
662 {
663  TXComplex *src = _src;
664  TXComplex *dst = _dst;
665  TXUSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
666  const TXSample cos = TX_TAB(ff_tx_tab_8)[1];
667 
669 
670  BF(t1, dst[5].re, src[4].re, -src[5].re);
671  BF(t2, dst[5].im, src[4].im, -src[5].im);
672  BF(t5, dst[7].re, src[6].re, -src[7].re);
673  BF(t6, dst[7].im, src[6].im, -src[7].im);
674 
675  BUTTERFLIES(dst[0], dst[2], dst[4], dst[6]);
676  TRANSFORM(dst[1], dst[3], dst[5], dst[7], cos, cos);
677 }
678 
679 static void TX_NAME(ff_tx_fft16_ns)(AVTXContext *s, void *_dst,
680  void *_src, ptrdiff_t stride)
681 {
682  TXComplex *src = _src;
683  TXComplex *dst = _dst;
684  const TXSample *cos = TX_TAB(ff_tx_tab_16);
685 
686  TXUSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
687  TXSample cos_16_1 = cos[1];
688  TXSample cos_16_2 = cos[2];
689  TXSample cos_16_3 = cos[3];
690 
691  TX_NAME(ff_tx_fft8_ns)(s, dst + 0, src + 0, stride);
692  TX_NAME(ff_tx_fft4_ns)(s, dst + 8, src + 8, stride);
693  TX_NAME(ff_tx_fft4_ns)(s, dst + 12, src + 12, stride);
694 
695  t1 = dst[ 8].re;
696  t2 = dst[ 8].im;
697  t5 = dst[12].re;
698  t6 = dst[12].im;
699  BUTTERFLIES(dst[0], dst[4], dst[8], dst[12]);
700 
701  TRANSFORM(dst[ 2], dst[ 6], dst[10], dst[14], cos_16_2, cos_16_2);
702  TRANSFORM(dst[ 1], dst[ 5], dst[ 9], dst[13], cos_16_1, cos_16_3);
703  TRANSFORM(dst[ 3], dst[ 7], dst[11], dst[15], cos_16_3, cos_16_1);
704 }
705 
710 DECL_SR_CODELET(32,16,8)
711 DECL_SR_CODELET(64,32,16)
712 DECL_SR_CODELET(128,64,32)
713 DECL_SR_CODELET(256,128,64)
714 DECL_SR_CODELET(512,256,128)
715 DECL_SR_CODELET(1024,512,256)
716 DECL_SR_CODELET(2048,1024,512)
717 DECL_SR_CODELET(4096,2048,1024)
718 DECL_SR_CODELET(8192,4096,2048)
719 DECL_SR_CODELET(16384,8192,4096)
720 DECL_SR_CODELET(32768,16384,8192)
721 DECL_SR_CODELET(65536,32768,16384)
722 DECL_SR_CODELET(131072,65536,32768)
723 
725  const FFTXCodelet *cd,
726  uint64_t flags,
728  int len, int inv,
729  const void *scale)
730 {
731  int ret;
732  int is_inplace = !!(flags & AV_TX_INPLACE);
733  FFTXCodeletOptions sub_opts = {
734  .map_dir = is_inplace ? FF_TX_MAP_SCATTER : FF_TX_MAP_GATHER,
735  };
736 
737  flags &= ~FF_TX_OUT_OF_PLACE; /* We want the subtransform to be */
738  flags |= AV_TX_INPLACE; /* in-place */
739  flags |= FF_TX_PRESHUFFLE; /* This function handles the permute step */
740 
741  if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len, inv, scale)))
742  return ret;
743 
744  if (is_inplace && (ret = ff_tx_gen_inplace_map(s, len)))
745  return ret;
746 
747  return 0;
748 }
749 
751  const FFTXCodelet *cd,
752  uint64_t flags,
754  int len, int inv,
755  const void *scale)
756 {
757  if (!(s->tmp = av_malloc(len*sizeof(*s->tmp))))
758  return AVERROR(ENOMEM);
759  flags &= ~AV_TX_INPLACE;
760  return TX_NAME(ff_tx_fft_init)(s, cd, flags, opts, len, inv, scale);
761 }
762 
763 static void TX_NAME(ff_tx_fft)(AVTXContext *s, void *_dst,
764  void *_src, ptrdiff_t stride)
765 {
766  TXComplex *src = _src;
767  TXComplex *dst1 = s->flags & AV_TX_INPLACE ? s->tmp : _dst;
768  TXComplex *dst2 = _dst;
769  int *map = s->sub[0].map;
770  int len = s->len;
771 
772  /* Compilers can't vectorize this anyway without assuming AVX2, which they
773  * generally don't, at least without -march=native -mtune=native */
774  for (int i = 0; i < len; i++)
775  dst1[i] = src[map[i]];
776 
777  s->fn[0](&s->sub[0], dst2, dst1, stride);
778 }
779 
781  void *_src, ptrdiff_t stride)
782 {
783  TXComplex *src = _src;
784  TXComplex *dst = _dst;
785  TXComplex tmp;
786  const int *map = s->sub->map;
787  const int *inplace_idx = s->map;
788  int src_idx, dst_idx;
789 
790  src_idx = *inplace_idx++;
791  do {
792  tmp = src[src_idx];
793  dst_idx = map[src_idx];
794  do {
795  FFSWAP(TXComplex, tmp, src[dst_idx]);
796  dst_idx = map[dst_idx];
797  } while (dst_idx != src_idx); /* Can be > as well, but was less predictable */
798  src[dst_idx] = tmp;
799  } while ((src_idx = *inplace_idx++));
800 
801  s->fn[0](&s->sub[0], dst, src, stride);
802 }
803 
804 static const FFTXCodelet TX_NAME(ff_tx_fft_def) = {
805  .name = TX_NAME_STR("fft"),
806  .function = TX_NAME(ff_tx_fft),
807  .type = TX_TYPE(FFT),
809  .factors[0] = TX_FACTOR_ANY,
810  .nb_factors = 1,
811  .min_len = 2,
812  .max_len = TX_LEN_UNLIMITED,
813  .init = TX_NAME(ff_tx_fft_init),
815  .prio = FF_TX_PRIO_BASE,
816 };
817 
818 static const FFTXCodelet TX_NAME(ff_tx_fft_inplace_small_def) = {
819  .name = TX_NAME_STR("fft_inplace_small"),
820  .function = TX_NAME(ff_tx_fft),
821  .type = TX_TYPE(FFT),
823  .factors[0] = TX_FACTOR_ANY,
824  .nb_factors = 1,
825  .min_len = 2,
826  .max_len = 65536,
829  .prio = FF_TX_PRIO_BASE - 256,
830 };
831 
832 static const FFTXCodelet TX_NAME(ff_tx_fft_inplace_def) = {
833  .name = TX_NAME_STR("fft_inplace"),
834  .function = TX_NAME(ff_tx_fft_inplace),
835  .type = TX_TYPE(FFT),
837  .factors[0] = TX_FACTOR_ANY,
838  .nb_factors = 1,
839  .min_len = 2,
840  .max_len = TX_LEN_UNLIMITED,
841  .init = TX_NAME(ff_tx_fft_init),
843  .prio = FF_TX_PRIO_BASE - 512,
844 };
845 
847  const FFTXCodelet *cd,
848  uint64_t flags,
850  int len, int inv,
851  const void *scale)
852 {
853  const double phase = s->inv ? 2.0*M_PI/len : -2.0*M_PI/len;
854 
855  if (!(s->exp = av_malloc(len*len*sizeof(*s->exp))))
856  return AVERROR(ENOMEM);
857 
858  for (int i = 0; i < len; i++) {
859  for (int j = 0; j < len; j++) {
860  const double factor = phase*i*j;
861  s->exp[i*j] = (TXComplex){
862  RESCALE(cos(factor)),
863  RESCALE(sin(factor)),
864  };
865  }
866  }
867 
868  return 0;
869 }
870 
871 static void TX_NAME(ff_tx_fft_naive)(AVTXContext *s, void *_dst, void *_src,
872  ptrdiff_t stride)
873 {
874  TXComplex *src = _src;
875  TXComplex *dst = _dst;
876  const int n = s->len;
877  double phase = s->inv ? 2.0*M_PI/n : -2.0*M_PI/n;
878 
879  stride /= sizeof(*dst);
880 
881  for (int i = 0; i < n; i++) {
882  TXComplex tmp = { 0 };
883  for (int j = 0; j < n; j++) {
884  const double factor = phase*i*j;
885  const TXComplex mult = {
886  RESCALE(cos(factor)),
887  RESCALE(sin(factor)),
888  };
889  TXComplex res;
890  CMUL3(res, src[j], mult);
891  tmp.re += res.re;
892  tmp.im += res.im;
893  }
894  dst[i*stride] = tmp;
895  }
896 }
897 
898 static void TX_NAME(ff_tx_fft_naive_small)(AVTXContext *s, void *_dst, void *_src,
899  ptrdiff_t stride)
900 {
901  TXComplex *src = _src;
902  TXComplex *dst = _dst;
903  const int n = s->len;
904 
905  stride /= sizeof(*dst);
906 
907  for (int i = 0; i < n; i++) {
908  TXComplex tmp = { 0 };
909  for (int j = 0; j < n; j++) {
910  TXComplex res;
911  const TXComplex mult = s->exp[i*j];
912  CMUL3(res, src[j], mult);
913  tmp.re += res.re;
914  tmp.im += res.im;
915  }
916  dst[i*stride] = tmp;
917  }
918 }
919 
920 static const FFTXCodelet TX_NAME(ff_tx_fft_naive_small_def) = {
921  .name = TX_NAME_STR("fft_naive_small"),
922  .function = TX_NAME(ff_tx_fft_naive_small),
923  .type = TX_TYPE(FFT),
925  .factors[0] = TX_FACTOR_ANY,
926  .nb_factors = 1,
927  .min_len = 2,
928  .max_len = 1024,
931  .prio = FF_TX_PRIO_MIN/2,
932 };
933 
934 static const FFTXCodelet TX_NAME(ff_tx_fft_naive_def) = {
935  .name = TX_NAME_STR("fft_naive"),
936  .function = TX_NAME(ff_tx_fft_naive),
937  .type = TX_TYPE(FFT),
939  .factors[0] = TX_FACTOR_ANY,
940  .nb_factors = 1,
941  .min_len = 2,
942  .max_len = TX_LEN_UNLIMITED,
943  .init = NULL,
944  .cpu_flags = FF_TX_CPU_FLAGS_ALL,
945  .prio = FF_TX_PRIO_MIN,
946 };
947 
949  const FFTXCodelet *cd,
950  uint64_t flags,
952  int len, int inv,
953  const void *scale)
954 {
955  int ret, *tmp, ps = flags & FF_TX_PRESHUFFLE;
956  FFTXCodeletOptions sub_opts = { .map_dir = FF_TX_MAP_GATHER };
957  size_t extra_tmp_len = 0;
958  int len_list[TX_MAX_DECOMPOSITIONS];
959 
960  if ((ret = ff_tx_decompose_length(len_list, TX_TYPE(FFT), len, inv)) < 0)
961  return ret;
962 
963  /* Two iterations to test both orderings. */
964  for (int i = 0; i < ret; i++) {
965  int len1 = len_list[i];
966  int len2 = len / len1;
967 
968  /* Our ptwo transforms don't support striding the output. */
969  if (len2 & (len2 - 1))
970  FFSWAP(int, len1, len2);
971 
973 
974  /* First transform */
975  sub_opts.map_dir = FF_TX_MAP_GATHER;
976  flags &= ~AV_TX_INPLACE;
978  flags |= FF_TX_PRESHUFFLE; /* This function handles the permute step */
979  ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts,
980  len1, inv, scale);
981 
982  if (ret == AVERROR(ENOMEM)) {
983  return ret;
984  } else if (ret < 0) { /* Try again without a preshuffle flag */
986  ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts,
987  len1, inv, scale);
988  if (ret == AVERROR(ENOMEM))
989  return ret;
990  else if (ret < 0)
991  continue;
992  }
993 
994  /* Second transform. */
995  sub_opts.map_dir = FF_TX_MAP_SCATTER;
997 retry:
999  flags |= AV_TX_INPLACE;
1000  ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts,
1001  len2, inv, scale);
1002 
1003  if (ret == AVERROR(ENOMEM)) {
1004  return ret;
1005  } else if (ret < 0) { /* Try again with an out-of-place transform */
1007  flags &= ~AV_TX_INPLACE;
1008  ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts,
1009  len2, inv, scale);
1010  if (ret == AVERROR(ENOMEM)) {
1011  return ret;
1012  } else if (ret < 0) {
1013  if (flags & FF_TX_PRESHUFFLE) { /* Retry again without a preshuf flag */
1014  flags &= ~FF_TX_PRESHUFFLE;
1015  goto retry;
1016  } else {
1017  continue;
1018  }
1019  }
1020  }
1021 
1022  /* Success */
1023  break;
1024  }
1025 
1026  /* If nothing was successful, error out */
1027  if (ret < 0)
1028  return ret;
1029 
1030  /* Generate PFA map */
1031  if ((ret = ff_tx_gen_compound_mapping(s, opts, 0,
1032  s->sub[0].len, s->sub[1].len)))
1033  return ret;
1034 
1035  if (!(s->tmp = av_malloc(len*sizeof(*s->tmp))))
1036  return AVERROR(ENOMEM);
1037 
1038  /* Flatten input map */
1039  tmp = (int *)s->tmp;
1040  for (int k = 0; k < len; k += s->sub[0].len) {
1041  memcpy(tmp, &s->map[k], s->sub[0].len*sizeof(*tmp));
1042  for (int i = 0; i < s->sub[0].len; i++)
1043  s->map[k + i] = tmp[s->sub[0].map[i]];
1044  }
1045 
1046  /* Only allocate extra temporary memory if we need it */
1047  if (!(s->sub[1].flags & AV_TX_INPLACE))
1048  extra_tmp_len = len;
1049  else if (!ps)
1050  extra_tmp_len = s->sub[0].len;
1051 
1052  if (extra_tmp_len && !(s->exp = av_malloc(extra_tmp_len*sizeof(*s->exp))))
1053  return AVERROR(ENOMEM);
1054 
1055  return 0;
1056 }
1057 
1058 static void TX_NAME(ff_tx_fft_pfa)(AVTXContext *s, void *_out,
1059  void *_in, ptrdiff_t stride)
1060 {
1061  const int n = s->sub[0].len, m = s->sub[1].len, l = s->len;
1062  const int *in_map = s->map, *out_map = in_map + l;
1063  const int *sub_map = s->sub[1].map;
1064  TXComplex *tmp1 = s->sub[1].flags & AV_TX_INPLACE ? s->tmp : s->exp;
1065  TXComplex *in = _in, *out = _out;
1066 
1067  stride /= sizeof(*out);
1068 
1069  for (int i = 0; i < m; i++) {
1070  for (int j = 0; j < n; j++)
1071  s->exp[j] = in[in_map[i*n + j]];
1072  s->fn[0](&s->sub[0], &s->tmp[sub_map[i]], s->exp, m*sizeof(TXComplex));
1073  }
1074 
1075  for (int i = 0; i < n; i++)
1076  s->fn[1](&s->sub[1], &tmp1[m*i], &s->tmp[m*i], sizeof(TXComplex));
1077 
1078  for (int i = 0; i < l; i++)
1079  out[i*stride] = tmp1[out_map[i]];
1080 }
1081 
1082 static void TX_NAME(ff_tx_fft_pfa_ns)(AVTXContext *s, void *_out,
1083  void *_in, ptrdiff_t stride)
1084 {
1085  const int n = s->sub[0].len, m = s->sub[1].len, l = s->len;
1086  const int *in_map = s->map, *out_map = in_map + l;
1087  const int *sub_map = s->sub[1].map;
1088  TXComplex *tmp1 = s->sub[1].flags & AV_TX_INPLACE ? s->tmp : s->exp;
1089  TXComplex *in = _in, *out = _out;
1090 
1091  stride /= sizeof(*out);
1092 
1093  for (int i = 0; i < m; i++)
1094  s->fn[0](&s->sub[0], &s->tmp[sub_map[i]], &in[i*n], m*sizeof(TXComplex));
1095 
1096  for (int i = 0; i < n; i++)
1097  s->fn[1](&s->sub[1], &tmp1[m*i], &s->tmp[m*i], sizeof(TXComplex));
1098 
1099  for (int i = 0; i < l; i++)
1100  out[i*stride] = tmp1[out_map[i]];
1101 }
1102 
1103 static const FFTXCodelet TX_NAME(ff_tx_fft_pfa_def) = {
1104  .name = TX_NAME_STR("fft_pfa"),
1105  .function = TX_NAME(ff_tx_fft_pfa),
1106  .type = TX_TYPE(FFT),
1108  .factors = { 7, 5, 3, 2, TX_FACTOR_ANY },
1109  .nb_factors = 2,
1110  .min_len = 2*3,
1111  .max_len = TX_LEN_UNLIMITED,
1112  .init = TX_NAME(ff_tx_fft_pfa_init),
1114  .prio = FF_TX_PRIO_BASE,
1115 };
1116 
1117 static const FFTXCodelet TX_NAME(ff_tx_fft_pfa_ns_def) = {
1118  .name = TX_NAME_STR("fft_pfa_ns"),
1119  .function = TX_NAME(ff_tx_fft_pfa_ns),
1120  .type = TX_TYPE(FFT),
1123  .factors = { 7, 5, 3, 2, TX_FACTOR_ANY },
1124  .nb_factors = 2,
1125  .min_len = 2*3,
1126  .max_len = TX_LEN_UNLIMITED,
1127  .init = TX_NAME(ff_tx_fft_pfa_init),
1129  .prio = FF_TX_PRIO_BASE,
1130 };
1131 
1133  const FFTXCodelet *cd,
1134  uint64_t flags,
1136  int len, int inv,
1137  const void *scale)
1138 {
1139  s->scale_d = *((SCALE_TYPE *)scale);
1140  s->scale_f = s->scale_d;
1141  return 0;
1142 }
1143 
1145  void *_src, ptrdiff_t stride)
1146 {
1147  TXSample *src = _src;
1148  TXSample *dst = _dst;
1149  double scale = s->scale_d;
1150  int len = s->len;
1151  const double phase = M_PI/(4.0*len);
1152 
1153  stride /= sizeof(*dst);
1154 
1155  for (int i = 0; i < len; i++) {
1156  double sum = 0.0;
1157  for (int j = 0; j < len*2; j++) {
1158  int a = (2*j + 1 + len) * (2*i + 1);
1159  sum += UNSCALE(src[j]) * cos(a * phase);
1160  }
1161  dst[i*stride] = RESCALE(sum*scale);
1162  }
1163 }
1164 
1166  void *_src, ptrdiff_t stride)
1167 {
1168  TXSample *src = _src;
1169  TXSample *dst = _dst;
1170  double scale = s->scale_d;
1171  int len = s->len >> 1;
1172  int len2 = len*2;
1173  const double phase = M_PI/(4.0*len2);
1174 
1175  stride /= sizeof(*src);
1176 
1177  for (int i = 0; i < len; i++) {
1178  double sum_d = 0.0;
1179  double sum_u = 0.0;
1180  double i_d = phase * (4*len - 2*i - 1);
1181  double i_u = phase * (3*len2 + 2*i + 1);
1182  for (int j = 0; j < len2; j++) {
1183  double a = (2 * j + 1);
1184  double a_d = cos(a * i_d);
1185  double a_u = cos(a * i_u);
1186  double val = UNSCALE(src[j*stride]);
1187  sum_d += a_d * val;
1188  sum_u += a_u * val;
1189  }
1190  dst[i + 0] = RESCALE( sum_d*scale);
1191  dst[i + len] = RESCALE(-sum_u*scale);
1192  }
1193 }
1194 
1195 static const FFTXCodelet TX_NAME(ff_tx_mdct_naive_fwd_def) = {
1196  .name = TX_NAME_STR("mdct_naive_fwd"),
1197  .function = TX_NAME(ff_tx_mdct_naive_fwd),
1198  .type = TX_TYPE(MDCT),
1200  .factors = { 2, TX_FACTOR_ANY }, /* MDCTs need an even length */
1201  .nb_factors = 2,
1202  .min_len = 2,
1203  .max_len = TX_LEN_UNLIMITED,
1204  .init = TX_NAME(ff_tx_mdct_naive_init),
1206  .prio = FF_TX_PRIO_MIN,
1207 };
1208 
1209 static const FFTXCodelet TX_NAME(ff_tx_mdct_naive_inv_def) = {
1210  .name = TX_NAME_STR("mdct_naive_inv"),
1211  .function = TX_NAME(ff_tx_mdct_naive_inv),
1212  .type = TX_TYPE(MDCT),
1214  .factors = { 2, TX_FACTOR_ANY },
1215  .nb_factors = 2,
1216  .min_len = 2,
1217  .max_len = TX_LEN_UNLIMITED,
1218  .init = TX_NAME(ff_tx_mdct_naive_init),
1220  .prio = FF_TX_PRIO_MIN,
1221 };
1222 
1224  const FFTXCodelet *cd,
1225  uint64_t flags,
1227  int len, int inv,
1228  const void *scale)
1229 {
1230  int ret;
1231  FFTXCodeletOptions sub_opts = {
1233  };
1234 
1235  s->scale_d = *((SCALE_TYPE *)scale);
1236  s->scale_f = s->scale_d;
1237 
1238  flags &= ~FF_TX_OUT_OF_PLACE; /* We want the subtransform to be */
1239  flags |= AV_TX_INPLACE; /* in-place */
1240  flags |= FF_TX_PRESHUFFLE; /* First try with an in-place transform */
1241 
1242  if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len >> 1,
1243  inv, scale))) {
1244  flags &= ~FF_TX_PRESHUFFLE; /* Now try with a generic FFT */
1245  if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len >> 1,
1246  inv, scale)))
1247  return ret;
1248  }
1249 
1250  s->map = av_malloc((len >> 1)*sizeof(*s->map));
1251  if (!s->map)
1252  return AVERROR(ENOMEM);
1253 
1254  /* If we need to preshuffle copy the map from the subcontext */
1255  if (s->sub[0].flags & FF_TX_PRESHUFFLE) {
1256  memcpy(s->map, s->sub->map, (len >> 1)*sizeof(*s->map));
1257  } else {
1258  for (int i = 0; i < len >> 1; i++)
1259  s->map[i] = i;
1260  }
1261 
1262  if ((ret = TX_TAB(ff_tx_mdct_gen_exp)(s, inv ? s->map : NULL)))
1263  return ret;
1264 
1265  /* Saves a multiply in a hot path. */
1266  if (inv)
1267  for (int i = 0; i < (s->len >> 1); i++)
1268  s->map[i] <<= 1;
1269 
1270  return 0;
1271 }
1272 
1273 static void TX_NAME(ff_tx_mdct_fwd)(AVTXContext *s, void *_dst, void *_src,
1274  ptrdiff_t stride)
1275 {
1276  TXSample *src = _src, *dst = _dst;
1277  TXComplex *exp = s->exp, tmp, *z = _dst;
1278  const int len2 = s->len >> 1;
1279  const int len4 = s->len >> 2;
1280  const int len3 = len2 * 3;
1281  const int *sub_map = s->map;
1282 
1283  stride /= sizeof(*dst);
1284 
1285  for (int i = 0; i < len2; i++) { /* Folding and pre-reindexing */
1286  const int k = 2*i;
1287  const int idx = sub_map[i];
1288  if (k < len2) {
1289  tmp.re = FOLD(-src[ len2 + k], src[1*len2 - 1 - k]);
1290  tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]);
1291  } else {
1292  tmp.re = FOLD(-src[ len2 + k], -src[5*len2 - 1 - k]);
1293  tmp.im = FOLD( src[-len2 + k], -src[1*len3 - 1 - k]);
1294  }
1295  CMUL(z[idx].im, z[idx].re, tmp.re, tmp.im, exp[i].re, exp[i].im);
1296  }
1297 
1298  s->fn[0](&s->sub[0], z, z, sizeof(TXComplex));
1299 
1300  for (int i = 0; i < len4; i++) {
1301  const int i0 = len4 + i, i1 = len4 - i - 1;
1302  TXComplex src1 = { z[i1].re, z[i1].im };
1303  TXComplex src0 = { z[i0].re, z[i0].im };
1304 
1305  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im,
1306  exp[i0].im, exp[i0].re);
1307  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im,
1308  exp[i1].im, exp[i1].re);
1309  }
1310 }
1311 
1312 static void TX_NAME(ff_tx_mdct_inv)(AVTXContext *s, void *_dst, void *_src,
1313  ptrdiff_t stride)
1314 {
1315  TXComplex *z = _dst, *exp = s->exp;
1316  const TXSample *src = _src, *in1, *in2;
1317  const int len2 = s->len >> 1;
1318  const int len4 = s->len >> 2;
1319  const int *sub_map = s->map;
1320 
1321  stride /= sizeof(*src);
1322  in1 = src;
1323  in2 = src + ((len2*2) - 1) * stride;
1324 
1325  for (int i = 0; i < len2; i++) {
1326  int k = sub_map[i];
1327  TXComplex tmp = { in2[-k*stride], in1[k*stride] };
1328  CMUL3(z[i], tmp, exp[i]);
1329  }
1330 
1331  s->fn[0](&s->sub[0], z, z, sizeof(TXComplex));
1332 
1333  exp += len2;
1334  for (int i = 0; i < len4; i++) {
1335  const int i0 = len4 + i, i1 = len4 - i - 1;
1336  TXComplex src1 = { z[i1].im, z[i1].re };
1337  TXComplex src0 = { z[i0].im, z[i0].re };
1338 
1339  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re);
1340  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re);
1341  }
1342 }
1343 
1344 static const FFTXCodelet TX_NAME(ff_tx_mdct_fwd_def) = {
1345  .name = TX_NAME_STR("mdct_fwd"),
1346  .function = TX_NAME(ff_tx_mdct_fwd),
1347  .type = TX_TYPE(MDCT),
1349  .factors = { 2, TX_FACTOR_ANY },
1350  .nb_factors = 2,
1351  .min_len = 2,
1352  .max_len = TX_LEN_UNLIMITED,
1353  .init = TX_NAME(ff_tx_mdct_init),
1355  .prio = FF_TX_PRIO_BASE,
1356 };
1357 
1358 static const FFTXCodelet TX_NAME(ff_tx_mdct_inv_def) = {
1359  .name = TX_NAME_STR("mdct_inv"),
1360  .function = TX_NAME(ff_tx_mdct_inv),
1361  .type = TX_TYPE(MDCT),
1363  .factors = { 2, TX_FACTOR_ANY },
1364  .nb_factors = 2,
1365  .min_len = 2,
1366  .max_len = TX_LEN_UNLIMITED,
1367  .init = TX_NAME(ff_tx_mdct_init),
1369  .prio = FF_TX_PRIO_BASE,
1370 };
1371 
1373  const FFTXCodelet *cd,
1374  uint64_t flags,
1376  int len, int inv,
1377  const void *scale)
1378 {
1379  int ret;
1380 
1381  s->scale_d = *((SCALE_TYPE *)scale);
1382  s->scale_f = s->scale_d;
1383 
1384  flags &= ~AV_TX_FULL_IMDCT;
1385 
1386  if ((ret = ff_tx_init_subtx(s, TX_TYPE(MDCT), flags, NULL, len, 1, scale)))
1387  return ret;
1388 
1389  return 0;
1390 }
1391 
1393  void *_src, ptrdiff_t stride)
1394 {
1395  int len = s->len << 1;
1396  int len2 = len >> 1;
1397  int len4 = len >> 2;
1398  TXSample *dst = _dst;
1399 
1400  s->fn[0](&s->sub[0], dst + len4, _src, stride);
1401 
1402  stride /= sizeof(*dst);
1403 
1404  for (int i = 0; i < len4; i++) {
1405  dst[ i*stride] = -dst[(len2 - i - 1)*stride];
1406  dst[(len - i - 1)*stride] = dst[(len2 + i + 0)*stride];
1407  }
1408 }
1409 
1410 static const FFTXCodelet TX_NAME(ff_tx_mdct_inv_full_def) = {
1411  .name = TX_NAME_STR("mdct_inv_full"),
1412  .function = TX_NAME(ff_tx_mdct_inv_full),
1413  .type = TX_TYPE(MDCT),
1414  .flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
1416  .factors = { 2, TX_FACTOR_ANY },
1417  .nb_factors = 2,
1418  .min_len = 2,
1419  .max_len = TX_LEN_UNLIMITED,
1422  .prio = FF_TX_PRIO_BASE,
1423 };
1424 
1426  const FFTXCodelet *cd,
1427  uint64_t flags,
1429  int len, int inv,
1430  const void *scale)
1431 {
1432  int ret, sub_len;
1433  FFTXCodeletOptions sub_opts = { .map_dir = FF_TX_MAP_SCATTER };
1434 
1435  len >>= 1;
1436  sub_len = len / cd->factors[0];
1437 
1438  s->scale_d = *((SCALE_TYPE *)scale);
1439  s->scale_f = s->scale_d;
1440 
1441  flags &= ~FF_TX_OUT_OF_PLACE; /* We want the subtransform to be */
1442  flags |= AV_TX_INPLACE; /* in-place */
1443  flags |= FF_TX_PRESHUFFLE; /* This function handles the permute step */
1444 
1445  if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts,
1446  sub_len, inv, scale)))
1447  return ret;
1448 
1449  if ((ret = ff_tx_gen_compound_mapping(s, opts, s->inv, cd->factors[0], sub_len)))
1450  return ret;
1451 
1452  /* Our 15-point transform is also a compound one, so embed its input map */
1453  if (cd->factors[0] == 15)
1454  TX_EMBED_INPUT_PFA_MAP(s->map, len, 3, 5);
1455 
1456  if ((ret = TX_TAB(ff_tx_mdct_gen_exp)(s, inv ? s->map : NULL)))
1457  return ret;
1458 
1459  /* Saves multiplies in loops. */
1460  for (int i = 0; i < len; i++)
1461  s->map[i] <<= 1;
1462 
1463  if (!(s->tmp = av_malloc(len*sizeof(*s->tmp))))
1464  return AVERROR(ENOMEM);
1465 
1466  TX_TAB(ff_tx_init_tabs)(len / sub_len);
1467 
1468  return 0;
1469 }
1470 
1471 #define DECL_COMP_IMDCT(N) \
1472 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_inv)(AVTXContext *s, void *_dst, \
1473  void *_src, ptrdiff_t stride) \
1474 { \
1475  TXComplex fft##N##in[N]; \
1476  TXComplex *z = _dst, *exp = s->exp; \
1477  const TXSample *src = _src, *in1, *in2; \
1478  const int len4 = s->len >> 2; \
1479  const int len2 = s->len >> 1; \
1480  const int m = s->sub->len; \
1481  const int *in_map = s->map, *out_map = in_map + N*m; \
1482  const int *sub_map = s->sub->map; \
1483  \
1484  stride /= sizeof(*src); /* To convert it from bytes */ \
1485  in1 = src; \
1486  in2 = src + ((N*m*2) - 1) * stride; \
1487  \
1488  for (int i = 0; i < len2; i += N) { \
1489  for (int j = 0; j < N; j++) { \
1490  const int k = in_map[j]; \
1491  TXComplex tmp = { in2[-k*stride], in1[k*stride] }; \
1492  CMUL3(fft##N##in[j], tmp, exp[j]); \
1493  } \
1494  fft##N(s->tmp + *(sub_map++), fft##N##in, m); \
1495  exp += N; \
1496  in_map += N; \
1497  } \
1498  \
1499  for (int i = 0; i < N; i++) \
1500  s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex)); \
1501  \
1502  for (int i = 0; i < len4; i++) { \
1503  const int i0 = len4 + i, i1 = len4 - i - 1; \
1504  const int s0 = out_map[i0], s1 = out_map[i1]; \
1505  TXComplex src1 = { s->tmp[s1].im, s->tmp[s1].re }; \
1506  TXComplex src0 = { s->tmp[s0].im, s->tmp[s0].re }; \
1507  \
1508  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re); \
1509  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re); \
1510  } \
1511 } \
1512  \
1513 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_inv_def) = { \
1514  .name = TX_NAME_STR("mdct_pfa_" #N "xM_inv"), \
1515  .function = TX_NAME(ff_tx_mdct_pfa_##N##xM_inv), \
1516  .type = TX_TYPE(MDCT), \
1517  .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY, \
1518  .factors = { N, TX_FACTOR_ANY }, \
1519  .nb_factors = 2, \
1520  .min_len = N*2, \
1521  .max_len = TX_LEN_UNLIMITED, \
1522  .init = TX_NAME(ff_tx_mdct_pfa_init), \
1523  .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1524  .prio = FF_TX_PRIO_BASE, \
1525 };
1526 
1527 DECL_COMP_IMDCT(3)
1528 DECL_COMP_IMDCT(5)
1529 DECL_COMP_IMDCT(7)
1530 DECL_COMP_IMDCT(9)
1531 DECL_COMP_IMDCT(15)
1532 
1533 #define DECL_COMP_MDCT(N) \
1534 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd)(AVTXContext *s, void *_dst, \
1535  void *_src, ptrdiff_t stride) \
1536 { \
1537  TXComplex fft##N##in[N]; \
1538  TXSample *src = _src, *dst = _dst; \
1539  TXComplex *exp = s->exp, tmp; \
1540  const int m = s->sub->len; \
1541  const int len4 = N*m; \
1542  const int len3 = len4 * 3; \
1543  const int len8 = s->len >> 2; \
1544  const int *in_map = s->map, *out_map = in_map + N*m; \
1545  const int *sub_map = s->sub->map; \
1546  \
1547  stride /= sizeof(*dst); \
1548  \
1549  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */ \
1550  for (int j = 0; j < N; j++) { \
1551  const int k = in_map[i*N + j]; \
1552  if (k < len4) { \
1553  tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); \
1554  tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); \
1555  } else { \
1556  tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); \
1557  tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); \
1558  } \
1559  CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
1560  exp[k >> 1].re, exp[k >> 1].im); \
1561  } \
1562  fft##N(s->tmp + sub_map[i], fft##N##in, m); \
1563  } \
1564  \
1565  for (int i = 0; i < N; i++) \
1566  s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex)); \
1567  \
1568  for (int i = 0; i < len8; i++) { \
1569  const int i0 = len8 + i, i1 = len8 - i - 1; \
1570  const int s0 = out_map[i0], s1 = out_map[i1]; \
1571  TXComplex src1 = { s->tmp[s1].re, s->tmp[s1].im }; \
1572  TXComplex src0 = { s->tmp[s0].re, s->tmp[s0].im }; \
1573  \
1574  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im, \
1575  exp[i0].im, exp[i0].re); \
1576  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im, \
1577  exp[i1].im, exp[i1].re); \
1578  } \
1579 } \
1580  \
1581 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd_def) = { \
1582  .name = TX_NAME_STR("mdct_pfa_" #N "xM_fwd"), \
1583  .function = TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd), \
1584  .type = TX_TYPE(MDCT), \
1585  .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \
1586  .factors = { N, TX_FACTOR_ANY }, \
1587  .nb_factors = 2, \
1588  .min_len = N*2, \
1589  .max_len = TX_LEN_UNLIMITED, \
1590  .init = TX_NAME(ff_tx_mdct_pfa_init), \
1591  .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1592  .prio = FF_TX_PRIO_BASE, \
1593 };
1594 
1595 DECL_COMP_MDCT(3)
1596 DECL_COMP_MDCT(5)
1597 DECL_COMP_MDCT(7)
1598 DECL_COMP_MDCT(9)
1599 DECL_COMP_MDCT(15)
1600 
1602  const FFTXCodelet *cd,
1603  uint64_t flags,
1605  int len, int inv,
1606  const void *scale)
1607 {
1608  int ret;
1609  double f, m;
1610  TXSample *tab;
1611  uint64_t r2r = flags & AV_TX_REAL_TO_REAL;
1612  int len4 = FFALIGN(len, 4) / 4;
1613 
1614  s->scale_d = *((SCALE_TYPE *)scale);
1615  s->scale_f = s->scale_d;
1616 
1618 
1619  if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, NULL, len >> 1, inv, scale)))
1620  return ret;
1621 
1622  if (!(s->exp = av_mallocz((8 + 2*len4)*sizeof(*s->exp))))
1623  return AVERROR(ENOMEM);
1624 
1625  tab = (TXSample *)s->exp;
1626 
1627  f = 2*M_PI/len;
1628 
1629  m = (inv ? 2*s->scale_d : s->scale_d);
1630 
1631  *tab++ = RESCALE((inv ? 0.5 : 1.0) * m);
1632  *tab++ = RESCALE(inv ? 0.5*m : 1.0*m);
1633  *tab++ = RESCALE( m);
1634  *tab++ = RESCALE(-m);
1635 
1636  *tab++ = RESCALE( (0.5 - 0.0) * m);
1637  if (r2r)
1638  *tab++ = 1 / s->scale_f;
1639  else
1640  *tab++ = RESCALE( (0.0 - 0.5) * m);
1641  *tab++ = RESCALE( (0.5 - inv) * m);
1642  *tab++ = RESCALE(-(0.5 - inv) * m);
1643 
1644  for (int i = 0; i < len4; i++)
1645  *tab++ = RESCALE(cos(i*f));
1646 
1647  tab = ((TXSample *)s->exp) + len4 + 8;
1648 
1649  for (int i = 0; i < len4; i++)
1650  *tab++ = RESCALE(cos(((len - i*4)/4.0)*f)) * (inv ? 1 : -1);
1651 
1652  return 0;
1653 }
1654 
1655 #define DECL_RDFT(n, inv) \
1656 static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst, \
1657  void *_src, ptrdiff_t stride) \
1658 { \
1659  const int len2 = s->len >> 1; \
1660  const int len4 = s->len >> 2; \
1661  const TXSample *fact = (void *)s->exp; \
1662  const TXSample *tcos = fact + 8; \
1663  const TXSample *tsin = tcos + len4; \
1664  TXComplex *data = inv ? _src : _dst; \
1665  TXComplex t[3]; \
1666  \
1667  if (!inv) \
1668  s->fn[0](&s->sub[0], data, _src, sizeof(TXComplex)); \
1669  else \
1670  data[0].im = data[len2].re; \
1671  \
1672  /* The DC value's both components are real, but we need to change them \
1673  * into complex values. Also, the middle of the array is special-cased. \
1674  * These operations can be done before or after the loop. */ \
1675  t[0].re = data[0].re; \
1676  data[0].re = t[0].re + data[0].im; \
1677  data[0].im = t[0].re - data[0].im; \
1678  data[ 0].re = MULT(fact[0], data[ 0].re); \
1679  data[ 0].im = MULT(fact[1], data[ 0].im); \
1680  data[len4].re = MULT(fact[2], data[len4].re); \
1681  data[len4].im = MULT(fact[3], data[len4].im); \
1682  \
1683  for (int i = 1; i < len4; i++) { \
1684  /* Separate even and odd FFTs */ \
1685  t[0].re = MULT(fact[4], (data[i].re + data[len2 - i].re)); \
1686  t[0].im = MULT(fact[5], (data[i].im - data[len2 - i].im)); \
1687  t[1].re = MULT(fact[6], (data[i].im + data[len2 - i].im)); \
1688  t[1].im = MULT(fact[7], (data[i].re - data[len2 - i].re)); \
1689  \
1690  /* Apply twiddle factors to the odd FFT and add to the even FFT */ \
1691  CMUL(t[2].re, t[2].im, t[1].re, t[1].im, tcos[i], tsin[i]); \
1692  \
1693  data[ i].re = t[0].re + t[2].re; \
1694  data[ i].im = t[2].im - t[0].im; \
1695  data[len2 - i].re = t[0].re - t[2].re; \
1696  data[len2 - i].im = t[2].im + t[0].im; \
1697  } \
1698  \
1699  if (inv) { \
1700  s->fn[0](&s->sub[0], _dst, data, sizeof(TXComplex)); \
1701  } else { \
1702  /* Move [0].im to the last position, as convention requires */ \
1703  data[len2].re = data[0].im; \
1704  data[ 0].im = data[len2].im = 0; \
1705  } \
1706 } \
1707  \
1708 static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = { \
1709  .name = TX_NAME_STR("rdft_" #n), \
1710  .function = TX_NAME(ff_tx_rdft_ ##n), \
1711  .type = TX_TYPE(RDFT), \
1712  .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
1713  (inv ? FF_TX_INVERSE_ONLY : FF_TX_FORWARD_ONLY), \
1714  .factors = { 4, TX_FACTOR_ANY }, \
1715  .nb_factors = 2, \
1716  .min_len = 4, \
1717  .max_len = TX_LEN_UNLIMITED, \
1718  .init = TX_NAME(ff_tx_rdft_init), \
1719  .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1720  .prio = FF_TX_PRIO_BASE, \
1721 };
1722 
1723 DECL_RDFT(r2c, 0)
1725 
1726 #define DECL_RDFT_HALF(n, mode, mod2) \
1727 static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst, \
1728  void *_src, ptrdiff_t stride) \
1729 { \
1730  const int len = s->len; \
1731  const int len2 = len >> 1; \
1732  const int len4 = len >> 2; \
1733  const int aligned_len4 = FFALIGN(len, 4)/4; \
1734  const TXSample *fact = (void *)s->exp; \
1735  const TXSample *tcos = fact + 8; \
1736  const TXSample *tsin = tcos + aligned_len4; \
1737  TXComplex *data = _dst; \
1738  TXSample *out = _dst; /* Half-complex is forward-only */ \
1739  TXSample tmp_dc; \
1740  av_unused TXSample tmp_mid; \
1741  TXSample tmp[4]; \
1742  TXComplex sf, sl; \
1743  \
1744  s->fn[0](&s->sub[0], _dst, _src, sizeof(TXComplex)); \
1745  \
1746  tmp_dc = data[0].re; \
1747  data[ 0].re = tmp_dc + data[0].im; \
1748  tmp_dc = tmp_dc - data[0].im; \
1749  \
1750  data[ 0].re = MULT(fact[0], data[ 0].re); \
1751  tmp_dc = MULT(fact[1], tmp_dc); \
1752  data[len4].re = MULT(fact[2], data[len4].re); \
1753  \
1754  if (!mod2) { \
1755  data[len4].im = MULT(fact[3], data[len4].im); \
1756  } else { \
1757  sf = data[len4]; \
1758  sl = data[len4 + 1]; \
1759  if (mode == AV_TX_REAL_TO_REAL) \
1760  tmp[0] = MULT(fact[4], (sf.re + sl.re)); \
1761  else \
1762  tmp[0] = MULT(fact[5], (sf.im - sl.im)); \
1763  tmp[1] = MULT(fact[6], (sf.im + sl.im)); \
1764  tmp[2] = MULT(fact[7], (sf.re - sl.re)); \
1765  \
1766  if (mode == AV_TX_REAL_TO_REAL) { \
1767  tmp[3] = tmp[1]*tcos[len4] - tmp[2]*tsin[len4]; \
1768  tmp_mid = (tmp[0] - tmp[3]); \
1769  } else { \
1770  tmp[3] = tmp[1]*tsin[len4] + tmp[2]*tcos[len4]; \
1771  tmp_mid = (tmp[0] + tmp[3]); \
1772  } \
1773  } \
1774  \
1775  /* NOTE: unrolling this breaks non-mod8 lengths */ \
1776  for (int i = 1; i <= len4; i++) { \
1777  TXSample tmp[4]; \
1778  TXComplex sf = data[i]; \
1779  TXComplex sl = data[len2 - i]; \
1780  \
1781  if (mode == AV_TX_REAL_TO_REAL) \
1782  tmp[0] = MULT(fact[4], (sf.re + sl.re)); \
1783  else \
1784  tmp[0] = MULT(fact[5], (sf.im - sl.im)); \
1785  \
1786  tmp[1] = MULT(fact[6], (sf.im + sl.im)); \
1787  tmp[2] = MULT(fact[7], (sf.re - sl.re)); \
1788  \
1789  if (mode == AV_TX_REAL_TO_REAL) { \
1790  tmp[3] = tmp[1]*tcos[i] - tmp[2]*tsin[i]; \
1791  out[i] = (tmp[0] + tmp[3]); \
1792  out[len - i] = (tmp[0] - tmp[3]); \
1793  } else { \
1794  tmp[3] = tmp[1]*tsin[i] + tmp[2]*tcos[i]; \
1795  out[i - 1] = (tmp[3] - tmp[0]); \
1796  out[len - i - 1] = (tmp[0] + tmp[3]); \
1797  } \
1798  } \
1799  \
1800  for (int i = 1; i < (len4 + (mode == AV_TX_REAL_TO_IMAGINARY)); i++) \
1801  out[len2 - i] = out[len - i]; \
1802  \
1803  if (mode == AV_TX_REAL_TO_REAL) { \
1804  out[len2] = tmp_dc; \
1805  if (mod2) \
1806  out[len4 + 1] = tmp_mid * fact[5]; \
1807  } else if (mod2) { \
1808  out[len4] = tmp_mid; \
1809  } \
1810 } \
1811  \
1812 static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = { \
1813  .name = TX_NAME_STR("rdft_" #n), \
1814  .function = TX_NAME(ff_tx_rdft_ ##n), \
1815  .type = TX_TYPE(RDFT), \
1816  .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | mode | \
1817  FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \
1818  .factors = { 2 + 2*(!mod2), TX_FACTOR_ANY }, \
1819  .nb_factors = 2, \
1820  .min_len = 2 + 2*(!mod2), \
1821  .max_len = TX_LEN_UNLIMITED, \
1822  .init = TX_NAME(ff_tx_rdft_init), \
1823  .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1824  .prio = FF_TX_PRIO_BASE, \
1825 };
1826 
1828 DECL_RDFT_HALF(r2r_mod2, AV_TX_REAL_TO_REAL, 1)
1831 
1833  const FFTXCodelet *cd,
1834  uint64_t flags,
1836  int len, int inv,
1837  const void *scale)
1838 {
1839  int ret;
1840  double freq;
1841  TXSample *tab;
1842  SCALE_TYPE rsc = *((SCALE_TYPE *)scale);
1843 
1844  if (inv) {
1845  len *= 2;
1846  s->len *= 2;
1847  rsc *= 0.5;
1848  }
1849 
1850  if ((ret = ff_tx_init_subtx(s, TX_TYPE(RDFT), flags, NULL, len, inv, &rsc)))
1851  return ret;
1852 
1853  s->exp = av_malloc((len/2)*3*sizeof(TXSample));
1854  if (!s->exp)
1855  return AVERROR(ENOMEM);
1856 
1857  tab = (TXSample *)s->exp;
1858 
1859  freq = M_PI/(len*2);
1860 
1861  for (int i = 0; i < len; i++)
1862  tab[i] = RESCALE(cos(i*freq)*(!inv + 1));
1863 
1864  if (inv) {
1865  for (int i = 0; i < len/2; i++)
1866  tab[len + i] = RESCALE(0.5 / sin((2*i + 1)*freq));
1867  } else {
1868  for (int i = 0; i < len/2; i++)
1869  tab[len + i] = RESCALE(cos((len - 2*i - 1)*freq));
1870  }
1871 
1872  return 0;
1874 
1875 static void TX_NAME(ff_tx_dctII)(AVTXContext *s, void *_dst,
1876  void *_src, ptrdiff_t stride)
1877 {
1878  TXSample *dst = _dst;
1879  TXSample *src = _src;
1880  const int len = s->len;
1881  const int len2 = len >> 1;
1882  const TXSample *exp = (void *)s->exp;
1883  TXSample next;
1884 #ifdef TX_INT32
1885  int64_t tmp1, tmp2;
1886 #else
1887  TXSample tmp1, tmp2;
1888 #endif
1889 
1890  for (int i = 0; i < len2; i++) {
1891  TXSample in1 = src[i];
1892  TXSample in2 = src[len - i - 1];
1893  TXSample s = exp[len + i];
1894 
1895 #ifdef TX_INT32
1896  tmp1 = in1 + in2;
1897  tmp2 = in1 - in2;
1898 
1899  tmp1 >>= 1;
1900  tmp2 *= s;
1901 
1902  tmp2 = (tmp2 + 0x40000000) >> 31;
1903 #else
1904  tmp1 = (in1 + in2)*0.5;
1905  tmp2 = (in1 - in2)*s;
1906 #endif
1907 
1908  src[i] = tmp1 + tmp2;
1909  src[len - i - 1] = tmp1 - tmp2;
1910  }
1911 
1912  s->fn[0](&s->sub[0], dst, src, sizeof(TXComplex));
1913 
1914  next = dst[len];
1915 
1916  for (int i = len - 2; i > 0; i -= 2) {
1917  TXSample tmp;
1918 
1919  CMUL(tmp, dst[i], exp[len - i], exp[i], dst[i + 0], dst[i + 1]);
1920 
1921  dst[i + 1] = next;
1922 
1923  next += tmp;
1924  }
1925 
1926 #ifdef TX_INT32
1927  tmp1 = ((int64_t)exp[0]) * ((int64_t)dst[0]);
1928  dst[0] = (tmp1 + 0x40000000) >> 31;
1929 #else
1930  dst[0] = exp[0] * dst[0];
1931 #endif
1932  dst[1] = next;
1934 
1935 static void TX_NAME(ff_tx_dctIII)(AVTXContext *s, void *_dst,
1936  void *_src, ptrdiff_t stride)
1937 {
1938  TXSample *dst = _dst;
1939  TXSample *src = _src;
1940  const int len = s->len;
1941  const int len2 = len >> 1;
1942  const TXSample *exp = (void *)s->exp;
1943 #ifdef TX_INT32
1944  int64_t tmp1, tmp2 = src[len - 1];
1945  tmp2 = (2*tmp2 + 0x40000000) >> 31;
1946 #else
1947  TXSample tmp1, tmp2 = 2*src[len - 1];
1948 #endif
1949 
1950  src[len] = tmp2;
1951 
1952  for (int i = len - 2; i >= 2; i -= 2) {
1953  TXSample val1 = src[i - 0];
1954  TXSample val2 = src[i - 1] - src[i + 1];
1955 
1956  CMUL(src[i + 1], src[i], exp[len - i], exp[i], val1, val2);
1957  }
1958 
1959  s->fn[0](&s->sub[0], dst, src, sizeof(float));
1960 
1961  for (int i = 0; i < len2; i++) {
1962  TXSample in1 = dst[i];
1963  TXSample in2 = dst[len - i - 1];
1964  TXSample c = exp[len + i];
1965 
1966  tmp1 = in1 + in2;
1967  tmp2 = in1 - in2;
1968  tmp2 *= c;
1969 #ifdef TX_INT32
1970  tmp2 = (tmp2 + 0x40000000) >> 31;
1971 #endif
1972 
1973  dst[i] = tmp1 + tmp2;
1974  dst[len - i - 1] = tmp1 - tmp2;
1975  }
1976 }
1977 
1978 static const FFTXCodelet TX_NAME(ff_tx_dctII_def) = {
1979  .name = TX_NAME_STR("dctII"),
1980  .function = TX_NAME(ff_tx_dctII),
1981  .type = TX_TYPE(DCT),
1982  .flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
1984  .factors = { 2, TX_FACTOR_ANY },
1985  .min_len = 2,
1986  .max_len = TX_LEN_UNLIMITED,
1987  .init = TX_NAME(ff_tx_dct_init),
1989  .prio = FF_TX_PRIO_BASE,
1990 };
1991 
1992 static const FFTXCodelet TX_NAME(ff_tx_dctIII_def) = {
1993  .name = TX_NAME_STR("dctIII"),
1994  .function = TX_NAME(ff_tx_dctIII),
1995  .type = TX_TYPE(DCT),
1996  .flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
1998  .factors = { 2, TX_FACTOR_ANY },
1999  .min_len = 2,
2000  .max_len = TX_LEN_UNLIMITED,
2001  .init = TX_NAME(ff_tx_dct_init),
2003  .prio = FF_TX_PRIO_BASE,
2004 };
2005 
2007  const FFTXCodelet *cd,
2008  uint64_t flags,
2010  int len, int inv,
2011  const void *scale)
2012 {
2013  int ret;
2014  SCALE_TYPE rsc = *((SCALE_TYPE *)scale);
2015 
2016  if (inv) {
2017  len *= 2;
2018  s->len *= 2;
2019  rsc *= 0.5;
2020  }
2021 
2022  /* We want a half-complex RDFT */
2023  flags |= cd->type == TX_TYPE(DCT_I) ? AV_TX_REAL_TO_REAL :
2025 
2026  if ((ret = ff_tx_init_subtx(s, TX_TYPE(RDFT), flags, NULL,
2027  (len - 1 + 2*(cd->type == TX_TYPE(DST_I)))*2,
2028  0, &rsc)))
2029  return ret;
2030 
2031  s->tmp = av_mallocz((len + 1)*2*sizeof(TXSample));
2032  if (!s->tmp)
2033  return AVERROR(ENOMEM);
2034 
2035  return 0;
2037 
2038 static void TX_NAME(ff_tx_dctI)(AVTXContext *s, void *_dst,
2039  void *_src, ptrdiff_t stride)
2040 {
2041  TXSample *dst = _dst;
2042  TXSample *src = _src;
2043  const int len = s->len - 1;
2044  TXSample *tmp = (TXSample *)s->tmp;
2045 
2046  stride /= sizeof(TXSample);
2047 
2048  for (int i = 0; i < len; i++)
2049  tmp[i] = tmp[2*len - i] = src[i * stride];
2050 
2051  tmp[len] = src[len * stride]; /* Middle */
2052 
2053  s->fn[0](&s->sub[0], dst, tmp, sizeof(TXSample));
2055 
2056 static void TX_NAME(ff_tx_dstI)(AVTXContext *s, void *_dst,
2057  void *_src, ptrdiff_t stride)
2058 {
2059  TXSample *dst = _dst;
2060  TXSample *src = _src;
2061  const int len = s->len + 1;
2062  TXSample *tmp = (void *)s->tmp;
2063 
2064  stride /= sizeof(TXSample);
2065 
2066  tmp[0] = 0;
2067 
2068  for (int i = 1; i < len; i++) {
2069  TXSample a = src[(i - 1) * stride];
2070  tmp[i] = -a;
2071  tmp[2*len - i] = a;
2072  }
2073 
2074  tmp[len] = 0; /* i == n, Nyquist */
2075 
2076  s->fn[0](&s->sub[0], dst, tmp, sizeof(float));
2077 }
2078 
2079 static const FFTXCodelet TX_NAME(ff_tx_dctI_def) = {
2080  .name = TX_NAME_STR("dctI"),
2081  .function = TX_NAME(ff_tx_dctI),
2082  .type = TX_TYPE(DCT_I),
2084  .factors = { 2, TX_FACTOR_ANY },
2085  .nb_factors = 2,
2086  .min_len = 2,
2087  .max_len = TX_LEN_UNLIMITED,
2088  .init = TX_NAME(ff_tx_dcstI_init),
2090  .prio = FF_TX_PRIO_BASE,
2091 };
2092 
2093 static const FFTXCodelet TX_NAME(ff_tx_dstI_def) = {
2094  .name = TX_NAME_STR("dstI"),
2095  .function = TX_NAME(ff_tx_dstI),
2096  .type = TX_TYPE(DST_I),
2098  .factors = { 2, TX_FACTOR_ANY },
2099  .nb_factors = 2,
2100  .min_len = 2,
2101  .max_len = TX_LEN_UNLIMITED,
2102  .init = TX_NAME(ff_tx_dcstI_init),
2104  .prio = FF_TX_PRIO_BASE,
2105 };
2106 
2107 int TX_TAB(ff_tx_mdct_gen_exp)(AVTXContext *s, int *pre_tab)
2108 {
2109  int off = 0;
2110  int len4 = s->len >> 1;
2111  double scale = s->scale_d;
2112  const double theta = (scale < 0 ? len4 : 0) + 1.0/8.0;
2113  size_t alloc = pre_tab ? 2*len4 : len4;
2114 
2115  if (!(s->exp = av_malloc_array(alloc, sizeof(*s->exp))))
2116  return AVERROR(ENOMEM);
2117 
2118  scale = sqrt(fabs(scale));
2119 
2120  if (pre_tab)
2121  off = len4;
2122 
2123  for (int i = 0; i < len4; i++) {
2124  const double alpha = M_PI_2 * (i + theta) / len4;
2125  s->exp[off + i] = (TXComplex){ RESCALE(cos(alpha) * scale),
2126  RESCALE(sin(alpha) * scale) };
2127  }
2128 
2129  if (pre_tab)
2130  for (int i = 0; i < len4; i++)
2131  s->exp[i] = s->exp[len4 + pre_tab[i]];
2132 
2133  return 0;
2134 }
2135 
2136 const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
2137  /* Split-Radix codelets */
2138  &TX_NAME(ff_tx_fft2_ns_def),
2139  &TX_NAME(ff_tx_fft4_ns_def),
2140  &TX_NAME(ff_tx_fft8_ns_def),
2141  &TX_NAME(ff_tx_fft16_ns_def),
2142  &TX_NAME(ff_tx_fft32_ns_def),
2143  &TX_NAME(ff_tx_fft64_ns_def),
2144  &TX_NAME(ff_tx_fft128_ns_def),
2145  &TX_NAME(ff_tx_fft256_ns_def),
2146  &TX_NAME(ff_tx_fft512_ns_def),
2147  &TX_NAME(ff_tx_fft1024_ns_def),
2148  &TX_NAME(ff_tx_fft2048_ns_def),
2149  &TX_NAME(ff_tx_fft4096_ns_def),
2150  &TX_NAME(ff_tx_fft8192_ns_def),
2151  &TX_NAME(ff_tx_fft16384_ns_def),
2152  &TX_NAME(ff_tx_fft32768_ns_def),
2153  &TX_NAME(ff_tx_fft65536_ns_def),
2154  &TX_NAME(ff_tx_fft131072_ns_def),
2155 
2156  /* Prime factor codelets */
2157  &TX_NAME(ff_tx_fft3_ns_def),
2158  &TX_NAME(ff_tx_fft5_ns_def),
2159  &TX_NAME(ff_tx_fft7_ns_def),
2160  &TX_NAME(ff_tx_fft9_ns_def),
2161  &TX_NAME(ff_tx_fft15_ns_def),
2162 
2163  /* We get these for free */
2164  &TX_NAME(ff_tx_fft3_fwd_def),
2165  &TX_NAME(ff_tx_fft5_fwd_def),
2166  &TX_NAME(ff_tx_fft7_fwd_def),
2167  &TX_NAME(ff_tx_fft9_fwd_def),
2168 
2169  /* Standalone transforms */
2170  &TX_NAME(ff_tx_fft_def),
2171  &TX_NAME(ff_tx_fft_inplace_def),
2172  &TX_NAME(ff_tx_fft_inplace_small_def),
2173  &TX_NAME(ff_tx_fft_pfa_def),
2174  &TX_NAME(ff_tx_fft_pfa_ns_def),
2175  &TX_NAME(ff_tx_fft_naive_def),
2176  &TX_NAME(ff_tx_fft_naive_small_def),
2177  &TX_NAME(ff_tx_mdct_fwd_def),
2178  &TX_NAME(ff_tx_mdct_inv_def),
2179  &TX_NAME(ff_tx_mdct_pfa_3xM_fwd_def),
2180  &TX_NAME(ff_tx_mdct_pfa_5xM_fwd_def),
2181  &TX_NAME(ff_tx_mdct_pfa_7xM_fwd_def),
2182  &TX_NAME(ff_tx_mdct_pfa_9xM_fwd_def),
2183  &TX_NAME(ff_tx_mdct_pfa_15xM_fwd_def),
2184  &TX_NAME(ff_tx_mdct_pfa_3xM_inv_def),
2185  &TX_NAME(ff_tx_mdct_pfa_5xM_inv_def),
2186  &TX_NAME(ff_tx_mdct_pfa_7xM_inv_def),
2187  &TX_NAME(ff_tx_mdct_pfa_9xM_inv_def),
2188  &TX_NAME(ff_tx_mdct_pfa_15xM_inv_def),
2189  &TX_NAME(ff_tx_mdct_naive_fwd_def),
2190  &TX_NAME(ff_tx_mdct_naive_inv_def),
2191  &TX_NAME(ff_tx_mdct_inv_full_def),
2192  &TX_NAME(ff_tx_rdft_r2c_def),
2193  &TX_NAME(ff_tx_rdft_r2r_def),
2194  &TX_NAME(ff_tx_rdft_r2r_mod2_def),
2195  &TX_NAME(ff_tx_rdft_r2i_def),
2196  &TX_NAME(ff_tx_rdft_r2i_mod2_def),
2197  &TX_NAME(ff_tx_rdft_c2r_def),
2198  &TX_NAME(ff_tx_dctII_def),
2199  &TX_NAME(ff_tx_dctIII_def),
2200  &TX_NAME(ff_tx_dctI_def),
2201  &TX_NAME(ff_tx_dstI_def),
2202 
2203  NULL,
2204 };
flags
const SwsFlags flags[]
Definition: swscale.c:61
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:66
_dst
uint8_t * _dst
Definition: dsp.h:56
ff_tx_fft_sr_combine
static void TX_NAME() ff_tx_fft_sr_combine(TXComplex *z, const TXSample *cos, int len)
Definition: tx_template.c:562
ff_tx_dct_init
static av_cold int TX_NAME() ff_tx_dct_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:1830
AV_TX_REAL_TO_REAL
@ AV_TX_REAL_TO_REAL
Perform a real to half-complex RDFT.
Definition: tx.h:184
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
out
FILE * out
Definition: movenc.c:55
ff_ctz
#define ff_ctz
Definition: intmath.h:105
TRANSFORM
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
Definition: tx_template.c:554
TX_TYPE
#define TX_TYPE
Definition: afir_template.c:48
src1
const pixel * src1
Definition: h264pred_template.c:420
AVTXContext
Definition: tx_priv.h:235
int64_t
long long int64_t
Definition: coverity.c:34
ff_tx_fft
static void TX_NAME() ff_tx_fft(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:763
FFTXCodeletOptions
Definition: tx_priv.h:183
M_PI_2
#define M_PI_2
Definition: mathematics.h:73
TX_MAX_DECOMPOSITIONS
#define TX_MAX_DECOMPOSITIONS
Definition: tx_priv.h:197
SR_POW2_TABLES
#define SR_POW2_TABLES
Definition: tx_template.c:32
ff_tx_fft_pfa
static void TX_NAME() ff_tx_fft_pfa(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
Definition: tx_template.c:1058
ff_tx_fft16_ns
static void TX_NAME() ff_tx_fft16_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:679
ff_tx_gen_inplace_map
int ff_tx_gen_inplace_map(AVTXContext *s, int len)
Definition: tx.c:156
fft15
static av_always_inline void fft15(TXComplex *out, TXComplex *in, ptrdiff_t stride)
Definition: tx_template.c:465
FF_TX_CPU_FLAGS_ALL
#define FF_TX_CPU_FLAGS_ALL
Definition: tx_priv.h:230
ff_tx_gen_compound_mapping
int ff_tx_gen_compound_mapping(AVTXContext *s, FFTXCodeletOptions *opts, int inv, int n, int m)
Definition: tx.c:75
ff_tx_dctI
static void TX_NAME() ff_tx_dctI(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:2036
ff_tx_fft_naive
static void TX_NAME() ff_tx_fft_naive(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:871
_src
uint8_t ptrdiff_t const uint8_t * _src
Definition: dsp.h:56
DECL_FFT5
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)
Definition: tx_template.c:211
ff_tx_mdct_naive_fwd
static void TX_NAME() ff_tx_mdct_naive_fwd(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:1144
ff_tx_rdft_init
static av_cold int TX_NAME() ff_tx_rdft_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:1601
DECL_SR_CODELET_DEF
#define DECL_SR_CODELET_DEF(n)
Definition: tx_template.c:599
FFTabInitData::func
void(* func)(void)
Definition: tx_template.c:61
sr_tabs_init_funcs
static SR_POW2_TABLES void(*const sr_tabs_init_funcs[])(void)
Definition: tx_template.c:79
tab
static const struct twinvq_data tab
Definition: twinvq_data.h:10345
TX_NAME
static const FFTXCodelet TX_NAME(ff_tx_fft_def)
FF_TX_MAP_GATHER
@ FF_TX_MAP_GATHER
Definition: tx_priv.h:176
sum_d
static void sum_d(const int *input, int *output, int len)
Definition: dcadct.c:51
TX_INT32
#define TX_INT32
Definition: tx_int32.c:19
sr_tabs_init_once
static AVOnce sr_tabs_init_once[]
Definition: tx_template.c:85
val
static double val(void *priv, double ch)
Definition: aeval.c:77
DECL_FACTOR_F
#define DECL_FACTOR_F(n)
Definition: tx_template.c:517
TX_MAX_SUB
#define TX_MAX_SUB
Definition: tx_priv.h:194
TABLE_DEF
#define TABLE_DEF(name, size)
Definition: tx_template.c:29
FFTXCodelet::type
enum AVTXType type
Definition: tx_priv.h:202
FFTXCodeletOptions::map_dir
FFTXMapDirection map_dir
Definition: tx_priv.h:187
mult
static int16_t mult(Float11 *f1, Float11 *f2)
Definition: g726.c:60
ff_thread_once
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:205
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:106
FFTabInitData
Definition: tx_template.c:60
float
float
Definition: af_crystalizer.c:122
c2r
static void c2r(float *buffer, int size)
Definition: af_apsyclip.c:386
s
#define s(width, name)
Definition: cbs_vp9.c:198
ff_tx_fft_factor_init
static av_cold int TX_NAME() ff_tx_fft_factor_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:478
ff_tx_mdct_fwd
static void TX_NAME() ff_tx_mdct_fwd(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:1273
ff_tx_mdct_naive_init
static av_cold int TX_NAME() ff_tx_mdct_naive_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:1132
FF_TX_FORWARD_ONLY
#define FF_TX_FORWARD_ONLY
Definition: tx_priv.h:158
FFTXCodelet::cpu_flags
int cpu_flags
Definition: tx_priv.h:227
DECL_FACTOR_S
#define DECL_FACTOR_S(n)
Definition: tx_template.c:496
av_mallocz
#define av_mallocz(s)
Definition: tableprint_vlc.h:31
ff_tx_dstI
static void TX_NAME() ff_tx_dstI(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:2054
tmp
static uint8_t tmp[40]
Definition: aes_ctr.c:52
if
if(ret)
Definition: filter_design.txt:179
AV_TX_FULL_IMDCT
@ AV_TX_FULL_IMDCT
Performs a full inverse MDCT rather than leaving out samples that can be derived through symmetry.
Definition: tx.h:175
opts
AVDictionary * opts
Definition: movenc.c:51
AV_ONCE_INIT
#define AV_ONCE_INIT
Definition: thread.h:203
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
AV_TX_REAL_TO_IMAGINARY
@ AV_TX_REAL_TO_IMAGINARY
Definition: tx.h:185
NULL
#define NULL
Definition: coverity.c:32
ff_tx_mdct_init
static av_cold int TX_NAME() ff_tx_mdct_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:1223
AV_TX_INPLACE
@ AV_TX_INPLACE
Allows for in-place transformations, where input == output.
Definition: tx.h:161
ff_tx_gen_ptwo_revtab
int ff_tx_gen_ptwo_revtab(AVTXContext *s, FFTXCodeletOptions *opts)
Definition: tx.c:136
r2c
static void r2c(float *buffer, int size)
Definition: af_apsyclip.c:377
FF_TX_OUT_OF_PLACE
#define FF_TX_OUT_OF_PLACE
Definition: tx_priv.h:154
CMUL3
#define CMUL3(c, a, b)
Definition: tx_priv.h:150
AV_TX_UNALIGNED
@ AV_TX_UNALIGNED
Relaxes alignment requirement for the in and out arrays of av_tx_fn().
Definition: tx.h:167
exp
int8_t exp
Definition: eval.c:73
ff_tx_dctIII
static void TX_NAME() ff_tx_dctIII(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:1933
DECL_COMP_MDCT
#define DECL_COMP_MDCT(N)
Definition: tx_template.c:1533
AVOnce
#define AVOnce
Definition: thread.h:202
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_tx_fft_pfa_init
static av_cold int TX_NAME() ff_tx_fft_pfa_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:948
ff_tx_clear_ctx
void ff_tx_clear_ctx(AVTXContext *s)
Definition: tx.c:290
ff_tx_fft2_ns
static void TX_NAME() ff_tx_fft2_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:631
FF_TX_PRESHUFFLE
#define FF_TX_PRESHUFFLE
Definition: tx_priv.h:156
ff_tx_fft_sr_codelet_init
static av_cold int TX_NAME() ff_tx_fft_sr_codelet_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:588
ff_tx_gen_default_map
int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts)
Definition: tx.c:525
f
f
Definition: af_crystalizer.c:122
ff_tx_init_tab_53
static av_cold void TX_TAB() ff_tx_init_tab_53(void)
Definition: tx_template.c:91
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
FF_TX_PRIO_BASE
@ FF_TX_PRIO_BASE
Definition: tx_priv.h:162
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:424
ff_tx_fft8_ns
static void TX_NAME() ff_tx_fft8_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:660
fft9
static av_always_inline void fft9(TXComplex *out, TXComplex *in, ptrdiff_t stride)
Definition: tx_template.c:340
BF
#define BF(a, b, c, s)
Definition: dct32_template.c:90
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
TX_EMBED_INPUT_PFA_MAP
#define TX_EMBED_INPUT_PFA_MAP(map, tot_len, d1, d2)
Definition: tx_priv.h:271
ff_tx_fft_inplace
static void TX_NAME() ff_tx_fft_inplace(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:780
DECL_RDFT_HALF
#define DECL_RDFT_HALF(n, mode, mod2)
Definition: tx_template.c:1724
M_PI
#define M_PI
Definition: mathematics.h:67
ff_tx_fft_init
static av_cold int TX_NAME() ff_tx_fft_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:724
TXComplex
void TXComplex
Definition: tx_priv.h:65
ff_tx_mdct_inv
static void TX_NAME() ff_tx_mdct_inv(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:1312
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:32
nptwo_tabs_init_once
static AVOnce nptwo_tabs_init_once[]
Definition: tx_template.c:138
av_always_inline
#define av_always_inline
Definition: attributes.h:63
ff_tx_fft_init_naive_small
static av_cold int TX_NAME() ff_tx_fft_init_naive_small(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:846
DECL_SR_CODELET
#define DECL_SR_CODELET(n, n2, n4)
Definition: tx_template.c:615
DECL_COMP_IMDCT
#define DECL_COMP_IMDCT(N)
Definition: tx_template.c:1471
len
int len
Definition: vorbis_enc_data.h:426
fft3
static av_always_inline void fft3(TXComplex *out, TXComplex *in, ptrdiff_t stride)
Definition: tx_template.c:173
FF_TX_MAP_SCATTER
@ FF_TX_MAP_SCATTER
Definition: tx_priv.h:179
TX_LEN_UNLIMITED
#define TX_LEN_UNLIMITED
Definition: tx_priv.h:216
nptwo_tabs_init_data
static const FFTabInitData nptwo_tabs_init_data[]
Definition: tx_template.c:132
ret
ret
Definition: filter_design.txt:187
ff_tx_init_subtx
av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx.c:712
FFSWAP
#define FFSWAP(type, a, b)
Definition: macros.h:52
ff_tx_init_tab_7
static av_cold void TX_TAB() ff_tx_init_tab_7(void)
Definition: tx_template.c:110
TX_FACTOR_ANY
#define TX_FACTOR_ANY
Definition: tx_priv.h:209
FF_TX_INVERSE_ONLY
#define FF_TX_INVERSE_ONLY
Definition: tx_priv.h:157
av_malloc
void * av_malloc(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:98
ff_tx_fft_naive_small
static void TX_NAME() ff_tx_fft_naive_small(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:898
ff_tx_init_tab_9
static av_cold void TX_TAB() ff_tx_init_tab_9(void)
Definition: tx_template.c:120
FFTXCodelet
Definition: tx_priv.h:199
ff_tx_init_tabs
av_cold void TX_TAB() ff_tx_init_tabs(int len)
Definition: tx_template.c:144
ff_tx_mdct_naive_inv
static void TX_NAME() ff_tx_mdct_naive_inv(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:1165
FFTabInitData::factors
int factors[TX_MAX_SUB]
Definition: tx_template.c:62
ff_tx_dctII
static void TX_NAME() ff_tx_dctII(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:1873
BUTTERFLIES
#define BUTTERFLIES(a0, a1, a2, a3)
Definition: tx_template.c:540
ff_tx_fft_pfa_ns
static void TX_NAME() ff_tx_fft_pfa_ns(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
Definition: tx_template.c:1082
src0
const pixel *const src0
Definition: h264pred_template.c:419
FFTXCodelet::name
const char * name
Definition: tx_priv.h:200
factor
static const int factor[16]
Definition: vf_pp7.c:80
ff_tx_dcstI_init
static av_cold int TX_NAME() ff_tx_dcstI_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:2004
mem.h
ff_tx_fft_inplace_small_init
static av_cold int TX_NAME() ff_tx_fft_inplace_small_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:750
w
uint8_t w
Definition: llvidencdsp.c:39
map
const VDPAUPixFmtMap * map
Definition: hwcontext_vdpau.c:71
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:273
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
fft7
static av_always_inline void fft7(TXComplex *out, TXComplex *in, ptrdiff_t stride)
Definition: tx_template.c:252
int32_t
int32_t
Definition: audioconvert.c:56
ff_tx_mdct_gen_exp
int TX_TAB() ff_tx_mdct_gen_exp(AVTXContext *s, int *pre_tab)
Definition: tx_template.c:2105
ff_tx_gen_pfa_input_map
int ff_tx_gen_pfa_input_map(AVTXContext *s, FFTXCodeletOptions *opts, int d1, int d2)
Definition: tx.c:44
stride
#define stride
Definition: h264pred_template.c:536
DECL_RDFT
#define DECL_RDFT(n, inv)
Definition: tx_template.c:1655
ff_tx_mdct_pfa_init
static av_cold int TX_NAME() ff_tx_mdct_pfa_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:1425
ff_tx_fft4_ns
static void TX_NAME() ff_tx_fft4_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:643
ff_tx_mdct_inv_full_init
static av_cold int TX_NAME() ff_tx_mdct_inv_full_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:1372
ff_tx_decompose_length
int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type, int len, int inv)
Definition: tx.c:412
src
#define src
Definition: vp8dsp.c:248
ff_tx_mdct_inv_full
static void TX_NAME() ff_tx_mdct_inv_full(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:1392
FF_TX_PRIO_MIN
@ FF_TX_PRIO_MIN
Definition: tx_priv.h:167