FFmpeg
vf_fspp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3  * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4  * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  */
22 
23 /**
24  * @file
25  * Fast Simple Post-processing filter
26  * This implementation is based on an algorithm described in
27  * "Aria Nosratinia Embedded Post-Processing for
28  * Enhancement of Compressed Images (1999)"
29  * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30  * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31  * them can be performed once per block, not per pixel. This allows for much
32  * higher speed.
33  *
34  * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35  * project, and ported by Arwa Arif for FFmpeg.
36  */
37 
38 #include "libavutil/imgutils.h"
39 #include "libavutil/mem_internal.h"
40 #include "libavutil/opt.h"
41 #include "libavutil/pixdesc.h"
42 #include "internal.h"
43 #include "qp_table.h"
44 #include "vf_fspp.h"
45 
46 #define OFFSET(x) offsetof(FSPPContext, x)
47 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
48 static const AVOption fspp_options[] = {
49  { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
50  { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
51  { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
52  { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1, FLAGS },
53  { NULL }
54 };
55 
57 
58 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
59  { 0, 48, 12, 60, 3, 51, 15, 63, },
60  { 32, 16, 44, 28, 35, 19, 47, 31, },
61  { 8, 56, 4, 52, 11, 59, 7, 55, },
62  { 40, 24, 36, 20, 43, 27, 39, 23, },
63  { 2, 50, 14, 62, 1, 49, 13, 61, },
64  { 34, 18, 46, 30, 33, 17, 45, 29, },
65  { 10, 58, 6, 54, 9, 57, 5, 53, },
66  { 42, 26, 38, 22, 41, 25, 37, 21, },
67 };
68 
69 static const short custom_threshold[64] = {
70 // values (296) can't be too high
71 // -it causes too big quant dependence
72 // or maybe overflow(check), which results in some flashing
73  71, 296, 295, 237, 71, 40, 38, 19,
74  245, 193, 185, 121, 102, 73, 53, 27,
75  158, 129, 141, 107, 97, 73, 50, 26,
76  102, 116, 109, 98, 82, 66, 45, 23,
77  71, 94, 95, 81, 70, 56, 38, 20,
78  56, 77, 74, 66, 56, 44, 30, 15,
79  38, 53, 50, 45, 38, 30, 21, 11,
80  20, 27, 26, 23, 20, 15, 11, 5
81 };
82 
83 //This func reads from 1 slice, 1 and clears 0 & 1
84 static void store_slice_c(uint8_t *dst, int16_t *src,
85  ptrdiff_t dst_stride, ptrdiff_t src_stride,
86  ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
87 {
88  int y, x;
89 #define STORE(pos) \
90  temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
91  src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
92  if (temp & 0x100) temp = ~(temp >> 31); \
93  dst[x + pos] = temp;
94 
95  for (y = 0; y < height; y++) {
96  const uint8_t *d = dither[y];
97  for (x = 0; x < width; x += 8) {
98  int temp;
99  STORE(0);
100  STORE(1);
101  STORE(2);
102  STORE(3);
103  STORE(4);
104  STORE(5);
105  STORE(6);
106  STORE(7);
107  }
108  src += src_stride;
109  dst += dst_stride;
110  }
111 }
112 
113 //This func reads from 2 slices, 0 & 2 and clears 2-nd
114 static void store_slice2_c(uint8_t *dst, int16_t *src,
115  ptrdiff_t dst_stride, ptrdiff_t src_stride,
116  ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
117 {
118  int y, x;
119 #define STORE2(pos) \
120  temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
121  src[x + pos + 16 * src_stride] = 0; \
122  if (temp & 0x100) temp = ~(temp >> 31); \
123  dst[x + pos] = temp;
124 
125  for (y = 0; y < height; y++) {
126  const uint8_t *d = dither[y];
127  for (x = 0; x < width; x += 8) {
128  int temp;
129  STORE2(0);
130  STORE2(1);
131  STORE2(2);
132  STORE2(3);
133  STORE2(4);
134  STORE2(5);
135  STORE2(6);
136  STORE2(7);
137  }
138  src += src_stride;
139  dst += dst_stride;
140  }
141 }
142 
143 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
144 {
145  int a;
146  for (a = 0; a < 64; a++)
147  thr_adr[a] = q * thr_adr_noq[a];
148 }
149 
150 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
151  int dst_stride, int src_stride,
152  int width, int height,
153  uint8_t *qp_store, int qp_stride, int is_luma)
154 {
155  int x, x0, y, es, qy, t;
156 
157  const int stride = is_luma ? p->temp_stride : (width + 16);
158  const int step = 6 - p->log2_count;
159  const int qpsh = 4 - p->hsub * !is_luma;
160  const int qpsv = 4 - p->vsub * !is_luma;
161 
162  DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
163  int16_t *block = (int16_t *)block_align;
164  int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
165 
166  memset(block3, 0, 4 * 8 * BLOCKSZ);
167 
168  if (!src || !dst) return;
169 
170  for (y = 0; y < height; y++) {
171  int index = 8 + 8 * stride + y * stride;
172  memcpy(p->src + index, src + y * src_stride, width);
173  for (x = 0; x < 8; x++) {
174  p->src[index - x - 1] = p->src[index + x ];
175  p->src[index + width + x ] = p->src[index + width - x - 1];
176  }
177  }
178 
179  for (y = 0; y < 8; y++) {
180  memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
181  memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
182  }
183  //FIXME (try edge emu)
184 
185  for (y = 8; y < 24; y++)
186  memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
187 
188  for (y = step; y < height + 8; y += step) { //step= 1,2
189  const int y1 = y - 8 + step; //l5-7 l4-6;
190  qy = y - 4;
191 
192  if (qy > height - 1) qy = height - 1;
193  if (qy < 0) qy = 0;
194 
195  qy = (qy >> qpsv) * qp_stride;
196  p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
197 
198  for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
199  p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
200 
201  if (p->qp)
202  p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
203  else
204  for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
205  t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
206 
207  if (t < 0) t = 0; //t always < width-2
208 
209  t = qp_store[qy + (t >> qpsh)];
210  t = ff_norm_qscale(t, p->qscale_type);
211 
212  if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
213  p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
214  }
215  p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
216  memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
217  memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
218  }
219 
220  es = width + 8 - x0; // 8, ...
221  if (es > 8)
222  p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
223 
224  p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
225  if (es > 3)
226  p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
227 
228  if (!(y1 & 7) && y1) {
229  if (y1 & 8)
230  p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
231  dst_stride, stride, width, 8, 5 - p->log2_count);
232  else
233  p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
234  dst_stride, stride, width, 8, 5 - p->log2_count);
235  }
236  }
237 
238  if (y & 7) { // height % 8 != 0
239  if (y & 8)
240  p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
241  dst_stride, stride, width, y&7, 5 - p->log2_count);
242  else
243  p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
244  dst_stride, stride, width, y&7, 5 - p->log2_count);
245  }
246 }
247 
248 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
249 {
250  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
251  int_simd16_t tmp10, tmp11, tmp12, tmp13;
252  int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
253  int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
254 
255  int16_t *dataptr;
256  int16_t *wsptr;
257  int16_t *threshold;
258  int ctr;
259 
260  dataptr = data;
261  wsptr = output;
262 
263  for (; cnt > 0; cnt -= 2) { //start positions
264  threshold = (int16_t *)thr_adr;//threshold_mtx
265  for (ctr = DCTSIZE; ctr > 0; ctr--) {
266  // Process columns from input, add to output.
267  tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
268  tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
269 
270  tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
271  tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
272 
273  tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
274  tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
275 
276  tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
277  tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
278 
279  // Even part of FDCT
280 
281  tmp10 = tmp0 + tmp3;
282  tmp13 = tmp0 - tmp3;
283  tmp11 = tmp1 + tmp2;
284  tmp12 = tmp1 - tmp2;
285 
286  d0 = tmp10 + tmp11;
287  d4 = tmp10 - tmp11;
288 
289  z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
290  d2 = tmp13 + z1;
291  d6 = tmp13 - z1;
292 
293  // Even part of IDCT
294 
295  THRESHOLD(tmp0, d0, threshold[0 * 8]);
296  THRESHOLD(tmp1, d2, threshold[2 * 8]);
297  THRESHOLD(tmp2, d4, threshold[4 * 8]);
298  THRESHOLD(tmp3, d6, threshold[6 * 8]);
299  tmp0 += 2;
300  tmp10 = (tmp0 + tmp2) >> 2;
301  tmp11 = (tmp0 - tmp2) >> 2;
302 
303  tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
304  tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
305 
306  tmp0 = tmp10 + tmp13; //->temps
307  tmp3 = tmp10 - tmp13; //->temps
308  tmp1 = tmp11 + tmp12; //->temps
309  tmp2 = tmp11 - tmp12; //->temps
310 
311  // Odd part of FDCT
312 
313  tmp10 = tmp4 + tmp5;
314  tmp11 = tmp5 + tmp6;
315  tmp12 = tmp6 + tmp7;
316 
317  z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
318  z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
319  z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
320  z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
321 
322  z11 = tmp7 + z3;
323  z13 = tmp7 - z3;
324 
325  d5 = z13 + z2;
326  d3 = z13 - z2;
327  d1 = z11 + z4;
328  d7 = z11 - z4;
329 
330  // Odd part of IDCT
331 
332  THRESHOLD(tmp4, d1, threshold[1 * 8]);
333  THRESHOLD(tmp5, d3, threshold[3 * 8]);
334  THRESHOLD(tmp6, d5, threshold[5 * 8]);
335  THRESHOLD(tmp7, d7, threshold[7 * 8]);
336 
337  //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
338  z13 = tmp6 + tmp5;
339  z10 = (tmp6 - tmp5) << 1;
340  z11 = tmp4 + tmp7;
341  z12 = (tmp4 - tmp7) << 1;
342 
343  tmp7 = (z11 + z13) >> 2; //+2 !
344  tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
345  z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
346  tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
347  tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
348 
349  tmp6 = tmp12 - tmp7;
350  tmp5 = tmp11 - tmp6;
351  tmp4 = tmp10 + tmp5;
352 
353  wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
354  wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
355  wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
356  wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
357  wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
358  wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
359  wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
360  wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
361  //
362  dataptr++; //next column
363  wsptr++;
364  threshold++;
365  }
366  dataptr += 8; //skip each second start pos
367  wsptr += 8;
368  }
369 }
370 
371 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
372 {
373  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
374  int_simd16_t tmp10, tmp11, tmp12, tmp13;
375  int_simd16_t z5, z10, z11, z12, z13;
376  int16_t *outptr;
377  int16_t *wsptr;
378 
379  cnt *= 4;
380  wsptr = workspace;
381  outptr = output_adr;
382  for (; cnt > 0; cnt--) {
383  // Even part
384  //Simd version reads 4x4 block and transposes it
385  tmp10 = wsptr[2] + wsptr[3];
386  tmp11 = wsptr[2] - wsptr[3];
387 
388  tmp13 = wsptr[0] + wsptr[1];
389  tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
390 
391  tmp0 = tmp10 + tmp13; //->temps
392  tmp3 = tmp10 - tmp13; //->temps
393  tmp1 = tmp11 + tmp12;
394  tmp2 = tmp11 - tmp12;
395 
396  // Odd part
397  //Also transpose, with previous:
398  // ---- ---- ||||
399  // ---- ---- idct ||||
400  // ---- ---- ---> ||||
401  // ---- ---- ||||
402  z13 = wsptr[4] + wsptr[5];
403  z10 = wsptr[4] - wsptr[5];
404  z11 = wsptr[6] + wsptr[7];
405  z12 = wsptr[6] - wsptr[7];
406 
407  tmp7 = z11 + z13;
408  tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
409 
410  z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
411  tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
412  tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
413 
414  tmp6 = (tmp12 << 3) - tmp7;
415  tmp5 = (tmp11 << 3) - tmp6;
416  tmp4 = (tmp10 << 3) + tmp5;
417 
418  // Final output stage: descale and write column
419  outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
420  outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
421  outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
422  outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
423  outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
424  outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
425  outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
426  outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
427  outptr++;
428 
429  wsptr += DCTSIZE; // advance pointer to next row
430  }
431 }
432 
433 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
434 {
435  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
436  int_simd16_t tmp10, tmp11, tmp12, tmp13;
437  int_simd16_t z1, z2, z3, z4, z5, z11, z13;
438  int16_t *dataptr;
439 
440  cnt *= 4;
441  // Pass 1: process rows.
442 
443  dataptr = data;
444  for (; cnt > 0; cnt--) {
445  tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
446  tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
447  tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
448  tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
449  tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
450  tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
451  tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
452  tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
453 
454  // Even part
455 
456  tmp10 = tmp0 + tmp3;
457  tmp13 = tmp0 - tmp3;
458  tmp11 = tmp1 + tmp2;
459  tmp12 = tmp1 - tmp2;
460  //Even columns are written first, this leads to different order of columns
461  //in column_fidct(), but they are processed independently, so all ok.
462  //Later in the row_idct() columns readed at the same order.
463  dataptr[2] = tmp10 + tmp11;
464  dataptr[3] = tmp10 - tmp11;
465 
466  z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
467  dataptr[0] = tmp13 + z1;
468  dataptr[1] = tmp13 - z1;
469 
470  // Odd part
471 
472  tmp10 = (tmp4 + tmp5) << 2;
473  tmp11 = (tmp5 + tmp6) << 2;
474  tmp12 = (tmp6 + tmp7) << 2;
475 
476  z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
477  z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
478  z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
479  z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
480 
481  z11 = tmp7 + z3;
482  z13 = tmp7 - z3;
483 
484  dataptr[4] = z13 + z2;
485  dataptr[5] = z13 - z2;
486  dataptr[6] = z11 + z4;
487  dataptr[7] = z11 - z4;
488 
489  pixels++; // advance pointer to next column
490  dataptr += DCTSIZE;
491  }
492 }
493 
494 static const enum AVPixelFormat pix_fmts[] = {
502 };
503 
505 {
506  AVFilterContext *ctx = inlink->dst;
507  FSPPContext *fspp = ctx->priv;
508  const int h = FFALIGN(inlink->h + 16, 16);
510 
511  fspp->hsub = desc->log2_chroma_w;
512  fspp->vsub = desc->log2_chroma_h;
513 
514  fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
515  fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
516  fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
517 
518  if (!fspp->temp || !fspp->src)
519  return AVERROR(ENOMEM);
520 
521  fspp->store_slice = store_slice_c;
523  fspp->mul_thrmat = mul_thrmat_c;
525  fspp->row_idct = row_idct_c;
526  fspp->row_fdct = row_fdct_c;
527 
528  if (ARCH_X86)
529  ff_fspp_init_x86(fspp);
530 
531  return 0;
532 }
533 
535 {
536  AVFilterContext *ctx = inlink->dst;
537  FSPPContext *fspp = ctx->priv;
538  AVFilterLink *outlink = ctx->outputs[0];
539  AVFrame *out = in;
540 
541  int qp_stride = 0;
542  int8_t *qp_table = NULL;
543  int i, bias;
544  int ret = 0;
545  int custom_threshold_m[64];
546 
547  bias = (1 << 4) + fspp->strength;
548 
549  for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
550  custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
551 
552  for (i = 0; i < 8; i++) {
553  fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
554  |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
555  |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
556  |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
557 
558  fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
559  |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
560  |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
561  |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
562  }
563 
564  if (fspp->qp)
565  fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
566 
567  /* if we are not in a constant user quantizer mode and we don't want to use
568  * the quantizers from the B-frames (B-frames often have a higher QP), we
569  * need to save the qp table from the last non B-frame; this is what the
570  * following code block does */
571  if (!fspp->qp && (fspp->use_bframe_qp || in->pict_type != AV_PICTURE_TYPE_B)) {
572  ret = ff_qp_table_extract(in, &qp_table, &qp_stride, NULL, &fspp->qscale_type);
573  if (ret < 0) {
574  av_frame_free(&in);
575  return ret;
576  }
577 
578  if (!fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
579  av_freep(&fspp->non_b_qp_table);
580  fspp->non_b_qp_table = qp_table;
581  fspp->non_b_qp_stride = qp_stride;
582  }
583  }
584 
585  if (fspp->log2_count && !ctx->is_disabled) {
586  if (!fspp->use_bframe_qp && fspp->non_b_qp_table) {
587  qp_table = fspp->non_b_qp_table;
588  qp_stride = fspp->non_b_qp_stride;
589  }
590 
591  if (qp_table || fspp->qp) {
592  const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub);
593  const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub);
594 
595  /* get a new frame if in-place is not possible or if the dimensions
596  * are not multiple of 8 */
597  if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
598  const int aligned_w = FFALIGN(inlink->w, 8);
599  const int aligned_h = FFALIGN(inlink->h, 8);
600 
601  out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
602  if (!out) {
603  av_frame_free(&in);
604  ret = AVERROR(ENOMEM);
605  goto finish;
606  }
608  out->width = in->width;
609  out->height = in->height;
610  }
611 
612  filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
613  inlink->w, inlink->h, qp_table, qp_stride, 1);
614  filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
615  cw, ch, qp_table, qp_stride, 0);
616  filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
617  cw, ch, qp_table, qp_stride, 0);
618  emms_c();
619  }
620  }
621 
622  if (in != out) {
623  if (in->data[3])
624  av_image_copy_plane(out->data[3], out->linesize[3],
625  in ->data[3], in ->linesize[3],
626  inlink->w, inlink->h);
627  av_frame_free(&in);
628  }
629  ret = ff_filter_frame(outlink, out);
630 finish:
631  if (qp_table != fspp->non_b_qp_table)
632  av_freep(&qp_table);
633  return ret;
634 }
635 
637 {
638  FSPPContext *fspp = ctx->priv;
639  av_freep(&fspp->temp);
640  av_freep(&fspp->src);
641  av_freep(&fspp->non_b_qp_table);
642 }
643 
644 static const AVFilterPad fspp_inputs[] = {
645  {
646  .name = "default",
647  .type = AVMEDIA_TYPE_VIDEO,
648  .config_props = config_input,
649  .filter_frame = filter_frame,
650  },
651 };
652 
653 static const AVFilterPad fspp_outputs[] = {
654  {
655  .name = "default",
656  .type = AVMEDIA_TYPE_VIDEO,
657  },
658 };
659 
661  .name = "fspp",
662  .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
663  .priv_size = sizeof(FSPPContext),
664  .uninit = uninit,
668  .priv_class = &fspp_class,
670 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:98
MULTIPLY16H
#define MULTIPLY16H(x, k)
Definition: vf_fspp.h:36
stride
int stride
Definition: mace.c:144
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
FIX_0_707106781
#define FIX_0_707106781
Definition: jfdctfst.c:118
FIX_0_541196100
#define FIX_0_541196100
Definition: jfdctfst.c:117
FSPPContext::column_fidct
void(* column_fidct)(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
Definition: vf_fspp.h:82
store_slice2_c
static void store_slice2_c(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.c:114
vf_fspp.h
qp_table.h
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
FSPPContext::hsub
int hsub
Definition: vf_fspp.h:60
STORE
#define STORE(pos)
mem_internal.h
out
FILE * out
Definition: movenc.c:54
FSPPContext::threshold_mtx_noq
uint64_t threshold_mtx_noq[8 *2]
Definition: vf_fspp.h:55
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1018
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2660
FILTER_PIXFMTS_ARRAY
#define FILTER_PIXFMTS_ARRAY(array)
Definition: internal.h:171
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
FLAGS
#define FLAGS
Definition: vf_fspp.c:47
FSPPContext::store_slice
void(* store_slice)(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.h:72
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:109
FSPPContext::vsub
int vsub
Definition: vf_fspp.h:61
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:317
pixdesc.h
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
index
fg index
Definition: ffmpeg_filter.c:167
AVFrame::width
int width
Definition: frame.h:389
AVOption
AVOption.
Definition: opt.h:247
data
const char data[16]
Definition: mxf.c:143
AV_PIX_FMT_YUV440P
@ AV_PIX_FMT_YUV440P
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:99
FSPPContext::src
uint8_t * src
Definition: vf_fspp.h:66
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:169
FIX_1_082392200
#define FIX_1_082392200
Definition: 4xm.c:158
FSPPContext::row_idct
void(* row_idct)(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
Definition: vf_fspp.h:85
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:338
av_image_copy_plane
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:374
FIX_2_613125930
#define FIX_2_613125930
Definition: 4xm.c:161
row_fdct_c
static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
Definition: vf_fspp.c:433
finish
static void finish(void)
Definition: movenc.c:342
BLOCKSZ
#define BLOCKSZ
Definition: vf_fspp.h:28
FIX_0_382683433
#define FIX_0_382683433
Definition: jfdctfst.c:116
ff_norm_qscale
static int ff_norm_qscale(int qscale, int type)
Normalize the qscale factor FIXME the H264 qscale is a log based scale, mpeg1/2 is not,...
Definition: qp_table.h:39
fspp_inputs
static const AVFilterPad fspp_inputs[]
Definition: vf_fspp.c:644
custom_threshold
static const short custom_threshold[64]
Definition: vf_fspp.c:69
mul_thrmat_c
static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
Definition: vf_fspp.c:143
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:50
av_cold
#define av_cold
Definition: attributes.h:90
ff_vf_fspp
const AVFilter ff_vf_fspp
Definition: vf_fspp.c:660
column_fidct_c
static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
Definition: vf_fspp.c:248
AV_PIX_FMT_YUVJ422P
@ AV_PIX_FMT_YUVJ422P
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:79
width
#define width
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:51
FSPPContext::row_fdct
void(* row_fdct)(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
Definition: vf_fspp.h:88
DCTSIZE
#define DCTSIZE
Definition: jfdctfst.c:74
FSPPContext::non_b_qp_table
int8_t * non_b_qp_table
Definition: vf_fspp.h:68
FSPPContext::non_b_qp_stride
int non_b_qp_stride
Definition: vf_fspp.h:69
ctx
AVFormatContext * ctx
Definition: movenc.c:48
config_input
static int config_input(AVFilterLink *inlink)
Definition: vf_fspp.c:504
store_slice_c
static void store_slice_c(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.c:84
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
FSPPContext::log2_count
int log2_count
Definition: vf_fspp.h:58
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:191
AV_PIX_FMT_YUVJ444P
@ AV_PIX_FMT_YUVJ444P
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
Definition: pixfmt.h:80
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:537
FSPPContext::qp
int qp
Definition: vf_fspp.h:63
AV_PIX_FMT_YUVJ420P
@ AV_PIX_FMT_YUVJ420P
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:78
src
#define src
Definition: vp8dsp.c:255
FIX_1_306562965
#define FIX_1_306562965
Definition: jfdctfst.c:119
STORE2
#define STORE2(pos)
AV_PIX_FMT_GRAY8
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:74
for
for(j=16;j >0;--j)
Definition: h264pred_template.c:469
pix_fmts
static enum AVPixelFormat pix_fmts[]
Definition: vf_fspp.c:494
MAX_LEVEL
#define MAX_LEVEL
Definition: rl.h:36
AVFrame::pict_type
enum AVPictureType pict_type
Picture type of the frame.
Definition: frame.h:414
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
FSPPContext::strength
int strength
Definition: vf_fspp.h:59
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_fspp.c:636
av_frame_is_writable
int av_frame_is_writable(AVFrame *frame)
Check if the frame data is writable.
Definition: frame.c:473
OFFSET
#define OFFSET(x)
Definition: vf_fspp.c:46
height
#define height
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
fspp_options
static const AVOption fspp_options[]
Definition: vf_fspp.c:48
row_idct_c
static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
Definition: vf_fspp.c:371
FSPPContext::threshold_mtx
uint64_t threshold_mtx[8 *2]
Definition: vf_fspp.h:56
FIX_1_847759065
#define FIX_1_847759065
Definition: 4xm.c:160
internal.h
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:116
FSPPContext::temp_stride
int temp_stride
Definition: vf_fspp.h:62
FSPPContext::use_bframe_qp
int use_bframe_qp
Definition: vf_fspp.h:70
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:271
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:32
AV_PIX_FMT_YUVJ440P
@ AV_PIX_FMT_YUVJ440P
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range
Definition: pixfmt.h:100
FIX_1_414213562
#define FIX_1_414213562
Definition: 4xm.c:159
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:56
DESCALE
#define DESCALE(x, n)
Definition: jfdctfst.c:135
AVFilter
Filter definition.
Definition: avfilter.h:165
ret
ret
Definition: filter_design.txt:187
fspp_outputs
static const AVFilterPad fspp_outputs[]
Definition: vf_fspp.c:653
AVFrame::height
int height
Definition: frame.h:389
FSPPContext
Definition: vf_fspp.h:53
ff_fspp_init_x86
void ff_fspp_init_x86(FSPPContext *fspp)
Definition: vf_fspp_init.c:37
FIX_1_414213562_A
static const int16_t FIX_1_414213562_A
Definition: vf_fspp.h:47
AV_PICTURE_TYPE_B
@ AV_PICTURE_TYPE_B
Bi-dir predicted.
Definition: avutil.h:276
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:65
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Definition: opt.h:224
filter
static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int width, int height, uint8_t *qp_store, int qp_stride, int is_luma)
Definition: vf_fspp.c:150
FSPPContext::mul_thrmat
void(* mul_thrmat)(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
Definition: vf_fspp.h:80
temp
else temp
Definition: vf_mcdeint.c:248
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
AVFilterContext
An instance of a filter.
Definition: avfilter.h:402
FSPPContext::store_slice2
void(* store_slice2)(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.h:76
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:158
desc
const char * desc
Definition: libsvtav1.c:79
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
ff_qp_table_extract
int ff_qp_table_extract(AVFrame *frame, int8_t **table, int *table_w, int *table_h, int *qscale_type)
Extract a libpostproc-compatible QP table - an 8-bit QP value per 16x16 macroblock,...
Definition: qp_table.c:30
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
FSPPContext::temp
int16_t * temp
Definition: vf_fspp.h:67
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
Definition: vf_fspp.c:534
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:241
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:192
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
AV_PIX_FMT_YUV411P
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:73
d
d
Definition: ffmpeg_filter.c:153
FSPPContext::prev_q
int prev_q
Definition: vf_fspp.h:65
int32_t
int32_t
Definition: audioconvert.c:56
AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
Definition: avfilter.h:154
imgutils.h
AVFrame::linesize
int linesize[AV_NUM_DATA_POINTERS]
For video, a positive or negative value, which is typically indicating the size in bytes of each pict...
Definition: frame.h:362
AV_PIX_FMT_YUV410P
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:72
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
FSPPContext::qscale_type
int qscale_type
Definition: vf_fspp.h:64
h
h
Definition: vp9dsp_template.c:2038
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(fspp)
int_simd16_t
int32_t int_simd16_t
Definition: vf_fspp.h:42
THRESHOLD
#define THRESHOLD(r, x, t)
Definition: vf_fspp.h:37
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:58