FFmpeg
vf_fsppdsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3  * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4  * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  */
22 
23 #include <stdint.h>
24 
25 #include "vf_fsppdsp.h"
26 
27 #include "libavutil/common.h"
28 #include "libavutil/mathematics.h"
29 #include "libavutil/mem_internal.h"
30 
31 #define DCTSIZE 8
32 
33 #define FIX(x,s) (int)((x) * (1 << s) + 0.5)
34 
35 #define MULTIPLY16H(x,k) (((x) * (k)) >> 16)
36 #define THRESHOLD(r,x,t) \
37  if (((unsigned)((x) + t)) >= t * 2) r = (x); \
38  else r = 0;
39 #define DESCALE(x,n) (((x) + (1 << ((n) - 1))) >> n)
40 
42 
43 enum {
44  FIX_0_382683433 = FIX(0.382683433, 14),
45  FIX_0_541196100 = FIX(0.541196100, 14),
47  FIX_1_306562965 = FIX(1.306562965, 14),
49  FIX_1_847759065 = FIX(1.847759065, 13),
50  FIX_2_613125930 = FIX(-2.613125930, 13),
52  FIX_1_082392200 = FIX(1.082392200, 13),
53 };
54 
55 DECLARE_ALIGNED(8, const uint8_t, ff_fspp_dither)[8][8] = {
56  { 0, 48, 12, 60, 3, 51, 15, 63, },
57  { 32, 16, 44, 28, 35, 19, 47, 31, },
58  { 8, 56, 4, 52, 11, 59, 7, 55, },
59  { 40, 24, 36, 20, 43, 27, 39, 23, },
60  { 2, 50, 14, 62, 1, 49, 13, 61, },
61  { 34, 18, 46, 30, 33, 17, 45, 29, },
62  { 10, 58, 6, 54, 9, 57, 5, 53, },
63  { 42, 26, 38, 22, 41, 25, 37, 21, },
64 };
65 
66 //This func reads from 1 slice, 1 and clears 0 & 1
67 void ff_store_slice_c(uint8_t *restrict dst, int16_t *restrict src,
68  ptrdiff_t dst_stride, ptrdiff_t src_stride,
69  ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
70 {
71 #define STORE(pos) \
72  temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
73  src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
74  temp = av_clip_uint8(temp); \
75  dst[x + pos] = temp;
76 
77  for (int y = 0; y < height; y++) {
78  const uint8_t *d = ff_fspp_dither[y];
79  for (int x = 0; x < width; x += 8) {
80  int temp;
81  STORE(0);
82  STORE(1);
83  STORE(2);
84  STORE(3);
85  STORE(4);
86  STORE(5);
87  STORE(6);
88  STORE(7);
89  }
90  src += src_stride;
91  dst += dst_stride;
92  }
93 }
94 
95 //This func reads from 2 slices, 0 & 2 and clears 2-nd
96 void ff_store_slice2_c(uint8_t *restrict dst, int16_t *restrict src,
97  ptrdiff_t dst_stride, ptrdiff_t src_stride,
98  ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
99 {
100 #define STORE2(pos) \
101  temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
102  src[x + pos + 16 * src_stride] = 0; \
103  temp = av_clip_uint8(temp); \
104  dst[x + pos] = temp;
105 
106  for (int y = 0; y < height; y++) {
107  const uint8_t *d = ff_fspp_dither[y];
108  for (int x = 0; x < width; x += 8) {
109  int temp;
110  STORE2(0);
111  STORE2(1);
112  STORE2(2);
113  STORE2(3);
114  STORE2(4);
115  STORE2(5);
116  STORE2(6);
117  STORE2(7);
118  }
119  src += src_stride;
120  dst += dst_stride;
121  }
122 }
123 
124 void ff_mul_thrmat_c(const int16_t *restrict thr_adr_noq, int16_t *restrict thr_adr, int q)
125 {
126  for (int a = 0; a < 64; a++)
127  thr_adr[a] = q * thr_adr_noq[a];
128 }
129 
130 void ff_column_fidct_c(const int16_t *restrict thr_adr, const int16_t *restrict data,
131  int16_t *restrict output, int cnt)
132 {
133  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
134  int_simd16_t tmp10, tmp11, tmp12, tmp13;
135  int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
136  int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
137 
138  int16_t *wsptr;
139 
140  wsptr = output;
141 
142  for (; cnt > 0; cnt -= 2) { //start positions
143  const int16_t *threshold = thr_adr;//threshold_mtx
144  for (int ctr = DCTSIZE; ctr > 0; ctr--) {
145  // Process columns from input, add to output.
146  tmp0 = data[DCTSIZE * 0] + data[DCTSIZE * 7];
147  tmp7 = data[DCTSIZE * 0] - data[DCTSIZE * 7];
148 
149  tmp1 = data[DCTSIZE * 1] + data[DCTSIZE * 6];
150  tmp6 = data[DCTSIZE * 1] - data[DCTSIZE * 6];
151 
152  tmp2 = data[DCTSIZE * 2] + data[DCTSIZE * 5];
153  tmp5 = data[DCTSIZE * 2] - data[DCTSIZE * 5];
154 
155  tmp3 = data[DCTSIZE * 3] + data[DCTSIZE * 4];
156  tmp4 = data[DCTSIZE * 3] - data[DCTSIZE * 4];
157 
158  // Even part of FDCT
159 
160  tmp10 = tmp0 + tmp3;
161  tmp13 = tmp0 - tmp3;
162  tmp11 = tmp1 + tmp2;
163  tmp12 = tmp1 - tmp2;
164 
165  d0 = tmp10 + tmp11;
166  d4 = tmp10 - tmp11;
167 
168  z1 = MULTIPLY16H(tmp12 + tmp13, FIX_0_707106781 << 2);
169  d2 = tmp13 + z1;
170  d6 = tmp13 - z1;
171 
172  // Even part of IDCT
173 
174  THRESHOLD(tmp0, d0, threshold[0 * 8]);
175  THRESHOLD(tmp1, d2, threshold[2 * 8]);
176  THRESHOLD(tmp2, d4, threshold[4 * 8]);
177  THRESHOLD(tmp3, d6, threshold[6 * 8]);
178  tmp0 += 2;
179  tmp10 = (tmp0 + tmp2) >> 2;
180  tmp11 = (tmp0 - tmp2) >> 2;
181 
182  tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
183  tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
184 
185  tmp0 = tmp10 + tmp13; //->temps
186  tmp3 = tmp10 - tmp13; //->temps
187  tmp1 = tmp11 + tmp12; //->temps
188  tmp2 = tmp11 - tmp12; //->temps
189 
190  // Odd part of FDCT
191 
192  tmp10 = tmp4 + tmp5;
193  tmp11 = tmp5 + tmp6;
194  tmp12 = tmp6 + tmp7;
195 
196  z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433 << 2);
197  z2 = MULTIPLY16H(tmp10, FIX_0_541196100 << 2) + z5;
198  z4 = MULTIPLY16H(tmp12, FIX_1_306562965 << 2) + z5;
199  z3 = MULTIPLY16H(tmp11, FIX_0_707106781 << 2);
200 
201  z11 = tmp7 + z3;
202  z13 = tmp7 - z3;
203 
204  d5 = z13 + z2;
205  d3 = z13 - z2;
206  d1 = z11 + z4;
207  d7 = z11 - z4;
208 
209  // Odd part of IDCT
210 
211  THRESHOLD(tmp4, d1, threshold[1 * 8]);
212  THRESHOLD(tmp5, d3, threshold[3 * 8]);
213  THRESHOLD(tmp6, d5, threshold[5 * 8]);
214  THRESHOLD(tmp7, d7, threshold[7 * 8]);
215 
216  //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
217  z13 = tmp6 + tmp5;
218  z10 = (tmp6 - tmp5) * 2;
219  z11 = tmp4 + tmp7;
220  z12 = (tmp4 - tmp7) * 2;
221 
222  tmp7 = (z11 + z13) >> 2; //+2 !
223  tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562 << 1);
224  z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
225  tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
226  tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
227 
228  tmp6 = tmp12 - tmp7;
229  tmp5 = tmp11 - tmp6;
230  tmp4 = tmp10 + tmp5;
231 
232  wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
233  wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
234  wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
235  wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
236  wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
237  wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
238  wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
239  wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
240  //
241  data++; //next column
242  wsptr++;
243  threshold++;
244  }
245  data += 8; //skip each second start pos
246  wsptr += 8;
247  }
248 }
249 
250 void ff_row_idct_c(const int16_t *restrict wsptr, int16_t *restrict output_adr,
251  ptrdiff_t output_stride, int cnt)
252 {
253  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
254  int_simd16_t tmp10, tmp11, tmp12, tmp13;
255  int_simd16_t z5, z10, z11, z12, z13;
256  int16_t *outptr;
257 
258  cnt *= 4;
259  outptr = output_adr;
260  for (; cnt > 0; cnt--) {
261  // Even part
262  //Simd version reads 4x4 block and transposes it
263  tmp10 = wsptr[2] + wsptr[3];
264  tmp11 = wsptr[2] - wsptr[3];
265 
266  tmp13 = wsptr[0] + wsptr[1];
267  tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) * 4) - tmp13;//this shift order to avoid overflow
268 
269  tmp0 = tmp10 + tmp13; //->temps
270  tmp3 = tmp10 - tmp13; //->temps
271  tmp1 = tmp11 + tmp12;
272  tmp2 = tmp11 - tmp12;
273 
274  // Odd part
275  //Also transpose, with previous:
276  // ---- ---- ||||
277  // ---- ---- idct ||||
278  // ---- ---- ---> ||||
279  // ---- ---- ||||
280  z13 = wsptr[4] + wsptr[5];
281  z10 = wsptr[4] - wsptr[5];
282  z11 = wsptr[6] + wsptr[7];
283  z12 = wsptr[6] - wsptr[7];
284 
285  tmp7 = z11 + z13;
286  tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
287 
288  z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
289  tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
290  tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
291 
292  tmp6 = tmp12 * 8 - tmp7;
293  tmp5 = tmp11 * 8 - tmp6;
294  tmp4 = tmp10 * 8 + tmp5;
295 
296  // Final output stage: descale and write column
297  outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
298  outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
299  outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
300  outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
301  outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
302  outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
303  outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
304  outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
305  outptr++;
306 
307  wsptr += DCTSIZE; // advance pointer to next row
308  }
309 }
310 
311 void ff_row_fdct_c(int16_t *restrict data, const uint8_t *restrict pixels,
312  ptrdiff_t line_size, int cnt)
313 {
314  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
315  int_simd16_t tmp10, tmp11, tmp12, tmp13;
316  int_simd16_t z1, z2, z3, z4, z5, z11, z13;
317  int16_t *dataptr;
318 
319  cnt *= 4;
320  // Pass 1: process rows.
321 
322  dataptr = data;
323  for (; cnt > 0; cnt--) {
324  tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
325  tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
326  tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
327  tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
328  tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
329  tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
330  tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
331  tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
332 
333  // Even part
334 
335  tmp10 = tmp0 + tmp3;
336  tmp13 = tmp0 - tmp3;
337  tmp11 = tmp1 + tmp2;
338  tmp12 = tmp1 - tmp2;
339  //Even columns are written first, this leads to different order of columns
340  //in column_fidct(), but they are processed independently, so all ok.
341  //Later in the row_idct() columns are read in the same order.
342  dataptr[2] = tmp10 + tmp11;
343  dataptr[3] = tmp10 - tmp11;
344 
345  z1 = MULTIPLY16H(tmp12 + tmp13, FIX_0_707106781 << 2);
346  dataptr[0] = tmp13 + z1;
347  dataptr[1] = tmp13 - z1;
348 
349  // Odd part
350 
351  tmp10 = tmp4 + tmp5;
352  tmp11 = tmp5 + tmp6;
353  tmp12 = tmp6 + tmp7;
354 
355  z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433 << 2);
356  z2 = MULTIPLY16H(tmp10, FIX_0_541196100 << 2) + z5;
357  z4 = MULTIPLY16H(tmp12, FIX_1_306562965 << 2) + z5;
358  z3 = MULTIPLY16H(tmp11, FIX_0_707106781 << 2);
359 
360  z11 = tmp7 + z3;
361  z13 = tmp7 - z3;
362 
363  dataptr[4] = z13 + z2;
364  dataptr[5] = z13 - z2;
365  dataptr[6] = z11 + z4;
366  dataptr[7] = z11 - z4;
367 
368  pixels++; // advance pointer to next column
369  dataptr += DCTSIZE;
370  }
371 }
mem_internal.h
ff_store_slice2_c
void ff_store_slice2_c(uint8_t *restrict dst, int16_t *restrict src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fsppdsp.c:96
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:226
data
const char data[16]
Definition: mxf.c:149
DCTSIZE
#define DCTSIZE
Definition: vf_fsppdsp.c:31
mathematics.h
vf_fsppdsp.h
FIX_2_613125930
@ FIX_2_613125930
Definition: vf_fsppdsp.c:50
STORE2
#define STORE2(pos)
FIX_0_382683433
@ FIX_0_382683433
Definition: vf_fsppdsp.c:44
DESCALE
#define DESCALE(x, n)
Definition: vf_fsppdsp.c:39
FIX_0_541196100
@ FIX_0_541196100
Definition: vf_fsppdsp.c:45
FIX
#define FIX(x, s)
Definition: vf_fsppdsp.c:33
ff_row_idct_c
void ff_row_idct_c(const int16_t *restrict wsptr, int16_t *restrict output_adr, ptrdiff_t output_stride, int cnt)
Definition: vf_fsppdsp.c:250
THRESHOLD
#define THRESHOLD(r, x, t)
Definition: vf_fsppdsp.c:36
ff_mul_thrmat_c
void ff_mul_thrmat_c(const int16_t *restrict thr_adr_noq, int16_t *restrict thr_adr, int q)
Definition: vf_fsppdsp.c:124
height
#define height
Definition: dsp.h:89
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:104
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
FIX_1_414213562_A
@ FIX_1_414213562_A
Definition: vf_fsppdsp.c:48
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
FIX_0_707106781
@ FIX_0_707106781
Definition: vf_fsppdsp.c:46
FIX_1_082392200
@ FIX_1_082392200
Definition: vf_fsppdsp.c:52
common.h
STORE
#define STORE(pos)
FIX_1_306562965
@ FIX_1_306562965
Definition: vf_fsppdsp.c:47
ff_row_fdct_c
void ff_row_fdct_c(int16_t *restrict data, const uint8_t *restrict pixels, ptrdiff_t line_size, int cnt)
Definition: vf_fsppdsp.c:311
M_SQRT1_2
#define M_SQRT1_2
Definition: mathematics.h:103
MULTIPLY16H
#define MULTIPLY16H(x, k)
Definition: vf_fsppdsp.c:35
temp
else temp
Definition: vf_mcdeint.c:271
M_SQRT2
#define M_SQRT2
Definition: mathematics.h:109
ff_column_fidct_c
void ff_column_fidct_c(const int16_t *restrict thr_adr, const int16_t *restrict data, int16_t *restrict output, int cnt)
Definition: vf_fsppdsp.c:130
int_simd16_t
int32_t int_simd16_t
Definition: vf_fsppdsp.c:41
int32_t
int32_t
Definition: audioconvert.c:56
width
#define width
Definition: dsp.h:89
ff_fspp_dither
const uint8_t ff_fspp_dither[8][8]
Definition: vf_fsppdsp.c:55
ff_store_slice_c
void ff_store_slice_c(uint8_t *restrict dst, int16_t *restrict src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fsppdsp.c:67
src
#define src
Definition: vp8dsp.c:248
FIX_1_847759065
@ FIX_1_847759065
Definition: vf_fsppdsp.c:49
FIX_1_414213562
@ FIX_1_414213562
Definition: vf_fsppdsp.c:51