FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
float_dsp.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include "config.h"
20 
21 #include <float.h>
22 #include <stdint.h>
23 
24 #include "libavutil/float_dsp.h"
25 #include "libavutil/internal.h"
26 #include "checkasm.h"
27 
28 #define LEN 256
29 
30 #define randomize_buffer(buf) \
31 do { \
32  int i; \
33  double bmg[2], stddev = 10.0, mean = 0.0; \
34  \
35  for (i = 0; i < LEN; i += 2) { \
36  av_bmg_get(&checkasm_lfg, bmg); \
37  buf[i] = bmg[0] * stddev + mean; \
38  buf[i + 1] = bmg[1] * stddev + mean; \
39  } \
40 } while(0);
41 
42 static void test_vector_fmul(const float *src0, const float *src1)
43 {
44  LOCAL_ALIGNED_32(float, cdst, [LEN]);
45  LOCAL_ALIGNED_32(float, odst, [LEN]);
46  int i;
47 
48  declare_func(void, float *dst, const float *src0, const float *src1,
49  int len);
50 
51  call_ref(cdst, src0, src1, LEN);
52  call_new(odst, src0, src1, LEN);
53  for (i = 0; i < LEN; i++) {
54  if (!float_near_abs_eps(cdst[i], odst[i], FLT_EPSILON)) {
55  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
56  i, cdst[i], odst[i], cdst[i] - odst[i]);
57  fail();
58  break;
59  }
60  }
61  bench_new(odst, src0, src1, LEN);
62 }
63 
64 #define ARBITRARY_FMUL_ADD_CONST 0.005
65 static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
66 {
67  LOCAL_ALIGNED_32(float, cdst, [LEN]);
68  LOCAL_ALIGNED_32(float, odst, [LEN]);
69  int i;
70 
71  declare_func(void, float *dst, const float *src0, const float *src1,
72  const float *src2, int len);
73 
74  call_ref(cdst, src0, src1, src2, LEN);
75  call_new(odst, src0, src1, src2, LEN);
76  for (i = 0; i < LEN; i++) {
77  if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_ADD_CONST)) {
78  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
79  i, cdst[i], odst[i], cdst[i] - odst[i]);
80  fail();
81  break;
82  }
83  }
84  bench_new(odst, src0, src1, src2, LEN);
85 }
86 
87 static void test_vector_fmul_scalar(const float *src0, const float *src1)
88 {
89  LOCAL_ALIGNED_16(float, cdst, [LEN]);
90  LOCAL_ALIGNED_16(float, odst, [LEN]);
91  int i;
92 
93  declare_func(void, float *dst, const float *src, float mul, int len);
94 
95  call_ref(cdst, src0, src1[0], LEN);
96  call_new(odst, src0, src1[0], LEN);
97  for (i = 0; i < LEN; i++) {
98  if (!float_near_abs_eps(cdst[i], odst[i], FLT_EPSILON)) {
99  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
100  i, cdst[i], odst[i], cdst[i] - odst[i]);
101  fail();
102  break;
103  }
104  }
105  bench_new(odst, src0, src1[0], LEN);
106 }
107 
108 #define ARBITRARY_FMUL_WINDOW_CONST 0.008
109 static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
110 {
111  LOCAL_ALIGNED_16(float, cdst, [LEN]);
112  LOCAL_ALIGNED_16(float, odst, [LEN]);
113  int i;
114 
115  declare_func(void, float *dst, const float *src0, const float *src1,
116  const float *win, int len);
117 
118  call_ref(cdst, src0, src1, win, LEN / 2);
119  call_new(odst, src0, src1, win, LEN / 2);
120  for (i = 0; i < LEN; i++) {
121  if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_WINDOW_CONST)) {
122  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
123  i, cdst[i], odst[i], cdst[i] - odst[i]);
124  fail();
125  break;
126  }
127  }
128  bench_new(odst, src0, src1, win, LEN / 2);
129 }
130 
131 #define ARBITRARY_FMAC_SCALAR_CONST 0.005
132 static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
133 {
134  LOCAL_ALIGNED_32(float, cdst, [LEN]);
135  LOCAL_ALIGNED_32(float, odst, [LEN]);
136  int i;
137 
138  declare_func(void, float *dst, const float *src, float mul, int len);
139 
140  memcpy(cdst, src2, LEN * sizeof(*src2));
141  memcpy(odst, src2, LEN * sizeof(*src2));
142 
143  call_ref(cdst, src0, src1[0], LEN);
144  call_new(odst, src0, src1[0], LEN);
145  for (i = 0; i < LEN; i++) {
146  if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMAC_SCALAR_CONST)) {
147  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
148  i, cdst[i], odst[i], cdst[i] - odst[i]);
149  fail();
150  break;
151  }
152  }
153  memcpy(odst, src2, LEN * sizeof(*src2));
154  bench_new(odst, src0, src1[0], LEN);
155 }
156 
157 static void test_vector_dmul_scalar(const double *src0, const double *src1)
158 {
159  LOCAL_ALIGNED_32(double, cdst, [LEN]);
160  LOCAL_ALIGNED_32(double, odst, [LEN]);
161  int i;
162 
163  declare_func(void, double *dst, const double *src, double mul, int len);
164 
165  call_ref(cdst, src0, src1[0], LEN);
166  call_new(odst, src0, src1[0], LEN);
167  for (i = 0; i < LEN; i++) {
168  double t = fabs(src1[0]) + fabs(src0[i]) + fabs(src1[0] * src0[i]) + 1.0;
169  if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
170  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n", i,
171  cdst[i], odst[i], cdst[i] - odst[i]);
172  fail();
173  break;
174  }
175  }
176  bench_new(odst, src0, src1[0], LEN);
177 }
178 
179 #define ARBITRARY_DMAC_SCALAR_CONST 0.005
180 static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
181 {
182  LOCAL_ALIGNED_32(double, cdst, [LEN]);
183  LOCAL_ALIGNED_32(double, odst, [LEN]);
184  int i;
185 
186  declare_func(void, double *dst, const double *src, double mul, int len);
187 
188  memcpy(cdst, src2, LEN * sizeof(*src2));
189  memcpy(odst, src2, LEN * sizeof(*src2));
190  call_ref(cdst, src0, src1[0], LEN);
191  call_new(odst, src0, src1[0], LEN);
192  for (i = 0; i < LEN; i++) {
193  if (!double_near_abs_eps(cdst[i], odst[i], ARBITRARY_DMAC_SCALAR_CONST)) {
194  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
195  i, cdst[i], odst[i], cdst[i] - odst[i]);
196  fail();
197  break;
198  }
199  }
200  memcpy(odst, src2, LEN * sizeof(*src2));
201  bench_new(odst, src0, src1[0], LEN);
202 }
203 
204 static void test_butterflies_float(const float *src0, const float *src1)
205 {
206  LOCAL_ALIGNED_16(float, cdst, [LEN]);
207  LOCAL_ALIGNED_16(float, odst, [LEN]);
208  LOCAL_ALIGNED_16(float, cdst1, [LEN]);
209  LOCAL_ALIGNED_16(float, odst1, [LEN]);
210  int i;
211 
212  declare_func(void, float *av_restrict src0, float *av_restrict src1,
213  int len);
214 
215  memcpy(cdst, src0, LEN * sizeof(*src0));
216  memcpy(cdst1, src1, LEN * sizeof(*src1));
217  memcpy(odst, src0, LEN * sizeof(*src0));
218  memcpy(odst1, src1, LEN * sizeof(*src1));
219 
220  call_ref(cdst, cdst1, LEN);
221  call_new(odst, odst1, LEN);
222  for (i = 0; i < LEN; i++) {
223  if (!float_near_abs_eps(cdst[i], odst[i], FLT_EPSILON) ||
224  !float_near_abs_eps(cdst1[i], odst1[i], FLT_EPSILON)) {
225  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
226  i, cdst[i], odst[i], cdst[i] - odst[i]);
227  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
228  i, cdst1[i], odst1[i], cdst1[i] - odst1[i]);
229  fail();
230  break;
231  }
232  }
233  memcpy(odst, src0, LEN * sizeof(*src0));
234  memcpy(odst1, src1, LEN * sizeof(*src1));
235  bench_new(odst, odst1, LEN);
236 }
237 
238 #define ARBITRARY_SCALARPRODUCT_CONST 0.2
239 static void test_scalarproduct_float(const float *src0, const float *src1)
240 {
241  float cprod, oprod;
242 
243  declare_func_float(float, const float *src0, const float *src1, int len);
244 
245  cprod = call_ref(src0, src1, LEN);
246  oprod = call_new(src0, src1, LEN);
248  fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
249  cprod, oprod, cprod - oprod);
250  fail();
251  }
252  bench_new(src0, src1, LEN);
253 }
254 
256 {
257  LOCAL_ALIGNED_32(float, src0, [LEN]);
258  LOCAL_ALIGNED_32(float, src1, [LEN]);
259  LOCAL_ALIGNED_32(float, src2, [LEN]);
260  LOCAL_ALIGNED_16(float, src3, [LEN]);
261  LOCAL_ALIGNED_16(float, src4, [LEN]);
262  LOCAL_ALIGNED_16(float, src5, [LEN]);
263  LOCAL_ALIGNED_32(double, dbl_src0, [LEN]);
264  LOCAL_ALIGNED_32(double, dbl_src1, [LEN]);
265  LOCAL_ALIGNED_32(double, dbl_src2, [LEN]);
267 
268  if (!fdsp) {
269  fprintf(stderr, "floatdsp: Out of memory error\n");
270  return;
271  }
272 
275  randomize_buffer(src2);
276  randomize_buffer(src3);
277  randomize_buffer(src4);
278  randomize_buffer(src5);
279  randomize_buffer(dbl_src0);
280  randomize_buffer(dbl_src1);
281  randomize_buffer(dbl_src2);
282 
283  if (check_func(fdsp->vector_fmul, "vector_fmul"))
285  if (check_func(fdsp->vector_fmul_add, "vector_fmul_add"))
287  if (check_func(fdsp->vector_fmul_scalar, "vector_fmul_scalar"))
288  test_vector_fmul_scalar(src3, src4);
289  if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse"))
291  if (check_func(fdsp->vector_fmul_window, "vector_fmul_window"))
292  test_vector_fmul_window(src3, src4, src5);
293  report("vector_fmul");
294  if (check_func(fdsp->vector_fmac_scalar, "vector_fmac_scalar"))
296  report("vector_fmac");
297  if (check_func(fdsp->vector_dmul_scalar, "vector_dmul_scalar"))
298  test_vector_dmul_scalar(dbl_src0, dbl_src1);
299  report("vector_dmul");
300  if (check_func(fdsp->vector_dmac_scalar, "vector_dmac_scalar"))
301  test_vector_dmac_scalar(dbl_src0, dbl_src1, dbl_src2);
302  report("vector_dmac");
303  if (check_func(fdsp->butterflies_float, "butterflies_float"))
304  test_butterflies_float(src3, src4);
305  report("butterflies_float");
306  if (check_func(fdsp->scalarproduct_float, "scalarproduct_float"))
307  test_scalarproduct_float(src3, src4);
308  report("scalarproduct_float");
309 
310  av_freep(&fdsp);
311 }
static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
Definition: float_dsp.c:180
static void test_vector_fmul_scalar(const float *src0, const float *src1)
Definition: float_dsp.c:87
static float win(SuperEqualizerContext *s, float n, int N)
#define ARBITRARY_FMUL_ADD_CONST
Definition: float_dsp.c:64
float(* scalarproduct_float)(const float *v1, const float *v2, int len)
Calculate the scalar product of two vectors of floats.
Definition: float_dsp.h:175
static void test_vector_fmul(const float *src0, const float *src1)
Definition: float_dsp.c:42
void(* vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len)
Calculate the entry wise product of two vectors of floats, and store the result in a vector of floats...
Definition: float_dsp.h:154
#define src
Definition: vp8dsp.c:254
#define report
Definition: checkasm.h:119
void(* vector_fmac_scalar)(float *dst, const float *src, float mul, int len)
Multiply a vector of floats by a scalar float and add to destination vector.
Definition: float_dsp.h:54
int float_near_abs_eps(float a, float b, float eps)
Definition: checkasm.c:294
void checkasm_check_float_dsp(void)
Definition: float_dsp.c:255
void(* vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len)
Overlap/add with window function.
Definition: float_dsp.h:119
#define ARBITRARY_DMAC_SCALAR_CONST
Definition: float_dsp.c:179
void(* vector_dmac_scalar)(double *dst, const double *src, double mul, int len)
Multiply a vector of doubles by a scalar double and add to destination vector.
Definition: float_dsp.h:70
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:127
void(* vector_fmul)(float *dst, const float *src0, const float *src1, int len)
Calculate the entry wise product of two vectors of floats and store the result in a vector of floats...
Definition: float_dsp.h:38
#define declare_func(ret,...)
Definition: checkasm.h:111
void(* butterflies_float)(float *av_restrict v1, float *av_restrict v2, int len)
Calculate the sum and difference of two vectors of floats.
Definition: float_dsp.h:164
void(* vector_dmul_scalar)(double *dst, const double *src, double mul, int len)
Multiply a vector of double by a scalar double.
Definition: float_dsp.h:100
#define fail()
Definition: checkasm.h:116
static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
Definition: float_dsp.c:109
common internal API header
void(* vector_fmul_scalar)(float *dst, const float *src, float mul, int len)
Multiply a vector of floats by a scalar float.
Definition: float_dsp.h:85
#define call_ref(...)
Definition: checkasm.h:122
int double_near_abs_eps(double a, double b, double eps)
Definition: checkasm.c:334
#define src1
Definition: h264pred.c:139
#define randomize_buffer(buf)
Definition: float_dsp.c:30
#define declare_func_float(ret,...)
Definition: checkasm.h:112
#define ARBITRARY_FMUL_WINDOW_CONST
Definition: float_dsp.c:108
static void test_vector_dmul_scalar(const double *src0, const double *src1)
Definition: float_dsp.c:157
static void test_butterflies_float(const float *src0, const float *src1)
Definition: float_dsp.c:204
static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
Definition: float_dsp.c:132
void(* vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len)
Calculate the entry wise product of two vectors of floats, add a third vector of floats and store the...
Definition: float_dsp.h:137
#define check_func(func,...)
Definition: checkasm.h:107
#define src0
Definition: h264pred.c:138
#define LEN
Definition: float_dsp.c:28
static void test_scalarproduct_float(const float *src0, const float *src1)
Definition: float_dsp.c:239
#define ARBITRARY_FMAC_SCALAR_CONST
Definition: float_dsp.c:131
#define LOCAL_ALIGNED_32(t, v,...)
Definition: internal.h:137
#define ARBITRARY_SCALARPRODUCT_CONST
Definition: float_dsp.c:238
int len
#define bench_new(...)
Definition: checkasm.h:249
#define LOCAL_ALIGNED_16(t, v,...)
Definition: internal.h:131
#define av_freep(p)
#define call_new(...)
Definition: checkasm.h:189
static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
Definition: float_dsp.c:65