FFmpeg
sw_ops.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 #include <string.h>
22 
23 #include "libavutil/avassert.h"
24 #include "libavutil/mem_internal.h"
25 #include "libavutil/refstruct.h"
26 
27 #include "libswscale/ops.h"
29 
30 #include "checkasm.h"
31 
32 enum {
33  NB_PLANES = 4,
34  PIXELS = 64,
35  LINES = 2,
36 };
37 
38 enum {
43 };
44 
45 #define FMT(fmt, ...) tprintf((char[256]) {0}, 256, fmt, __VA_ARGS__)
46 static const char *tprintf(char buf[], size_t size, const char *fmt, ...)
47 {
48  va_list ap;
49  va_start(ap, fmt);
50  vsnprintf(buf, size, fmt, ap);
51  va_end(ap);
52  return buf;
53 }
54 
55 static int rw_pixel_bits(const SwsOp *op)
56 {
57  const int elems = op->rw.packed ? op->rw.elems : 1;
58  const int size = ff_sws_pixel_type_size(op->type);
59  const int bits = 8 >> op->rw.frac;
60  av_assert1(bits >= 1);
61  return elems * size * bits;
62 }
63 
64 static float rndf(void)
65 {
66  union { uint32_t u; float f; } x;
67  do {
68  x.u = rnd();
69  } while (!isnormal(x.f));
70  return x.f;
71 }
72 
73 static void fill32f(float *line, int num, unsigned range)
74 {
75  const float scale = (float) range / UINT32_MAX;
76  for (int i = 0; i < num; i++)
77  line[i] = range ? scale * rnd() : rndf();
78 }
79 
80 static void fill32(uint32_t *line, int num, unsigned range)
81 {
82  for (int i = 0; i < num; i++)
83  line[i] = (range && range < UINT_MAX) ? rnd() % (range + 1) : rnd();
84 }
85 
86 static void fill16(uint16_t *line, int num, unsigned range)
87 {
88  if (!range) {
89  fill32((uint32_t *) line, AV_CEIL_RSHIFT(num, 1), 0);
90  } else {
91  for (int i = 0; i < num; i++)
92  line[i] = rnd() % (range + 1);
93  }
94 }
95 
96 static void fill8(uint8_t *line, int num, unsigned range)
97 {
98  if (!range) {
99  fill32((uint32_t *) line, AV_CEIL_RSHIFT(num, 2), 0);
100  } else {
101  for (int i = 0; i < num; i++)
102  line[i] = rnd() % (range + 1);
103  }
104 }
105 
106 static void check_ops(const char *report, const unsigned ranges[NB_PLANES],
107  const SwsOp *ops)
108 {
110  SwsCompiledOp comp_ref = {0}, comp_new = {0};
111  const SwsOpBackend *backend_new = NULL;
112  SwsOpList oplist = { .ops = (SwsOp *) ops };
113  const SwsOp *read_op, *write_op;
114  static const unsigned def_ranges[4] = {0};
115  if (!ranges)
116  ranges = def_ranges;
117 
118  declare_func(void, const SwsOpExec *, const void *, int bx, int y, int bx_end, int y_end);
119 
120  static DECLARE_ALIGNED_64(char, src0)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
121  static DECLARE_ALIGNED_64(char, src1)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
122  static DECLARE_ALIGNED_64(char, dst0)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
123  static DECLARE_ALIGNED_64(char, dst1)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
124 
125  if (!ctx)
126  return;
128 
129  read_op = &ops[0];
130  for (oplist.num_ops = 0; ops[oplist.num_ops].op; oplist.num_ops++)
131  write_op = &ops[oplist.num_ops];
132 
133  const int read_size = PIXELS * rw_pixel_bits(read_op) >> 3;
134  const int write_size = PIXELS * rw_pixel_bits(write_op) >> 3;
135 
136  for (int p = 0; p < NB_PLANES; p++) {
137  void *plane = src0[p];
138  switch (read_op->type) {
139  case U8: fill8(plane, sizeof(src0[p]) / sizeof(uint8_t), ranges[p]); break;
140  case U16: fill16(plane, sizeof(src0[p]) / sizeof(uint16_t), ranges[p]); break;
141  case U32: fill32(plane, sizeof(src0[p]) / sizeof(uint32_t), ranges[p]); break;
142  case F32: fill32f(plane, sizeof(src0[p]) / sizeof(uint32_t), ranges[p]); break;
143  }
144  }
145 
146  memcpy(src1, src0, sizeof(src0));
147  memset(dst0, 0, sizeof(dst0));
148  memset(dst1, 0, sizeof(dst1));
149 
150  /* Compile `ops` using both the asm and c backends */
151  for (int n = 0; ff_sws_op_backends[n]; n++) {
152  const SwsOpBackend *backend = ff_sws_op_backends[n];
153  const bool is_ref = !strcmp(backend->name, "c");
154  if (is_ref || !comp_new.func) {
156  int ret = ff_sws_ops_compile_backend(ctx, backend, &oplist, &comp);
157  if (ret == AVERROR(ENOTSUP))
158  continue;
159  else if (ret < 0)
160  fail();
161  else if (PIXELS % comp.block_size != 0)
162  fail();
163 
164  if (is_ref)
165  comp_ref = comp;
166  if (!comp_new.func) {
167  comp_new = comp;
168  backend_new = backend;
169  }
170  }
171  }
172 
173  av_assert0(comp_ref.func && comp_new.func);
174 
175  SwsOpExec exec = {0};
176  exec.width = PIXELS;
177  exec.height = exec.slice_h = LINES;
178  for (int i = 0; i < NB_PLANES; i++) {
179  exec.in_stride[i] = sizeof(src0[i][0]);
180  exec.out_stride[i] = sizeof(dst0[i][0]);
181  exec.in_bump[i] = exec.in_stride[i] - read_size;
182  exec.out_bump[i] = exec.out_stride[i] - write_size;
183  }
184 
185  /**
186  * Don't use check_func() because the actual function pointer may be a
187  * wrapper shared by multiple implementations. Instead, take a hash of both
188  * the backend pointer and the active CPU flags.
189  */
190  uintptr_t id = (uintptr_t) backend_new;
191  id ^= (id << 6) + (id >> 2) + 0x9e3779b97f4a7c15 + comp_new.cpu_flags;
192 
193  if (check_key((void*) id, "%s", report)) {
194  exec.block_size_in = comp_ref.block_size * rw_pixel_bits(read_op) >> 3;
195  exec.block_size_out = comp_ref.block_size * rw_pixel_bits(write_op) >> 3;
196  for (int i = 0; i < NB_PLANES; i++) {
197  exec.in[i] = (void *) src0[i];
198  exec.out[i] = (void *) dst0[i];
199  }
200  checkasm_call(comp_ref.func, &exec, comp_ref.priv, 0, 0, PIXELS / comp_ref.block_size, LINES);
201 
202  exec.block_size_in = comp_new.block_size * rw_pixel_bits(read_op) >> 3;
203  exec.block_size_out = comp_new.block_size * rw_pixel_bits(write_op) >> 3;
204  for (int i = 0; i < NB_PLANES; i++) {
205  exec.in[i] = (void *) src1[i];
206  exec.out[i] = (void *) dst1[i];
207  }
208  checkasm_call_checked(comp_new.func, &exec, comp_new.priv, 0, 0, PIXELS / comp_new.block_size, LINES);
209 
210  for (int i = 0; i < NB_PLANES; i++) {
211  const char *name = FMT("%s[%d]", report, i);
212  const int stride = sizeof(dst0[i][0]);
213 
214  switch (write_op->type) {
215  case U8:
216  checkasm_check(uint8_t, (void *) dst0[i], stride,
217  (void *) dst1[i], stride,
218  write_size, LINES, name);
219  break;
220  case U16:
221  checkasm_check(uint16_t, (void *) dst0[i], stride,
222  (void *) dst1[i], stride,
223  write_size >> 1, LINES, name);
224  break;
225  case U32:
226  checkasm_check(uint32_t, (void *) dst0[i], stride,
227  (void *) dst1[i], stride,
228  write_size >> 2, LINES, name);
229  break;
230  case F32:
231  checkasm_check(float_ulp, (void *) dst0[i], stride,
232  (void *) dst1[i], stride,
233  write_size >> 2, LINES, name, 0);
234  break;
235  }
236 
237  if (write_op->rw.packed)
238  break;
239  }
240 
241  bench(comp_new.func, &exec, comp_new.priv, 0, 0, PIXELS / comp_new.block_size, LINES);
242  }
243 
244  if (comp_new.func != comp_ref.func)
245  ff_sws_compiled_op_unref(&comp_new);
246  ff_sws_compiled_op_unref(&comp_ref);
248 }
249 
250 #define CHECK_RANGES(NAME, RANGES, N_IN, N_OUT, IN, OUT, ...) \
251  do { \
252  check_ops(NAME, RANGES, (SwsOp[]) { \
253  { \
254  .op = SWS_OP_READ, \
255  .type = IN, \
256  .rw.elems = N_IN, \
257  }, \
258  __VA_ARGS__, \
259  { \
260  .op = SWS_OP_WRITE, \
261  .type = OUT, \
262  .rw.elems = N_OUT, \
263  }, {0} \
264  }); \
265  } while (0)
266 
267 #define MK_RANGES(R) ((const unsigned[]) { R, R, R, R })
268 #define CHECK_RANGE(NAME, RANGE, N_IN, N_OUT, IN, OUT, ...) \
269  CHECK_RANGES(NAME, MK_RANGES(RANGE), N_IN, N_OUT, IN, OUT, __VA_ARGS__)
270 
271 #define CHECK_COMMON_RANGE(NAME, RANGE, IN, OUT, ...) \
272  CHECK_RANGE(FMT("%s_p1000", NAME), RANGE, 1, 1, IN, OUT, __VA_ARGS__); \
273  CHECK_RANGE(FMT("%s_p1110", NAME), RANGE, 3, 3, IN, OUT, __VA_ARGS__); \
274  CHECK_RANGE(FMT("%s_p1111", NAME), RANGE, 4, 4, IN, OUT, __VA_ARGS__); \
275  CHECK_RANGE(FMT("%s_p1001", NAME), RANGE, 4, 2, IN, OUT, __VA_ARGS__, { \
276  .op = SWS_OP_SWIZZLE, \
277  .type = OUT, \
278  .swizzle = SWS_SWIZZLE(0, 3, 1, 2), \
279  })
280 
281 #define CHECK(NAME, N_IN, N_OUT, IN, OUT, ...) \
282  CHECK_RANGE(NAME, 0, N_IN, N_OUT, IN, OUT, __VA_ARGS__)
283 
284 #define CHECK_COMMON(NAME, IN, OUT, ...) \
285  CHECK_COMMON_RANGE(NAME, 0, IN, OUT, __VA_ARGS__)
286 
287 static void check_read_write(void)
288 {
289  for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
290  const char *type = ff_sws_pixel_type_name(t);
291  for (int i = 1; i <= 4; i++) {
292  /* Test N->N planar read/write */
293  for (int o = 1; o <= i; o++) {
294  check_ops(FMT("rw_%d_%d_%s", i, o, type), NULL, (SwsOp[]) {
295  {
296  .op = SWS_OP_READ,
297  .type = t,
298  .rw.elems = i,
299  }, {
300  .op = SWS_OP_WRITE,
301  .type = t,
302  .rw.elems = o,
303  }, {0}
304  });
305  }
306 
307  /* Test packed read/write */
308  if (i == 1)
309  continue;
310 
311  check_ops(FMT("read_packed%d_%s", i, type), NULL, (SwsOp[]) {
312  {
313  .op = SWS_OP_READ,
314  .type = t,
315  .rw.elems = i,
316  .rw.packed = true,
317  }, {
318  .op = SWS_OP_WRITE,
319  .type = t,
320  .rw.elems = i,
321  }, {0}
322  });
323 
324  check_ops(FMT("write_packed%d_%s", i, type), NULL, (SwsOp[]) {
325  {
326  .op = SWS_OP_READ,
327  .type = t,
328  .rw.elems = i,
329  }, {
330  .op = SWS_OP_WRITE,
331  .type = t,
332  .rw.elems = i,
333  .rw.packed = true,
334  }, {0}
335  });
336  }
337  }
338 
339  /* Test fractional reads/writes */
340  for (int frac = 1; frac <= 3; frac++) {
341  const int bits = 8 >> frac;
342  const int range = (1 << bits) - 1;
343  if (bits == 2)
344  continue; /* no 2 bit packed formats currently exist */
345 
346  check_ops(FMT("read_frac%d", frac), NULL, (SwsOp[]) {
347  {
348  .op = SWS_OP_READ,
349  .type = U8,
350  .rw.elems = 1,
351  .rw.frac = frac,
352  }, {
353  .op = SWS_OP_WRITE,
354  .type = U8,
355  .rw.elems = 1,
356  }, {0}
357  });
358 
359  check_ops(FMT("write_frac%d", frac), MK_RANGES(range), (SwsOp[]) {
360  {
361  .op = SWS_OP_READ,
362  .type = U8,
363  .rw.elems = 1,
364  }, {
365  .op = SWS_OP_WRITE,
366  .type = U8,
367  .rw.elems = 1,
368  .rw.frac = frac,
369  }, {0}
370  });
371  }
372 }
373 
374 static void check_swap_bytes(void)
375 {
376  CHECK_COMMON("swap_bytes_16", U16, U16, {
377  .op = SWS_OP_SWAP_BYTES,
378  .type = U16,
379  });
380 
381  CHECK_COMMON("swap_bytes_32", U32, U32, {
382  .op = SWS_OP_SWAP_BYTES,
383  .type = U32,
384  });
385 }
386 
387 static void check_pack_unpack(void)
388 {
389  const struct {
391  SwsPackOp op;
392  } patterns[] = {
393  { U8, {{ 3, 3, 2 }}},
394  { U8, {{ 2, 3, 3 }}},
395  { U8, {{ 1, 2, 1 }}},
396  {U16, {{ 5, 6, 5 }}},
397  {U16, {{ 5, 5, 5 }}},
398  {U16, {{ 4, 4, 4 }}},
399  {U32, {{ 2, 10, 10, 10 }}},
400  {U32, {{10, 10, 10, 2 }}},
401  };
402 
403  for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
404  const SwsPixelType type = patterns[i].type;
405  const SwsPackOp pack = patterns[i].op;
406  const int num = pack.pattern[3] ? 4 : 3;
407  const char *pat = FMT("%d%d%d%d", pack.pattern[0], pack.pattern[1],
408  pack.pattern[2], pack.pattern[3]);
409  const int total = pack.pattern[0] + pack.pattern[1] +
410  pack.pattern[2] + pack.pattern[3];
411  const unsigned ranges[4] = {
412  (1 << pack.pattern[0]) - 1,
413  (1 << pack.pattern[1]) - 1,
414  (1 << pack.pattern[2]) - 1,
415  (1 << pack.pattern[3]) - 1,
416  };
417 
418  CHECK_RANGES(FMT("pack_%s", pat), ranges, num, 1, type, type, {
419  .op = SWS_OP_PACK,
420  .type = type,
421  .pack = pack,
422  });
423 
424  CHECK_RANGE(FMT("unpack_%s", pat), UINT32_MAX >> (32 - total), 1, num, type, type, {
425  .op = SWS_OP_UNPACK,
426  .type = type,
427  .pack = pack,
428  });
429  }
430 }
431 
433 {
434  const unsigned num = rnd();
435  if (ff_sws_pixel_type_is_int(t)) {
436  const unsigned mask = UINT_MAX >> (32 - ff_sws_pixel_type_size(t) * 8);
437  return (AVRational) { num & mask, 1 };
438  } else {
439  const unsigned den = rnd();
440  return (AVRational) { num, den ? den : 1 };
441  }
442 }
443 
444 static void check_clear(void)
445 {
446  for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
447  const char *type = ff_sws_pixel_type_name(t);
448  const int bits = ff_sws_pixel_type_size(t) * 8;
449 
450  /* TODO: AVRational can't fit 32 bit constants */
451  if (bits < 32) {
452  const AVRational chroma = (AVRational) { 1 << (bits - 1), 1};
453  const AVRational alpha = (AVRational) { (1 << bits) - 1, 1};
454  const AVRational zero = (AVRational) { 0, 1};
455  const AVRational none = {0};
456 
457  const SwsConst patterns[] = {
458  /* Zero only */
459  {.q4 = { none, none, none, zero }},
460  {.q4 = { zero, none, none, none }},
461  /* Alpha only */
462  {.q4 = { none, none, none, alpha }},
463  {.q4 = { alpha, none, none, none }},
464  /* Chroma only */
465  {.q4 = { chroma, chroma, none, none }},
466  {.q4 = { none, chroma, chroma, none }},
467  {.q4 = { none, none, chroma, chroma }},
468  {.q4 = { chroma, none, chroma, none }},
469  {.q4 = { none, chroma, none, chroma }},
470  /* Alpha+chroma */
471  {.q4 = { chroma, chroma, none, alpha }},
472  {.q4 = { none, chroma, chroma, alpha }},
473  {.q4 = { alpha, none, chroma, chroma }},
474  {.q4 = { chroma, none, chroma, alpha }},
475  {.q4 = { alpha, chroma, none, chroma }},
476  /* Random values */
477  {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
478  {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
479  {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
480  {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
481  };
482 
483  for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
484  CHECK(FMT("clear_pattern_%s[%d]", type, i), 4, 4, t, t, {
485  .op = SWS_OP_CLEAR,
486  .type = t,
487  .c = patterns[i],
488  });
489  }
490  } else if (!ff_sws_pixel_type_is_int(t)) {
491  /* Floating point YUV doesn't exist, only alpha needs to be cleared */
492  CHECK(FMT("clear_alpha_%s", type), 4, 4, t, t, {
493  .op = SWS_OP_CLEAR,
494  .type = t,
495  .c.q4[3] = { 0, 1 },
496  });
497  }
498  }
499 }
500 
501 static void check_shift(void)
502 {
503  for (SwsPixelType t = U16; t < SWS_PIXEL_TYPE_NB; t++) {
504  const char *type = ff_sws_pixel_type_name(t);
505  if (!ff_sws_pixel_type_is_int(t))
506  continue;
507 
508  for (int shift = 1; shift <= 8; shift++) {
509  CHECK_COMMON(FMT("lshift%d_%s", shift, type), t, t, {
510  .op = SWS_OP_LSHIFT,
511  .type = t,
512  .c.u = shift,
513  });
514 
515  CHECK_COMMON(FMT("rshift%d_%s", shift, type), t, t, {
516  .op = SWS_OP_RSHIFT,
517  .type = t,
518  .c.u = shift,
519  });
520  }
521  }
522 }
523 
524 static void check_swizzle(void)
525 {
526  for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
527  const char *type = ff_sws_pixel_type_name(t);
528  static const int patterns[][4] = {
529  /* Pure swizzle */
530  {3, 0, 1, 2},
531  {3, 0, 2, 1},
532  {2, 1, 0, 3},
533  {3, 2, 1, 0},
534  {3, 1, 0, 2},
535  {3, 2, 0, 1},
536  {1, 2, 0, 3},
537  {1, 0, 2, 3},
538  {2, 0, 1, 3},
539  {2, 3, 1, 0},
540  {2, 1, 3, 0},
541  {1, 2, 3, 0},
542  {1, 3, 2, 0},
543  {0, 2, 1, 3},
544  {0, 2, 3, 1},
545  {0, 3, 1, 2},
546  {3, 1, 2, 0},
547  {0, 3, 2, 1},
548  /* Luma expansion */
549  {0, 0, 0, 3},
550  {3, 0, 0, 0},
551  {0, 0, 0, 1},
552  {1, 0, 0, 0},
553  };
554 
555  for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
556  const int x = patterns[i][0], y = patterns[i][1],
557  z = patterns[i][2], w = patterns[i][3];
558  CHECK(FMT("swizzle_%d%d%d%d_%s", x, y, z, w, type), 4, 4, t, t, {
559  .op = SWS_OP_SWIZZLE,
560  .type = t,
561  .swizzle = SWS_SWIZZLE(x, y, z, w),
562  });
563  }
564  }
565 }
566 
567 static void check_convert(void)
568 {
569  for (SwsPixelType i = U8; i < SWS_PIXEL_TYPE_NB; i++) {
570  const char *itype = ff_sws_pixel_type_name(i);
571  const int isize = ff_sws_pixel_type_size(i);
572  for (SwsPixelType o = U8; o < SWS_PIXEL_TYPE_NB; o++) {
573  const char *otype = ff_sws_pixel_type_name(o);
574  const int osize = ff_sws_pixel_type_size(o);
575  const char *name = FMT("convert_%s_%s", itype, otype);
576  if (i == o)
577  continue;
578 
579  if (isize < osize || !ff_sws_pixel_type_is_int(o)) {
580  CHECK_COMMON(name, i, o, {
581  .op = SWS_OP_CONVERT,
582  .type = i,
583  .convert.to = o,
584  });
585  } else if (isize > osize || !ff_sws_pixel_type_is_int(i)) {
586  uint32_t range = UINT32_MAX >> (32 - osize * 8);
588  .op = SWS_OP_CONVERT,
589  .type = i,
590  .convert.to = o,
591  });
592  }
593  }
594  }
595 
596  /* Check expanding conversions */
597  CHECK_COMMON("expand16", U8, U16, {
598  .op = SWS_OP_CONVERT,
599  .type = U8,
600  .convert.to = U16,
601  .convert.expand = true,
602  });
603 
604  CHECK_COMMON("expand32", U8, U32, {
605  .op = SWS_OP_CONVERT,
606  .type = U8,
607  .convert.to = U32,
608  .convert.expand = true,
609  });
610 }
611 
612 static void check_dither(void)
613 {
614  for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) {
615  const char *type = ff_sws_pixel_type_name(t);
617  continue;
618 
619  /* Test all sizes up to 256x256 */
620  for (int size_log2 = 0; size_log2 <= 8; size_log2++) {
621  const int size = 1 << size_log2;
622  const int mask = size - 1;
624  if (!matrix) {
625  fail();
626  return;
627  }
628 
629  if (size == 1) {
630  matrix[0] = (AVRational) { 1, 2 };
631  } else {
632  for (int i = 0; i < size * size; i++)
633  matrix[i] = rndq(t);
634  }
635 
636  CHECK_COMMON(FMT("dither_%dx%d_%s", size, size, type), t, t, {
637  .op = SWS_OP_DITHER,
638  .type = t,
639  .dither.size_log2 = size_log2,
640  .dither.matrix = matrix,
641  .dither.y_offset = {0, 3 & mask, 2 & mask, 5 & mask},
642  });
643 
645  }
646  }
647 }
648 
649 static void check_min_max(void)
650 {
651  for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
652  const char *type = ff_sws_pixel_type_name(t);
653  CHECK_COMMON(FMT("min_%s", type), t, t, {
654  .op = SWS_OP_MIN,
655  .type = t,
656  .c.q4 = { rndq(t), rndq(t), rndq(t), rndq(t) },
657  });
658 
659  CHECK_COMMON(FMT("max_%s", type), t, t, {
660  .op = SWS_OP_MAX,
661  .type = t,
662  .c.q4 = { rndq(t), rndq(t), rndq(t), rndq(t) },
663  });
664  }
665 }
666 
667 static void check_linear(void)
668 {
669  static const struct {
670  const char *name;
671  uint32_t mask;
672  } patterns[] = {
673  { "noop", 0 },
674  { "luma", SWS_MASK_LUMA },
675  { "alpha", SWS_MASK_ALPHA },
676  { "luma+alpha", SWS_MASK_LUMA | SWS_MASK_ALPHA },
677  { "dot3", 0x7 },
678  { "dot4", 0xF },
679  { "row0", SWS_MASK_ROW(0) },
680  { "row0+alpha", SWS_MASK_ROW(0) | SWS_MASK_ALPHA },
681  { "off3", SWS_MASK_OFF3 },
682  { "off3+alpha", SWS_MASK_OFF3 | SWS_MASK_ALPHA },
683  { "diag3", SWS_MASK_DIAG3 },
684  { "diag4", SWS_MASK_DIAG4 },
685  { "diag3+alpha", SWS_MASK_DIAG3 | SWS_MASK_ALPHA },
686  { "diag3+off3", SWS_MASK_DIAG3 | SWS_MASK_OFF3 },
687  { "diag3+off3+alpha", SWS_MASK_DIAG3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA },
688  { "diag4+off4", SWS_MASK_DIAG4 | SWS_MASK_OFF4 },
689  { "matrix3", SWS_MASK_MAT3 },
690  { "matrix3+off3", SWS_MASK_MAT3 | SWS_MASK_OFF3 },
691  { "matrix3+off3+alpha", SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA },
692  { "matrix4", SWS_MASK_MAT4 },
693  { "matrix4+off4", SWS_MASK_MAT4 | SWS_MASK_OFF4 },
694  };
695 
696  for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) {
697  const char *type = ff_sws_pixel_type_name(t);
699  continue;
700 
701  for (int p = 0; p < FF_ARRAY_ELEMS(patterns); p++) {
702  const uint32_t mask = patterns[p].mask;
703  SwsLinearOp lin = { .mask = mask };
704 
705  for (int i = 0; i < 4; i++) {
706  for (int j = 0; j < 5; j++) {
707  if (mask & SWS_MASK(i, j)) {
708  lin.m[i][j] = rndq(t);
709  } else {
710  lin.m[i][j] = (AVRational) { i == j, 1 };
711  }
712  }
713  }
714 
715  CHECK(FMT("linear_%s_%s", patterns[p].name, type), 4, 4, t, t, {
716  .op = SWS_OP_LINEAR,
717  .type = t,
718  .lin = lin,
719  });
720  }
721  }
722 }
723 
724 static void check_scale(void)
725 {
726  for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) {
727  const char *type = ff_sws_pixel_type_name(t);
728  const int bits = ff_sws_pixel_type_size(t) * 8;
729  if (ff_sws_pixel_type_is_int(t)) {
730  /* Ensure the result won't exceed the value range */
731  const unsigned max = (1 << bits) - 1;
732  const unsigned scale = rnd() & max;
733  const unsigned range = max / (scale ? scale : 1);
734  CHECK_COMMON_RANGE(FMT("scale_%s", type), range, t, t, {
735  .op = SWS_OP_SCALE,
736  .type = t,
737  .c.q = { scale, 1 },
738  });
739  } else {
740  CHECK_COMMON(FMT("scale_%s", type), t, t, {
741  .op = SWS_OP_SCALE,
742  .type = t,
743  .c.q = rndq(t),
744  });
745  }
746  }
747 }
748 
750 {
752  report("read_write");
754  report("swap_bytes");
756  report("pack_unpack");
757  check_clear();
758  report("clear");
759  check_shift();
760  report("shift");
761  check_swizzle();
762  report("swizzle");
763  check_convert();
764  report("convert");
765  check_dither();
766  report("dither");
767  check_min_max();
768  report("min_max");
769  check_linear();
770  report("linear");
771  check_scale();
772  report("scale");
773 }
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:47
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: ops.h:33
name
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
Definition: writing_filters.txt:88
SWS_OP_SWIZZLE
@ SWS_OP_SWIZZLE
Definition: ops.h:50
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SwsCompiledOp::func
SwsOpFunc func
Definition: ops_dispatch.h:78
SWS_OP_LSHIFT
@ SWS_OP_LSHIFT
Definition: ops.h:55
SWS_OP_UNPACK
@ SWS_OP_UNPACK
Definition: ops.h:53
mem_internal.h
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:79
SwsOpExec::in_bump
ptrdiff_t in_bump[4]
Definition: ops_dispatch.h:45
SwsConst
Definition: ops.h:81
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:59
CHECK_COMMON
#define CHECK_COMMON(NAME, IN, OUT,...)
Definition: sw_ops.c:284
SwsOpExec::in
const uint8_t * in[4]
Definition: ops_dispatch.h:37
SwsOpExec::out_stride
ptrdiff_t out_stride[4]
Definition: ops_dispatch.h:42
SwsLinearOp::m
AVRational m[4][5]
Generalized 5x5 affine transformation: [ Out.x ] = [ A B C D E ] [ Out.y ] = [ F G H I J ] * [ x y z ...
Definition: ops.h:160
matrix
Definition: vc1dsp.c:43
src1
const pixel * src1
Definition: h264pred_template.c:420
NB_PLANES
@ NB_PLANES
Definition: sw_ops.c:33
mask
int mask
Definition: mediacodecdec_common.c:154
SwsOp::rw
SwsReadWriteOp rw
Definition: ops.h:193
check_min_max
static void check_min_max(void)
Definition: sw_ops.c:649
ops.h
u
#define u(width, name, range_min, range_max)
Definition: cbs_apv.c:68
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:67
checkasm_check_sw_ops
void checkasm_check_sw_ops(void)
Definition: sw_ops.c:749
SWS_BITEXACT
@ SWS_BITEXACT
Definition: swscale.h:157
SwsOpExec::block_size_in
int32_t block_size_in
Definition: ops_dispatch.h:51
check_convert
static void check_convert(void)
Definition: sw_ops.c:567
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1639
check_swap_bytes
static void check_swap_bytes(void)
Definition: sw_ops.c:374
CHECK_COMMON_RANGE
#define CHECK_COMMON_RANGE(NAME, RANGE, IN, OUT,...)
Definition: sw_ops.c:271
check_read_write
static void check_read_write(void)
Definition: sw_ops.c:287
U32
@ U32
Definition: sw_ops.c:41
max
#define max(a, b)
Definition: cuda_runtime.h:33
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: ops.h:34
SwsOpExec::in_stride
ptrdiff_t in_stride[4]
Definition: ops_dispatch.h:41
check_linear
static void check_linear(void)
Definition: sw_ops.c:667
SwsOpBackend::name
const char * name
Definition: ops_internal.h:56
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:63
rndf
static float rndf(void)
Definition: sw_ops.c:64
F32
@ F32
Definition: sw_ops.c:42
SWS_MASK_ROW
#define SWS_MASK_ROW(I)
Definition: ops.h:166
DECLARE_ALIGNED_64
#define DECLARE_ALIGNED_64(t, v)
Definition: mem_internal.h:114
check_clear
static void check_clear(void)
Definition: sw_ops.c:444
SwsPixelType
SwsPixelType
Copyright (C) 2025 Niklas Haas.
Definition: ops.h:30
CHECK_RANGE
#define CHECK_RANGE(NAME, RANGE, N_IN, N_OUT, IN, OUT,...)
Definition: sw_ops.c:268
SWS_MASK_OFF3
@ SWS_MASK_OFF3
Definition: ops.h:175
SWS_PIXEL_F32
@ SWS_PIXEL_F32
Definition: ops.h:35
ff_sws_op_backends
const SwsOpBackend *const ff_sws_op_backends[]
Definition: ops.c:36
check_scale
static void check_scale(void)
Definition: sw_ops.c:724
fail
#define fail()
Definition: checkasm.h:221
SwsOpList::num_ops
int num_ops
Definition: ops.h:226
checkasm.h
SWS_PIXEL_U8
@ SWS_PIXEL_U8
Definition: ops.h:32
ff_sws_pixel_type_is_int
bool ff_sws_pixel_type_is_int(SwsPixelType type)
Definition: ops.c:78
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
refstruct.h
U8
@ U8
Definition: sw_ops.c:39
SwsLinearOp::mask
uint32_t mask
Definition: ops.h:161
av_refstruct_allocz
static void * av_refstruct_allocz(size_t size)
Equivalent to av_refstruct_alloc_ext(size, 0, NULL, NULL)
Definition: refstruct.h:105
SwsOp::op
SwsOpType op
Definition: ops.h:189
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:63
avassert.h
rnd
#define rnd()
Definition: checkasm.h:204
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
SWS_MASK_MAT4
@ SWS_MASK_MAT4
Definition: ops.h:182
float
float
Definition: af_crystalizer.c:122
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
SWS_SWIZZLE
#define SWS_SWIZZLE(X, Y, Z, W)
Definition: ops.h:134
AVFormatContext::flags
int flags
Flags modifying the (de)muxer behaviour.
Definition: avformat.h:1414
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
bits
uint8_t bits
Definition: vp3data.h:128
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:42
SWS_MASK_OFF4
@ SWS_MASK_OFF4
Definition: ops.h:181
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:61
fill32
static void fill32(uint32_t *line, int num, unsigned range)
Definition: sw_ops.c:80
MK_RANGES
#define MK_RANGES(R)
Definition: sw_ops.c:267
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
FMT
#define FMT(fmt,...)
Definition: sw_ops.c:45
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:66
SwsOpBackend
Definition: ops_internal.h:55
PIXELS
@ PIXELS
Definition: sw_ops.c:34
SWS_OP_PACK
@ SWS_OP_PACK
Definition: ops.h:54
SwsOpExec::height
int32_t height
Definition: ops_dispatch.h:49
SwsOpExec
Copyright (C) 2026 Niklas Haas.
Definition: ops_dispatch.h:35
fill16
static void fill16(uint16_t *line, int num, unsigned range)
Definition: sw_ops.c:86
rw_pixel_bits
static int rw_pixel_bits(const SwsOp *op)
Definition: sw_ops.c:55
NULL
#define NULL
Definition: coverity.c:32
tprintf
static const char * tprintf(char buf[], size_t size, const char *fmt,...)
Definition: sw_ops.c:46
ff_sws_compiled_op_unref
void ff_sws_compiled_op_unref(SwsCompiledOp *comp)
Definition: ops_dispatch.c:97
LINES
@ LINES
Definition: sw_ops.c:35
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
SwsOpExec::slice_h
int32_t slice_h
Definition: ops_dispatch.h:50
SWS_MASK
#define SWS_MASK(I, J)
Definition: ops.h:164
SwsPackOp::pattern
uint8_t pattern[4]
Packed bits are assumed to be LSB-aligned within the underlying integer type; i.e.
Definition: ops.h:119
f
f
Definition: af_crystalizer.c:122
SwsOpExec::block_size_out
int32_t block_size_out
Definition: ops_dispatch.h:52
sws_alloc_context
SwsContext * sws_alloc_context(void)
Allocate an empty SwsContext and set its fields to default values.
Definition: utils.c:1031
shift
static int shift(int a, int b)
Definition: bonk.c:261
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
SwsOp::type
SwsPixelType type
Definition: ops.h:190
check_ops
static void check_ops(const char *report, const unsigned ranges[NB_PLANES], const SwsOp *ops)
Definition: sw_ops.c:106
size
int size
Definition: twinvq_data.h:10344
SWS_OP_RSHIFT
@ SWS_OP_RSHIFT
Definition: ops.h:56
range
enum AVColorRange range
Definition: mediacodec_wrapper.c:2594
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:48
line
Definition: graph2dot.c:48
SWS_MASK_ALPHA
@ SWS_MASK_ALPHA
Definition: ops.h:172
SwsLinearOp
Definition: ops.h:147
zero
static int zero(InterplayACMContext *s, unsigned ind, unsigned col)
Definition: interplayacm.c:121
av_refstruct_unref
void av_refstruct_unref(void *objp)
Decrement the reference count of the underlying object and automatically free the object if there are...
Definition: refstruct.c:120
fill32f
static void fill32f(float *line, int num, unsigned range)
Definition: sw_ops.c:73
SwsOpExec::out
uint8_t * out[4]
Definition: ops_dispatch.h:38
SWS_MASK_DIAG3
@ SWS_MASK_DIAG3
Definition: ops.h:174
report
#define report
Definition: checkasm.h:224
SwsOpList::ops
SwsOp * ops
Definition: ops.h:225
SwsPackOp
Definition: ops.h:114
vsnprintf
#define vsnprintf
Definition: snprintf.h:36
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
SwsConst::q4
AVRational q4[4]
Definition: ops.h:83
ops_internal.h
SWS_MASK_MAT3
@ SWS_MASK_MAT3
Definition: ops.h:176
checkasm_call_checked
#define checkasm_call_checked(func,...)
Definition: checkasm.h:336
SwsOp
Definition: ops.h:188
SwsOpExec::width
int32_t width
Definition: ops_dispatch.h:49
SwsCompiledOp::priv
void * priv
Definition: ops_dispatch.h:99
SwsCompiledOp::block_size
int block_size
Definition: ops_dispatch.h:94
ret
ret
Definition: filter_design.txt:187
check_shift
static void check_shift(void)
Definition: sw_ops.c:501
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:62
SwsCompiledOp
Definition: ops_dispatch.h:75
bench
#define bench(func,...)
Definition: checkasm.h:421
id
enum AVCodecID id
Definition: dts2pts.c:549
check_swizzle
static void check_swizzle(void)
Definition: sw_ops.c:524
SWS_PIXEL_TYPE_NB
@ SWS_PIXEL_TYPE_NB
Definition: ops.h:36
SwsReadWriteOp::packed
bool packed
Definition: ops.h:103
ff_sws_pixel_type_name
const char * ff_sws_pixel_type_name(SwsPixelType type)
Definition: ops.c:48
SWS_OP_SWAP_BYTES
@ SWS_OP_SWAP_BYTES
Definition: ops.h:49
SWS_MASK_LUMA
@ SWS_MASK_LUMA
Definition: ops.h:171
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
ff_sws_ops_compile_backend
int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend, const SwsOpList *ops, SwsCompiledOp *out)
Attempt to compile a list of operations using a specific backend.
Definition: ops_dispatch.c:48
src0
const pixel *const src0
Definition: h264pred_template.c:419
rndq
static AVRational rndq(SwsPixelType t)
Definition: sw_ops.c:432
check_key
#define check_key(key,...)
Definition: checkasm.h:212
w
uint8_t w
Definition: llvidencdsp.c:39
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:216
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:278
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
SWS_OP_CONVERT
@ SWS_OP_CONVERT
Definition: ops.h:60
fill8
static void fill8(uint8_t *line, int num, unsigned range)
Definition: sw_ops.c:96
sws_free_context
void sws_free_context(SwsContext **ctx)
Free the context and everything associated with it, and write NULL to the provided pointer.
Definition: utils.c:2368
checkasm_call
#define checkasm_call(func,...)
Definition: checkasm.h:230
check_pack_unpack
static void check_pack_unpack(void)
Definition: sw_ops.c:387
stride
#define stride
Definition: h264pred_template.c:536
checkasm_check
#define checkasm_check(prefix,...)
Definition: checkasm.h:470
U16
@ U16
Definition: sw_ops.c:40
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:224
SwsContext
Main external API structure.
Definition: swscale.h:206
CHECK
#define CHECK(NAME, N_IN, N_OUT, IN, OUT,...)
Definition: sw_ops.c:281
CHECK_RANGES
#define CHECK_RANGES(NAME, RANGES, N_IN, N_OUT, IN, OUT,...)
Definition: sw_ops.c:250
SwsOpExec::out_bump
ptrdiff_t out_bump[4]
Definition: ops_dispatch.h:46
check_dither
static void check_dither(void)
Definition: sw_ops.c:612
SWS_MASK_DIAG4
@ SWS_MASK_DIAG4
Definition: ops.h:180