FFmpeg
ops_optimizer.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 #include "libavutil/bswap.h"
23 #include "libavutil/rational.h"
24 
25 #include "ops.h"
26 #include "ops_internal.h"
27 
28 #define RET(x) \
29  do { \
30  if ((ret = (x)) < 0) \
31  return ret; \
32  } while (0)
33 
34 /**
35  * Try to commute a clear op with the next operation. Makes any adjustments
36  * to the operations as needed, but does not perform the actual commutation.
37  *
38  * Returns whether successful.
39  */
40 static bool op_commute_clear(SwsOp *op, SwsOp *next)
41 {
42  SwsOp tmp;
43 
44  av_assert1(op->op == SWS_OP_CLEAR);
45  switch (next->op) {
46  case SWS_OP_CONVERT:
47  op->type = next->convert.to;
48  /* fall through */
49  case SWS_OP_LSHIFT:
50  case SWS_OP_RSHIFT:
51  case SWS_OP_DITHER:
52  case SWS_OP_MIN:
53  case SWS_OP_MAX:
54  case SWS_OP_SCALE:
55  case SWS_OP_READ:
56  case SWS_OP_SWIZZLE:
57  ff_sws_apply_op_q(next, op->c.q4);
58  return true;
59  case SWS_OP_SWAP_BYTES:
60  switch (next->type) {
61  case SWS_PIXEL_U16:
62  ff_sws_apply_op_q(next, op->c.q4); /* always works */
63  return true;
64  case SWS_PIXEL_U32:
65  for (int i = 0; i < 4; i++) {
66  uint32_t v = av_bswap32(op->c.q4[i].num);
67  if (v > INT_MAX)
68  return false; /* can't represent as AVRational anymore */
69  tmp.c.q4[i] = Q(v);
70  }
71  op->c = tmp.c;
72  return true;
73  default:
74  return false;
75  }
76  case SWS_OP_INVALID:
77  case SWS_OP_WRITE:
78  case SWS_OP_LINEAR:
79  case SWS_OP_PACK:
80  case SWS_OP_UNPACK:
81  case SWS_OP_CLEAR:
82  return false;
83  case SWS_OP_TYPE_NB:
84  break;
85  }
86 
87  av_unreachable("Invalid operation type!");
88  return false;
89 }
90 
91  /**
92  * Try to commute a swizzle op with the next operation. Makes any adjustments
93  * to the operations as needed, but does not perform the actual commutation.
94  *
95  * Returns whether successful.
96  */
97 static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
98 {
99  bool seen[4] = {0};
100 
101  av_assert1(op->op == SWS_OP_SWIZZLE);
102  switch (next->op) {
103  case SWS_OP_CONVERT:
104  op->type = next->convert.to;
105  /* fall through */
106  case SWS_OP_SWAP_BYTES:
107  case SWS_OP_LSHIFT:
108  case SWS_OP_RSHIFT:
109  case SWS_OP_SCALE:
110  return true;
111 
112  /**
113  * We can commute per-channel ops only if the per-channel constants are the
114  * same for all duplicated channels; e.g.:
115  * SWIZZLE {0, 0, 0, 3}
116  * NEXT {x, x, x, w}
117  * ->
118  * NEXT {x, _, _, w}
119  * SWIZZLE {0, 0, 0, 3}
120  */
121  case SWS_OP_MIN:
122  case SWS_OP_MAX: {
123  const SwsConst c = next->c;
124  for (int i = 0; i < 4; i++) {
125  if (next->comps.unused[i])
126  continue;
127  const int j = op->swizzle.in[i];
128  if (seen[j] && av_cmp_q(next->c.q4[j], c.q4[i]))
129  return false;
130  next->c.q4[j] = c.q4[i];
131  seen[j] = true;
132  }
133  return true;
134  }
135 
136  case SWS_OP_DITHER: {
137  const SwsDitherOp d = next->dither;
138  for (int i = 0; i < 4; i++) {
139  if (next->comps.unused[i])
140  continue;
141  const int j = op->swizzle.in[i];
142  if (seen[j] && next->dither.y_offset[j] != d.y_offset[i])
143  return false;
144  next->dither.y_offset[j] = d.y_offset[i];
145  seen[j] = true;
146  }
147  return true;
148  }
149 
150  case SWS_OP_INVALID:
151  case SWS_OP_READ:
152  case SWS_OP_WRITE:
153  case SWS_OP_SWIZZLE:
154  case SWS_OP_CLEAR:
155  case SWS_OP_LINEAR:
156  case SWS_OP_PACK:
157  case SWS_OP_UNPACK:
158  return false;
159  case SWS_OP_TYPE_NB:
160  break;
161  }
162 
163  av_unreachable("Invalid operation type!");
164  return false;
165 }
166 
167 /* returns log2(x) only if x is a power of two, or 0 otherwise */
168 static int exact_log2(const int x)
169 {
170  int p;
171  if (x <= 0)
172  return 0;
173  p = av_log2(x);
174  return (1 << p) == x ? p : 0;
175 }
176 
177 static int exact_log2_q(const AVRational x)
178 {
179  if (x.den == 1)
180  return exact_log2(x.num);
181  else if (x.num == 1)
182  return -exact_log2(x.den);
183  else
184  return 0;
185 }
186 
187 /**
188  * If a linear operation can be reduced to a scalar multiplication, returns
189  * the corresponding scaling factor, or 0 otherwise.
190  */
191 static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next,
192  SwsConst *out_scale)
193 {
194  SwsConst scale = {0};
195 
196  /* There are components not on the main diagonal */
197  if (c->mask & ~SWS_MASK_DIAG4)
198  return false;
199 
200  for (int i = 0; i < 4; i++) {
201  const AVRational s = c->m[i][i];
202  if ((prev.flags[i] & SWS_COMP_ZERO) || next.unused[i])
203  continue;
204  if (scale.q.den && av_cmp_q(s, scale.q))
205  return false;
206  scale.q = s;
207  }
208 
209  if (scale.q.den)
210  *out_scale = scale;
211  return scale.q.den;
212 }
213 
214 /* Extracts an integer clear operation (subset) from the given linear op. */
216  SwsConst *out_clear)
217 {
218  SwsConst clear = {0};
219  bool ret = false;
220 
221  for (int i = 0; i < 4; i++) {
222  bool const_row = c->m[i][4].den == 1; /* offset is integer */
223  for (int j = 0; j < 4; j++) {
224  const_row &= c->m[i][j].num == 0 || /* scalar is zero */
225  (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */
226  }
227  if (const_row && (c->mask & SWS_MASK_ROW(i))) {
228  clear.q4[i] = c->m[i][4];
229  for (int j = 0; j < 5; j++)
230  c->m[i][j] = Q(i == j);
231  c->mask &= ~SWS_MASK_ROW(i);
232  ret = true;
233  }
234  }
235 
236  if (ret)
237  *out_clear = clear;
238  return ret;
239 }
240 
241 /* Unswizzle a linear operation by aligning single-input rows with
242  * their corresponding diagonal */
243 static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
244 {
245  SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
246  SwsLinearOp c = *op;
247 
248  /* Find non-zero coefficients in the main 4x4 matrix */
249  uint32_t nonzero = 0;
250  for (int i = 0; i < 4; i++) {
251  for (int j = 0; j < 4; j++) {
252  if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO))
253  continue;
254  nonzero |= SWS_MASK(i, j);
255  }
256  }
257 
258  /* If a value is unique in its row and the target column is
259  * empty, move it there and update the input swizzle */
260  for (int i = 0; i < 4; i++) {
261  if (nonzero & SWS_MASK_COL(i))
262  continue; /* target column is not empty */
263  for (int j = 0; j < 4; j++) {
264  if ((nonzero & SWS_MASK_ROW(i)) == SWS_MASK(i, j)) {
265  /* Move coefficient to the diagonal */
266  c.m[i][i] = c.m[i][j];
267  c.m[i][j] = Q(0);
268  swiz.in[i] = j;
269  break;
270  }
271  }
272  }
273 
274  if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
275  return false; /* no swizzle was identified */
276 
277  c.mask = ff_sws_linear_mask(c);
278  *out_swiz = swiz;
279  *op = c;
280  return true;
281 }
282 
284 {
285  int ret;
286 
287 retry:
289 
290  /* Apply all in-place optimizations (that do not re-order the list) */
291  for (int n = 0; n < ops->num_ops; n++) {
292  SwsOp dummy = {0};
293  SwsOp *op = &ops->ops[n];
294  SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
295  SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy;
296 
297  /* common helper variable */
298  bool noop = true;
299 
300  if (next->comps.unused[0] && next->comps.unused[1] &&
301  next->comps.unused[2] && next->comps.unused[3])
302  {
303  /* Remove completely unused operations */
304  ff_sws_op_list_remove_at(ops, n, 1);
305  goto retry;
306  }
307 
308  switch (op->op) {
309  case SWS_OP_READ:
310  /* "Compress" planar reads where not all components are needed */
311  if (!op->rw.packed) {
312  SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
313  int nb_planes = 0;
314  for (int i = 0; i < op->rw.elems; i++) {
315  if (next->comps.unused[i]) {
316  swiz.in[i] = 3 - (i - nb_planes); /* map to unused plane */
317  continue;
318  }
319 
320  const int idx = nb_planes++;
321  av_assert1(idx <= i);
322  ops->order_src.in[idx] = ops->order_src.in[i];
323  swiz.in[i] = idx;
324  }
325 
326  if (nb_planes < op->rw.elems) {
327  op->rw.elems = nb_planes;
328  RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
329  .op = SWS_OP_SWIZZLE,
330  .type = op->type,
331  .swizzle = swiz,
332  }));
333  goto retry;
334  }
335  }
336  break;
337 
338  case SWS_OP_SWAP_BYTES:
339  /* Redundant (double) swap */
340  if (next->op == SWS_OP_SWAP_BYTES) {
341  ff_sws_op_list_remove_at(ops, n, 2);
342  goto retry;
343  }
344  break;
345 
346  case SWS_OP_UNPACK:
347  /* Redundant unpack+pack */
348  if (next->op == SWS_OP_PACK && next->type == op->type &&
349  next->pack.pattern[0] == op->pack.pattern[0] &&
350  next->pack.pattern[1] == op->pack.pattern[1] &&
351  next->pack.pattern[2] == op->pack.pattern[2] &&
352  next->pack.pattern[3] == op->pack.pattern[3])
353  {
354  ff_sws_op_list_remove_at(ops, n, 2);
355  goto retry;
356  }
357  break;
358 
359  case SWS_OP_LSHIFT:
360  case SWS_OP_RSHIFT:
361  /* Two shifts in the same direction */
362  if (next->op == op->op) {
363  op->c.u += next->c.u;
364  ff_sws_op_list_remove_at(ops, n + 1, 1);
365  goto retry;
366  }
367 
368  /* No-op shift */
369  if (!op->c.u) {
370  ff_sws_op_list_remove_at(ops, n, 1);
371  goto retry;
372  }
373  break;
374 
375  case SWS_OP_CLEAR:
376  for (int i = 0; i < 4; i++) {
377  if (!op->c.q4[i].den)
378  continue;
379 
380  if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
381  !(prev->comps.flags[i] & SWS_COMP_GARBAGE) &&
382  op->c.q4[i].num == 0)
383  {
384  /* Redundant clear-to-zero of zero component */
385  op->c.q4[i].den = 0;
386  } else if (next->comps.unused[i]) {
387  /* Unnecessary clear of unused component */
388  op->c.q4[i] = (AVRational) {0, 0};
389  } else if (op->c.q4[i].den) {
390  noop = false;
391  }
392  }
393 
394  if (noop) {
395  ff_sws_op_list_remove_at(ops, n, 1);
396  goto retry;
397  }
398 
399  /* Transitive clear */
400  if (next->op == SWS_OP_CLEAR) {
401  for (int i = 0; i < 4; i++) {
402  if (next->c.q4[i].den)
403  op->c.q4[i] = next->c.q4[i];
404  }
405  ff_sws_op_list_remove_at(ops, n + 1, 1);
406  goto retry;
407  }
408  break;
409 
410  case SWS_OP_SWIZZLE:
411  for (int i = 0; i < 4; i++) {
412  if (next->comps.unused[i])
413  continue;
414  if (op->swizzle.in[i] != i)
415  noop = false;
416  }
417 
418  /* Identity swizzle */
419  if (noop) {
420  ff_sws_op_list_remove_at(ops, n, 1);
421  goto retry;
422  }
423 
424  /* Transitive swizzle */
425  if (next->op == SWS_OP_SWIZZLE) {
426  const SwsSwizzleOp orig = op->swizzle;
427  for (int i = 0; i < 4; i++)
428  op->swizzle.in[i] = orig.in[next->swizzle.in[i]];
429  ff_sws_op_list_remove_at(ops, n + 1, 1);
430  goto retry;
431  }
432 
433  /* Swizzle planes instead of components, if possible */
434  if (prev->op == SWS_OP_READ && !prev->rw.packed) {
435  for (int dst = 0; dst < prev->rw.elems; dst++) {
436  const int src = op->swizzle.in[dst];
437  if (src > dst && src < prev->rw.elems) {
438  FFSWAP(int, ops->order_src.in[dst], ops->order_src.in[src]);
439  for (int i = dst; i < 4; i++) {
440  if (op->swizzle.in[i] == dst)
441  op->swizzle.in[i] = src;
442  else if (op->swizzle.in[i] == src)
443  op->swizzle.in[i] = dst;
444  }
445  goto retry;
446  }
447  }
448  }
449 
450  if (next->op == SWS_OP_WRITE && !next->rw.packed) {
451  for (int dst = 0; dst < next->rw.elems; dst++) {
452  const int src = op->swizzle.in[dst];
453  if (src > dst && src < next->rw.elems) {
454  FFSWAP(int, ops->order_dst.in[dst], ops->order_dst.in[src]);
455  FFSWAP(int, op->swizzle.in[dst], op->swizzle.in[src]);
456  goto retry;
457  }
458  }
459  }
460  break;
461 
462  case SWS_OP_CONVERT:
463  /* No-op conversion */
464  if (op->type == op->convert.to) {
465  ff_sws_op_list_remove_at(ops, n, 1);
466  goto retry;
467  }
468 
469  /* Transitive conversion */
470  if (next->op == SWS_OP_CONVERT &&
471  op->convert.expand == next->convert.expand)
472  {
473  av_assert1(op->convert.to == next->type);
474  op->convert.to = next->convert.to;
475  ff_sws_op_list_remove_at(ops, n + 1, 1);
476  goto retry;
477  }
478 
479  /* Conversion followed by integer expansion */
480  if (next->op == SWS_OP_SCALE && !op->convert.expand &&
481  ff_sws_pixel_type_is_int(op->type) &&
482  ff_sws_pixel_type_is_int(op->convert.to) &&
483  !av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to)))
484  {
485  op->convert.expand = true;
486  ff_sws_op_list_remove_at(ops, n + 1, 1);
487  goto retry;
488  }
489  break;
490 
491  case SWS_OP_MIN:
492  for (int i = 0; i < 4; i++) {
493  if (next->comps.unused[i] || !op->c.q4[i].den)
494  continue;
495  if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0)
496  noop = false;
497  }
498 
499  if (noop) {
500  ff_sws_op_list_remove_at(ops, n, 1);
501  goto retry;
502  }
503  break;
504 
505  case SWS_OP_MAX:
506  for (int i = 0; i < 4; i++) {
507  if (next->comps.unused[i] || !op->c.q4[i].den)
508  continue;
509  if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0)
510  noop = false;
511  }
512 
513  if (noop) {
514  ff_sws_op_list_remove_at(ops, n, 1);
515  goto retry;
516  }
517  break;
518 
519  case SWS_OP_DITHER:
520  for (int i = 0; i < 4; i++) {
521  noop &= (prev->comps.flags[i] & SWS_COMP_EXACT) ||
522  next->comps.unused[i];
523  }
524 
525  if (noop) {
526  ff_sws_op_list_remove_at(ops, n, 1);
527  goto retry;
528  }
529  break;
530 
531  case SWS_OP_LINEAR: {
532  SwsSwizzleOp swizzle;
533  SwsConst c;
534 
535  /* No-op (identity) linear operation */
536  if (!op->lin.mask) {
537  ff_sws_op_list_remove_at(ops, n, 1);
538  goto retry;
539  }
540 
541  if (next->op == SWS_OP_LINEAR) {
542  /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */
543  const SwsLinearOp m1 = op->lin;
544  const SwsLinearOp m2 = next->lin;
545  for (int i = 0; i < 4; i++) {
546  for (int j = 0; j < 5; j++) {
547  AVRational sum = Q(0);
548  for (int k = 0; k < 4; k++)
549  sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j]));
550  if (j == 4) /* m1.m[4][j] == 1 */
551  sum = av_add_q(sum, m2.m[i][4]);
552  op->lin.m[i][j] = sum;
553  }
554  }
555  op->lin.mask = ff_sws_linear_mask(op->lin);
556  ff_sws_op_list_remove_at(ops, n + 1, 1);
557  goto retry;
558  }
559 
560  /* Optimize away zero columns */
561  for (int j = 0; j < 4; j++) {
562  const uint32_t col = SWS_MASK_COL(j);
563  if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col))
564  continue;
565  for (int i = 0; i < 4; i++)
566  op->lin.m[i][j] = Q(i == j);
567  op->lin.mask &= ~col;
568  goto retry;
569  }
570 
571  /* Optimize away unused rows */
572  for (int i = 0; i < 4; i++) {
573  const uint32_t row = SWS_MASK_ROW(i);
574  if (!next->comps.unused[i] || !(op->lin.mask & row))
575  continue;
576  for (int j = 0; j < 5; j++)
577  op->lin.m[i][j] = Q(i == j);
578  op->lin.mask &= ~row;
579  goto retry;
580  }
581 
582  /* Convert constant rows to explicit clear instruction */
583  if (extract_constant_rows(&op->lin, prev->comps, &c)) {
584  RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
585  .op = SWS_OP_CLEAR,
586  .type = op->type,
587  .comps = op->comps,
588  .c = c,
589  }));
590  goto retry;
591  }
592 
593  /* Multiplication by scalar constant */
594  if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) {
595  op->op = SWS_OP_SCALE;
596  op->c = c;
597  goto retry;
598  }
599 
600  /* Swizzle by fixed pattern */
601  if (extract_swizzle(&op->lin, prev->comps, &swizzle)) {
602  RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) {
603  .op = SWS_OP_SWIZZLE,
604  .type = op->type,
605  .swizzle = swizzle,
606  }));
607  goto retry;
608  }
609  break;
610  }
611 
612  case SWS_OP_SCALE: {
613  const int factor2 = exact_log2_q(op->c.q);
614 
615  /* No-op scaling */
616  if (op->c.q.num == 1 && op->c.q.den == 1) {
617  ff_sws_op_list_remove_at(ops, n, 1);
618  goto retry;
619  }
620 
621  /* Scaling by exact power of two */
622  if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
623  op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
624  op->c.u = FFABS(factor2);
625  goto retry;
626  }
627  break;
628  }
629  }
630  }
631 
632  /* Push clears to the back to void any unused components */
633  for (int n = 0; n < ops->num_ops - 1; n++) {
634  SwsOp *op = &ops->ops[n];
635  SwsOp *next = &ops->ops[n + 1];
636 
637  switch (op->op) {
638  case SWS_OP_CLEAR:
639  if (op_commute_clear(op, next)) {
640  FFSWAP(SwsOp, *op, *next);
641  goto retry;
642  }
643  break;
644  }
645  }
646 
647  /* Apply any remaining preferential re-ordering optimizations; do these
648  * last because they are more likely to block other optimizations if done
649  * too aggressively */
650  for (int n = 0; n < ops->num_ops - 1; n++) {
651  SwsOp *op = &ops->ops[n];
652  SwsOp *next = &ops->ops[n + 1];
653 
654  switch (op->op) {
655  case SWS_OP_SWIZZLE: {
656  /* Try to push swizzles towards the output */
657  if (op_commute_swizzle(op, next)) {
658  FFSWAP(SwsOp, *op, *next);
659  goto retry;
660  }
661  break;
662  }
663 
664  case SWS_OP_SCALE:
665  /* Scaling by integer before conversion to int */
666  if (op->c.q.den == 1 && next->op == SWS_OP_CONVERT &&
668  {
669  op->type = next->convert.to;
670  FFSWAP(SwsOp, *op, *next);
671  goto retry;
672  }
673  break;
674  }
675  }
676 
677  return 0;
678 }
679 
680 int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
681  int size, uint8_t clear_val,
682  int *read_bytes, int *write_bytes)
683 {
684  if (!ops->num_ops)
685  return AVERROR(EINVAL);
686 
687  const SwsOp read = ops->ops[0];
688  const int read_size = ff_sws_pixel_type_size(read.type);
689  uint32_t mask[4] = {0};
690 
691  if (read.op != SWS_OP_READ || read.rw.frac ||
692  (!read.rw.packed && read.rw.elems > 1))
693  return AVERROR(ENOTSUP);
694 
695  for (int i = 0; i < read.rw.elems; i++)
696  mask[i] = 0x01010101 * i * read_size + 0x03020100;
697 
698  for (int opidx = 1; opidx < ops->num_ops; opidx++) {
699  const SwsOp *op = &ops->ops[opidx];
700  switch (op->op) {
701  case SWS_OP_SWIZZLE: {
702  uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
703  for (int i = 0; i < 4; i++)
704  mask[i] = orig[op->swizzle.in[i]];
705  break;
706  }
707 
708  case SWS_OP_SWAP_BYTES:
709  for (int i = 0; i < 4; i++) {
710  switch (ff_sws_pixel_type_size(op->type)) {
711  case 2: mask[i] = av_bswap16(mask[i]); break;
712  case 4: mask[i] = av_bswap32(mask[i]); break;
713  }
714  }
715  break;
716 
717  case SWS_OP_CLEAR:
718  for (int i = 0; i < 4; i++) {
719  if (!op->c.q4[i].den)
720  continue;
721  if (op->c.q4[i].num != 0 || !clear_val)
722  return AVERROR(ENOTSUP);
723  mask[i] = 0x1010101ul * clear_val;
724  }
725  break;
726 
727  case SWS_OP_CONVERT: {
728  if (!op->convert.expand)
729  return AVERROR(ENOTSUP);
730  for (int i = 0; i < 4; i++) {
731  switch (ff_sws_pixel_type_size(op->type)) {
732  case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break;
733  case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
734  }
735  }
736  break;
737  }
738 
739  case SWS_OP_WRITE: {
740  if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1))
741  return AVERROR(ENOTSUP);
742 
743  /* Initialize to no-op */
744  memset(shuffle, clear_val, size);
745 
746  const int write_size = ff_sws_pixel_type_size(op->type);
747  const int read_chunk = read.rw.elems * read_size;
748  const int write_chunk = op->rw.elems * write_size;
749  const int num_groups = size / FFMAX(read_chunk, write_chunk);
750  for (int n = 0; n < num_groups; n++) {
751  const int base_in = n * read_chunk;
752  const int base_out = n * write_chunk;
753  for (int i = 0; i < op->rw.elems; i++) {
754  const int offset = base_out + i * write_size;
755  for (int b = 0; b < write_size; b++) {
756  const uint8_t idx = mask[i] >> (b * 8);
757  if (idx != clear_val)
758  shuffle[offset + b] = base_in + idx;
759  }
760  }
761  }
762 
763  *read_bytes = num_groups * read_chunk;
764  *write_bytes = num_groups * write_chunk;
765  return num_groups;
766  }
767 
768  default:
769  return AVERROR(ENOTSUP);
770  }
771  }
772 
773  return AVERROR(EINVAL);
774 }
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:47
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: ops.h:33
SwsComps::flags
unsigned flags[4]
Definition: ops.h:90
SWS_OP_SWIZZLE
@ SWS_OP_SWIZZLE
Definition: ops.h:50
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SWS_OP_LSHIFT
@ SWS_OP_LSHIFT
Definition: ops.h:55
SWS_OP_UNPACK
@ SWS_OP_UNPACK
Definition: ops.h:53
SwsSwizzleOp::mask
uint32_t mask
Definition: ops.h:126
SwsConst
Definition: ops.h:79
SWS_COMP_ZERO
@ SWS_COMP_ZERO
Definition: ops.h:75
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:59
ff_sws_linear_mask
uint32_t ff_sws_linear_mask(const SwsLinearOp c)
Definition: ops.c:595
SwsOp::swizzle
SwsSwizzleOp swizzle
Definition: ops.h:193
SwsLinearOp::m
AVRational m[4][5]
Generalized 5x5 affine transformation: [ Out.x ] = [ A B C D E ] [ Out.y ] = [ F G H I J ] * [ x y z ...
Definition: ops.h:158
SwsComps::unused
bool unused[4]
Definition: ops.h:91
SwsOp::convert
SwsConvertOp convert
Definition: ops.h:194
rational.h
mask
int mask
Definition: mediacodecdec_common.c:154
SwsOp::rw
SwsReadWriteOp rw
Definition: ops.h:191
ops.h
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:67
read_bytes
static void read_bytes(const uint8_t *src, float *dst, int src_stride, int dst_stride, int width, int height, float scale)
Definition: vf_nnedi.c:442
b
#define b
Definition: input.c:42
ff_sws_op_list_optimize
int ff_sws_op_list_optimize(SwsOpList *ops)
Fuse compatible and eliminate redundant operations, as well as replacing some operations with more ef...
Definition: ops_optimizer.c:283
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: ops.h:34
SWS_OP_TYPE_NB
@ SWS_OP_TYPE_NB
Definition: ops.h:69
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:65
SWS_MASK_ROW
#define SWS_MASK_ROW(I)
Definition: ops.h:164
SwsComps::max
AVRational max[4]
Definition: ops.h:95
SwsOpList::num_ops
int num_ops
Definition: ops.h:224
SWS_MASK_COL
#define SWS_MASK_COL(J)
Definition: ops.h:165
SwsDitherOp
Definition: ops.h:139
dummy
int dummy
Definition: motion.c:64
SwsOp::c
SwsConst c
Definition: ops.h:196
SwsSwizzleOp
Definition: ops.h:120
ff_sws_pixel_type_is_int
bool ff_sws_pixel_type_is_int(SwsPixelType type)
Definition: ops.c:80
AVRational::num
int num
Numerator.
Definition: rational.h:59
SwsOp::op
SwsOpType op
Definition: ops.h:187
Q
#define Q(q)
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:63
avassert.h
SwsDitherOp::y_offset
uint8_t y_offset[4]
Definition: ops.h:142
s
#define s(width, name)
Definition: cbs_vp9.c:198
SWS_SWIZZLE
#define SWS_SWIZZLE(X, Y, Z, W)
Definition: ops.h:132
SwsComps::min
AVRational min[4]
Definition: ops.h:95
read_chunk
static int read_chunk(AVFormatContext *s)
Definition: dhav.c:173
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:61
exact_log2_q
static int exact_log2_q(const AVRational x)
Definition: ops_optimizer.c:177
ff_sws_pixel_expand
static AVRational ff_sws_pixel_expand(SwsPixelType from, SwsPixelType to)
Definition: ops_internal.h:30
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:66
tmp
static uint8_t tmp[40]
Definition: aes_ctr.c:52
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
SWS_OP_PACK
@ SWS_OP_PACK
Definition: ops.h:54
SwsOp::dither
SwsDitherOp dither
Definition: ops.h:195
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:116
SWS_COMP_GARBAGE
@ SWS_COMP_GARBAGE
Definition: ops.h:73
SwsConvertOp::to
SwsPixelType to
Definition: ops.h:135
ff_sws_op_list_remove_at
void ff_sws_op_list_remove_at(SwsOpList *ops, int index, int count)
Definition: ops.c:524
RET
#define RET(x)
Copyright (C) 2025 Niklas Haas.
Definition: ops_optimizer.c:28
SWS_MASK
#define SWS_MASK(I, J)
Definition: ops.h:162
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_sws_apply_op_q
void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4])
Apply an operation to an AVRational.
Definition: ops.c:108
SwsConvertOp::expand
bool expand
Definition: ops.h:136
SwsOpList::order_dst
SwsSwizzleOp order_dst
Definition: ops.h:227
SwsPackOp::pattern
uint8_t pattern[4]
Packed bits are assumed to be LSB-aligned within the underlying integer type; i.e.
Definition: ops.h:117
SwsConst::q
AVRational q
Definition: ops.h:82
extract_constant_rows
static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev, SwsConst *out_clear)
Definition: ops_optimizer.c:215
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
av_bswap32
#define av_bswap32
Definition: bswap.h:47
SwsOp::type
SwsPixelType type
Definition: ops.h:188
ff_sws_op_list_insert_at
int ff_sws_op_list_insert_at(SwsOpList *ops, int index, SwsOp *op)
Definition: ops.c:534
size
int size
Definition: twinvq_data.h:10344
SWS_OP_RSHIFT
@ SWS_OP_RSHIFT
Definition: ops.h:56
SwsOp::lin
SwsLinearOp lin
Definition: ops.h:190
SWS_OP_INVALID
@ SWS_OP_INVALID
Definition: ops.h:44
extract_scalar
static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next, SwsConst *out_scale)
If a linear operation can be reduced to a scalar multiplication, returns the corresponding scaling fa...
Definition: ops_optimizer.c:191
ff_sws_op_list_update_comps
void ff_sws_op_list_update_comps(SwsOpList *ops)
Infer + propagate known information about components.
Definition: ops.c:225
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:48
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
SwsOp::comps
SwsComps comps
Metadata about the operation's input/output components.
Definition: ops.h:206
SwsLinearOp
Definition: ops.h:145
noop
#define noop(a)
Definition: h264chroma_template.c:71
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
extract_swizzle
static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
Definition: ops_optimizer.c:243
SwsOpList::ops
SwsOp * ops
Definition: ops.h:223
SwsOpList::order_src
SwsSwizzleOp order_src
Definition: ops.h:227
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
SwsConst::q4
AVRational q4[4]
Definition: ops.h:81
ops_internal.h
SwsOp
Definition: ops.h:186
write_bytes
static void write_bytes(const float *src, uint8_t *dst, int src_stride, int dst_stride, int width, int height, int depth, float scale)
Definition: vf_nnedi.c:484
av_cmp_q
static int av_cmp_q(AVRational a, AVRational b)
Compare two rationals.
Definition: rational.h:89
ret
ret
Definition: filter_design.txt:187
bswap.h
FFSWAP
#define FFSWAP(type, a, b)
Definition: macros.h:52
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:62
op_commute_swizzle
static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
Try to commute a swizzle op with the next operation.
Definition: ops_optimizer.c:97
SwsComps
Definition: ops.h:89
SwsConst::u
unsigned u
Definition: ops.h:83
AVRational::den
int den
Denominator.
Definition: rational.h:60
SwsReadWriteOp::packed
bool packed
Definition: ops.h:101
SWS_OP_SWAP_BYTES
@ SWS_OP_SWAP_BYTES
Definition: ops.h:49
ff_sws_solve_shuffle
int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[], int size, uint8_t clear_val, int *read_bytes, int *write_bytes)
"Solve" an op list into a fixed shuffle mask, with an optional ability to also directly clear the out...
Definition: ops_optimizer.c:680
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
av_mul_q
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
Definition: rational.c:80
SWS_COMP_EXACT
@ SWS_COMP_EXACT
Definition: ops.h:74
SwsReadWriteOp::elems
uint8_t elems
Definition: ops.h:99
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:278
av_add_q
AVRational av_add_q(AVRational b, AVRational c)
Add two rationals.
Definition: rational.c:93
SWS_MASK_DIAG4
@ SWS_MASK_DIAG4
Definition: ops.h:178
SwsSwizzleOp::in
uint8_t in[4]
Definition: ops.h:127
SWS_OP_CONVERT
@ SWS_OP_CONVERT
Definition: ops.h:60
op_commute_clear
static bool op_commute_clear(SwsOp *op, SwsOp *next)
Try to commute a clear op with the next operation.
Definition: ops_optimizer.c:40
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:222
av_bswap16
#define av_bswap16
Definition: bswap.h:28
SwsOp::pack
SwsPackOp pack
Definition: ops.h:192
shuffle
static uint64_t shuffle(uint64_t in, const uint8_t *shuffle, int shuffle_len)
Definition: des.c:179
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
src
#define src
Definition: vp8dsp.c:248
read
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
Definition: bitstream_template.h:239
exact_log2
static int exact_log2(const int x)
Definition: ops_optimizer.c:168