FFmpeg
ops_dispatch.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 #include "libavutil/cpu.h"
23 #include "libavutil/mathematics.h"
24 #include "libavutil/mem.h"
25 #include "libavutil/mem_internal.h"
26 #include "libavutil/refstruct.h"
27 
28 #include "ops.h"
29 #include "ops_internal.h"
30 #include "ops_dispatch.h"
31 
32 typedef struct SwsOpPass {
36  size_t num_blocks;
41  int planes_in;
45  int idx_in[4];
46  int idx_out[4];
47  int *offsets_y;
51  bool memcpy_out;
52  size_t tail_blocks;
53  uint8_t *tail_buf; /* extra memory for fixing unpadded tails */
54  unsigned int tail_buf_size;
55 } SwsOpPass;
56 
58  const SwsOpList *ops, SwsCompiledOp *out)
59 {
60  SwsOpList *copy;
61  SwsCompiledOp compiled = {0};
62  int ret = 0;
63 
65  if (!copy)
66  return AVERROR(ENOMEM);
67 
68  /* Ensure these are always set during compilation */
70 
71  ret = backend->compile(ctx, copy, &compiled);
72  if (ret < 0) {
73  int msg_lev = ret == AVERROR(ENOTSUP) ? AV_LOG_TRACE : AV_LOG_ERROR;
74  av_log(ctx, msg_lev, "Backend '%s' failed to compile operations: %s\n",
75  backend->name, av_err2str(ret));
76  } else {
77  *out = compiled;
78  }
79 
81  return ret;
82 }
83 
85 {
86  for (int n = 0; ff_sws_op_backends[n]; n++) {
87  const SwsOpBackend *backend = ff_sws_op_backends[n];
88  if (ops->src.hw_format != backend->hw_format ||
89  ops->dst.hw_format != backend->hw_format)
90  continue;
91  if (ff_sws_ops_compile_backend(ctx, backend, ops, out) < 0)
92  continue;
93 
94  av_log(ctx, AV_LOG_VERBOSE, "Compiled using backend '%s': "
95  "block size = %d, over-read = %d, over-write = %d, cpu flags = 0x%x\n",
96  backend->name, out->block_size, out->over_read, out->over_write,
97  out->cpu_flags);
98 
100  return 0;
101  }
102 
103  return AVERROR(ENOTSUP);
104 }
105 
107 {
108  if (comp->free)
109  comp->free(comp->priv);
110 
111  *comp = (SwsCompiledOp) {0};
112 }
113 
114 static void op_pass_free(void *ptr)
115 {
116  SwsOpPass *p = ptr;
117  if (!p)
118  return;
119 
120  ff_sws_compiled_op_unref(&p->comp);
121  av_refstruct_unref(&p->offsets_y);
122  av_free(p->exec_base.in_bump_y);
123  av_free(p->exec_base.in_offset_x);
124  av_free(p->tail_buf);
125  av_free(p);
126 }
127 
128 static inline void get_row_data(const SwsOpPass *p, const int y_dst,
129  const uint8_t *in[4], uint8_t *out[4])
130 {
131  const SwsOpExec *base = &p->exec_base;
132  const int y_src = p->offsets_y ? p->offsets_y[y_dst] : y_dst;
133  for (int i = 0; i < p->planes_in; i++)
134  in[i] = base->in[i] + (y_src >> base->in_sub_y[i]) * base->in_stride[i];
135  for (int i = 0; i < p->planes_out; i++)
136  out[i] = base->out[i] + (y_dst >> base->out_sub_y[i]) * base->out_stride[i];
137 }
138 
139 static inline size_t pixel_bytes(size_t pixels, int pixel_bits,
140  enum AVRounding rounding)
141 {
142  const uint64_t bits = (uint64_t) pixels * pixel_bits;
143  switch (rounding) {
144  case AV_ROUND_ZERO:
145  case AV_ROUND_DOWN:
146  return bits >> 3;
147  case AV_ROUND_INF:
148  case AV_ROUND_UP:
149  return (bits + 7) >> 3;
150  default:
151  av_unreachable("Invalid rounding mode");
152  return (size_t) -1;
153  }
154 }
155 
156 static size_t safe_bytes_pad(int linesize, int plane_pad)
157 {
158  av_assert1(linesize);
159  int64_t safe_bytes = FFABS((int64_t) linesize) - plane_pad;
160  return FFMAX(safe_bytes, 0);
161 }
162 
163 static size_t safe_blocks_offset(size_t num_blocks, unsigned block_size,
164  ptrdiff_t safe_offset,
165  const int32_t *offset_bytes)
166 {
167  size_t safe_blocks = num_blocks;
168  while (safe_blocks && offset_bytes[safe_blocks * block_size - 1] > safe_offset)
169  safe_blocks--;
170  return safe_blocks;
171 }
172 
173 static int op_pass_setup(const SwsFrame *out, const SwsFrame *in,
174  const SwsPass *pass)
175 {
176  const AVPixFmtDescriptor *indesc = av_pix_fmt_desc_get(in->format);
177  const AVPixFmtDescriptor *outdesc = av_pix_fmt_desc_get(out->format);
178 
179  SwsOpPass *p = pass->priv;
180  SwsOpExec *exec = &p->exec_base;
181  const SwsCompiledOp *comp = &p->comp;
182 
183  /* Set up main loop parameters */
184  const unsigned block_size = comp->block_size;
185  const size_t num_blocks = (pass->width + block_size - 1) / block_size;
186  const size_t aligned_w = num_blocks * block_size;
187  if (aligned_w < pass->width) /* overflow */
188  return AVERROR(EINVAL);
189  p->num_blocks = num_blocks;
190  p->memcpy_first = false;
191  p->memcpy_last = false;
192  p->memcpy_out = false;
193 
194  size_t safe_blocks = num_blocks;
195  for (int i = 0; i < p->planes_in; i++) {
196  int idx = p->idx_in[i];
197  int chroma = idx == 1 || idx == 2;
198  int sub_x = chroma ? indesc->log2_chroma_w : 0;
199  int sub_y = chroma ? indesc->log2_chroma_h : 0;
200  size_t safe_bytes = safe_bytes_pad(in->linesize[idx], comp->over_read);
201  size_t safe_blocks_in;
202  if (exec->in_offset_x) {
203  size_t filter_size = pixel_bytes(p->filter_size, p->pixel_bits_in,
204  AV_ROUND_UP);
205  safe_blocks_in = safe_blocks_offset(num_blocks, block_size,
206  safe_bytes - filter_size,
207  exec->in_offset_x);
208  } else {
209  safe_blocks_in = safe_bytes / exec->block_size_in;
210  }
211 
212  if (safe_blocks_in < num_blocks) {
213  p->memcpy_first |= in->linesize[idx] < 0;
214  p->memcpy_last |= in->linesize[idx] > 0;
215  safe_blocks = FFMIN(safe_blocks, safe_blocks_in);
216  }
217 
218  size_t loop_size = num_blocks * exec->block_size_in;
219  exec->in[i] = in->data[idx];
220  exec->in_stride[i] = in->linesize[idx];
221  exec->in_bump[i] = in->linesize[idx] - loop_size;
222  exec->in_sub_y[i] = sub_y;
223  exec->in_sub_x[i] = sub_x;
224  }
225 
226  for (int i = 0; i < p->planes_out; i++) {
227  int idx = p->idx_out[i];
228  int chroma = idx == 1 || idx == 2;
229  int sub_x = chroma ? outdesc->log2_chroma_w : 0;
230  int sub_y = chroma ? outdesc->log2_chroma_h : 0;
231  size_t safe_bytes = safe_bytes_pad(out->linesize[idx], comp->over_write);
232  size_t safe_blocks_out = safe_bytes / exec->block_size_out;
233  if (safe_blocks_out < num_blocks) {
234  p->memcpy_out = true;
235  safe_blocks = FFMIN(safe_blocks, safe_blocks_out);
236  }
237 
238  size_t loop_size = num_blocks * exec->block_size_out;
239  exec->out[i] = out->data[idx];
240  exec->out_stride[i] = out->linesize[idx];
241  exec->out_bump[i] = out->linesize[idx] - loop_size;
242  exec->out_sub_y[i] = sub_y;
243  exec->out_sub_x[i] = sub_x;
244  }
245 
246  const bool memcpy_in = p->memcpy_first || p->memcpy_last;
247  if (!memcpy_in && !p->memcpy_out) {
248  av_assert0(safe_blocks == num_blocks);
249  return 0;
250  }
251 
252  /* Set-up tail section parameters and buffers */
253  SwsOpExec *tail = &p->exec_tail;
254  const int align = av_cpu_max_align();
255  size_t alloc_size = 0;
256  *tail = *exec;
257 
258  const size_t safe_width = safe_blocks * block_size;
259  const size_t tail_size = pass->width - safe_width;
260  p->tail_off_out = pixel_bytes(safe_width, p->pixel_bits_out, AV_ROUND_DOWN);
261  p->tail_size_out = pixel_bytes(tail_size, p->pixel_bits_out, AV_ROUND_UP);
262  p->tail_blocks = num_blocks - safe_blocks;
263 
264  if (exec->in_offset_x) {
265  p->tail_off_in = exec->in_offset_x[safe_width];
266  p->tail_size_in = exec->in_offset_x[pass->width - 1] - p->tail_off_in;
267  p->tail_size_in += pixel_bytes(p->filter_size, p->pixel_bits_in, AV_ROUND_UP);
268  } else {
269  p->tail_off_in = pixel_bytes(safe_width, p->pixel_bits_in, AV_ROUND_DOWN);
270  p->tail_size_in = pixel_bytes(tail_size, p->pixel_bits_in, AV_ROUND_UP);
271  }
272 
273  const size_t alloc_width = aligned_w - safe_width;
274  for (int i = 0; memcpy_in && i < p->planes_in; i++) {
275  size_t needed_size;
276  if (exec->in_offset_x) {
277  /* The input offset map is already padded to multiples of the block
278  * size, and clamps the input offsets to the image boundaries; so
279  * we just need to compensate for the comp->over_read */
280  needed_size = p->tail_size_in;
281  } else {
282  needed_size = pixel_bytes(alloc_width, p->pixel_bits_in, AV_ROUND_UP);
283  }
284  size_t loop_size = p->tail_blocks * exec->block_size_in;
285  tail->in_stride[i] = FFALIGN(needed_size + comp->over_read, align);
286  tail->in_bump[i] = tail->in_stride[i] - loop_size;
287  alloc_size += tail->in_stride[i] * in->height;
288  }
289 
290  for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
291  size_t needed_size = pixel_bytes(alloc_width, p->pixel_bits_out, AV_ROUND_UP);
292  size_t loop_size = p->tail_blocks * exec->block_size_out;
293  tail->out_stride[i] = FFALIGN(needed_size + comp->over_write, align);
294  tail->out_bump[i] = tail->out_stride[i] - loop_size;
295  alloc_size += tail->out_stride[i] * out->height;
296  }
297 
298  if (memcpy_in && exec->in_offset_x) {
299  /* `in_offset_x` is indexed relative to the line start, not the start
300  * of the section being processed; so we need to over-allocate this
301  * array to the full width of the image, even though we will only
302  * partially fill in the offsets relevant to the tail region */
303  alloc_size += aligned_w * sizeof(*exec->in_offset_x);
304  }
305 
306  av_fast_mallocz(&p->tail_buf, &p->tail_buf_size, alloc_size);
307  if (!p->tail_buf)
308  return AVERROR(ENOMEM);
309 
310  uint8_t *tail_buf = p->tail_buf;
311  for (int i = 0; memcpy_in && i < p->planes_in; i++) {
312  tail->in[i] = tail_buf;
313  tail_buf += tail->in_stride[i] * in->height;
314  }
315 
316  for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
317  tail->out[i] = tail_buf;
318  tail_buf += tail->out_stride[i] * out->height;
319  }
320 
321  if (memcpy_in && exec->in_offset_x) {
322  tail->in_offset_x = (int32_t *) tail_buf;
323  for (int i = safe_width; i < aligned_w; i++)
324  tail->in_offset_x[i] = exec->in_offset_x[i] - p->tail_off_in;
325  }
326 
327  return 0;
328 }
329 
330 static void copy_lines(uint8_t *dst, const size_t dst_stride,
331  const uint8_t *src, const size_t src_stride,
332  const int h, const size_t bytes)
333 {
334  for (int y = 0; y < h; y++) {
335  memcpy(dst, src, bytes);
336  dst += dst_stride;
337  src += src_stride;
338  }
339 }
340 
341 static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y,
342  const int h, const SwsPass *pass)
343 {
344  const SwsOpPass *p = pass->priv;
345  const SwsCompiledOp *comp = &p->comp;
346 
347  /* Fill exec metadata for this slice */
348  DECLARE_ALIGNED_32(SwsOpExec, exec) = p->exec_base;
349  exec.slice_y = y;
350  exec.slice_h = h;
351 
352  /**
353  * To ensure safety, we need to consider the following:
354  *
355  * 1. We can overread the input, unless this is the last line of an
356  * unpadded buffer. All defined operations can handle arbitrary pixel
357  * input, so overread of arbitrary data is fine. For flipped images,
358  * this condition is actually *inverted* to where the first line is
359  * the one at the end of the buffer.
360  *
361  * 2. We can overwrite the output, as long as we don't write more than the
362  * amount of pixels that fit into one linesize. So we always need to
363  * memcpy the last column on the output side if unpadded.
364  */
365 
366  const bool memcpy_in = p->memcpy_last && y + h == pass->height ||
367  p->memcpy_first && y == 0;
368  const bool memcpy_out = p->memcpy_out;
369  const size_t num_blocks = p->num_blocks;
370  const size_t tail_blocks = p->tail_blocks;
371 
372  get_row_data(p, y, exec.in, exec.out);
373  if (!memcpy_in && !memcpy_out) {
374  /* Fast path (fully aligned/padded inputs and outputs) */
375  comp->func(&exec, comp->priv, 0, y, num_blocks, y + h);
376  return;
377  }
378 
379  /* Non-aligned case (slow path); process main blocks as normal, and
380  * a separate tail (via memcpy into an appropriately padded buffer) */
381  if (num_blocks > tail_blocks) {
382  for (int i = 0; i < 4; i++) {
383  /* We process fewer blocks, so the in_bump needs to be increased
384  * to reflect that the plane pointers are left on the last block,
385  * not the end of the processed line, after each loop iteration */
386  exec.in_bump[i] += exec.block_size_in * tail_blocks;
387  exec.out_bump[i] += exec.block_size_out * tail_blocks;
388  }
389 
390  comp->func(&exec, comp->priv, 0, y, num_blocks - tail_blocks, y + h);
391  }
392 
393  DECLARE_ALIGNED_32(SwsOpExec, tail) = p->exec_tail;
394  tail.slice_y = y;
395  tail.slice_h = h;
396 
397  for (int i = 0; i < p->planes_in; i++) {
398  /* Input offsets are relative to the base pointer */
399  if (!exec.in_offset_x || memcpy_in)
400  exec.in[i] += p->tail_off_in;
401  tail.in[i] += y * tail.in_stride[i];
402  }
403  for (int i = 0; i < p->planes_out; i++) {
404  exec.out[i] += p->tail_off_out;
405  tail.out[i] += y * tail.out_stride[i];
406  }
407 
408  for (int i = 0; i < p->planes_in; i++) {
409  if (memcpy_in) {
410  copy_lines((uint8_t *) tail.in[i], tail.in_stride[i],
411  exec.in[i], exec.in_stride[i], h, p->tail_size_in);
412  } else {
413  /* Reuse input pointers directly */
414  const size_t loop_size = tail_blocks * exec.block_size_in;
415  tail.in[i] = exec.in[i];
416  tail.in_stride[i] = exec.in_stride[i];
417  tail.in_bump[i] = exec.in_stride[i] - loop_size;
418  }
419  }
420 
421  for (int i = 0; !memcpy_out && i < p->planes_out; i++) {
422  /* Reuse output pointers directly */
423  const size_t loop_size = tail_blocks * exec.block_size_out;
424  tail.out[i] = exec.out[i];
425  tail.out_stride[i] = exec.out_stride[i];
426  tail.out_bump[i] = exec.out_stride[i] - loop_size;
427  }
428 
429  /* Dispatch kernel over tail */
430  av_assert1(tail_blocks > 0);
431  comp->func(&tail, comp->priv, num_blocks - tail_blocks, y, num_blocks, y + h);
432 
433  for (int i = 0; memcpy_out && i < p->planes_out; i++) {
434  copy_lines(exec.out[i], exec.out_stride[i],
435  tail.out[i], tail.out_stride[i], h, p->tail_size_out);
436  }
437 }
438 
439 static int rw_planes(const SwsOp *op)
440 {
441  return op->rw.packed ? 1 : op->rw.elems;
442 }
443 
444 static int rw_pixel_bits(const SwsOp *op)
445 {
446  const int elems = op->rw.packed ? op->rw.elems : 1;
447  const int size = ff_sws_pixel_type_size(op->type);
448  const int bits = 8 >> op->rw.frac;
449  av_assert1(bits >= 1);
450  return elems * size * bits;
451 }
452 
453 static void align_pass(SwsPass *pass, int block_size, int over_rw, int pixel_bits)
454 {
455  if (!pass)
456  return;
457 
458  /* Add at least as many pixels as needed to cover the padding requirement */
459  const int pad = (over_rw * 8 + pixel_bits - 1) / pixel_bits;
460 
461  SwsPassBuffer *buf = pass->output;
462  buf->width_align = FFMAX(buf->width_align, block_size);
463  buf->width_pad = FFMAX(buf->width_pad, pad);
464 }
465 
466 static int compile(SwsGraph *graph, const SwsOpList *ops, SwsPass *input,
467  SwsPass **output)
468 {
469  SwsContext *ctx = graph->ctx;
470  SwsOpPass *p = av_mallocz(sizeof(*p));
471  if (!p)
472  return AVERROR(ENOMEM);
473 
474  int ret = ff_sws_ops_compile(ctx, ops, &p->comp);
475  if (ret < 0)
476  goto fail;
477 
478  const SwsCompiledOp *comp = &p->comp;
479  const SwsFormat *dst = &ops->dst;
480  if (p->comp.opaque) {
481  SwsCompiledOp c = *comp;
482  av_free(p);
483  return ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
484  input, c.slice_align, c.func_opaque,
485  NULL, c.priv, c.free, output);
486  }
487 
488  const SwsOp *read = ff_sws_op_list_input(ops);
489  const SwsOp *write = ff_sws_op_list_output(ops);
490  p->planes_in = rw_planes(read);
491  p->planes_out = rw_planes(write);
492  p->pixel_bits_in = rw_pixel_bits(read);
493  p->pixel_bits_out = rw_pixel_bits(write);
494  p->exec_base = (SwsOpExec) {
495  .width = dst->width,
496  .height = dst->height,
497  };
498 
499  const int64_t block_bits_in = (int64_t) comp->block_size * p->pixel_bits_in;
500  const int64_t block_bits_out = (int64_t) comp->block_size * p->pixel_bits_out;
501  if (block_bits_in & 0x7 || block_bits_out & 0x7) {
502  av_log(ctx, AV_LOG_ERROR, "Block size must be a multiple of the pixel size.\n");
503  ret = AVERROR(EINVAL);
504  goto fail;
505  }
506 
507  p->exec_base.block_size_in = block_bits_in >> 3;
508  p->exec_base.block_size_out = block_bits_out >> 3;
509 
510  for (int i = 0; i < 4; i++) {
511  p->idx_in[i] = i < p->planes_in ? ops->plane_src[i] : -1;
512  p->idx_out[i] = i < p->planes_out ? ops->plane_dst[i] : -1;
513  }
514 
515  const SwsFilterWeights *filter = read->rw.kernel;
516  if (read->rw.filter == SWS_OP_FILTER_V) {
517  p->offsets_y = av_refstruct_ref(filter->offsets);
518 
519  /* Compute relative pointer bumps for each output line */
520  int32_t *bump = av_malloc_array(filter->dst_size, sizeof(*bump));
521  if (!bump) {
522  ret = AVERROR(ENOMEM);
523  goto fail;
524  }
525 
526  int line = filter->offsets[0];
527  for (int y = 0; y < filter->dst_size - 1; y++) {
528  int next = filter->offsets[y + 1];
529  bump[y] = next - line - 1;
530  line = next;
531  }
532  bump[filter->dst_size - 1] = 0;
533  p->exec_base.in_bump_y = bump;
534  } else if (read->rw.filter == SWS_OP_FILTER_H) {
535  /* Compute pixel offset map for each output line */
536  const int pixels = FFALIGN(filter->dst_size, p->comp.block_size);
537  int32_t *offset = av_malloc_array(pixels, sizeof(*offset));
538  if (!offset) {
539  ret = AVERROR(ENOMEM);
540  goto fail;
541  }
542  p->exec_base.in_offset_x = offset;
543 
544  for (int x = 0; x < filter->dst_size; x++) {
545  /* Sanity check; if the tap would land on a half-pixel, we cannot
546  * reasonably expect the implementation to know about this. Just
547  * error out in such (theoretical) cases. */
548  int64_t bits = (int64_t) filter->offsets[x] * p->pixel_bits_in;
549  if ((bits & 0x7) || (bits >> 3) > INT32_MAX) {
550  ret = AVERROR(EINVAL);
551  goto fail;
552  }
553  offset[x] = bits >> 3;
554  }
555  for (int x = filter->dst_size; x < pixels; x++)
556  offset[x] = offset[filter->dst_size - 1];
557  p->exec_base.block_size_in = 0; /* ptr does not advance */
558  p->filter_size = filter->filter_size;
559  }
560 
561  ret = ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
562  input, comp->slice_align, op_pass_run,
564  if (ret < 0)
565  return ret;
566 
567  align_pass(input, comp->block_size, comp->over_read, p->pixel_bits_in);
568  align_pass(*output, comp->block_size, comp->over_write, p->pixel_bits_out);
569  return 0;
570 
571 fail:
572  op_pass_free(p);
573  return ret;
574 }
575 
576 int ff_sws_compile_pass(SwsGraph *graph, SwsOpList **pops, int flags,
578 {
579  const int passes_orig = graph->num_passes;
580  SwsContext *ctx = graph->ctx;
581  SwsOpList *ops = *pops;
582  int ret = 0;
583 
584  /* Check if the whole operation graph is an end-to-end no-op */
585  if (ff_sws_op_list_is_noop(ops)) {
586  *output = input;
587  goto out;
588  }
589 
590  const SwsOp *read = ff_sws_op_list_input(ops);
591  const SwsOp *write = ff_sws_op_list_output(ops);
592  if (!read || !write) {
593  av_log(ctx, AV_LOG_ERROR, "First and last operations must be a read "
594  "and write, respectively.\n");
595  ret = AVERROR(EINVAL);
596  goto out;
597  }
598 
599  if (flags & SWS_OP_FLAG_OPTIMIZE) {
601  if (ret < 0)
602  goto out;
603  av_log(ctx, AV_LOG_DEBUG, "Operation list after optimizing:\n");
605  }
606 
607  ret = compile(graph, ops, input, output);
608  if (ret != AVERROR(ENOTSUP))
609  goto out;
610 
611  av_log(ctx, AV_LOG_DEBUG, "Retrying with separated filter passes.\n");
612  SwsPass *prev = input;
613  while (ops) {
614  SwsOpList *rest;
615  ret = ff_sws_op_list_subpass(ops, &rest);
616  if (ret < 0)
617  goto out;
618 
619  if (prev == input && !rest) {
620  /* No point in compiling an unsplit pass again */
621  ret = AVERROR(ENOTSUP);
622  goto out;
623  }
624 
625  ret = compile(graph, ops, prev, &prev);
626  if (ret < 0) {
627  ff_sws_op_list_free(&rest);
628  goto out;
629  }
630 
631  ff_sws_op_list_free(&ops);
632  ops = rest;
633  }
634 
635  /* Return last subpass successfully compiled */
636  av_log(ctx, AV_LOG_VERBOSE, "Using %d separate passes.\n",
637  graph->num_passes - passes_orig);
638  *output = prev;
639 
640 out:
641  if (ret == AVERROR(ENOTSUP)) {
642  av_log(ctx, AV_LOG_WARNING, "No backend found for operations:\n");
644  }
645  if (ret < 0)
646  ff_sws_graph_rollback(graph, passes_orig);
647  ff_sws_op_list_free(&ops);
648  *pops = NULL;
649  return ret;
650 }
flags
const SwsFlags flags[]
Definition: swscale.c:72
SwsOpPass::tail_buf
uint8_t * tail_buf
Definition: ops_dispatch.c:53
copy_lines
static void copy_lines(uint8_t *dst, const size_t dst_stride, const uint8_t *src, const size_t src_stride, const int h, const size_t bytes)
Definition: ops_dispatch.c:330
AV_ROUND_UP
@ AV_ROUND_UP
Round toward +infinity.
Definition: mathematics.h:134
SwsOpPass::filter_size
int filter_size
Definition: ops_dispatch.c:48
SwsOpPass::tail_buf_size
unsigned int tail_buf_size
Definition: ops_dispatch.c:54
rw_planes
static int rw_planes(const SwsOp *op)
Definition: ops_dispatch.c:439
ff_sws_op_list_free
void ff_sws_op_list_free(SwsOpList **p_ops)
Definition: ops.c:620
AV_LOG_WARNING
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:216
SwsGraph::ctx
SwsContext * ctx
Definition: graph.h:122
SwsPass
Represents a single filter pass in the scaling graph.
Definition: graph.h:75
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SwsOpPass::idx_in
int idx_in[4]
Definition: ops_dispatch.c:45
SwsOpPass::tail_size_out
int tail_size_out
Definition: ops_dispatch.c:40
ff_sws_op_list_duplicate
SwsOpList * ff_sws_op_list_duplicate(const SwsOpList *ops)
Returns a duplicate of ops, or NULL on OOM.
Definition: ops.c:634
mem_internal.h
out
static FILE * out
Definition: movenc.c:55
SwsOpPass::exec_tail
SwsOpExec exec_tail
Definition: ops_dispatch.c:35
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:79
SwsOpExec::in_bump
ptrdiff_t in_bump[4]
Pointer bump, difference between stride and processed line size.
Definition: ops_dispatch.h:51
ff_sws_op_list_input
const SwsOp * ff_sws_op_list_input(const SwsOpList *ops)
Returns the input operation for a given op list, or NULL if there is none (e.g.
Definition: ops.c:671
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3456
SwsOpExec::out_stride
ptrdiff_t out_stride[4]
Definition: ops_dispatch.h:42
SwsOpExec::in
const uint8_t * in[4]
Definition: ops_dispatch.h:37
ff_sws_ops_compile
int ff_sws_ops_compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
Compile a list of operations using the best available backend.
Definition: ops_dispatch.c:84
int64_t
long long int64_t
Definition: coverity.c:34
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:226
ops.h
SwsFilterWeights
Represents a computed filter kernel.
Definition: filters.h:64
SwsOpExec::block_size_in
int32_t block_size_in
Definition: ops_dispatch.h:57
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1639
AV_ROUND_ZERO
@ AV_ROUND_ZERO
Round toward zero.
Definition: mathematics.h:131
AVRounding
AVRounding
Rounding methods.
Definition: mathematics.h:130
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:226
base
uint8_t base
Definition: vp3data.h:128
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
mathematics.h
ops_dispatch.h
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
compile
static int compile(SwsGraph *graph, const SwsOpList *ops, SwsPass *input, SwsPass **output)
Definition: ops_dispatch.c:466
SwsOpExec::in_stride
ptrdiff_t in_stride[4]
Definition: ops_dispatch.h:41
SwsOpPass::tail_blocks
size_t tail_blocks
Definition: ops_dispatch.c:52
SwsOpBackend::name
const char * name
Definition: ops_internal.h:56
SwsOpPass::idx_out
int idx_out[4]
Definition: ops_dispatch.c:46
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:77
SwsPass::width
int width
Definition: graph.h:85
ff_sws_op_list_subpass
int ff_sws_op_list_subpass(SwsOpList *ops, SwsOpList **out_rest)
Eliminate SWS_OP_FILTER_* operations by merging them with prior SWS_OP_READ operations.
Definition: ops_optimizer.c:937
SwsOpList::plane_dst
uint8_t plane_dst[4]
Definition: ops.h:296
ff_sws_op_list_print
void ff_sws_op_list_print(void *log, int lev, int lev_extra, const SwsOpList *ops)
Print out the contents of an operation list.
Definition: ops.c:961
ff_sws_op_backends
const SwsOpBackend *const ff_sws_op_backends[]
Definition: ops.c:45
SwsFrame::data
uint8_t * data[4]
Definition: format.h:195
fail
#define fail()
Definition: checkasm.h:224
SwsOpBackend::compile
int(* compile)(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
Compile an operation list to an implementation chain.
Definition: ops_internal.h:64
SwsOpBackend::hw_format
enum AVPixelFormat hw_format
If NONE, backend only supports software frames.
Definition: ops_internal.h:71
SwsOpPass::memcpy_last
bool memcpy_last
Definition: ops_dispatch.c:50
refstruct.h
get_row_data
static void get_row_data(const SwsOpPass *p, const int y_dst, const uint8_t *in[4], uint8_t *out[4])
Definition: ops_dispatch.c:128
ff_sws_compile_pass
int ff_sws_compile_pass(SwsGraph *graph, SwsOpList **pops, int flags, SwsPass *input, SwsPass **output)
Resolves an operation list to a graph pass.
Definition: ops_dispatch.c:576
safe_blocks_offset
static size_t safe_blocks_offset(size_t num_blocks, unsigned block_size, ptrdiff_t safe_offset, const int32_t *offset_bytes)
Definition: ops_dispatch.c:163
SwsFrame
Represents a view into a single field of frame data.
Definition: format.h:193
avassert.h
AV_LOG_TRACE
#define AV_LOG_TRACE
Extremely verbose debugging, useful for libav* development.
Definition: log.h:236
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:210
SwsFrame::format
enum AVPixelFormat format
Definition: format.h:202
SwsPass::priv
void * priv
Definition: graph.h:110
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
bits
uint8_t bits
Definition: vp3data.h:128
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:42
AV_LOG_DEBUG
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:231
SwsGraph::num_passes
int num_passes
Definition: graph.h:132
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
AVPixFmtDescriptor::log2_chroma_w
uint8_t log2_chroma_w
Amount to shift the luma width right to find the chroma width.
Definition: pixdesc.h:80
ff_sws_op_list_output
const SwsOp * ff_sws_op_list_output(const SwsOpList *ops)
Returns the output operation for a given op list, or NULL if there is none.
Definition: ops.c:680
SWS_OP_FILTER_H
@ SWS_OP_FILTER_H
Definition: ops.h:73
av_mallocz
#define av_mallocz(s)
Definition: tableprint_vlc.h:31
SwsOpPass::comp
SwsCompiledOp comp
Definition: ops_dispatch.c:33
SwsOpBackend
Definition: ops_internal.h:55
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
if
if(ret)
Definition: filter_design.txt:179
SwsOpExec
Copyright (C) 2026 Niklas Haas.
Definition: ops_dispatch.h:35
ff_sws_op_list_is_noop
bool ff_sws_op_list_is_noop(const SwsOpList *ops)
Returns whether an op list represents a true no-op operation, i.e.
Definition: ops.c:719
op_pass_free
static void op_pass_free(void *ptr)
Definition: ops_dispatch.c:114
NULL
#define NULL
Definition: coverity.c:32
ff_sws_compiled_op_unref
void ff_sws_compiled_op_unref(SwsCompiledOp *comp)
Definition: ops_dispatch.c:106
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:116
av_fast_mallocz
void av_fast_mallocz(void *ptr, unsigned int *size, size_t min_size)
Allocate and clear a buffer, reusing the given one if large enough.
Definition: mem.c:562
SWS_OP_FILTER_V
@ SWS_OP_FILTER_V
Definition: ops.h:74
av_cpu_max_align
size_t av_cpu_max_align(void)
Get the maximum data alignment that may be required by FFmpeg.
Definition: cpu.c:287
rw_pixel_bits
static int rw_pixel_bits(const SwsOp *op)
Definition: ops_dispatch.c:444
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
AV_ROUND_DOWN
@ AV_ROUND_DOWN
Round toward -infinity.
Definition: mathematics.h:133
SwsPass::height
int height
Definition: graph.h:85
SwsOpExec::block_size_out
int32_t block_size_out
Definition: ops_dispatch.h:58
copy
static void copy(const float *p1, float *p2, const int length)
Definition: vf_vaguedenoiser.c:186
SwsFrame::height
int height
Definition: format.h:201
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
SwsOpExec::in_sub_x
uint8_t in_sub_x[4]
Definition: ops_dispatch.h:62
cpu.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
av_err2str
#define av_err2str(errnum)
Convenience macro, the return value should be used only directly in function arguments but never stan...
Definition: error.h:122
size
int size
Definition: twinvq_data.h:10344
op_pass_setup
static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, const SwsPass *pass)
Definition: ops_dispatch.c:173
SwsOpPass::offsets_y
int * offsets_y
Definition: ops_dispatch.c:47
SwsOpList::src
SwsFormat src
Definition: ops.h:293
ff_sws_op_list_update_comps
void ff_sws_op_list_update_comps(SwsOpList *ops)
Infer + propagate known information about components.
Definition: ops.c:341
SwsFormat
Definition: format.h:77
align
static const uint8_t *BS_FUNC() align(BSCTX *bc)
Skip bits to a byte boundary.
Definition: bitstream_template.h:419
av_refstruct_ref
void * av_refstruct_ref(void *obj)
Create a new reference to an object managed via this API, i.e.
Definition: refstruct.c:140
SwsPass::output
SwsPassBuffer * output
Filter output buffer.
Definition: graph.h:98
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
line
Definition: graph2dot.c:48
SWS_OP_FLAG_OPTIMIZE
@ SWS_OP_FLAG_OPTIMIZE
Definition: ops.h:372
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
SwsOpPass::planes_in
int planes_in
Definition: ops_dispatch.c:41
av_refstruct_unref
void av_refstruct_unref(void *objp)
Decrement the reference count of the underlying object and automatically free the object if there are...
Definition: refstruct.c:120
SwsOpExec::out
uint8_t * out[4]
Definition: ops_dispatch.h:38
ff_sws_op_list_optimize
int ff_sws_op_list_optimize(SwsOpList *ops)
Fuse compatible and eliminate redundant operations, as well as replacing some operations with more ef...
Definition: ops_optimizer.c:340
SwsPassBuffer::width_align
int width_align
Definition: graph.h:66
SwsOpPass::pixel_bits_out
int pixel_bits_out
Definition: ops_dispatch.c:44
SwsOpExec::in_offset_x
int32_t * in_offset_x
Pixel offset map; for horizontal scaling, in bytes.
Definition: ops_dispatch.h:80
SwsOpPass::planes_out
int planes_out
Definition: ops_dispatch.c:42
AV_ROUND_INF
@ AV_ROUND_INF
Round away from zero.
Definition: mathematics.h:132
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:32
SwsOpPass::tail_size_in
int tail_size_in
Definition: ops_dispatch.c:39
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
DECLARE_ALIGNED_32
#define DECLARE_ALIGNED_32(t, v)
Definition: mem_internal.h:113
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
ops_internal.h
SwsOpPass
Copyright (C) 2025 Niklas Haas.
Definition: ops_dispatch.c:32
pixel_bytes
static size_t pixel_bytes(size_t pixels, int pixel_bits, enum AVRounding rounding)
Definition: ops_dispatch.c:139
SwsOp
Definition: ops.h:238
SwsOpExec::out_sub_y
uint8_t out_sub_y[4]
Definition: ops_dispatch.h:61
SwsOpExec::out_sub_x
uint8_t out_sub_x[4]
Definition: ops_dispatch.h:62
SwsOpPass::memcpy_first
bool memcpy_first
Definition: ops_dispatch.c:49
ff_sws_graph_add_pass
int ff_sws_graph_add_pass(SwsGraph *graph, enum AVPixelFormat fmt, int width, int height, SwsPass *input, int align, SwsPassFunc run, SwsPassSetup setup, void *priv, void(*free_cb)(void *priv), SwsPass **out_pass)
Allocate and add a new pass to the filter graph.
Definition: graph.c:126
ret
ret
Definition: filter_design.txt:187
SwsOpList::dst
SwsFormat dst
Definition: ops.h:293
SwsCompiledOp
Definition: ops_dispatch.h:100
SwsPassBuffer::width_pad
int width_pad
Definition: graph.h:67
SwsFormat::hw_format
enum AVPixelFormat hw_format
Definition: format.h:81
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
SwsOpPass::num_blocks
size_t num_blocks
Definition: ops_dispatch.c:36
safe_bytes_pad
static size_t safe_bytes_pad(int linesize, int plane_pad)
Definition: ops_dispatch.c:156
ff_sws_ops_compile_backend
int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend, const SwsOpList *ops, SwsCompiledOp *out)
Attempt to compile a list of operations using a specific backend.
Definition: ops_dispatch.c:57
SwsOpPass::exec_base
SwsOpExec exec_base
Definition: ops_dispatch.c:34
SwsOpExec::in_sub_y
uint8_t in_sub_y[4]
Definition: ops_dispatch.h:61
SwsOpPass::pixel_bits_in
int pixel_bits_in
Definition: ops_dispatch.c:43
SwsOpPass::tail_off_in
int tail_off_in
Definition: ops_dispatch.c:37
SwsOpPass::memcpy_out
bool memcpy_out
Definition: ops_dispatch.c:51
mem.h
SwsGraph
Filter graph, which represents a 'baked' pixel format conversion.
Definition: graph.h:121
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
align_pass
static void align_pass(SwsPass *pass, int block_size, int over_rw, int pixel_bits)
Definition: ops_dispatch.c:453
av_free
#define av_free(p)
Definition: tableprint_vlc.h:34
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
op_pass_run
static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y, const int h, const SwsPass *pass)
Definition: ops_dispatch.c:341
int32_t
int32_t
Definition: audioconvert.c:56
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
SwsPassBuffer
Represents an output buffer for a filter pass.
Definition: graph.h:59
h
h
Definition: vp9dsp_template.c:2070
width
#define width
Definition: dsp.h:89
SwsOpList::plane_src
uint8_t plane_src[4]
Definition: ops.h:296
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:288
SwsContext
Main external API structure.
Definition: swscale.h:206
SwsOpPass::tail_off_out
int tail_off_out
Definition: ops_dispatch.c:38
SwsFrame::linesize
int linesize[4]
Definition: format.h:196
AVPixFmtDescriptor::log2_chroma_h
uint8_t log2_chroma_h
Amount to shift the luma height right to find the chroma height.
Definition: pixdesc.h:89
src
#define src
Definition: vp8dsp.c:248
SwsOpExec::out_bump
ptrdiff_t out_bump[4]
Definition: ops_dispatch.h:52
read
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
Definition: bitstream_template.h:239
ff_sws_graph_rollback
void ff_sws_graph_rollback(SwsGraph *graph, int since_idx)
Remove all passes added since the given index.
Definition: graph.c:896