FFmpeg
ops_chain.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 #include "libavutil/mem.h"
23 #include "libavutil/rational.h"
24 
25 #include "ops_chain.h"
26 
27 #define Q(N) ((AVRational) { N, 1 })
28 
30 {
31  return av_mallocz(sizeof(SwsOpChain));
32 }
33 
34 void ff_sws_op_chain_free_cb(void *ptr)
35 {
36  if (!ptr)
37  return;
38 
39  SwsOpChain *chain = ptr;
40  for (int i = 0; i < chain->num_impl + 1; i++) {
41  if (chain->free[i])
42  chain->free[i](&chain->impl[i].priv);
43  }
44 
45  av_free(chain);
46 }
47 
49  void (*free)(SwsOpPriv *), const SwsOpPriv *priv)
50 {
51  const int idx = chain->num_impl;
52  if (idx == SWS_MAX_OPS)
53  return AVERROR(EINVAL);
54 
56  chain->impl[idx].cont = func;
57  chain->impl[idx + 1].priv = *priv;
58  chain->free[idx + 1] = free;
59  chain->num_impl++;
60  return 0;
61 }
62 
63 /**
64  * Match an operation against a reference operation. Returns a score for how
65  * well the reference matches the operation, or 0 if there is no match.
66  *
67  * If `ref->comps` has any flags set, they must be set in `op` as well.
68  * Likewise, if `ref->comps` has any components marked as unused, they must be
69  * marked as unused in `ops` as well.
70  *
71  * For SWS_OP_LINEAR, `ref->linear.mask` must be a strict superset of
72  * `op->linear.mask`, but may not contain any columns explicitly ignored by
73  * `op->comps.unused`.
74  *
75  * For unfiltered SWS_OP_READ/SWS_OP_WRITE, SWS_OP_SWAP_BYTES and
76  * SWS_OP_SWIZZLE, the exact type is not checked, just the size.
77  *
78  * Components marked SWS_COMP_GARBAGE are ignored when matching. If `flexible`
79  * is true, the op body is ignored - only the operation, pixel type, and
80  * component masks are checked.
81  */
82 static int op_match(const SwsOp *op, const SwsOpEntry *entry)
83 {
84  int score = 10;
85  if (op->op != entry->op)
86  return 0;
87 
88  switch (op->op) {
89  case SWS_OP_READ:
90  case SWS_OP_WRITE:
91  if (op->rw.filter && op->type != entry->type)
92  return 0;
93  /* fall through */;
94  case SWS_OP_SWAP_BYTES:
95  case SWS_OP_SWIZZLE:
96  /* Only the size matters for these operations */
98  return 0;
99  break;
100  default:
101  if (op->type != entry->type)
102  return 0;
103  break;
104  }
105 
106  for (int i = 0; i < 4; i++) {
107  if (entry->unused[i]) {
108  if (op->comps.unused[i])
109  score += 1; /* Operating on fewer components is better .. */
110  else
111  return 0; /* .. but not too few! */
112  }
113  }
114 
115  if (op->op == SWS_OP_CLEAR) {
116  /* Clear pattern must match exactly, regardless of `entry->flexible` */
117  for (int i = 0; i < 4; i++) {
118  if (SWS_OP_NEEDED(op, i) && entry->unused[i] != !!op->c.q4[i].den)
119  return 0;
120  }
121  }
122 
123  /* Flexible variants always match, but lower the score to prioritize more
124  * specific implementations if they exist */
125  if (entry->flexible)
126  return score - 5;
127 
128  switch (op->op) {
129  case SWS_OP_INVALID:
130  return 0;
131  case SWS_OP_READ:
132  case SWS_OP_WRITE:
133  if (op->rw.elems != entry->rw.elems ||
134  op->rw.frac != entry->rw.frac ||
135  op->rw.filter != entry->rw.filter ||
136  (op->rw.elems > 1 && op->rw.packed != entry->rw.packed))
137  return 0;
138  return score;
139  case SWS_OP_SWAP_BYTES:
140  return score;
141  case SWS_OP_PACK:
142  case SWS_OP_UNPACK:
143  for (int i = 0; i < 4 && op->pack.pattern[i]; i++) {
144  if (op->pack.pattern[i] != entry->pack.pattern[i])
145  return 0;
146  }
147  return score;
148  case SWS_OP_CLEAR:
149  for (int i = 0; i < 4; i++) {
150  if (!op->c.q4[i].den || !SWS_OP_NEEDED(op, i))
151  continue;
152  if (av_cmp_q(op->c.q4[i], Q(entry->clear_value)))
153  return 0;
154  }
155  return score;
156  case SWS_OP_LSHIFT:
157  case SWS_OP_RSHIFT:
158  av_assert1(entry->flexible);
159  break;
160  case SWS_OP_SWIZZLE:
161  for (int i = 0; i < 4; i++) {
162  if (SWS_OP_NEEDED(op, i) && op->swizzle.in[i] != entry->swizzle.in[i])
163  return 0;
164  }
165  return score;
166  case SWS_OP_CONVERT:
167  if (op->convert.to != entry->convert.to ||
168  op->convert.expand != entry->convert.expand)
169  return 0;
170  return score;
171  case SWS_OP_DITHER:
172  return op->dither.size_log2 == entry->dither_size ? score : 0;
173  case SWS_OP_MIN:
174  case SWS_OP_MAX:
175  av_assert1(entry->flexible);
176  break;
177  case SWS_OP_LINEAR:
178  /* All required elements must be present */
179  if (op->lin.mask & ~entry->linear_mask)
180  return 0;
181  /* To avoid operating on possibly undefined memory, filter out
182  * implementations that operate on more input components */
183  for (int i = 0; i < 4; i++) {
184  if ((entry->linear_mask & SWS_MASK_COL(i)) && op->comps.unused[i])
185  return 0;
186  }
187  /* Prioritize smaller implementations */
188  score += av_popcount(SWS_MASK_ALL ^ entry->linear_mask);
189  return score;
190  case SWS_OP_SCALE:
191  return av_cmp_q(op->c.q, entry->scale) ? 0 : score;
192  case SWS_OP_FILTER_H:
193  case SWS_OP_FILTER_V:
194  return score;
195  case SWS_OP_TYPE_NB:
196  break;
197  }
198 
199  av_unreachable("Invalid operation type!");
200  return 0;
201 }
202 
204  int num_tables, SwsOpList *ops, int ops_index,
205  const int block_size, SwsOpChain *chain)
206 {
207  const SwsOp *op = &ops->ops[ops_index];
208  const unsigned cpu_flags = av_get_cpu_flags();
209  const SwsOpEntry *best = NULL;
210  const SwsOpTable *best_table = NULL;
211  int ret, best_score = 0;
212 
213  SwsImplParams params = {
214  .ctx = ctx,
215  .op = op
216  };
217 
218  for (int n = 0; n < num_tables; n++) {
219  const SwsOpTable *table = tables[n];
220  if (table->block_size && table->block_size != block_size ||
221  table->cpu_flags & ~cpu_flags)
222  continue;
223 
224  params.table = table;
225  for (int i = 0; table->entries[i]; i++) {
226  const SwsOpEntry *entry = table->entries[i];
227  int score = op_match(op, entry);
228  if (score <= best_score)
229  continue;
230  if (entry->check && !entry->check(&params))
231  continue;
232  best_score = score;
233  best_table = table;
234  best = entry;
235  }
236  }
237 
238  if (!best)
239  return AVERROR(ENOTSUP);
240 
241  params.table = best_table;
242 
243  SwsImplResult res = {0};
244  if (best->setup) {
245  ret = best->setup(&params, &res);
246  if (ret < 0)
247  return ret;
248  }
249 
250  ret = ff_sws_op_chain_append(chain, res.func ? res.func : best->func,
251  res.free, &res.priv);
252  if (ret < 0) {
253  if (res.free)
254  res.free(&res.priv);
255  return ret;
256  }
257 
258  chain->cpu_flags |= best_table->cpu_flags;
259  chain->over_read = FFMAX(chain->over_read, res.over_read);
260  chain->over_write = FFMAX(chain->over_write, res.over_write);
261  return 0;
262 }
263 
264 #define q2pixel(type, q) ((q).den ? (type) (q).num / (q).den : 0)
265 
267 {
268  out->priv.u8[0] = params->op->c.u;
269  return 0;
270 }
271 
273 {
274  const SwsOp *op = params->op;
275  switch (op->type) {
276  case SWS_PIXEL_U8: out->priv.u8[0] = op->c.u; return 0;
277  case SWS_PIXEL_U16: out->priv.u16[0] = op->c.u; return 0;
278  case SWS_PIXEL_U32: out->priv.u32[0] = op->c.u; return 0;
279  case SWS_PIXEL_F32: out->priv.f32[0] = op->c.u; return 0;
280  default: return AVERROR(EINVAL);
281  }
282 }
283 
285 {
286  const SwsOp *op = params->op;
287  switch (op->type) {
288  case SWS_PIXEL_U8: out->priv.u8[0] = q2pixel(uint8_t, op->c.q); return 0;
289  case SWS_PIXEL_U16: out->priv.u16[0] = q2pixel(uint16_t, op->c.q); return 0;
290  case SWS_PIXEL_U32: out->priv.u32[0] = q2pixel(uint32_t, op->c.q); return 0;
291  case SWS_PIXEL_F32: out->priv.f32[0] = q2pixel(float, op->c.q); return 0;
292  default: return AVERROR(EINVAL);
293  }
294 }
295 
297 {
298  const SwsOp *op = params->op;
299  for (int i = 0; i < 4; i++) {
300  switch (op->type) {
301  case SWS_PIXEL_U8: out->priv.u8[i] = q2pixel(uint8_t, op->c.q4[i]); break;
302  case SWS_PIXEL_U16: out->priv.u16[i] = q2pixel(uint16_t, op->c.q4[i]); break;
303  case SWS_PIXEL_U32: out->priv.u32[i] = q2pixel(uint32_t, op->c.q4[i]); break;
304  case SWS_PIXEL_F32: out->priv.f32[i] = q2pixel(float, op->c.q4[i]); break;
305  default: return AVERROR(EINVAL);
306  }
307  }
308 
309  return 0;
310 }
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:50
SwsOpTable
Copyright (C) 2025 Niklas Haas.
Definition: ops_chain.h:159
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:66
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: ops.h:36
entry
#define entry
Definition: aom_film_grain_template.c:66
SWS_OP_SWIZZLE
@ SWS_OP_SWIZZLE
Definition: ops.h:53
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SwsImplResult::func
SwsFuncPtr func
Definition: ops_chain.h:112
SWS_OP_LSHIFT
@ SWS_OP_LSHIFT
Definition: ops.h:58
SWS_OP_UNPACK
@ SWS_OP_UNPACK
Definition: ops.h:56
SWS_MAX_OPS
#define SWS_MAX_OPS
Definition: ops_chain.h:85
out
static FILE * out
Definition: movenc.c:55
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:62
ff_sws_setup_q
int ff_sws_setup_q(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:284
rational.h
SwsOpImpl::cont
SwsFuncPtr cont
Definition: ops_chain.h:72
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:70
table
static const uint16_t table[]
Definition: prosumer.c:203
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: ops.h:37
SWS_OP_TYPE_NB
@ SWS_OP_TYPE_NB
Definition: ops.h:76
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
SwsOpEntry::setup
int(* setup)(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.h:139
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
av_popcount
#define av_popcount
Definition: common.h:154
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:68
SwsOpChain::cpu_flags
int cpu_flags
Definition: ops_chain.h:89
SWS_PIXEL_F32
@ SWS_PIXEL_F32
Definition: ops.h:38
SwsFuncPtr
void(* SwsFuncPtr)(void)
Per-kernel execution context.
Definition: ops_chain.h:70
SWS_MASK_COL
#define SWS_MASK_COL(J)
Definition: ops.h:187
ff_sws_setup_u8
int ff_sws_setup_u8(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:266
tables
Writing a table generator This documentation is preliminary Parts of the API are not good and should be changed Basic concepts A table generator consists of two *_tablegen c and *_tablegen h The h file will provide the variable declarations and initialization code for the tables
Definition: tablegen.txt:10
SwsOp::c
SwsConst c
Definition: ops.h:223
SWS_PIXEL_U8
@ SWS_PIXEL_U8
Definition: ops.h:35
SwsOpChain::over_read
int over_read
Definition: ops_chain.h:90
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:66
ops_chain.h
SwsOpChain::free
void(* free[SWS_MAX_OPS+1])(SwsOpPriv *)
Definition: ops_chain.h:87
avassert.h
SWS_OP_NEEDED
#define SWS_OP_NEEDED(op, idx)
Definition: ops.h:88
ff_sws_op_chain_alloc
SwsOpChain * ff_sws_op_chain_alloc(void)
Definition: ops_chain.c:29
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:64
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:69
SWS_OP_FILTER_H
@ SWS_OP_FILTER_H
Definition: ops.h:73
SwsOpChain::impl
SwsOpImpl impl[SWS_MAX_OPS+1]
Definition: ops_chain.h:86
SWS_MASK_ALL
@ SWS_MASK_ALL
Definition: ops.h:190
av_mallocz
#define av_mallocz(s)
Definition: tableprint_vlc.h:31
SWS_OP_PACK
@ SWS_OP_PACK
Definition: ops.h:57
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:84
NULL
#define NULL
Definition: coverity.c:32
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:116
SwsImplParams::op
const SwsOp * op
Definition: ops_chain.h:107
SWS_OP_FILTER_V
@ SWS_OP_FILTER_V
Definition: ops.h:74
SwsImplResult::over_read
int over_read
Definition: ops_chain.h:115
SwsImplResult::over_write
int over_write
Definition: ops_chain.h:116
SwsImplParams
Definition: ops_chain.h:105
SwsOpEntry::func
SwsFuncPtr func
Definition: ops_chain.h:138
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
SWS_OP_RSHIFT
@ SWS_OP_RSHIFT
Definition: ops.h:59
SWS_OP_INVALID
@ SWS_OP_INVALID
Definition: ops.h:47
ff_sws_op_compile_tables
int ff_sws_op_compile_tables(SwsContext *ctx, const SwsOpTable *const tables[], int num_tables, SwsOpList *ops, int ops_index, const int block_size, SwsOpChain *chain)
"Compile" a single op by looking it up in a list of fixed size op tables.
Definition: ops_chain.c:203
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:51
ff_sws_setup_q4
int ff_sws_setup_q4(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:296
SwsOpChain::num_impl
int num_impl
Definition: ops_chain.h:88
SwsOpEntry
Definition: ops_chain.h:119
ff_sws_setup_u
int ff_sws_setup_u(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:272
ff_sws_op_chain_free_cb
void ff_sws_op_chain_free_cb(void *ptr)
Definition: ops_chain.c:34
SwsImplParams::ctx
SwsContext * ctx
Definition: ops_chain.h:108
SwsOpTable::cpu_flags
unsigned cpu_flags
Definition: ops_chain.h:160
SwsOpList::ops
SwsOp * ops
Definition: ops.h:255
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
SwsImplResult::free
void(* free)(SwsOpPriv *priv)
Definition: ops_chain.h:114
SwsOp
Definition: ops.h:212
av_cmp_q
static int av_cmp_q(AVRational a, AVRational b)
Compare two rationals.
Definition: rational.h:89
Q
#define Q(N)
Copyright (C) 2025 Niklas Haas.
Definition: ops_chain.c:27
ret
ret
Definition: filter_design.txt:187
op_match
static int op_match(const SwsOp *op, const SwsOpEntry *entry)
Match an operation against a reference operation.
Definition: ops_chain.c:82
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:65
SwsOpImpl::priv
SwsOpPriv priv
Definition: ops_chain.h:73
SwsConst::u
unsigned u
Definition: ops.h:94
SwsImplResult::priv
SwsOpPriv priv
Definition: ops_chain.h:113
SWS_OP_SWAP_BYTES
@ SWS_OP_SWAP_BYTES
Definition: ops.h:52
mem.h
av_free
#define av_free(p)
Definition: tableprint_vlc.h:34
SWS_OP_CONVERT
@ SWS_OP_CONVERT
Definition: ops.h:63
ff_sws_op_chain_append
int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, void(*free)(SwsOpPriv *), const SwsOpPriv *priv)
Definition: ops_chain.c:48
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:254
q2pixel
#define q2pixel(type, q)
Definition: ops_chain.c:264
SwsContext
Main external API structure.
Definition: swscale.h:206
SwsOpPriv
Private data for each kernel.
Definition: ops_chain.h:45
SwsImplResult
Definition: ops_chain.h:111
SwsImplParams::table
const SwsOpTable * table
Definition: ops_chain.h:106
SwsOpChain::over_write
int over_write
Definition: ops_chain.h:91