FFmpeg
ops_tmpl_common.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "ops_backend.h"
22 
23 #ifndef BIT_DEPTH
24 # error Should only be included from ops_tmpl_*.c!
25 #endif
26 
27 #define WRAP_CONVERT_UINT(N) \
28 DECL_PATTERN(convert_uint##N) \
29 { \
30  u##N##block_t xu, yu, zu, wu; \
31  \
32  SWS_LOOP \
33  for (int i = 0; i < SWS_BLOCK_SIZE; i++) { \
34  if (X) \
35  xu[i] = x[i]; \
36  if (Y) \
37  yu[i] = y[i]; \
38  if (Z) \
39  zu[i] = z[i]; \
40  if (W) \
41  wu[i] = w[i]; \
42  } \
43  \
44  CONTINUE(u##N##block_t, xu, yu, zu, wu); \
45 } \
46  \
47 WRAP_COMMON_PATTERNS(convert_uint##N, \
48  .op = SWS_OP_CONVERT, \
49  .convert.to = SWS_PIXEL_U##N, \
50 );
51 
52 #if BIT_DEPTH != 8
54 #endif
55 
56 #if BIT_DEPTH != 16
58 #endif
59 
60 #if BIT_DEPTH != 32 || defined(IS_FLOAT)
62 #endif
63 
65 {
66  SWS_LOOP
67  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
68  if (!X)
69  x[i] = impl->priv.px[0];
70  if (!Y)
71  y[i] = impl->priv.px[1];
72  if (!Z)
73  z[i] = impl->priv.px[2];
74  if (!W)
75  w[i] = impl->priv.px[3];
76  }
77 
78  CONTINUE(block_t, x, y, z, w);
79 }
80 
81 #define WRAP_CLEAR(X, Y, Z, W) \
82 DECL_IMPL(clear##_##X##Y##Z##W) \
83 { \
84  CALL(clear, X, Y, Z, W); \
85 } \
86  \
87 DECL_ENTRY(clear##_##X##Y##Z##W, \
88  .setup = ff_sws_setup_q4, \
89  .op = SWS_OP_CLEAR, \
90  .flexible = true, \
91  .unused = { !X, !Y, !Z, !W }, \
92 );
93 
94 WRAP_CLEAR(1, 1, 1, 0) /* rgba alpha */
95 WRAP_CLEAR(0, 1, 1, 1) /* argb alpha */
96 WRAP_CLEAR(1, 0, 1, 1) /* ya alpha */
97 
98 WRAP_CLEAR(0, 0, 1, 1) /* vuya chroma */
99 WRAP_CLEAR(1, 0, 0, 1) /* yuva chroma */
100 WRAP_CLEAR(1, 1, 0, 0) /* ayuv chroma */
101 WRAP_CLEAR(0, 1, 0, 1) /* uyva chroma */
102 WRAP_CLEAR(1, 0, 1, 0) /* xvyu chroma */
103 
104 WRAP_CLEAR(1, 0, 0, 0) /* gray -> yuva */
105 WRAP_CLEAR(0, 1, 0, 0) /* gray -> ayuv */
106 WRAP_CLEAR(0, 0, 1, 0) /* gray -> vuya */
107 
109 {
110  SWS_LOOP
111  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
112  if (X)
113  x[i] = FFMIN(x[i], impl->priv.px[0]);
114  if (Y)
115  y[i] = FFMIN(y[i], impl->priv.px[1]);
116  if (Z)
117  z[i] = FFMIN(z[i], impl->priv.px[2]);
118  if (W)
119  w[i] = FFMIN(w[i], impl->priv.px[3]);
120  }
121 
122  CONTINUE(block_t, x, y, z, w);
123 }
124 
126 {
127  SWS_LOOP
128  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
129  if (X)
130  x[i] = FFMAX(x[i], impl->priv.px[0]);
131  if (Y)
132  y[i] = FFMAX(y[i], impl->priv.px[1]);
133  if (Z)
134  z[i] = FFMAX(z[i], impl->priv.px[2]);
135  if (W)
136  w[i] = FFMAX(w[i], impl->priv.px[3]);
137  }
138 
139  CONTINUE(block_t, x, y, z, w);
140 }
141 
143  .op = SWS_OP_MIN,
144  .setup = ff_sws_setup_q4,
145  .flexible = true,
146 );
147 
149  .op = SWS_OP_MAX,
150  .setup = ff_sws_setup_q4,
151  .flexible = true,
152 );
153 
155 {
156  const pixel_t scale = impl->priv.px[0];
157 
158  SWS_LOOP
159  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
160  if (X)
161  x[i] *= scale;
162  if (Y)
163  y[i] *= scale;
164  if (Z)
165  z[i] *= scale;
166  if (W)
167  w[i] *= scale;
168  }
169 
170  CONTINUE(block_t, x, y, z, w);
171 }
172 
174  .op = SWS_OP_SCALE,
175  .setup = ff_sws_setup_q,
176  .flexible = true,
177 );
178 
180 {
181  const SwsFilterWeights *filter = params->op->rw.kernel;
182  static_assert(sizeof(out->priv.ptr) <= sizeof(int32_t[2]),
183  ">8 byte pointers not supported");
184 
185  /* Pre-convert weights to float */
186  float *weights = av_calloc(filter->num_weights, sizeof(float));
187  if (!weights)
188  return AVERROR(ENOMEM);
189 
190  for (int i = 0; i < filter->num_weights; i++)
191  weights[i] = (float) filter->weights[i] / SWS_FILTER_SCALE;
192 
193  out->priv.ptr = weights;
194  out->priv.i32[2] = filter->filter_size;
195  out->free = ff_op_priv_free;
196  return 0;
197 }
198 
199 /* Fully general vertical planar filter case */
200 DECL_READ(filter_v, const int elems)
201 {
202  const SwsOpExec *exec = iter->exec;
203  const float *restrict weights = impl->priv.ptr;
204  const int filter_size = impl->priv.i32[2];
205  weights += filter_size * iter->y;
206 
207  f32block_t xs, ys, zs, ws;
208  memset(xs, 0, sizeof(xs));
209  if (elems > 1)
210  memset(ys, 0, sizeof(ys));
211  if (elems > 2)
212  memset(zs, 0, sizeof(zs));
213  if (elems > 3)
214  memset(ws, 0, sizeof(ws));
215 
216  for (int j = 0; j < filter_size; j++) {
217  const float weight = weights[j];
218 
219  SWS_LOOP
220  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
221  xs[i] += weight * in0[i];
222  if (elems > 1)
223  ys[i] += weight * in1[i];
224  if (elems > 2)
225  zs[i] += weight * in2[i];
226  if (elems > 3)
227  ws[i] += weight * in3[i];
228  }
229 
230  in0 = bump_ptr(in0, exec->in_stride[0]);
231  if (elems > 1)
232  in1 = bump_ptr(in1, exec->in_stride[1]);
233  if (elems > 2)
234  in2 = bump_ptr(in2, exec->in_stride[2]);
235  if (elems > 3)
236  in3 = bump_ptr(in3, exec->in_stride[3]);
237  }
238 
239  for (int i = 0; i < elems; i++)
240  iter->in[i] += sizeof(block_t);
241 
242  CONTINUE(f32block_t, xs, ys, zs, ws);
243 }
244 
246 {
247  SwsFilterWeights *filter = params->op->rw.kernel;
248  out->priv.ptr = av_refstruct_ref(filter->weights);
249  out->priv.i32[2] = filter->filter_size;
250  out->free = ff_op_priv_unref;
251  return 0;
252 }
253 
254 /* Fully general horizontal planar filter case */
255 DECL_READ(filter_h, const int elems)
256 {
257  const SwsOpExec *exec = iter->exec;
258  const int *restrict weights = impl->priv.ptr;
259  const int filter_size = impl->priv.i32[2];
260  const float scale = 1.0f / SWS_FILTER_SCALE;
261  const int xpos = iter->x;
262  weights += filter_size * iter->x;
263 
264  f32block_t xs, ys, zs, ws;
265  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
266  const int offset = exec->in_offset_x[xpos + i];
267  pixel_t *start0 = bump_ptr(in0, offset);
268  pixel_t *start1 = bump_ptr(in1, offset);
269  pixel_t *start2 = bump_ptr(in2, offset);
270  pixel_t *start3 = bump_ptr(in3, offset);
271 
272  inter_t sx = 0, sy = 0, sz = 0, sw = 0;
273  for (int j = 0; j < filter_size; j++) {
274  const int weight = weights[j];
275  sx += weight * start0[j];
276  if (elems > 1)
277  sy += weight * start1[j];
278  if (elems > 2)
279  sz += weight * start2[j];
280  if (elems > 3)
281  sw += weight * start3[j];
282  }
283 
284  xs[i] = (float) sx * scale;
285  if (elems > 1)
286  ys[i] = (float) sy * scale;
287  if (elems > 2)
288  zs[i] = (float) sz * scale;
289  if (elems > 3)
290  ws[i] = (float) sw * scale;
291 
292  weights += filter_size;
293  }
294 
295  CONTINUE(f32block_t, xs, ys, zs, ws);
296 }
297 
298 #define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX) \
299 DECL_IMPL(FUNC##ELEMS##SUFFIX) \
300 { \
301  CALL_READ(FUNC##SUFFIX, ELEMS); \
302 } \
303  \
304 DECL_ENTRY(FUNC##ELEMS##SUFFIX, \
305  .op = SWS_OP_READ, \
306  .setup = fn(setup_filter##SUFFIX), \
307  .rw.elems = ELEMS, \
308  .rw.filter = SWS_OP_FILTER_##DIR, \
309 );
310 
311 WRAP_FILTER(filter, V, 1, _v)
312 WRAP_FILTER(filter, V, 2, _v)
313 WRAP_FILTER(filter, V, 3, _v)
314 WRAP_FILTER(filter, V, 4, _v)
315 
316 WRAP_FILTER(filter, H, 1, _h)
317 WRAP_FILTER(filter, H, 2, _h)
318 WRAP_FILTER(filter, H, 3, _h)
319 WRAP_FILTER(filter, H, 4, _h)
320 
321 static void fn(process)(const SwsOpExec *exec, const void *priv,
322  const int bx_start, const int y_start,
323  int bx_end, int y_end)
324 {
325  const SwsOpChain *chain = priv;
326  const SwsOpImpl *impl = chain->impl;
327  u32block_t x, y, z, w; /* allocate enough space for any intermediate */
328 
329  SwsOpIter iterdata;
330  SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */
331  iter->exec = exec;
332  for (int i = 0; i < 4; i++) {
333  iter->in[i] = (uintptr_t) exec->in[i];
334  iter->out[i] = (uintptr_t) exec->out[i];
335  }
336 
337  for (iter->y = y_start; iter->y < y_end; iter->y++) {
338  for (int block = bx_start; block < bx_end; block++) {
339  iter->x = block * SWS_BLOCK_SIZE;
340  CONTINUE(block_t, (void *) x, (void *) y, (void *) z, (void *) w);
341  }
342 
343  const int y_bump = exec->in_bump_y ? exec->in_bump_y[iter->y] : 0;
344  for (int i = 0; i < 4; i++) {
345  iter->in[i] += exec->in_bump[i] + y_bump * exec->in_stride[i];
346  iter->out[i] += exec->out_bump[i];
347  }
348  }
349 }
WRAP_CLEAR
#define WRAP_CLEAR(X, Y, Z, W)
Definition: ops_tmpl_common.c:81
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
ops_backend.h
f32block_t
float f32block_t[SWS_BLOCK_SIZE]
Definition: ops_backend.c:36
out
static FILE * out
Definition: movenc.c:55
SwsOpIter::exec
const SwsOpExec * exec
Definition: ops_backend.h:52
ff_sws_setup_q
int ff_sws_setup_q(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:284
block_t
#define block_t
Definition: ops_tmpl_float.c:34
SwsFilterWeights
Represents a computed filter kernel.
Definition: filters.h:64
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
SwsOpIter
Copyright (C) 2025 Niklas Haas.
Definition: ops_backend.h:46
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
SwsOpExec::in_stride
ptrdiff_t in_stride[4]
Definition: ops_dispatch.h:41
ff_op_priv_unref
static void ff_op_priv_unref(SwsOpPriv *priv)
Definition: ops_chain.h:154
SwsOpIter::x
int x
Definition: ops_backend.h:49
WRAP_CONVERT_UINT
#define WRAP_CONVERT_UINT(N)
Copyright (C) 2025 Niklas Haas.
Definition: ops_tmpl_common.c:27
DECL_PATTERN
DECL_PATTERN(clear)
Definition: ops_tmpl_common.c:64
weight
const h264_weight_func weight
Definition: h264dsp_init.c:33
WRAP_COMMON_PATTERNS
WRAP_COMMON_PATTERNS(min,.op=SWS_OP_MIN,.setup=ff_sws_setup_q4,.flexible=true,)
fn
Definition: ops_tmpl_float.c:126
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:66
float
float
Definition: af_crystalizer.c:122
W
#define W(a, i, v)
Definition: jpegls.h:119
CONTINUE
#define CONTINUE(TYPE,...)
Definition: ops_backend.h:115
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SWS_LOOP
#define SWS_LOOP
Definition: ops_backend.h:60
SwsOpImpl
Definition: ops_chain.h:71
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:64
DECL_READ
DECL_READ(filter_v, const int elems)
Definition: ops_tmpl_common.c:200
u32block_t
uint32_t u32block_t[SWS_BLOCK_SIZE]
Definition: ops_backend.c:35
SwsOpChain::impl
SwsOpImpl impl[SWS_MAX_OPS+1]
Definition: ops_chain.h:86
SWS_BLOCK_SIZE
#define SWS_BLOCK_SIZE
Copyright (C) 2025 Niklas Haas.
Definition: ops_backend.c:30
SwsOpIter::out
uintptr_t out[4]
Definition: ops_backend.h:48
SWS_FILTER_SCALE
@ SWS_FILTER_SCALE
14-bit coefficients are picked to fit comfortably within int16_t for efficient SIMD processing (e....
Definition: filters.h:40
SwsOpExec
Copyright (C) 2026 Niklas Haas.
Definition: ops_dispatch.h:35
bump_ptr
#define bump_ptr(ptr, bump)
Definition: ops_backend.h:74
xs
#define xs(width, name, var, subs,...)
Definition: cbs_vp9.c:305
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:84
V
#define V
Definition: avdct.c:32
SwsOpIter::in
uintptr_t in[4]
Definition: ops_backend.h:47
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
WRAP_FILTER
#define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX)
Definition: ops_tmpl_common.c:298
process
static void fn() process(const SwsOpExec *exec, const void *priv, const int bx_start, const int y_start, int bx_end, int y_end)
Definition: ops_tmpl_common.c:321
pixel_t
#define pixel_t
Definition: ops_tmpl_float.c:32
ff_sws_setup_q4
int ff_sws_setup_q4(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:296
av_refstruct_ref
void * av_refstruct_ref(void *obj)
Create a new reference to an object managed via this API, i.e.
Definition: refstruct.c:140
H
#define H
Definition: pixlet.c:39
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
Y
#define Y
Definition: boxblur.h:37
DECL_SETUP
DECL_SETUP(setup_filter_v, params, out)
Definition: ops_tmpl_common.c:179
SwsOpExec::in_offset_x
int32_t * in_offset_x
Pixel offset map; for horizontal scaling, in bytes.
Definition: ops_dispatch.h:80
weights
static const int weights[]
Definition: hevc_pel.c:32
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:264
ff_op_priv_free
static void ff_op_priv_free(SwsOpPriv *priv)
Definition: ops_chain.h:149
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:65
X
@ X
Definition: vf_addroi.c:27
inter_t
#define inter_t
Definition: ops_tmpl_float.c:33
w
uint8_t w
Definition: llvidencdsp.c:39
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:278
setup_filter_v
static int setup_filter_v(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:312
int32_t
int32_t
Definition: audioconvert.c:56
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
SwsOpIter::y
int y
Definition: ops_backend.h:49
min
float min
Definition: vorbis_enc_data.h:429
setup_filter_h
static int setup_filter_h(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:342