FFmpeg
ops.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2026 Lynne
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/mem.h"
22 #include "libavutil/refstruct.h"
23 
24 #include "../graph.h"
25 #include "../ops_internal.h"
26 #include "../swscale_internal.h"
27 
28 #include "ops.h"
29 
30 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
31 #include "spvasm.h"
32 #endif
33 
34 static void ff_sws_vk_uninit(AVRefStructOpaque opaque, void *obj)
35 {
36  FFVulkanOpsCtx *s = obj;
37 
38 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
39  if (s->spvc)
40  s->spvc->uninit(&s->spvc);
41 #endif
42  ff_vk_uninit(&s->vkctx);
43 }
44 
46 {
47  int err;
48  SwsInternal *c = sws_internal(sws);
49 
50  if (!c->hw_priv) {
51  c->hw_priv = av_refstruct_alloc_ext(sizeof(FFVulkanOpsCtx), 0, NULL,
53  if (!c->hw_priv)
54  return AVERROR(ENOMEM);
55  }
56 
57  FFVulkanOpsCtx *s = c->hw_priv;
58  if (s->vkctx.device_ref && s->vkctx.device_ref->data != dev_ref->data) {
59  /* Reinitialize with new context */
60  ff_vk_uninit(&s->vkctx);
61  } else if (s->vkctx.device_ref && s->vkctx.device_ref->data == dev_ref->data) {
62  return 0;
63  }
64 
65  err = ff_vk_init(&s->vkctx, sws, dev_ref, NULL);
66  if (err < 0)
67  return err;
68 
69  s->qf = ff_vk_qf_find(&s->vkctx, VK_QUEUE_COMPUTE_BIT, 0);
70  if (!s->qf) {
71  av_log(sws, AV_LOG_ERROR, "Device has no compute queues\n");
72  return AVERROR(ENOTSUP);
73  }
74 
75 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
76  if (!s->spvc) {
77  s->spvc = ff_vk_spirv_init();
78  if (!s->spvc)
79  return AVERROR(ENOMEM);
80  }
81 #endif
82 
83  return 0;
84 }
85 
87 {
88  SwsInternal *c = sws_internal(sws);
89  FFVulkanOpsCtx *s = c->hw_priv;
90  return s ? s->vkctx.device_ref : NULL;
91 }
92 
93 #define MAX_DITHER_BUFS 4
94 #define MAX_FILT_BUFS 4
95 #define MAX_DATA_BUFS (MAX_DITHER_BUFS + MAX_FILT_BUFS*4)
96 
97 typedef struct VulkanPriv {
106 } VulkanPriv;
107 
108 static void process(const SwsFrame *dst, const SwsFrame *src, int y, int h,
109  const SwsPass *pass)
110 {
111  VulkanPriv *p = (VulkanPriv *) pass->priv;
112  FFVkExecContext *ec = ff_vk_exec_get(&p->s->vkctx, &p->e);
113  FFVulkanFunctions *vk = &p->s->vkctx.vkfn;
114  ff_vk_exec_start(&p->s->vkctx, ec);
115 
116  AVFrame *src_f = (AVFrame *) src->avframe;
117  AVFrame *dst_f = (AVFrame *) dst->avframe;
118  ff_vk_exec_add_dep_frame(&p->s->vkctx, ec, src_f,
119  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
120  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT);
121  ff_vk_exec_add_dep_frame(&p->s->vkctx, ec, dst_f,
122  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
123  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT);
124 
125  VkImageView src_views[AV_NUM_DATA_POINTERS];
126  VkImageView dst_views[AV_NUM_DATA_POINTERS];
127  ff_vk_create_imageviews(&p->s->vkctx, ec, src_views, src_f, p->src_rep);
128  ff_vk_create_imageviews(&p->s->vkctx, ec, dst_views, dst_f, p->dst_rep);
129 
130  ff_vk_shader_update_img_array(&p->s->vkctx, ec, &p->shd, src_f, src_views,
131  0, 0, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
132  ff_vk_shader_update_img_array(&p->s->vkctx, ec, &p->shd, dst_f, dst_views,
133  0, 1, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
134 
135  int nb_img_bar = 0;
136  VkImageMemoryBarrier2 img_bar[8];
137  ff_vk_frame_barrier(&p->s->vkctx, ec, src_f, img_bar, &nb_img_bar,
138  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
139  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
140  VK_ACCESS_SHADER_READ_BIT,
141  VK_IMAGE_LAYOUT_GENERAL,
142  VK_QUEUE_FAMILY_IGNORED);
143  ff_vk_frame_barrier(&p->s->vkctx, ec, dst_f, img_bar, &nb_img_bar,
144  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
145  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
146  VK_ACCESS_SHADER_WRITE_BIT,
147  VK_IMAGE_LAYOUT_GENERAL,
148  VK_QUEUE_FAMILY_IGNORED);
149  vk->CmdPipelineBarrier2(ec->buf, &(VkDependencyInfo) {
150  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
151  .pImageMemoryBarriers = img_bar,
152  .imageMemoryBarrierCount = nb_img_bar,
153  });
154 
155  if (p->interlaced) {
156  uint32_t field = pass->graph ? pass->graph->field : 0;
157  ff_vk_shader_update_push_const(&p->s->vkctx, ec, &p->shd,
158  VK_SHADER_STAGE_COMPUTE_BIT,
159  0, sizeof(field), &field);
160  }
161 
162  ff_vk_exec_bind_shader(&p->s->vkctx, ec, &p->shd);
163 
164  vk->CmdDispatch(ec->buf,
165  FFALIGN(dst->width, p->shd.lg_size[0])/p->shd.lg_size[0],
166  FFALIGN(dst->height, p->shd.lg_size[1])/p->shd.lg_size[1],
167  1);
168 
169  ff_vk_exec_submit(&p->s->vkctx, ec);
170  ff_vk_exec_wait(&p->s->vkctx, ec);
171 }
172 
173 static void free_fn(void *priv)
174 {
175  VulkanPriv *p = priv;
176  ff_vk_exec_pool_free(&p->s->vkctx, &p->e);
177  ff_vk_shader_free(&p->s->vkctx, &p->shd);
178  for (int i = 0; i < p->nb_data_bufs; i++)
179  ff_vk_free_buf(&p->s->vkctx, &p->data_bufs[i]);
180  av_refstruct_unref(&p->s);
181  av_free(priv);
182 }
183 
185  const SwsFilterWeights *wd, FFVkBuffer *buf)
186 {
187  int err;
188 
189  /* Weights */
190  err = ff_vk_create_buf(&s->vkctx, buf,
191  wd->num_weights*sizeof(float) +
192  wd->dst_size*sizeof(int32_t), NULL, NULL,
193  VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
194  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
195  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
196  if (err < 0)
197  goto fail;
198 
199  float *weights_data;
200  err = ff_vk_map_buffer(&s->vkctx, buf,
201  (uint8_t **)&weights_data, 0);
202  if (err < 0)
203  goto fail;
204  for (int i = 0; i < wd->num_weights; i++)
205  weights_data[i] = (float) wd->weights[i] / SWS_FILTER_SCALE;
206 
207  memcpy(weights_data + wd->num_weights,
208  wd->offsets, wd->dst_size*sizeof(int32_t));
209 
210  ff_vk_unmap_buffer(&s->vkctx, buf, 1);
211 
212  return 0;
213 
214 fail:
215  ff_vk_free_buf(&p->s->vkctx, buf);
216  return 0;
217 }
218 
220  const SwsDitherOp *dd, FFVkBuffer *buf)
221 {
222  int err;
223 
224  int size = (1 << dd->size_log2);
225  err = ff_vk_create_buf(&s->vkctx, buf,
226  size*size*sizeof(float), NULL, NULL,
227  VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
228  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
229  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
230  if (err < 0)
231  return err;
232 
233  float *dither_data;
234  err = ff_vk_map_buffer(&s->vkctx, buf, (uint8_t **)&dither_data, 0);
235  if (err < 0)
236  goto fail;
237 
238  for (int i = 0; i < size; i++) {
239  for (int j = 0; j < size; j++) {
240  const AVRational r = dd->matrix[i*size + j];
241  dither_data[i*size + j] = r.num/(float)r.den;
242  }
243  }
244 
245  ff_vk_unmap_buffer(&s->vkctx, buf, 1);
246 
247  return 0;
248 
249 fail:
250  ff_vk_free_buf(&p->s->vkctx, buf);
251  return err;
252 }
253 
254 static int create_bufs(FFVulkanOpsCtx *s, VulkanPriv *p, const SwsOpList *ops)
255 {
256  int err;
257  p->nb_data_bufs = 0;
258  for (int n = 0; n < ops->num_ops; n++) {
259  const SwsOp *op = &ops->ops[n];
260  if (op->op == SWS_OP_DITHER) {
261  av_assert0(p->nb_data_bufs + 1 <= FF_ARRAY_ELEMS(p->data_bufs));
262  err = create_dither_buf(s, p, &op->dither,
263  &p->data_bufs[p->nb_data_bufs]);
264  if (err < 0)
265  goto fail;
266  p->nb_data_bufs++;
267  } else if (op->op == SWS_OP_FILTER_H || op->op == SWS_OP_FILTER_V) {
268  av_assert0(p->nb_data_bufs + 1 <= FF_ARRAY_ELEMS(p->data_bufs));
269  err = create_filter_buf(s, p, op->filter.kernel,
270  &p->data_bufs[p->nb_data_bufs]);
271  if (err < 0)
272  goto fail;
273  p->nb_data_bufs++;
274  } else if ((op->op == SWS_OP_READ ||
275  op->op == SWS_OP_WRITE) && op->rw.filter) {
276  av_assert0(p->nb_data_bufs + 1 <= FF_ARRAY_ELEMS(p->data_bufs));
277  err = create_filter_buf(s, p, op->rw.kernel,
278  &p->data_bufs[p->nb_data_bufs]);
279  if (err < 0)
280  goto fail;
281  p->nb_data_bufs++;
282  }
283  }
284 
285  return 0;
286 
287 fail:
288  for (int i = 0; i < p->nb_data_bufs; i++)
289  ff_vk_free_buf(&p->s->vkctx, &p->data_bufs[i]);
290  return err;
291 }
292 
293 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
294 struct DitherData {
295  int size;
296  int arr_1d_id;
297  int arr_2d_id;
298  int struct_id;
299  int struct_ptr_id;
300  int id;
301  int mask_id;
302  int binding;
303 };
304 
305 struct FilterData {
307  int filter_size;
308  int dst_size;
309  int num_weights;
310 
311  int arr_w_in_id;
312  int arr_w_out_id;
313  int arr_o_id;
314  int struct_id;
315  int struct_ptr_id;
316 
317  int id; /* buffer ID */
318  int binding; /* descriptor idx in desc set 1 */
319 
320  int tap_const_base;
321 };
322 
323 typedef struct SPIRVIDs {
324  int in_vars[3 + MAX_DATA_BUFS + 1];
325 
326  int glfn;
327  int ep;
328 
329  /* Types */
330  int void_type;
331  int b_type;
332  int u32_type;
333  int i32_type;
334  int f32_type;
335  int void_fn_type;
336 
337  /* Define vector types */
338  int bvec2_type;
339  int u32vec2_type;
340  int i32vec2_type;
341 
342  int u32vec3_type;
343 
344  int u32vec4_type;
345  int f32vec4_type;
346  int f32mat4_type;
347 
348  /* Constants */
349  int u32_p;
350  int f32_p;
351  int f32_0;
352  int u32_cid[5];
353 
354  int const_ids[128];
355  int nb_const_ids;
356 
357  int linear_deco_off[16];
358  int linear_deco_ops[16];
359  int nb_linear_ops;
360 
361  struct DitherData dither[MAX_DITHER_BUFS];
362  int dither_ptr_elem_id;
363  int nb_dither_bufs;
364 
365  struct FilterData filt[MAX_FILT_BUFS];
366  int filt_o_ptr_id;
367  int nb_filter_bufs;
368 
369  int out_img_type;
370  int out_img_array_id;
371 
372  int in_img_type;
373  int in_img_array_id;
374 
375  /* Pointer types for images */
376  int u32vec3_tptr;
377  int out_img_tptr;
378  int out_img_sptr;
379 
380  int in_img_tptr;
381  int in_img_sptr;
382 
383  /* Interlaced handling */
384  int interlaced;
385  int push_const_struct_id;
386  int push_const_ptr_id;
387  int push_const_elem_ptr_id;
388  int push_const_var_id;
389  int field_i32;
390 } SPIRVIDs;
391 
392 /* Section 1: Function to define all shader header data, and decorations */
393 static void define_shader_header(SwsContext *sws, FFVulkanShader *shd,
394  const SwsOpList *ops, SPICtx *spi, SPIRVIDs *id)
395 {
396  spi_OpCapability(spi, SpvCapabilityShader); /* Shader type */
397 
398  /* Declare required capabilities */
399  spi_OpCapability(spi, SpvCapabilityInt16);
400  spi_OpCapability(spi, SpvCapabilityInt8);
401  spi_OpCapability(spi, SpvCapabilityImageQuery);
402  spi_OpCapability(spi, SpvCapabilityStorageImageReadWithoutFormat);
403  spi_OpCapability(spi, SpvCapabilityStorageImageWriteWithoutFormat);
404  spi_OpCapability(spi, SpvCapabilityStorageBuffer8BitAccess);
405  /* Import the GLSL set of functions (used for min/max) */
406  id->glfn = spi_OpExtInstImport(spi, "GLSL.std.450");
407 
408  /* Next section starts here */
409  spi_OpMemoryModel(spi, SpvAddressingModelLogical, SpvMemoryModelGLSL450);
410 
411  /* Entrypoint */
412  id->ep = spi_OpEntryPoint(spi, SpvExecutionModelGLCompute, "main",
413  id->in_vars,
414  3 + id->nb_dither_bufs + id->nb_filter_bufs +
415  (id->interlaced ? 1 : 0));
416  spi_OpExecutionMode(spi, id->ep, SpvExecutionModeLocalSize,
417  shd->lg_size, 3);
418 
419  if (id->interlaced) {
420  spi_OpDecorate(spi, id->push_const_struct_id, SpvDecorationBlock);
421  spi_OpMemberDecorate(spi, id->push_const_struct_id, 0,
422  SpvDecorationOffset, 0);
423  }
424 
425  /* gl_GlobalInvocationID descriptor decorations */
426  spi_OpDecorate(spi, id->in_vars[0], SpvDecorationBuiltIn,
427  SpvBuiltInGlobalInvocationId);
428 
429  /* Input image descriptor decorations */
430  spi_OpDecorate(spi, id->in_vars[1], SpvDecorationNonWritable);
431  spi_OpDecorate(spi, id->in_vars[1], SpvDecorationDescriptorSet, 0);
432  spi_OpDecorate(spi, id->in_vars[1], SpvDecorationBinding, 0);
433 
434  /* Output image descriptor decorations */
435  spi_OpDecorate(spi, id->in_vars[2], SpvDecorationNonReadable);
436  spi_OpDecorate(spi, id->in_vars[2], SpvDecorationDescriptorSet, 0);
437  spi_OpDecorate(spi, id->in_vars[2], SpvDecorationBinding, 1);
438 
439  for (int i = 0; i < id->nb_dither_bufs; i++) {
440  spi_OpDecorate(spi, id->dither[i].arr_1d_id, SpvDecorationArrayStride,
441  sizeof(float));
442  spi_OpDecorate(spi, id->dither[i].arr_2d_id, SpvDecorationArrayStride,
443  id->dither[i].size*sizeof(float));
444  spi_OpDecorate(spi, id->dither[i].struct_id, SpvDecorationBlock);
445  spi_OpMemberDecorate(spi, id->dither[i].struct_id, 0, SpvDecorationOffset, 0);
446  spi_OpDecorate(spi, id->dither[i].id, SpvDecorationDescriptorSet, 1);
447  spi_OpDecorate(spi, id->dither[i].id, SpvDecorationBinding,
448  id->dither[i].binding);
449  }
450 
451  for (int i = 0; i < id->nb_filter_bufs; i++) {
452  struct FilterData *f = &id->filt[i];
453  spi_OpDecorate(spi, f->arr_w_in_id, SpvDecorationArrayStride,
454  sizeof(float));
455  spi_OpDecorate(spi, f->arr_w_out_id, SpvDecorationArrayStride,
456  f->filter_size*sizeof(float));
457  spi_OpDecorate(spi, f->arr_o_id, SpvDecorationArrayStride,
458  sizeof(int32_t));
459  spi_OpDecorate(spi, f->struct_id, SpvDecorationBlock);
460  spi_OpMemberDecorate(spi, f->struct_id, 0, SpvDecorationOffset, 0);
461  spi_OpMemberDecorate(spi, f->struct_id, 1, SpvDecorationOffset,
462  f->num_weights*sizeof(float));
463  spi_OpDecorate(spi, f->id, SpvDecorationDescriptorSet, 1);
464  spi_OpDecorate(spi, f->id, SpvDecorationBinding, f->binding);
465  }
466 
467  if (!(sws->flags & SWS_BITEXACT))
468  return;
469 
470  /* All linear arithmetic ops must be decorated with NoContraction */
471  for (int n = 0; n < ops->num_ops; n++) {
472  const SwsOp *op = &ops->ops[n];
473  if (op->op != SWS_OP_LINEAR)
474  continue;
475  av_assert0((id->nb_linear_ops + 1) <= FF_ARRAY_ELEMS(id->linear_deco_off));
476 
477  int nb_ops = 0;
478  for (int j = 0; j < 4; j++) {
479  nb_ops += !!op->lin.m[j][0].num;
480  nb_ops += op->lin.m[j][0].num && op->lin.m[j][4].num;
481  for (int i = 1; i < 4; i++) {
482  nb_ops += !!op->lin.m[j][i].num;
483  nb_ops += op->lin.m[j][i].num &&
484  (op->lin.m[j][0].num || op->lin.m[j][4].num);
485  }
486  }
487 
488  id->linear_deco_off[id->nb_linear_ops] = spi_reserve(spi, nb_ops*4*3);
489  id->linear_deco_ops[id->nb_linear_ops] = nb_ops;
490  id->nb_linear_ops++;
491  }
492 }
493 
494 /* Section 2: Define all types and constants */
495 static void define_shader_consts(SwsContext *sws, const SwsOpList *ops,
496  SPICtx *spi, SPIRVIDs *id)
497 {
498  /* Define scalar types */
499  id->void_type = spi_OpTypeVoid(spi);
500  id->b_type = spi_OpTypeBool(spi);
501  int u32_type =
502  id->u32_type = spi_OpTypeInt(spi, 32, 0);
503  id->i32_type = spi_OpTypeInt(spi, 32, 1);
504  int f32_type =
505  id->f32_type = spi_OpTypeFloat(spi, 32);
506  id->void_fn_type = spi_OpTypeFunction(spi, id->void_type, NULL, 0);
507 
508  /* Define vector types */
509  id->bvec2_type = spi_OpTypeVector(spi, id->b_type, 2);
510  id->u32vec2_type = spi_OpTypeVector(spi, u32_type, 2);
511  id->i32vec2_type = spi_OpTypeVector(spi, id->i32_type, 2);
512 
513  id->u32vec3_type = spi_OpTypeVector(spi, u32_type, 3);
514 
515  id->u32vec4_type = spi_OpTypeVector(spi, u32_type, 4);
516  id->f32vec4_type = spi_OpTypeVector(spi, f32_type, 4);
517  id->f32mat4_type = spi_OpTypeMatrix(spi, id->f32vec4_type, 4);
518 
519  /* Constants */
520  id->u32_p = spi_OpUndef(spi, u32_type);
521  id->f32_p = spi_OpUndef(spi, f32_type);
522  id->f32_0 = spi_OpConstantFloat(spi, f32_type, 0);
523  for (int i = 0; i < 5; i++)
524  id->u32_cid[i] = spi_OpConstantUInt(spi, u32_type, i);
525 
526  /* Operation constants */
527  id->nb_const_ids = 0;
528  for (int n = 0; n < ops->num_ops; n++) {
529  /* Make sure there's always enough space for the maximum number of
530  * constants a single operation needs (currently linear, 31 consts). */
531  av_assert0((id->nb_const_ids + 31) <= FF_ARRAY_ELEMS(id->const_ids));
532  const SwsOp *op = &ops->ops[n];
533  switch (op->op) {
534  case SWS_OP_CONVERT:
535  if (ff_sws_pixel_type_is_int(op->convert.to) && op->convert.expand) {
536  AVRational m = ff_sws_pixel_expand(op->type, op->convert.to);
537  int tmp = spi_OpConstantUInt(spi, id->u32_type, m.num);
538  tmp = spi_OpConstantComposite(spi, id->u32vec4_type,
539  tmp, tmp, tmp, tmp);
540  id->const_ids[id->nb_const_ids++] = tmp;
541  }
542  break;
543  case SWS_OP_CLEAR:
544  for (int i = 0; i < 4; i++) {
545  if (!SWS_COMP_TEST(op->clear.mask, i))
546  continue;
547  AVRational cv = op->clear.value[i];
548  if (op->type == SWS_PIXEL_F32) {
549  float q = (float)cv.num/cv.den;
550  id->const_ids[id->nb_const_ids++] =
551  spi_OpConstantFloat(spi, f32_type, q);
552  } else {
553  av_assert0(cv.den == 1);
554  id->const_ids[id->nb_const_ids++] =
555  spi_OpConstantUInt(spi, u32_type, cv.num);
556  }
557  }
558  break;
559  case SWS_OP_LSHIFT:
560  case SWS_OP_RSHIFT: {
561  int tmp = spi_OpConstantUInt(spi, u32_type, op->shift.amount);
562  tmp = spi_OpConstantComposite(spi, id->u32vec4_type,
563  tmp, tmp, tmp, tmp);
564  id->const_ids[id->nb_const_ids++] = tmp;
565  break;
566  }
567  case SWS_OP_SCALE: {
568  int tmp;
569  if (op->type == SWS_PIXEL_F32) {
570  float q = op->scale.factor.num/(float)op->scale.factor.den;
571  tmp = spi_OpConstantFloat(spi, f32_type, q);
572  tmp = spi_OpConstantComposite(spi, id->f32vec4_type,
573  tmp, tmp, tmp, tmp);
574  } else {
575  av_assert0(op->scale.factor.den == 1);
576  tmp = spi_OpConstantUInt(spi, u32_type, op->scale.factor.num);
577  tmp = spi_OpConstantComposite(spi, id->u32vec4_type,
578  tmp, tmp, tmp, tmp);
579  }
580  id->const_ids[id->nb_const_ids++] = tmp;
581  break;
582  }
583  case SWS_OP_MIN:
584  case SWS_OP_MAX:
585  for (int i = 0; i < 4; i++) {
586  int tmp;
587  AVRational cl = op->clamp.limit[i];
588  if (!op->clamp.limit[i].den) {
589  continue;
590  } else if (op->type == SWS_PIXEL_F32) {
591  float q = (float)cl.num/((float)cl.den);
592  tmp = spi_OpConstantFloat(spi, f32_type, q);
593  } else {
594  av_assert0(cl.den == 1);
595  tmp = spi_OpConstantUInt(spi, u32_type, cl.num);
596  }
597  id->const_ids[id->nb_const_ids++] = tmp;
598  }
599  break;
600  case SWS_OP_DITHER:
601  for (int i = 0; i < 4; i++) {
602  if (op->dither.y_offset[i] < 0)
603  continue;
604  int tmp = spi_OpConstantUInt(spi, u32_type, op->dither.y_offset[i]);
605  id->const_ids[id->nb_const_ids++] = tmp;
606  }
607  break;
608  case SWS_OP_LINEAR: {
609  int tmp;
610  float val;
611  for (int i = 0; i < 4; i++) {
612  for (int j = 0; j < 4; j++) {
613  int k = sws->flags & SWS_BITEXACT ? i : j;
614  int l = sws->flags & SWS_BITEXACT ? j : i;
615  val = op->lin.m[k][l].num/(float)op->lin.m[k][l].den;
616  id->const_ids[id->nb_const_ids++] =
617  spi_OpConstantFloat(spi, f32_type, val);
618  }
619  tmp = spi_OpConstantComposite(spi, id->f32vec4_type,
620  id->const_ids[id->nb_const_ids - 4],
621  id->const_ids[id->nb_const_ids - 3],
622  id->const_ids[id->nb_const_ids - 2],
623  id->const_ids[id->nb_const_ids - 1]);
624  id->const_ids[id->nb_const_ids++] = tmp;
625  }
626 
627  tmp = spi_OpConstantComposite(spi, id->f32mat4_type,
628  id->const_ids[id->nb_const_ids - 5*4 + 4],
629  id->const_ids[id->nb_const_ids - 5*3 + 4],
630  id->const_ids[id->nb_const_ids - 5*2 + 4],
631  id->const_ids[id->nb_const_ids - 5*1 + 4]);
632  id->const_ids[id->nb_const_ids++] = tmp;
633 
634  for (int i = 0; i < 4; i++) {
635  val = op->lin.m[i][4].num/(float)op->lin.m[i][4].den;
636  id->const_ids[id->nb_const_ids++] =
637  spi_OpConstantFloat(spi, f32_type, val);
638  }
639 
640  tmp = spi_OpConstantComposite(spi, id->f32vec4_type,
641  id->const_ids[id->nb_const_ids - 4],
642  id->const_ids[id->nb_const_ids - 3],
643  id->const_ids[id->nb_const_ids - 2],
644  id->const_ids[id->nb_const_ids - 1]);
645  id->const_ids[id->nb_const_ids++] = tmp;
646  break;
647  }
648  default:
649  break;
650  }
651  }
652 }
653 
654 /* Section 3: Define bindings */
655 static void define_shader_bindings(const SwsOpList *ops, SPICtx *spi, SPIRVIDs *id,
656  int in_img_count, int out_img_count)
657 {
658  id->dither_ptr_elem_id = spi_OpTypePointer(spi, SpvStorageClassUniform,
659  id->f32_type);
660 
661  struct DitherData *dither = id->dither;
662  for (int i = 0; i < id->nb_dither_bufs; i++) {
663  int size_id = spi_OpConstantUInt(spi, id->u32_type, dither[i].size);
664  dither[i].mask_id = spi_OpConstantUInt(spi, id->u32_type, dither[i].size - 1);
665  spi_OpTypeArray(spi, id->f32_type, dither[i].arr_1d_id, size_id);
666  spi_OpTypeArray(spi, dither[i].arr_1d_id, dither[i].arr_2d_id, size_id);
667  spi_OpTypeStruct(spi, dither[i].struct_id, dither[i].arr_2d_id);
668  dither[i].struct_ptr_id = spi_OpTypePointer(spi, SpvStorageClassUniform,
669  dither[i].struct_id);
670  dither[i].id = spi_OpVariable(spi, dither[i].id, dither[i].struct_ptr_id,
671  SpvStorageClassUniform, 0);
672  }
673 
674  /* Filter buffers: struct { float w[dst_size][filter_size]; int o[dst_size]; } */
675  id->filt_o_ptr_id = 0;
676  if (id->nb_filter_bufs)
677  id->filt_o_ptr_id = spi_OpTypePointer(spi, SpvStorageClassUniform,
678  id->i32_type);
679 
680  for (int i = 0; i < id->nb_filter_bufs; i++) {
681  struct FilterData *f = &id->filt[i];
682  int fs_id = spi_OpConstantUInt(spi, id->u32_type, f->filter_size);
683  int ds_id = spi_OpConstantUInt(spi, id->u32_type, f->dst_size);
684 
685  spi_OpTypeArray(spi, id->f32_type, f->arr_w_in_id, fs_id);
686  spi_OpTypeArray(spi, f->arr_w_in_id, f->arr_w_out_id, ds_id);
687  spi_OpTypeArray(spi, id->i32_type, f->arr_o_id, ds_id);
688  spi_OpTypeStruct(spi, f->struct_id, f->arr_w_out_id, f->arr_o_id);
689  f->struct_ptr_id = spi_OpTypePointer(spi, SpvStorageClassUniform,
690  f->struct_id);
691  f->id = spi_OpVariable(spi, f->id, f->struct_ptr_id,
692  SpvStorageClassUniform, 0);
693 
694  /* Signed tap-index constants 0..filter_size-1 (consecutive <id>s) */
695  f->tap_const_base = spi_OpConstantInt(spi, id->i32_type, 0);
696  for (int t = 1; t < f->filter_size; t++)
697  spi_OpConstantInt(spi, id->i32_type, t);
698  }
699 
700  const SwsOp *op_w = ff_sws_op_list_output(ops);
701  const SwsOp *op_r = ff_sws_op_list_input(ops);
702 
703  /* Define image types for descriptors */
704  id->out_img_type = spi_OpTypeImage(spi,
705  op_w->type == SWS_PIXEL_F32 ?
706  id->f32_type : id->u32_type,
707  2, 0, 0, 0, 2, SpvImageFormatUnknown);
708  id->out_img_array_id = spi_OpTypeArray(spi, id->out_img_type, spi_get_id(spi),
709  id->u32_cid[out_img_count]);
710 
711  id->in_img_type = 0;
712  id->in_img_array_id = 0;
713  if (op_r) {
714  /* If the formats match, we have to reuse the types due to SPIR-V not
715  * allowing redundant type defines */
716  int match = ((op_w->type == SWS_PIXEL_F32) ==
717  (op_r->type == SWS_PIXEL_F32));
718  id->in_img_type = match ? id->out_img_type :
719  spi_OpTypeImage(spi,
720  op_r->type == SWS_PIXEL_F32 ?
721  id->f32_type : id->u32_type,
722  2, 0, 0, 0, 2, SpvImageFormatUnknown);
723  id->in_img_array_id = spi_OpTypeArray(spi, id->in_img_type, spi_get_id(spi),
724  id->u32_cid[in_img_count]);
725  }
726 
727  /* Pointer types for images */
728  id->u32vec3_tptr = spi_OpTypePointer(spi, SpvStorageClassInput,
729  id->u32vec3_type);
730  id->out_img_tptr = spi_OpTypePointer(spi, SpvStorageClassUniformConstant,
731  id->out_img_array_id);
732  id->out_img_sptr = spi_OpTypePointer(spi, SpvStorageClassUniformConstant,
733  id->out_img_type);
734 
735  id->in_img_tptr = 0;
736  id->in_img_sptr = 0;
737  if (op_r) {
738  id->in_img_tptr= spi_OpTypePointer(spi, SpvStorageClassUniformConstant,
739  id->in_img_array_id);
740  id->in_img_sptr= spi_OpTypePointer(spi, SpvStorageClassUniformConstant,
741  id->in_img_type);
742  }
743 
744  /* Define inputs */
745  spi_OpVariable(spi, id->in_vars[0], id->u32vec3_tptr,
746  SpvStorageClassInput, 0);
747  if (op_r) {
748  spi_OpVariable(spi, id->in_vars[1], id->in_img_tptr,
749  SpvStorageClassUniformConstant, 0);
750  }
751  spi_OpVariable(spi, id->in_vars[2], id->out_img_tptr,
752  SpvStorageClassUniformConstant, 0);
753 
754  if (id->interlaced) {
755  spi_OpTypeStruct(spi, id->push_const_struct_id, id->u32_type);
756  id->push_const_ptr_id = spi_OpTypePointer(spi, SpvStorageClassPushConstant,
757  id->push_const_struct_id);
758  id->push_const_elem_ptr_id = spi_OpTypePointer(spi, SpvStorageClassPushConstant,
759  id->u32_type);
760  spi_OpVariable(spi, id->push_const_var_id, id->push_const_ptr_id,
761  SpvStorageClassPushConstant, 0);
762  }
763 }
764 
765 static int insert_vmat_linear(const SwsOp *op, SPICtx *spi, SPIRVIDs *id,
766  int data, int const_off)
767 {
768  data = spi_OpMatrixTimesVector(spi, id->f32vec4_type,
769  id->const_ids[const_off + 4*5],
770  data);
771  return spi_OpFAdd(spi, id->f32vec4_type,
772  id->const_ids[const_off + 4*5 + 1 + 4], data);
773 }
774 
775 static int insert_bitexact_linear(const SwsOp *op, SPICtx *spi, SPIRVIDs *id,
776  int data, int linear_ops_idx, int const_off)
777 {
778  int type_s = op->type == SWS_PIXEL_F32 ? id->f32_type : id->u32_type;
779  int type_v = op->type == SWS_PIXEL_F32 ? id->f32vec4_type : id->u32vec4_type;
780 
781  int tmp[4];
782  tmp[0] = spi_OpCompositeExtract(spi, type_s, data, 0);
783  tmp[1] = spi_OpCompositeExtract(spi, type_s, data, 1);
784  tmp[2] = spi_OpCompositeExtract(spi, type_s, data, 2);
785  tmp[3] = spi_OpCompositeExtract(spi, type_s, data, 3);
786 
787  int off = spi_reserve(spi, 0); /* Current offset */
788  spi->off = id->linear_deco_off[linear_ops_idx];
789  for (int i = 0; i < id->linear_deco_ops[linear_ops_idx]; i++)
790  spi_OpDecorate(spi, spi->id + i, SpvDecorationNoContraction);
791  spi->off = off;
792 
793  int res[4];
794  for (int j = 0; j < 4; j++) {
795  res[j] = op->type == SWS_PIXEL_F32 ? id->f32_0 : id->u32_cid[0];
796  if (op->lin.m[j][0].num)
797  res[j] = spi_OpFMul(spi, type_s, tmp[0],
798  id->const_ids[const_off + j*5 + 0]);
799 
800  if (op->lin.m[j][0].num && op->lin.m[j][4].num)
801  res[j] = spi_OpFAdd(spi, type_s,
802  id->const_ids[const_off + 4*5 + 1 + j], res[j]);
803  else if (op->lin.m[j][4].num)
804  res[j] = id->const_ids[const_off + 4*5 + 1 + j];
805 
806  for (int i = 1; i < 4; i++) {
807  if (!op->lin.m[j][i].num)
808  continue;
809 
810  int v = spi_OpFMul(spi, type_s, tmp[i],
811  id->const_ids[const_off + j*5 + i]);
812  if (op->lin.m[j][0].num || op->lin.m[j][4].num)
813  res[j] = spi_OpFAdd(spi, type_s, res[j], v);
814  else
815  res[j] = v;
816  }
817  }
818 
819  return spi_OpCompositeConstruct(spi, type_v,
820  res[0], res[1], res[2], res[3]);
821 }
822 
823 static int read_filtered(SPICtx *spi, SPIRVIDs *id, const SwsOpList *ops,
824  const SwsOp *op, const struct FilterData *f,
825  const int *in_img, int gid, int gi2)
826 {
827  const int is_h = f->filter == SWS_OP_FILTER_H;
828  const int src_interlaced = ops->src.interlaced;
829 
830  const int src_float = op->type == SWS_PIXEL_F32;
831  const int read_vtype = src_float ? id->f32vec4_type : id->u32vec4_type;
832 
833  /* Buffer array index along the filtered axis: pos.x (H) or pos.y (V) */
834  int axis = spi_OpCompositeExtract(spi, id->u32_type, gid, is_h ? 0 : 1);
835 
836  /* int o = filter_o[axis]; */
837  int o_ptr = spi_OpAccessChain(spi, id->filt_o_ptr_id, f->id,
838  id->u32_cid[1], axis);
839  int o = spi_OpLoad(spi, id->i32_type, o_ptr, SpvMemoryAccessMaskNone, 0);
840 
841  /* Signed pixel position, for the non-filtered coordinate axis */
842  int pos_x = spi_OpCompositeExtract(spi, id->i32_type, gi2, 0);
843  int pos_y = spi_OpCompositeExtract(spi, id->i32_type, gi2, 1);
844 
845  /* For interlaced horizontal filtering, the y coordinate of every tap is
846  * the (constant) destination y mapped into the source image. */
847  if (src_interlaced && is_h) {
848  pos_y = spi_OpShiftLeftLogical(spi, id->i32_type, pos_y, id->u32_cid[1]);
849  pos_y = spi_OpIAdd(spi, id->i32_type, pos_y, id->field_i32);
850  }
851 
852  /* Accumulators, initialized to zero */
853  int acc_s[4] = { id->f32_0, id->f32_0, id->f32_0, id->f32_0 };
854  int acc_v = id->f32_0;
855  if (op->rw.packed)
856  acc_v = spi_OpCompositeConstruct(spi, id->f32vec4_type,
857  id->f32_0, id->f32_0,
858  id->f32_0, id->f32_0);
859 
860  for (int t = 0; t < f->filter_size; t++) {
861  /* float w = filter_w[axis][t]; */
862  int w_ptr = spi_OpAccessChain(spi, id->dither_ptr_elem_id, f->id,
863  id->u32_cid[0], axis,
864  f->tap_const_base + t);
865  int w = spi_OpLoad(spi, id->f32_type, w_ptr,
866  SpvMemoryAccessMaskNone, 0);
867 
868  /* Source coordinate, filtered axis offset by the tap index */
869  int c = t ? spi_OpIAdd(spi, id->i32_type, o, f->tap_const_base + t) : o;
870  /* For interlaced vertical filtering, the per-tap source row is
871  * field-local; map it to the actual image row. */
872  if (src_interlaced && !is_h) {
873  c = spi_OpShiftLeftLogical(spi, id->i32_type, c, id->u32_cid[1]);
874  c = spi_OpIAdd(spi, id->i32_type, c, id->field_i32);
875  }
876  int coord = is_h ?
877  spi_OpCompositeConstruct(spi, id->i32vec2_type, c, pos_y) :
878  spi_OpCompositeConstruct(spi, id->i32vec2_type, pos_x, c);
879 
880  if (op->rw.packed) {
881  int px = spi_OpImageRead(spi, read_vtype,
882  in_img[ops->plane_src[0]], coord,
883  SpvImageOperandsMaskNone);
884  if (!src_float)
885  px = spi_OpConvertUToF(spi, id->f32vec4_type, px);
886  px = spi_OpVectorTimesScalar(spi, id->f32vec4_type, px, w);
887  acc_v = spi_OpFAdd(spi, id->f32vec4_type, acc_v, px);
888  } else {
889  for (int e = 0; e < op->rw.elems; e++) {
890  int px = spi_OpImageRead(spi, read_vtype,
891  in_img[ops->plane_src[e]], coord,
892  SpvImageOperandsMaskNone);
893  if (src_float) {
894  px = spi_OpCompositeExtract(spi, id->f32_type, px, 0);
895  } else {
896  px = spi_OpCompositeExtract(spi, id->u32_type, px, 0);
897  px = spi_OpConvertUToF(spi, id->f32_type, px);
898  }
899  px = spi_OpFMul(spi, id->f32_type, w, px);
900  acc_s[e] = spi_OpFAdd(spi, id->f32_type, acc_s[e], px);
901  }
902  }
903  }
904 
905  if (op->rw.packed)
906  return acc_v;
907  return spi_OpCompositeConstruct(spi, id->f32vec4_type,
908  acc_s[0], acc_s[1], acc_s[2], acc_s[3]);
909 }
910 
911 static int add_ops_spirv(SwsContext *sws, VulkanPriv *p, FFVulkanOpsCtx *s,
912  const SwsOpList *ops, FFVulkanShader *shd)
913 {
914  uint8_t spvbuf[1024*16];
915  SPICtx spi_context = { 0 }, *spi = &spi_context;
916  SPIRVIDs spid_data = { 0 }, *id = &spid_data;
917  spi_init(spi, spvbuf, sizeof(spvbuf));
918 
919  id->interlaced = ops->src.interlaced || ops->dst.interlaced;
920  p->interlaced = id->interlaced;
921 
922  ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL,
923  (uint32_t []) { 32, 32, 1 }, 0);
924  shd->precompiled = 0;
925 
926  if (id->interlaced)
927  ff_vk_shader_add_push_const(shd, 0, sizeof(uint32_t),
928  VK_SHADER_STAGE_COMPUTE_BIT);
929 
930  /* Image ops, to determine types */
931  const SwsOp *op_w = ff_sws_op_list_output(ops);
932  int out_img_count = op_w->rw.packed ? 1 : op_w->rw.elems;
933  p->dst_rep = op_w->type == SWS_PIXEL_F32 ? FF_VK_REP_FLOAT : FF_VK_REP_UINT;
934 
935  const SwsOp *op_r = ff_sws_op_list_input(ops);
936  int in_img_count = op_r ? op_r->rw.packed ? 1 : op_r->rw.elems : 0;
937  if (op_r)
938  p->src_rep = op_r->type == SWS_PIXEL_F32 ? FF_VK_REP_FLOAT : FF_VK_REP_UINT;
939 
941  {
942  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
943  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
944  .elems = 4,
945  },
946  {
947  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
948  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
949  .elems = 4,
950  },
951  };
952  ff_vk_shader_add_descriptor_set(&s->vkctx, shd, desc_set, 2, 0, 0);
953 
954  /* Create dither buffers */
955  int err = create_bufs(s, p, ops);
956  if (err < 0)
957  return err;
958 
959  /* Entrypoint inputs; gl_GlobalInvocationID, input and output images, dither */
960  id->in_vars[0] = spi_get_id(spi);
961  id->in_vars[1] = spi_get_id(spi);
962  id->in_vars[2] = spi_get_id(spi);
963 
964  /* Create dither and filter buffer descriptor set. Both are collected in
965  * op order, so the bindings match the buffer order from create_bufs().*/
966  id->nb_dither_bufs = 0;
967  id->nb_filter_bufs = 0;
968  int nb_data_bufs = 0;
969  for (int n = 0; n < ops->num_ops; n++) {
970  const SwsOp *op = &ops->ops[n];
971  int var_id = 0;
972 
973  if (op->op == SWS_OP_DITHER) {
974  if (id->nb_dither_bufs >= MAX_DITHER_BUFS)
975  return AVERROR(ENOTSUP);
976  struct DitherData *d = &id->dither[id->nb_dither_bufs++];
977  d->size = 1 << op->dither.size_log2;
978  d->arr_1d_id = spi_get_id(spi);
979  d->arr_2d_id = spi_get_id(spi);
980  d->struct_id = spi_get_id(spi);
981  d->id = spi_get_id(spi);
982  d->binding = nb_data_bufs;
983  var_id = d->id;
984  } else if (op->op == SWS_OP_READ && op->rw.filter) {
985  if (id->nb_filter_bufs >= MAX_FILT_BUFS)
986  return AVERROR(ENOTSUP);
987  const SwsFilterWeights *wd = op->rw.kernel;
988  struct FilterData *f = &id->filt[id->nb_filter_bufs++];
989  f->filter = op->rw.filter;
990  f->filter_size = wd->filter_size;
991  f->dst_size = wd->dst_size;
992  f->num_weights = wd->num_weights;
993  f->arr_w_in_id = spi_get_id(spi);
994  f->arr_w_out_id = spi_get_id(spi);
995  f->arr_o_id = spi_get_id(spi);
996  f->struct_id = spi_get_id(spi);
997  f->id = spi_get_id(spi);
998  f->binding = nb_data_bufs;
999  var_id = f->id;
1000  } else {
1001  continue;
1002  }
1003 
1004  id->in_vars[3 + nb_data_bufs] = var_id;
1005  desc_set[nb_data_bufs++] = (FFVulkanDescriptorSetBinding) {
1006  .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1007  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1008  };
1009  }
1010  if (nb_data_bufs)
1011  ff_vk_shader_add_descriptor_set(&s->vkctx, shd, desc_set,
1012  nb_data_bufs, 1, 0);
1013 
1014  if (id->interlaced) {
1015  id->push_const_struct_id = spi_get_id(spi);
1016  id->push_const_var_id = spi_get_id(spi);
1017  id->in_vars[3 + id->nb_dither_bufs + id->nb_filter_bufs] =
1018  id->push_const_var_id;
1019  }
1020 
1021  /* Define shader header sections */
1022  define_shader_header(sws, shd, ops, spi, id);
1023  define_shader_consts(sws, ops, spi, id);
1024  define_shader_bindings(ops, spi, id, in_img_count, out_img_count);
1025 
1026  /* Main function starts here */
1027  spi_OpFunction(spi, id->ep, id->void_type, 0, id->void_fn_type);
1028  spi_OpLabel(spi, spi_get_id(spi));
1029 
1030  /* Load input image handles */
1031  int in_img[4] = { 0 };
1032  for (int i = 0; i < in_img_count; i++) {
1033  /* Deref array and then the pointer */
1034  int img = spi_OpAccessChain(spi, id->in_img_sptr,
1035  id->in_vars[1], id->u32_cid[i]);
1036  in_img[i] = spi_OpLoad(spi, id->in_img_type, img,
1037  SpvMemoryAccessMaskNone, 0);
1038  }
1039 
1040  /* Load output image handles */
1041  int out_img[4];
1042  for (int i = 0; i < out_img_count; i++) {
1043  int img = spi_OpAccessChain(spi, id->out_img_sptr,
1044  id->in_vars[2], id->u32_cid[i]);
1045  out_img[i] = spi_OpLoad(spi, id->out_img_type, img,
1046  SpvMemoryAccessMaskNone, 0);
1047  }
1048 
1049  /* Load gl_GlobalInvocationID */
1050  int gid = spi_OpLoad(spi, id->u32vec3_type, id->in_vars[0],
1051  SpvMemoryAccessMaskNone, 0);
1052 
1053  /* ivec2(gl_GlobalInvocationID.xy) */
1054  gid = spi_OpVectorShuffle(spi, id->u32vec2_type, gid, gid, 0, 1);
1055  int gi2 = spi_OpBitcast(spi, id->i32vec2_type, gid);
1056 
1057  /* For interlaced sources/destinations the shader operates on field-local
1058  * coordinates, while images contain the full frame. Map the y axis to the
1059  * actual image row: image_y = field_y * 2 + field. */
1060  int dst_gid = gid, dst_gi2 = gi2;
1061  int src_gid = gid;
1062  if (id->interlaced) {
1063  int field_u32_ptr = spi_OpAccessChain(spi, id->push_const_elem_ptr_id,
1064  id->push_const_var_id,
1065  id->u32_cid[0]);
1066  int field_u32 = spi_OpLoad(spi, id->u32_type, field_u32_ptr,
1067  SpvMemoryAccessMaskNone, 0);
1068  id->field_i32 = spi_OpBitcast(spi, id->i32_type, field_u32);
1069 
1070  int img_y_i32 = spi_OpShiftLeftLogical(spi, id->i32_type,
1071  spi_OpCompositeExtract(spi, id->i32_type, gi2, 1),
1072  id->u32_cid[1]);
1073  img_y_i32 = spi_OpIAdd(spi, id->i32_type, img_y_i32, id->field_i32);
1074 
1075  int gi2_x = spi_OpCompositeExtract(spi, id->i32_type, gi2, 0);
1076  int mapped_gi2 = spi_OpCompositeConstruct(spi, id->i32vec2_type,
1077  gi2_x, img_y_i32);
1078  int mapped_gid = spi_OpBitcast(spi, id->u32vec2_type, mapped_gi2);
1079 
1080  if (ops->src.interlaced)
1081  src_gid = mapped_gid;
1082  if (ops->dst.interlaced) {
1083  dst_gid = mapped_gid;
1084  dst_gi2 = mapped_gi2;
1085  }
1086  }
1087 
1088  /* imageSize(out_img[0]); */
1089  int img1_s = spi_OpImageQuerySize(spi, id->i32vec2_type, out_img[0]);
1090  int scmp = spi_OpSGreaterThanEqual(spi, id->bvec2_type, dst_gi2, img1_s);
1091  scmp = spi_OpAny(spi, id->b_type, scmp);
1092 
1093  /* if (out of bounds) return */
1094  int quit_label = spi_get_id(spi), merge_label = spi_get_id(spi);
1095  spi_OpSelectionMerge(spi, merge_label, SpvSelectionControlMaskNone);
1096  spi_OpBranchConditional(spi, scmp, quit_label, merge_label, 0);
1097 
1098  spi_OpLabel(spi, quit_label);
1099  spi_OpReturn(spi); /* Quit if out of bounds here */
1100  spi_OpLabel(spi, merge_label);
1101 
1102  /* Initialize main data state */
1103  int data;
1104  if (ops->ops[0].type == SWS_PIXEL_F32)
1105  data = spi_OpCompositeConstruct(spi, id->f32vec4_type,
1106  id->f32_p, id->f32_p,
1107  id->f32_p, id->f32_p);
1108  else
1109  data = spi_OpCompositeConstruct(spi, id->u32vec4_type,
1110  id->u32_p, id->u32_p,
1111  id->u32_p, id->u32_p);
1112 
1113  /* Keep track of which constant/buffer to use */
1114  int nb_const_ids = 0;
1115  int nb_dither_bufs = 0;
1116  int nb_linear_ops = 0;
1117  int nb_filter_used = 0;
1118 
1119  /* Operations */
1120  for (int n = 0; n < ops->num_ops; n++) {
1121  const SwsOp *op = &ops->ops[n];
1122  SwsPixelType cur_type = op->op == SWS_OP_CONVERT ?
1123  op->convert.to : op->type;
1124  int type_v = cur_type == SWS_PIXEL_F32 ?
1125  id->f32vec4_type : id->u32vec4_type;
1126  int type_s = cur_type == SWS_PIXEL_F32 ?
1127  id->f32_type : id->u32_type;
1128  int uid = cur_type == SWS_PIXEL_F32 ?
1129  id->f32_p : id->u32_p;
1130 
1131  switch (op->op) {
1132  case SWS_OP_READ:
1133  if (op->rw.frac) {
1134  return AVERROR(ENOTSUP);
1135  } else if (op->rw.filter) {
1136  data = read_filtered(spi, id, ops, op,
1137  &id->filt[nb_filter_used++],
1138  in_img, gid, gi2);
1139  } else if (op->rw.packed) {
1140  data = spi_OpImageRead(spi, type_v, in_img[ops->plane_src[0]],
1141  src_gid, SpvImageOperandsMaskNone);
1142  } else {
1143  int tmp[4] = { uid, uid, uid, uid };
1144  for (int i = 0; i < op->rw.elems; i++) {
1145  tmp[i] = spi_OpImageRead(spi, type_v,
1146  in_img[ops->plane_src[i]], src_gid,
1147  SpvImageOperandsMaskNone);
1148  tmp[i] = spi_OpCompositeExtract(spi, type_s, tmp[i], 0);
1149  }
1150  data = spi_OpCompositeConstruct(spi, type_v,
1151  tmp[0], tmp[1], tmp[2], tmp[3]);
1152  }
1153  break;
1154  case SWS_OP_WRITE:
1155  if (op->rw.frac || op->rw.filter) {
1156  return AVERROR(ENOTSUP);
1157  } else if (op->rw.packed) {
1158  spi_OpImageWrite(spi, out_img[ops->plane_dst[0]], dst_gid, data,
1159  SpvImageOperandsMaskNone);
1160  } else {
1161  for (int i = 0; i < op->rw.elems; i++) {
1162  int tmp = spi_OpCompositeExtract(spi, type_s, data, i);
1163  tmp = spi_OpCompositeConstruct(spi, type_v, tmp, tmp, tmp, tmp);
1164  spi_OpImageWrite(spi, out_img[ops->plane_dst[i]], dst_gid, tmp,
1165  SpvImageOperandsMaskNone);
1166  }
1167  }
1168  break;
1169  case SWS_OP_CLEAR:
1170  for (int i = 0; i < 4; i++) {
1171  if (!op->clear.value[i].den)
1172  continue;
1173  data = spi_OpCompositeInsert(spi, type_v,
1174  id->const_ids[nb_const_ids++],
1175  data, i);
1176  }
1177  break;
1178  case SWS_OP_SWIZZLE:
1179  data = spi_OpVectorShuffle(spi, type_v, data, data,
1180  op->swizzle.in[0],
1181  op->swizzle.in[1],
1182  op->swizzle.in[2],
1183  op->swizzle.in[3]);
1184  break;
1185  case SWS_OP_CONVERT:
1186  if (ff_sws_pixel_type_is_int(cur_type) && op->convert.expand)
1187  data = spi_OpIMul(spi, type_v, data, id->const_ids[nb_const_ids++]);
1188  else if (op->type == SWS_PIXEL_F32 && type_s == id->u32_type)
1189  data = spi_OpConvertFToU(spi, type_v, data);
1190  else if (op->type != SWS_PIXEL_F32 && type_s == id->f32_type)
1191  data = spi_OpConvertUToF(spi, type_v, data);
1192  break;
1193  case SWS_OP_LSHIFT:
1194  data = spi_OpShiftLeftLogical(spi, type_v, data,
1195  id->const_ids[nb_const_ids++]);
1196  break;
1197  case SWS_OP_RSHIFT:
1198  data = spi_OpShiftRightLogical(spi, type_v, data,
1199  id->const_ids[nb_const_ids++]);
1200  break;
1201  case SWS_OP_SCALE:
1202  if (op->type == SWS_PIXEL_F32)
1203  data = spi_OpFMul(spi, type_v, data,
1204  id->const_ids[nb_const_ids++]);
1205  else
1206  data = spi_OpIMul(spi, type_v, data,
1207  id->const_ids[nb_const_ids++]);
1208  break;
1209  case SWS_OP_MIN:
1210  case SWS_OP_MAX: {
1211  int t = op->type == SWS_PIXEL_F32 ?
1212  op->op == SWS_OP_MIN ? GLSLstd450FMin : GLSLstd450FMax :
1213  op->op == SWS_OP_MIN ? GLSLstd450UMin : GLSLstd450UMax;
1214  for (int i = 0; i < 4; i++) {
1215  if (!op->clamp.limit[i].den)
1216  continue;
1217  int tmp = spi_OpCompositeExtract(spi, type_s, data, i);
1218  tmp = spi_OpExtInst(spi, type_s, id->glfn, t,
1219  tmp, id->const_ids[nb_const_ids++]);
1220  data = spi_OpCompositeInsert(spi, type_v, tmp, data, i);
1221  }
1222  break;
1223  }
1224  case SWS_OP_DITHER: {
1225  int did = nb_dither_bufs++;
1226  int x_id = spi_OpCompositeExtract(spi, id->u32_type, gid, 0);
1227  int y_pos = spi_OpCompositeExtract(spi, id->u32_type, gid, 1);
1228  x_id = spi_OpBitwiseAnd(spi, id->u32_type, x_id,
1229  id->dither[did].mask_id);
1230  for (int i = 0; i < 4; i++) {
1231  if (op->dither.y_offset[i] < 0)
1232  continue;
1233 
1234  int y_id = spi_OpIAdd(spi, id->u32_type, y_pos,
1235  id->const_ids[nb_const_ids++]);
1236  y_id = spi_OpBitwiseAnd(spi, id->u32_type, y_id,
1237  id->dither[did].mask_id);
1238 
1239  int ptr = spi_OpAccessChain(spi, id->dither_ptr_elem_id,
1240  id->dither[did].id, id->u32_cid[0],
1241  y_id, x_id);
1242  int val = spi_OpLoad(spi, id->f32_type, ptr,
1243  SpvMemoryAccessMaskNone, 0);
1244 
1245  int tmp = spi_OpCompositeExtract(spi, type_s, data, i);
1246  tmp = spi_OpFAdd(spi, type_s, tmp, val);
1247  data = spi_OpCompositeInsert(spi, type_v, tmp, data, i);
1248  }
1249  break;
1250  }
1251  case SWS_OP_LINEAR: {
1252  if (sws->flags & SWS_BITEXACT)
1253  data = insert_bitexact_linear(op, spi, id, data, nb_linear_ops, nb_const_ids);
1254  else
1255  data = insert_vmat_linear(op, spi, id, data, nb_const_ids);
1256  nb_linear_ops++;
1257  nb_const_ids += 5*5 + 1;
1258  break;
1259  }
1260  case SWS_OP_UNPACK:
1261  if (ops->src.format == AV_PIX_FMT_X2BGR10)
1262  data = spi_OpVectorShuffle(spi, type_v, data, data, 3, 2, 1, 0);
1263  else
1264  data = spi_OpVectorShuffle(spi, type_v, data, data, 3, 0, 1, 2);
1265  break;
1266  case SWS_OP_PACK:
1267  if (ops->dst.format == AV_PIX_FMT_X2BGR10)
1268  data = spi_OpVectorShuffle(spi, type_v, data, data, 3, 2, 1, 0);
1269  else
1270  data = spi_OpVectorShuffle(spi, type_v, data, data, 1, 2, 3, 0);
1271  break;
1272  default:
1273  return AVERROR(ENOTSUP);
1274  }
1275  }
1276 
1277  /* Return and finalize */
1278  spi_OpReturn(spi);
1279  spi_OpFunctionEnd(spi);
1280 
1281  int len = spi_end(spi);
1282  if (len < 0)
1283  return AVERROR_INVALIDDATA;
1284 
1285  return ff_vk_shader_link(&s->vkctx, shd, spvbuf, len, "main");
1286 }
1287 #endif
1288 
1289 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
1290 static void add_desc_read_write(FFVulkanDescriptorSetBinding *out_desc,
1291  enum FFVkShaderRepFormat *out_rep,
1292  const SwsOp *op)
1293 {
1294  const char *img_type = op->type == SWS_PIXEL_F32 ? "rgba32f" :
1295  op->type == SWS_PIXEL_U32 ? "rgba32ui" :
1296  op->type == SWS_PIXEL_U16 ? "rgba16ui" :
1297  "rgba8ui";
1298 
1299  *out_desc = (FFVulkanDescriptorSetBinding) {
1300  .name = op->op == SWS_OP_WRITE ? "dst_img" : "src_img",
1301  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1302  .mem_layout = img_type,
1303  .mem_quali = op->op == SWS_OP_WRITE ? "writeonly" : "readonly",
1304  .dimensions = 2,
1305  .elems = 4,
1306  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1307  };
1308 
1309  *out_rep = op->type == SWS_PIXEL_F32 ? FF_VK_REP_FLOAT : FF_VK_REP_UINT;
1310 }
1311 
1312 #define QSTR "(%i/%i%s)"
1313 #define QTYPE(Q) (Q).num, (Q).den, cur_type == SWS_PIXEL_F32 ? ".0f" : ""
1314 
1315 static void read_glsl(const SwsOpList *ops, const SwsOp *op, FFVulkanShader *shd,
1316  int idx, const char *type_name,
1317  const char *type_v, const char *type_s)
1318 {
1319  const SwsFilterWeights *wd = op->rw.kernel;
1320  const int interlaced = ops->src.interlaced;
1321  if (op->rw.filter) {
1322  const char *axis = op->rw.filter == SWS_OP_FILTER_H ? "pos.x" : "pos.y";
1323  const char *coord_x = op->rw.filter == SWS_OP_FILTER_H ? "o + i" : "pos.x";
1324  const char *coord_y;
1325  if (op->rw.filter == SWS_OP_FILTER_H)
1326  coord_y = interlaced ? "spos.y" : "pos.y";
1327  else
1328  coord_y = interlaced ? "((o + i) * 2 + int(params.field))" : "o + i";
1329  GLSLC(1, tmp = vec4(0); );
1330  av_bprintf(&shd->src, " int o = filter_o%i[%s];\n", idx, axis);
1331  av_bprintf(&shd->src, " for (int i = 0; i < %i; i++) {\n",
1332  wd->filter_size);
1333  av_bprintf(&shd->src, " float w = filter_w%i[%s][i];\n",
1334  idx, axis);
1335  if (op->rw.packed) {
1336  GLSLF(2, tmp += w * %s(imageLoad(src_img[%i], ivec2(%s, %s))); ,
1337  type_v, ops->plane_src[0], coord_x, coord_y);
1338  } else {
1339  for (int i = 0; i < op->rw.elems; i++)
1340  GLSLF(2,
1341  tmp.%c += w * %s(imageLoad(src_img[%i], ivec2(%s, %s))[0]); ,
1342  "xyzw"[i], type_s, ops->plane_src[i], coord_x, coord_y);
1343  }
1344  GLSLC(1, } );
1345  GLSLC(1, f32 = tmp; );
1346  } else {
1347  const char *src_pos = interlaced ? "spos" : "pos";
1348  if (op->rw.packed) {
1349  GLSLF(1, %s = %s(imageLoad(src_img[%i], %s)); ,
1350  type_name, type_v, ops->plane_src[0], src_pos);
1351  } else {
1352  for (int i = 0; i < op->rw.elems; i++)
1353  GLSLF(1, %s.%c = %s(imageLoad(src_img[%i], %s)[0]); ,
1354  type_name, "xyzw"[i], type_s, ops->plane_src[i], src_pos);
1355  }
1356  }
1357 }
1358 
1359 static int add_ops_glsl(SwsContext *sws, VulkanPriv *p, FFVulkanOpsCtx *s,
1360  const SwsOpList *ops, FFVulkanShader *shd)
1361 {
1362  int err;
1363  uint8_t *spv_data;
1364  size_t spv_len;
1365  void *spv_opaque = NULL;
1366  const int interlaced = ops->src.interlaced || ops->dst.interlaced;
1367 
1368  err = ff_vk_shader_init(&s->vkctx, shd, "sws_pass",
1369  VK_SHADER_STAGE_COMPUTE_BIT,
1370  NULL, 0, 32, 32, 1, 0);
1371  if (err < 0)
1372  return err;
1373 
1374  p->interlaced = interlaced;
1375  if (interlaced)
1376  ff_vk_shader_add_push_const(shd, 0, sizeof(uint32_t),
1377  VK_SHADER_STAGE_COMPUTE_BIT);
1378 
1379  int nb_desc = 0;
1381 
1382  const SwsOp *read = ff_sws_op_list_input(ops);
1383  const SwsOp *write = ff_sws_op_list_output(ops);
1384  if (read)
1385  add_desc_read_write(&buf_desc[nb_desc++], &p->src_rep, read);
1386  add_desc_read_write(&buf_desc[nb_desc++], &p->dst_rep, write);
1387  ff_vk_shader_add_descriptor_set(&s->vkctx, shd, buf_desc, nb_desc, 0, 0);
1388 
1389  err = create_bufs(s, p, ops);
1390  if (err < 0)
1391  return err;
1392 
1393  nb_desc = 0;
1394  char data_buf_name[MAX_DATA_BUFS][256];
1395  char data_str_name[MAX_DATA_BUFS][256];
1396  for (int n = 0; n < ops->num_ops; n++) {
1397  const SwsOp *op = &ops->ops[n];
1398  if (op->op == SWS_OP_DITHER) {
1399  int size = (1 << op->dither.size_log2);
1400  av_assert0(size < 8192);
1401  snprintf(data_buf_name[nb_desc], 256, "dither_buf%i", n);
1402  snprintf(data_str_name[nb_desc], 256, "float dither_mat%i[%i][%i];",
1403  n, size, size);
1404  buf_desc[nb_desc] = (FFVulkanDescriptorSetBinding) {
1405  .name = data_buf_name[nb_desc],
1406  .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1407  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1408  .mem_layout = "scalar",
1409  .buf_content = data_str_name[nb_desc],
1410  };
1411  nb_desc++;
1412  } else if (op->op == SWS_OP_FILTER_H || op->op == SWS_OP_FILTER_V ||
1413  ((op->op == SWS_OP_READ || op->op == SWS_OP_WRITE) &&
1414  op->rw.filter)) {
1415  const SwsFilterWeights *wd = (op->op == SWS_OP_READ ||
1416  op->op == SWS_OP_WRITE) ?
1417  op->rw.kernel : op->filter.kernel;
1418  snprintf(data_buf_name[nb_desc], 256, "filter_buf%i", n);
1419  snprintf(data_str_name[nb_desc], 256,
1420  "float filter_w%i[%i][%i];\n"
1421  " int filter_o%i[%i];",
1422  n, wd->dst_size, wd->filter_size,
1423  n, wd->dst_size);
1424  buf_desc[nb_desc] = (FFVulkanDescriptorSetBinding) {
1425  .name = data_buf_name[nb_desc],
1426  .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1427  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1428  .mem_layout = "scalar",
1429  .buf_content = data_str_name[nb_desc],
1430  };
1431  nb_desc++;
1432  }
1433  }
1434  if (nb_desc)
1436  nb_desc, 1, 0);
1437 
1438  if (interlaced) {
1439  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
1440  GLSLC(1, uint field; );
1441  GLSLC(0, } params; );
1442  GLSLC(0, );
1443  }
1444 
1445  GLSLC(0, void main() );
1446  GLSLC(0, { );
1447  GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
1448  GLSLC(1, ivec2 size = imageSize(dst_img[0]); );
1449  if (ops->src.interlaced)
1450  GLSLC(1, ivec2 spos = ivec2(pos.x, pos.y * 2 + int(params.field)); );
1451  if (ops->dst.interlaced)
1452  GLSLC(1, ivec2 dpos = ivec2(pos.x, pos.y * 2 + int(params.field)); );
1453  if (ops->dst.interlaced) {
1454  GLSLC(1, if (any(greaterThanEqual(dpos, size))) );
1455  } else {
1456  GLSLC(1, if (any(greaterThanEqual(pos, size))) );
1457  }
1458  GLSLC(2, return; );
1459  GLSLC(0, );
1460  GLSLC(1, u8vec4 u8; );
1461  GLSLC(1, u16vec4 u16; );
1462  GLSLC(1, u32vec4 u32; );
1463  GLSLC(1, precise f32vec4 f32; );
1464  GLSLC(1, precise f32vec4 tmp; );
1465  GLSLC(0, );
1466 
1467  for (int n = 0; n < ops->num_ops; n++) {
1468  const SwsOp *op = &ops->ops[n];
1469  SwsPixelType cur_type = op->op == SWS_OP_CONVERT ? op->convert.to :
1470  op->type;
1471  const char *type_name = ff_sws_pixel_type_name(cur_type);
1472  const char *type_v = cur_type == SWS_PIXEL_F32 ? "f32vec4" :
1473  cur_type == SWS_PIXEL_U32 ? "u32vec4" :
1474  cur_type == SWS_PIXEL_U16 ? "u16vec4" : "u8vec4";
1475  const char *type_s = cur_type == SWS_PIXEL_F32 ? "float" :
1476  cur_type == SWS_PIXEL_U32 ? "uint32_t" :
1477  cur_type == SWS_PIXEL_U16 ? "uint16_t" : "uint8_t";
1478  av_bprintf(&shd->src, " // %s\n", ff_sws_op_type_name(op->op));
1479 
1480  switch (op->op) {
1481  case SWS_OP_READ: {
1482  if (op->rw.frac)
1483  return AVERROR(ENOTSUP);
1484  read_glsl(ops, op, shd, n, type_name, type_v, type_s);
1485  break;
1486  }
1487  case SWS_OP_WRITE: {
1488  const char *dst_pos = ops->dst.interlaced ? "dpos" : "pos";
1489  if (op->rw.frac || op->rw.filter) {
1490  return AVERROR(ENOTSUP);
1491  } else if (op->rw.packed) {
1492  GLSLF(1, imageStore(dst_img[%i], %s, %s(%s)); ,
1493  ops->plane_dst[0], dst_pos, type_v, type_name);
1494  } else {
1495  for (int i = 0; i < op->rw.elems; i++)
1496  GLSLF(1, imageStore(dst_img[%i], %s, %s(%s[%i])); ,
1497  ops->plane_dst[i], dst_pos, type_v, type_name, i);
1498  }
1499  break;
1500  }
1501  case SWS_OP_SWIZZLE: {
1502  av_bprintf(&shd->src, " %s = %s.", type_name, type_name);
1503  for (int i = 0; i < 4; i++)
1504  av_bprintf(&shd->src, "%c", "xyzw"[op->swizzle.in[i]]);
1505  av_bprintf(&shd->src, ";\n");
1506  break;
1507  }
1508  case SWS_OP_CLEAR: {
1509  for (int i = 0; i < 4; i++) {
1510  if (!SWS_COMP_TEST(op->clear.mask, i))
1511  continue;
1512  av_bprintf(&shd->src, " %s.%c = %s"QSTR";\n", type_name,
1513  "xyzw"[i], type_s, QTYPE(op->clear.value[i]));
1514  }
1515  break;
1516  }
1517  case SWS_OP_SCALE:
1518  av_bprintf(&shd->src, " %s = %s * "QSTR";\n",
1519  type_name, type_name, QTYPE(op->scale.factor));
1520  break;
1521  case SWS_OP_MIN:
1522  case SWS_OP_MAX:
1523  for (int i = 0; i < 4; i++) {
1524  if (!op->clamp.limit[i].den)
1525  continue;
1526  av_bprintf(&shd->src, " %s.%c = %s(%s.%c, "QSTR");\n",
1527  type_name, "xyzw"[i],
1528  op->op == SWS_OP_MIN ? "min" : "max",
1529  type_name, "xyzw"[i], QTYPE(op->clamp.limit[i]));
1530  }
1531  break;
1532  case SWS_OP_LSHIFT:
1533  case SWS_OP_RSHIFT:
1534  av_bprintf(&shd->src, " %s %s= %i;\n", type_name,
1535  op->op == SWS_OP_LSHIFT ? "<<" : ">>", op->shift.amount);
1536  break;
1537  case SWS_OP_CONVERT:
1538  if (ff_sws_pixel_type_is_int(cur_type) && op->convert.expand) {
1539  const AVRational sc = ff_sws_pixel_expand(op->type, op->convert.to);
1540  av_bprintf(&shd->src, " %s = %s((%s*%i)/%i);\n",
1541  type_name, type_v, ff_sws_pixel_type_name(op->type),
1542  sc.num, sc.den);
1543  } else {
1544  av_bprintf(&shd->src, " %s = %s(%s);\n",
1545  type_name, type_v, ff_sws_pixel_type_name(op->type));
1546  }
1547  break;
1548  case SWS_OP_DITHER: {
1549  int size = (1 << op->dither.size_log2);
1550  for (int i = 0; i < 4; i++) {
1551  if (op->dither.y_offset[i] < 0)
1552  continue;
1553  av_bprintf(&shd->src, " %s.%c += dither_mat%i[(pos.y + %i) & %i]"
1554  "[pos.x & %i];\n",
1555  type_name, "xyzw"[i], n,
1556  op->dither.y_offset[i], size - 1,
1557  size - 1);
1558  }
1559  break;
1560  }
1561  case SWS_OP_LINEAR:
1562  for (int i = 0; i < 4; i++) {
1563  if (op->lin.m[i][4].num)
1564  av_bprintf(&shd->src, " tmp.%c = "QSTR";\n", "xyzw"[i],
1565  QTYPE(op->lin.m[i][4]));
1566  else
1567  av_bprintf(&shd->src, " tmp.%c = 0;\n", "xyzw"[i]);
1568  for (int j = 0; j < 4; j++) {
1569  if (!op->lin.m[i][j].num)
1570  continue;
1571  av_bprintf(&shd->src, " tmp.%c += f32.%c*"QSTR";\n",
1572  "xyzw"[i], "xyzw"[j], QTYPE(op->lin.m[i][j]));
1573  }
1574  }
1575  av_bprintf(&shd->src, " f32 = tmp;\n");
1576  break;
1577  case SWS_OP_UNPACK:
1578  /* MSB->LSB indexing */
1579  av_bprintf(&shd->src, " %s = %s.%s;\n", type_name, type_name,
1580  ops->src.format == AV_PIX_FMT_X2BGR10 ? "wzyx" : "wxyz");
1581  break;
1582  case SWS_OP_PACK:
1583  /* LSB->MSB indexing */
1584  av_bprintf(&shd->src, " %s = %s.%s;\n", type_name, type_name,
1585  ops->dst.format == AV_PIX_FMT_X2BGR10 ? "wzyx" : "yzwx");
1586  break;
1587  default:
1588  return AVERROR(ENOTSUP);
1589  }
1590  }
1591 
1592  GLSLC(0, } );
1593 
1594  err = s->spvc->compile_shader(&s->vkctx, s->spvc, shd,
1595  &spv_data, &spv_len, "main",
1596  &spv_opaque);
1597  if (err < 0)
1598  return err;
1599 
1600  err = ff_vk_shader_link(&s->vkctx, shd, spv_data, spv_len, "main");
1601 
1602  if (spv_opaque)
1603  s->spvc->free_shader(s->spvc, &spv_opaque);
1604 
1605  if (err < 0)
1606  return err;
1607 
1608  return 0;
1609 }
1610 #endif
1611 
1612 static int compile(SwsContext *sws, const SwsOpList *ops, SwsCompiledOp *out,
1613  int glsl)
1614 {
1615  int err;
1616  SwsInternal *c = sws_internal(sws);
1617  FFVulkanOpsCtx *s = c->hw_priv;
1618  if (!s)
1619  return AVERROR(ENOTSUP);
1620 
1621  VulkanPriv *p = av_mallocz(sizeof(*p));
1622  if (!p)
1623  return AVERROR(ENOMEM);
1624  p->s = av_refstruct_ref(c->hw_priv);
1625 
1626  err = ff_vk_exec_pool_init(&s->vkctx, s->qf, &p->e, 1,
1627  0, 0, 0, NULL);
1628  if (err < 0)
1629  goto fail;
1630 
1631  if (ops->src.format == AV_PIX_FMT_BGR0 ||
1632  ops->src.format == AV_PIX_FMT_BGRA ||
1633  ops->dst.format == AV_PIX_FMT_BGR0 ||
1634  ops->dst.format == AV_PIX_FMT_BGRA) {
1635  VkFormatProperties2 prop = {
1636  .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
1637  };
1638  FFVulkanFunctions *vk = &s->vkctx.vkfn;
1639  vk->GetPhysicalDeviceFormatProperties2(s->vkctx.hwctx->phys_dev,
1640  VK_FORMAT_B8G8R8A8_UNORM,
1641  &prop);
1642  if (!(prop.formatProperties.optimalTilingFeatures &
1643  VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT)) {
1644  err = AVERROR(ENOTSUP);
1645  goto fail;
1646  }
1647  }
1648 
1649  if (glsl) {
1650  err = AVERROR(ENOTSUP);
1651 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
1652  err = add_ops_glsl(sws, p, s, ops, &p->shd);
1653 #endif
1654  } else {
1655  err = AVERROR(ENOTSUP);
1656 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
1657  err = add_ops_spirv(sws, p, s, ops, &p->shd);
1658 #endif
1659  }
1660  if (err < 0)
1661  goto fail;
1662 
1663  err = ff_vk_shader_register_exec(&s->vkctx, &p->e, &p->shd);
1664  if (err < 0)
1665  goto fail;
1666 
1667  for (int i = 0; i < p->nb_data_bufs; i++)
1668  ff_vk_shader_update_desc_buffer(&s->vkctx, &p->e.contexts[0], &p->shd,
1669  1, i, 0, &p->data_bufs[i],
1670  0, VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED);
1671 
1672  *out = (SwsCompiledOp) {
1673  .opaque = true,
1674  .func_opaque = process,
1675  .priv = p,
1676  .free = free_fn,
1677  };
1678 
1679  return 0;
1680 
1681 fail:
1682  free_fn(p);
1683  return err;
1684 }
1685 
1686 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
1687 static int compile_spirv(SwsContext *sws, const SwsOpList *ops,
1688  SwsCompiledOp *out)
1689 {
1690  return compile(sws, ops, out, 0);
1691 }
1692 
1693 const SwsOpBackend backend_spirv = {
1694  .name = "spirv",
1695  .flags = SWS_BACKEND_SPIRV,
1696  .compile = compile_spirv,
1697  .hw_format = AV_PIX_FMT_VULKAN,
1698 };
1699 #endif
1700 
1701 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
1702 static int compile_glsl(SwsContext *sws, const SwsOpList *ops,
1703  SwsCompiledOp *out)
1704 {
1705  return compile(sws, ops, out, 1);
1706 }
1707 
1708 const SwsOpBackend backend_glsl = {
1709  .name = "glsl",
1710  .flags = SWS_BACKEND_GLSL,
1711  .compile = compile_glsl,
1712  .hw_format = AV_PIX_FMT_VULKAN,
1713 };
1714 #endif
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:38
ff_vk_create_buf
int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
Definition: vulkan.c:1050
spi_OpExecutionMode
static void spi_OpExecutionMode(SPICtx *spi, int entry_point_id, SpvExecutionMode mode, int *s, int nb_s)
Definition: spvasm.h:405
VulkanPriv::e
FFVkExecPool e
Definition: ops.c:99
VulkanPriv::data_bufs
FFVkBuffer data_bufs[MAX_DATA_BUFS]
Definition: ops.c:101
FFVulkanOpsCtx
Copyright (C) 2026 Lynne.
Definition: ops.h:31
SWS_OP_SWIZZLE
@ SWS_OP_SWIZZLE
Definition: ops.h:41
SwsPass
Represents a single filter pass in the scaling graph.
Definition: graph.h:75
spi_OpConstantInt
static int spi_OpConstantInt(SPICtx *spi, int type_id, int val)
Definition: spvasm.h:584
VulkanPriv::src_rep
enum FFVkShaderRepFormat src_rep
Definition: ops.c:103
r
const char * r
Definition: vf_curves.c:127
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SwsFilterWeights::filter_size
int filter_size
The number of source texels to convolve over for each row.
Definition: filters.h:68
uid
UID uid
Definition: mxfenc.c:2488
spi_end
static int spi_end(SPICtx *spi)
Definition: spvasm.h:100
SWS_OP_LSHIFT
@ SWS_OP_LSHIFT
Definition: ops.h:46
SWS_OP_UNPACK
@ SWS_OP_UNPACK
Definition: ops.h:44
spi_OpVariable
static int spi_OpVariable(SPICtx *spi, int var_id, int ptr_type_id, SpvStorageClass storage_class, int initializer_id)
Definition: spvasm.h:537
ff_vk_shader_free
void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd)
Free a shader.
Definition: vulkan.c:2845
ff_vk_shader_init
int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, VkPipelineStageFlags stage, const char *extensions[], int nb_extensions, int lg_x, int lg_y, int lg_z, uint32_t required_subgroup_size)
Initialize a shader object, with a specific set of extensions, type+bind, local group size,...
Definition: vulkan.c:2157
out
static FILE * out
Definition: movenc.c:55
create_filter_buf
static int create_filter_buf(FFVulkanOpsCtx *s, VulkanPriv *p, const SwsFilterWeights *wd, FFVkBuffer *buf)
Definition: ops.c:184
MAX_DITHER_BUFS
#define MAX_DITHER_BUFS
Definition: ops.c:93
SwsFormat::interlaced
int interlaced
Definition: format.h:79
spi_OpTypeFunction
static int spi_OpTypeFunction(SPICtx *spi, int return_type_id, const int *args, int nb_args)
Definition: spvasm.h:498
ff_sws_op_list_input
const SwsOp * ff_sws_op_list_input(const SwsOpList *ops)
Returns the input operation for a given op list, or NULL if there is none (e.g.
Definition: ops.c:671
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:50
ff_vk_exec_pool_init
int ff_vk_exec_pool_init(FFVulkanContext *s, AVVulkanDeviceQueueFamily *qf, FFVkExecPool *pool, int nb_contexts, int nb_queries, VkQueryType query_type, int query_64bit, const void *query_create_pnext)
Allocates/frees an execution pool.
Definition: vulkan.c:357
AVRefStructOpaque
RefStruct is an API for creating reference-counted objects with minimal overhead.
Definition: refstruct.h:58
SWS_BACKEND_SPIRV
@ SWS_BACKEND_SPIRV
Vulkan SPIR-V backend.
Definition: swscale.h:120
SwsOp::rw
SwsReadWriteOp rw
Definition: ops.h:213
spi_OpConstantUInt
static int spi_OpConstantUInt(SPICtx *spi, int type_id, uint32_t val)
Definition: spvasm.h:565
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:466
spi_OpFunctionEnd
static void spi_OpFunctionEnd(SPICtx *spi)
Definition: spvasm.h:532
ff_vk_map_buffer
static int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, int invalidate)
Definition: vulkan.h:603
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:58
SwsFilterWeights
Represents a computed filter kernel.
Definition: filters.h:64
SWS_BITEXACT
@ SWS_BITEXACT
Definition: swscale.h:180
FFVulkanShader::src
AVBPrint src
Definition: vulkan.h:234
data
const char data[16]
Definition: mxf.c:149
spi_OpDecorate
#define spi_OpDecorate(spi, target, deco,...)
Definition: spvasm.h:356
SwsFilterWeights::offsets
int * offsets
The computed source pixel positions for each row of the filter.
Definition: filters.h:84
SwsContext::flags
unsigned flags
Bitmask of SWS_*.
Definition: swscale.h:242
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
VulkanPriv::s
FFVulkanOpsCtx * s
Definition: ops.c:98
AV_PIX_FMT_BGRA
@ AV_PIX_FMT_BGRA
packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
Definition: pixfmt.h:102
ff_vk_init
int ff_vk_init(FFVulkanContext *s, void *log_parent, AVBufferRef *device_ref, AVBufferRef *frames_ref)
Initializes the AVClass, in case this context is not used as the main user's context.
Definition: vulkan.c:2883
ff_vk_exec_get
FFVkExecContext * ff_vk_exec_get(FFVulkanContext *s, FFVkExecPool *pool)
Retrieve an execution pool.
Definition: vulkan.c:568
ff_vk_uninit
void ff_vk_uninit(FFVulkanContext *s)
Frees main context.
Definition: vulkan.c:2871
spi_OpAccessChain
#define spi_OpAccessChain(spi, res_type, ptr_id,...)
Definition: spvasm.h:311
spi_OpCompositeExtract
#define spi_OpCompositeExtract(spi, res_type, src,...)
Definition: spvasm.h:319
ff_vk_exec_bind_shader
void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e, const FFVulkanShader *shd)
Bind a shader.
Definition: vulkan.c:2822
SwsOpBackend::name
const char * name
Definition: ops_dispatch.h:134
AV_PIX_FMT_VULKAN
@ AV_PIX_FMT_VULKAN
Vulkan hardware images.
Definition: pixfmt.h:379
ff_vk_exec_add_dep_frame
int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, VkPipelineStageFlagBits2 wait_stage, VkPipelineStageFlagBits2 signal_stage)
Definition: vulkan.c:800
FFVkShaderRepFormat
FFVkShaderRepFormat
Returns the format to use for images in shaders.
Definition: vulkan.h:447
SwsOpList::plane_dst
uint8_t plane_dst[4]
Definition: ops.h:266
create_bufs
static int create_bufs(FFVulkanOpsCtx *s, VulkanPriv *p, const SwsOpList *ops)
Definition: ops.c:254
SWS_COMP_TEST
#define SWS_COMP_TEST(mask, X)
Definition: uops.h:71
SwsOpList::num_ops
int num_ops
Definition: ops.h:260
SwsDitherOp
Definition: ops.h:156
ff_vk_shader_update_img_array
void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, AVFrame *f, VkImageView *views, int set, int binding, VkImageLayout layout, VkSampler sampler)
Update a descriptor in a buffer with an image array.
Definition: vulkan.c:2773
ff_vk_frame_barrier
void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, VkPipelineStageFlags2 src_stage, VkPipelineStageFlags2 dst_stage, VkAccessFlagBits2 new_access, VkImageLayout new_layout, uint32_t new_qf)
Definition: vulkan.c:2085
SPICtx
Definition: spvasm.h:52
ff_vk_shader_register_exec
int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool, FFVulkanShader *shd)
Register a shader with an exec pool.
Definition: vulkan.c:2638
ff_sws_pixel_type_is_int
bool ff_sws_pixel_type_is_int(SwsPixelType type)
Definition: ops.c:92
create_dither_buf
static int create_dither_buf(FFVulkanOpsCtx *s, VulkanPriv *p, const SwsDitherOp *dd, FFVkBuffer *buf)
Definition: ops.c:219
val
static double val(void *priv, double ch)
Definition: aeval.c:77
spi_OpTypeBool
static int spi_OpTypeBool(SPICtx *spi)
Definition: spvasm.h:430
AVRational::num
int num
Numerator.
Definition: rational.h:59
spi_OpConstantComposite
#define spi_OpConstantComposite(spi, res_type, src,...)
Definition: spvasm.h:307
refstruct.h
spvasm.h
FFVulkanDescriptorSetBinding::type
VkDescriptorType type
Definition: vulkan.h:114
SwsFrame
Represents a view into a single field of frame data.
Definition: format.h:221
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:54
GLSLC
#define GLSLC(N, S)
Definition: vulkan.h:45
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:210
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
main
int main
Definition: dovi_rpuenc.c:38
spi_reserve
static int spi_reserve(SPICtx *spi, int len)
Definition: spvasm.h:108
SwsPass::priv
void * priv
Definition: graph.h:111
float
float
Definition: af_crystalizer.c:122
MAX_DATA_BUFS
#define MAX_DATA_BUFS
Definition: ops.c:95
ff_sws_vk_init
int ff_sws_vk_init(SwsContext *sws, AVBufferRef *dev_ref)
Definition: ops.c:45
dither
static const uint16_t dither[8][8]
Definition: vf_gradfun.c:46
s
#define s(width, name)
Definition: cbs_vp9.c:198
spi_init
static void spi_init(SPICtx *spi, uint8_t *spv_buf, int buf_len)
Definition: spvasm.h:86
spi_OpFunction
static void spi_OpFunction(SPICtx *spi, int fn_id, int result_type_id, SpvFunctionControlMask function_control, int function_type_id)
Definition: spvasm.h:509
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
ops.h
spi_OpMemoryModel
static void spi_OpMemoryModel(SPICtx *spi, SpvAddressingModel addressing_model, SpvMemoryModel memory_model)
Definition: spvasm.h:125
ff_vk_exec_wait
void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e)
Definition: vulkan.c:573
MAX_FILT_BUFS
#define MAX_FILT_BUFS
Definition: ops.c:94
spi_OpLabel
static int spi_OpLabel(SPICtx *spi, int label_id)
Definition: spvasm.h:520
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:42
FF_VK_REP_FLOAT
@ FF_VK_REP_FLOAT
Definition: vulkan.h:451
av_refstruct_alloc_ext
static void * av_refstruct_alloc_ext(size_t size, unsigned flags, void *opaque, void(*free_cb)(AVRefStructOpaque opaque, void *obj))
A wrapper around av_refstruct_alloc_ext_c() for the common case of a non-const qualified opaque.
Definition: refstruct.h:94
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:52
ff_sws_pixel_expand
static AVRational ff_sws_pixel_expand(SwsPixelType from, SwsPixelType to)
Definition: ops_internal.h:31
SPICtx::id
int id
Definition: spvasm.h:59
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:57
field
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this field
Definition: writing_filters.txt:78
ff_sws_op_list_output
const SwsOp * ff_sws_op_list_output(const SwsOpList *ops)
Returns the output operation for a given op list, or NULL if there is none.
Definition: ops.c:680
SWS_OP_FILTER_H
@ SWS_OP_FILTER_H
Definition: ops.h:61
SPICtx::off
int off
Definition: spvasm.h:55
ff_vk_exec_pool_free
void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
Definition: vulkan.c:299
av_mallocz
#define av_mallocz(s)
Definition: tableprint_vlc.h:31
SwsOpBackend
Definition: ops_dispatch.h:133
spi_OpUndef
static int spi_OpUndef(SPICtx *spi, int type_id)
Definition: spvasm.h:415
tmp
static uint8_t tmp[40]
Definition: aes_ctr.c:52
buf_desc
Definition: v4l2.c:128
SWS_OP_PACK
@ SWS_OP_PACK
Definition: ops.h:45
SwsGraph::field
int field
Definition: graph.h:146
spi_OpBranchConditional
static void spi_OpBranchConditional(SPICtx *spi, int cond_id, int true_label, int false_label, uint32_t branch_weights)
Definition: spvasm.h:642
fail
#define fail
Definition: test.h:478
NULL
#define NULL
Definition: coverity.c:32
spi_OpExtInst
#define spi_OpExtInst(spi, res_type, instr_id, set_id,...)
Definition: spvasm.h:348
SwsFilterWeights::dst_size
int dst_size
Definition: filters.h:90
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
spi_OpTypeImage
static int spi_OpTypeImage(SPICtx *spi, int sampled_type_id, SpvDim dim, int depth, int arrayed, int ms, int sampled, SpvImageFormat image_format)
Definition: spvasm.h:453
ff_vk_shader_link
int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, const char *spirv, size_t spirv_len, const char *entrypoint)
Link a shader into an executable.
Definition: vulkan.c:2411
SWS_OP_FILTER_V
@ SWS_OP_FILTER_V
Definition: ops.h:62
AV_PIX_FMT_BGR0
@ AV_PIX_FMT_BGR0
packed BGR 8:8:8, 32bpp, BGRXBGRX... X=unused/undefined
Definition: pixfmt.h:265
SwsOpType
SwsOpType
Copyright (C) 2025 Niklas Haas.
Definition: ops.h:34
SwsPass::graph
const SwsGraph * graph
Definition: graph.h:76
spi_OpEntryPoint
static int spi_OpEntryPoint(SPICtx *spi, SpvExecutionModel execution_model, const char *name, const int *args, int nb_args)
Definition: spvasm.h:372
spi_OpTypeStruct
#define spi_OpTypeStruct(spi, id,...)
Definition: spvasm.h:352
SwsPixelType
SwsPixelType
Definition: uops.h:38
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
spi_OpSelectionMerge
static void spi_OpSelectionMerge(SPICtx *spi, int merge_block, SpvSelectionControlMask selection_control)
Definition: spvasm.h:634
AV_PIX_FMT_X2BGR10
#define AV_PIX_FMT_X2BGR10
Definition: pixfmt.h:614
f
f
Definition: af_crystalizer.c:122
ff_vk_shader_update_push_const
void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, VkShaderStageFlagBits stage, int offset, size_t size, void *src)
Update push constant in a shader.
Definition: vulkan.c:2812
FFVulkanDescriptorSetBinding
Definition: vulkan.h:112
SwsDitherOp::size_log2
int size_log2
Definition: ops.h:159
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
SwsOp::type
SwsPixelType type
Definition: ops.h:210
SwsDitherOp::matrix
AVRational * matrix
Definition: ops.h:157
size
int size
Definition: twinvq_data.h:10344
AV_NUM_DATA_POINTERS
#define AV_NUM_DATA_POINTERS
Definition: frame.h:467
SWS_OP_RSHIFT
@ SWS_OP_RSHIFT
Definition: ops.h:47
VulkanPriv::dst_rep
enum FFVkShaderRepFormat dst_rep
Definition: ops.c:104
SwsOpList::src
SwsFormat src
Definition: ops.h:263
FFVulkanShader
Definition: vulkan.h:225
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:39
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: uops.h:42
av_refstruct_ref
void * av_refstruct_ref(void *obj)
Create a new reference to an object managed via this API, i.e.
Definition: refstruct.c:140
img
#define img
Definition: vf_colormatrix.c:114
spi_OpTypeArray
static int spi_OpTypeArray(SPICtx *spi, int element_type_id, int id, int length_id)
Definition: spvasm.h:470
FFVkExecContext
Definition: vulkan.h:145
spi_OpExtInstImport
static int spi_OpExtInstImport(SPICtx *spi, const char *name)
Definition: spvasm.h:397
ff_vk_shader_update_desc_buffer
int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, int set, int bind, int elem, FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, VkFormat fmt)
Update a descriptor in a buffer with a buffer.
Definition: vulkan.c:2786
FFVulkanDescriptorSetBinding::name
const char * name
Definition: vulkan.h:113
av_refstruct_unref
void av_refstruct_unref(void *objp)
Decrement the reference count of the underlying object and automatically free the object if there are...
Definition: refstruct.c:120
layout
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel layout
Definition: filter_design.txt:18
interlaced
uint8_t interlaced
Definition: mxfenc.c:2336
SwsFormat::format
enum AVPixelFormat format
Definition: format.h:80
ff_vk_exec_start
int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
Start/submit/wait an execution.
Definition: vulkan.c:580
spi_get_id
static int spi_get_id(SPICtx *spi)
Definition: spvasm.h:133
FF_VK_REP_UINT
@ FF_VK_REP_UINT
Definition: vulkan.h:455
process
static void process(const SwsFrame *dst, const SwsFrame *src, int y, int h, const SwsPass *pass)
Definition: ops.c:108
SwsOpList::ops
SwsOp * ops
Definition: ops.h:259
VulkanPriv
Definition: ops.c:97
ff_vk_unmap_buffer
static int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush)
Definition: vulkan.h:610
spi_OpCompositeConstruct
#define spi_OpCompositeConstruct(spi, res_type, src,...)
Definition: spvasm.h:315
spi_OpImageWrite
static void spi_OpImageWrite(SPICtx *spi, int img_id, int pos_id, int src_id, SpvImageOperandsMask image_operands)
Definition: spvasm.h:669
len
int len
Definition: vorbis_enc_data.h:426
filt
static const int8_t filt[NUMTAPS *2]
Definition: af_earwax.c:40
SwsOp
Definition: ops.h:208
spi_OpVectorShuffle
#define spi_OpVectorShuffle(spi, res_type, src1, src2,...)
Definition: spvasm.h:364
ff_vk_free_buf
void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
Definition: vulkan.c:1264
SwsInternal
Definition: swscale_internal.h:337
ff_vk_create_imageviews
int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, VkImageView views[AV_NUM_DATA_POINTERS], AVFrame *f, enum FFVkShaderRepFormat rep_fmt)
Create an imageview and add it as a dependency to an execution.
Definition: vulkan.c:2002
spi_OpTypeVoid
static int spi_OpTypeVoid(SPICtx *spi)
Definition: spvasm.h:423
SwsOpList::dst
SwsFormat dst
Definition: ops.h:263
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:53
SwsCompiledOp
Definition: ops_dispatch.h:100
FFVkExecPool
Definition: vulkan.h:290
pos
unsigned int pos
Definition: spdifenc.c:414
VulkanPriv::interlaced
int interlaced
Definition: ops.c:105
av_bprintf
void av_bprintf(AVBPrint *buf, const char *fmt,...)
Definition: bprint.c:122
ff_vk_shader_add_push_const
int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size, VkShaderStageFlagBits stage)
Add/update push constants for execution.
Definition: vulkan.c:1509
ff_vk_qf_find
AVVulkanDeviceQueueFamily * ff_vk_qf_find(FFVulkanContext *s, VkQueueFlagBits dev_family, VkVideoCodecOperationFlagBitsKHR vid_ops)
Chooses an appropriate QF.
Definition: vulkan.c:286
id
enum AVCodecID id
Definition: dts2pts.c:578
compile
static int compile(SwsContext *sws, const SwsOpList *ops, SwsCompiledOp *out, int glsl)
Definition: ops.c:1612
spi_OpReturn
static void spi_OpReturn(SPICtx *spi)
Definition: spvasm.h:527
ff_vk_shader_add_descriptor_set
int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, const FFVulkanDescriptorSetBinding *desc, int nb, int singular, int print_to_shader_only)
Add descriptor to a shader.
Definition: vulkan.c:2538
spi_OpImageRead
static int spi_OpImageRead(SPICtx *spi, int result_type_id, int img_id, int pos_id, SpvImageOperandsMask image_operands)
Definition: spvasm.h:656
FFVulkanShader::precompiled
int precompiled
Definition: vulkan.h:230
GLSLF
#define GLSLF(N, S,...)
Definition: vulkan.h:55
AVRational::den
int den
Denominator.
Definition: rational.h:60
SwsReadWriteOp::packed
bool packed
Definition: ops.h:98
spi_OpMemberDecorate
#define spi_OpMemberDecorate(spi, type, target, deco,...)
Definition: spvasm.h:360
ff_sws_pixel_type_name
const char * ff_sws_pixel_type_name(SwsPixelType type)
Definition: ops.c:62
spi_OpCapability
static void spi_OpCapability(SPICtx *spi, SpvCapability capability)
Definition: spvasm.h:119
FFVulkanShader::lg_size
uint32_t lg_size[3]
Definition: vulkan.h:237
px
#define px
Definition: uops_tmpl.c:54
spi_OpConstantFloat
static int spi_OpConstantFloat(SPICtx *spi, int type_id, float val)
Definition: spvasm.h:596
SwsFilterWeights::num_weights
size_t num_weights
Definition: filters.h:77
VulkanPriv::shd
FFVulkanShader shd
Definition: ops.c:100
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
free_fn
static void free_fn(void *priv)
Definition: ops.c:173
SWS_FILTER_SCALE
@ SWS_FILTER_SCALE
14-bit coefficients are picked to fit comfortably within int16_t for efficient SIMD processing (e....
Definition: filters.h:40
SwsReadWriteOp::elems
uint8_t elems
Examples: rgba = 4x u8 packed yuv444p = 3x u8 rgb565 = 1x u16 <- use SWS_OP_UNPACK to unpack monow = ...
Definition: ops.h:96
mem.h
spi_OpLoad
static int spi_OpLoad(SPICtx *spi, int result_type_id, int ptr_id, SpvMemoryAccessMask memory_access, int align)
Definition: spvasm.h:608
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
VulkanPriv::nb_data_bufs
int nb_data_bufs
Definition: ops.c:102
spi_OpTypePointer
static int spi_OpTypePointer(SPICtx *spi, SpvStorageClass storage_class, int type_id)
Definition: spvasm.h:488
spi_OpCompositeInsert
#define spi_OpCompositeInsert(spi, res_type, src1, src2,...)
Definition: spvasm.h:368
SWS_PIXEL_F32
@ SWS_PIXEL_F32
Definition: uops.h:43
w
uint8_t w
Definition: llvidencdsp.c:39
av_free
#define av_free(p)
Definition: tableprint_vlc.h:34
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
SWS_OP_CONVERT
@ SWS_OP_CONVERT
Definition: ops.h:51
FFVkBuffer
Definition: vulkan.h:125
ff_sws_vk_uninit
static void ff_sws_vk_uninit(AVRefStructOpaque opaque, void *obj)
Copyright (C) 2026 Lynne.
Definition: ops.c:34
int32_t
int32_t
Definition: audioconvert.c:56
ff_vk_exec_submit
int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
Definition: vulkan.c:925
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
sws_internal
static SwsInternal * sws_internal(const SwsContext *sws)
Definition: swscale_internal.h:79
AVERROR_INVALIDDATA
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:61
h
h
Definition: vp9dsp_template.c:2070
SwsOpList::plane_src
uint8_t plane_src[4]
Definition: ops.h:266
ff_sws_vk_device_ref
AVBufferRef * ff_sws_vk_device_ref(SwsContext *sws)
Returns the Vulkan device reference associated with sws, or NULL if Vulkan has not been initialized f...
Definition: ops.c:86
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:258
ff_sws_op_type_name
const char * ff_sws_op_type_name(SwsOpType op)
Definition: ops.c:109
SwsContext
Main external API structure.
Definition: swscale.h:229
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: uops.h:41
snprintf
#define snprintf
Definition: snprintf.h:34
SwsFilterWeights::weights
int * weights
The computed look-up table (LUT).
Definition: filters.h:76
FFVulkanFunctions
Definition: vulkan_functions.h:275
ff_vk_shader_load
int ff_vk_shader_load(FFVulkanShader *shd, VkPipelineStageFlags stage, VkSpecializationInfo *spec, uint32_t wg_size[3], uint32_t required_subgroup_size)
Initialize a shader object.
Definition: vulkan.c:2128
src
#define src
Definition: vp8dsp.c:248
read
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
Definition: bitstream_template.h:239
SWS_BACKEND_GLSL
@ SWS_BACKEND_GLSL
Vulkan GLSL backend.
Definition: swscale.h:121