25 #include "../ops_internal.h"
26 #include "../swscale_internal.h"
30 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
38 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
40 s->spvc->uninit(&
s->spvc);
58 if (
s->vkctx.device_ref &&
s->vkctx.device_ref->data != dev_ref->
data) {
61 }
else if (
s->vkctx.device_ref &&
s->vkctx.device_ref->data == dev_ref->
data) {
75 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
77 s->spvc = ff_vk_spirv_init();
90 return s ?
s->vkctx.device_ref :
NULL;
93 #define MAX_DITHER_BUFS 4
94 #define MAX_FILT_BUFS 4
95 #define MAX_DATA_BUFS (MAX_DITHER_BUFS + MAX_FILT_BUFS*4)
119 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
120 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT);
122 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
123 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT);
131 0, 0, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
133 0, 1, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
136 VkImageMemoryBarrier2 img_bar[8];
138 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
139 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
140 VK_ACCESS_SHADER_READ_BIT,
141 VK_IMAGE_LAYOUT_GENERAL,
142 VK_QUEUE_FAMILY_IGNORED);
144 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
145 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
146 VK_ACCESS_SHADER_WRITE_BIT,
147 VK_IMAGE_LAYOUT_GENERAL,
148 VK_QUEUE_FAMILY_IGNORED);
149 vk->CmdPipelineBarrier2(ec->buf, &(VkDependencyInfo) {
150 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
151 .pImageMemoryBarriers = img_bar,
152 .imageMemoryBarrierCount = nb_img_bar,
158 VK_SHADER_STAGE_COMPUTE_BIT,
164 vk->CmdDispatch(ec->buf,
165 FFALIGN(
dst->width,
p->shd.lg_size[0])/
p->shd.lg_size[0],
166 FFALIGN(
dst->height,
p->shd.lg_size[1])/
p->shd.lg_size[1],
178 for (
int i = 0;
i <
p->nb_data_bufs;
i++)
193 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
194 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
195 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
201 (uint8_t **)&weights_data, 0);
227 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
228 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
229 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
238 for (
int i = 0;
i <
size;
i++) {
239 for (
int j = 0; j <
size; j++) {
258 for (
int n = 0; n < ops->
num_ops; n++) {
263 &
p->data_bufs[
p->nb_data_bufs]);
270 &
p->data_bufs[
p->nb_data_bufs]);
278 &
p->data_bufs[
p->nb_data_bufs]);
288 for (
int i = 0;
i <
p->nb_data_bufs;
i++)
293 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
323 typedef struct SPIRVIDs {
357 int linear_deco_off[16];
358 int linear_deco_ops[16];
362 int dither_ptr_elem_id;
370 int out_img_array_id;
385 int push_const_struct_id;
386 int push_const_ptr_id;
387 int push_const_elem_ptr_id;
388 int push_const_var_id;
414 3 +
id->nb_dither_bufs +
id->nb_filter_bufs +
415 (
id->interlaced ? 1 : 0));
419 if (
id->interlaced) {
422 SpvDecorationOffset, 0);
427 SpvBuiltInGlobalInvocationId);
439 for (
int i = 0;
i <
id->nb_dither_bufs;
i++) {
443 id->dither[
i].size*
sizeof(
float));
448 id->dither[
i].binding);
451 for (
int i = 0;
i <
id->nb_filter_bufs;
i++) {
452 struct FilterData *
f = &
id->filt[
i];
456 f->filter_size*
sizeof(
float));
462 f->num_weights*
sizeof(
float));
471 for (
int n = 0; n < ops->
num_ops; n++) {
478 for (
int j = 0; j < 4; j++) {
479 nb_ops += !!
op->lin.m[j][0].num;
480 nb_ops +=
op->lin.m[j][0].num &&
op->lin.m[j][4].num;
481 for (
int i = 1;
i < 4;
i++) {
482 nb_ops += !!
op->lin.m[j][
i].num;
483 nb_ops +=
op->lin.m[j][
i].num &&
484 (
op->lin.m[j][0].num ||
op->lin.m[j][4].num);
488 id->linear_deco_off[
id->nb_linear_ops] =
spi_reserve(spi, nb_ops*4*3);
489 id->linear_deco_ops[
id->nb_linear_ops] = nb_ops;
496 SPICtx *spi, SPIRVIDs *
id)
502 id->u32_type = spi_OpTypeInt(spi, 32, 0);
503 id->i32_type = spi_OpTypeInt(spi, 32, 1);
505 id->f32_type = spi_OpTypeFloat(spi, 32);
509 id->bvec2_type = spi_OpTypeVector(spi,
id->b_type, 2);
510 id->u32vec2_type = spi_OpTypeVector(spi, u32_type, 2);
511 id->i32vec2_type = spi_OpTypeVector(spi,
id->i32_type, 2);
513 id->u32vec3_type = spi_OpTypeVector(spi, u32_type, 3);
515 id->u32vec4_type = spi_OpTypeVector(spi, u32_type, 4);
516 id->f32vec4_type = spi_OpTypeVector(spi, f32_type, 4);
517 id->f32mat4_type = spi_OpTypeMatrix(spi,
id->f32vec4_type, 4);
523 for (
int i = 0;
i < 5;
i++)
527 id->nb_const_ids = 0;
528 for (
int n = 0; n < ops->
num_ops; n++) {
540 id->const_ids[
id->nb_const_ids++] =
tmp;
544 for (
int i = 0;
i < 4;
i++) {
550 id->const_ids[
id->nb_const_ids++] =
554 id->const_ids[
id->nb_const_ids++] =
564 id->const_ids[
id->nb_const_ids++] =
tmp;
570 float q =
op->scale.factor.num/(
float)
op->scale.factor.den;
580 id->const_ids[
id->nb_const_ids++] =
tmp;
585 for (
int i = 0;
i < 4;
i++) {
588 if (!
op->clamp.limit[
i].den) {
597 id->const_ids[
id->nb_const_ids++] =
tmp;
601 for (
int i = 0;
i < 4;
i++) {
602 if (
op->dither.y_offset[
i] < 0)
605 id->const_ids[
id->nb_const_ids++] =
tmp;
611 for (
int i = 0;
i < 4;
i++) {
612 for (
int j = 0; j < 4; j++) {
616 id->const_ids[
id->nb_const_ids++] =
620 id->const_ids[
id->nb_const_ids - 4],
621 id->const_ids[
id->nb_const_ids - 3],
622 id->const_ids[
id->nb_const_ids - 2],
623 id->const_ids[
id->nb_const_ids - 1]);
624 id->const_ids[
id->nb_const_ids++] =
tmp;
628 id->const_ids[
id->nb_const_ids - 5*4 + 4],
629 id->const_ids[
id->nb_const_ids - 5*3 + 4],
630 id->const_ids[
id->nb_const_ids - 5*2 + 4],
631 id->const_ids[
id->nb_const_ids - 5*1 + 4]);
632 id->const_ids[
id->nb_const_ids++] =
tmp;
634 for (
int i = 0;
i < 4;
i++) {
636 id->const_ids[
id->nb_const_ids++] =
641 id->const_ids[
id->nb_const_ids - 4],
642 id->const_ids[
id->nb_const_ids - 3],
643 id->const_ids[
id->nb_const_ids - 2],
644 id->const_ids[
id->nb_const_ids - 1]);
645 id->const_ids[
id->nb_const_ids++] =
tmp;
655 static void define_shader_bindings(
const SwsOpList *ops,
SPICtx *spi, SPIRVIDs *
id,
656 int in_img_count,
int out_img_count)
661 struct DitherData *
dither =
id->dither;
662 for (
int i = 0;
i <
id->nb_dither_bufs;
i++) {
671 SpvStorageClassUniform, 0);
675 id->filt_o_ptr_id = 0;
676 if (
id->nb_filter_bufs)
680 for (
int i = 0;
i <
id->nb_filter_bufs;
i++) {
681 struct FilterData *
f = &
id->filt[
i];
692 SpvStorageClassUniform, 0);
696 for (
int t = 1; t <
f->filter_size; t++)
706 id->f32_type :
id->u32_type,
707 2, 0, 0, 0, 2, SpvImageFormatUnknown);
709 id->u32_cid[out_img_count]);
712 id->in_img_array_id = 0;
718 id->in_img_type = match ?
id->out_img_type :
721 id->f32_type :
id->u32_type,
722 2, 0, 0, 0, 2, SpvImageFormatUnknown);
724 id->u32_cid[in_img_count]);
731 id->out_img_array_id);
739 id->in_img_array_id);
746 SpvStorageClassInput, 0);
749 SpvStorageClassUniformConstant, 0);
752 SpvStorageClassUniformConstant, 0);
754 if (
id->interlaced) {
757 id->push_const_struct_id);
761 SpvStorageClassPushConstant, 0);
765 static int insert_vmat_linear(
const SwsOp *
op,
SPICtx *spi, SPIRVIDs *
id,
766 int data,
int const_off)
768 data = spi_OpMatrixTimesVector(spi,
id->f32vec4_type,
769 id->const_ids[const_off + 4*5],
771 return spi_OpFAdd(spi,
id->f32vec4_type,
772 id->const_ids[const_off + 4*5 + 1 + 4],
data);
775 static int insert_bitexact_linear(
const SwsOp *
op,
SPICtx *spi, SPIRVIDs *
id,
776 int data,
int linear_ops_idx,
int const_off)
779 int type_v =
op->type ==
SWS_PIXEL_F32 ?
id->f32vec4_type :
id->u32vec4_type;
788 spi->
off =
id->linear_deco_off[linear_ops_idx];
789 for (
int i = 0;
i <
id->linear_deco_ops[linear_ops_idx];
i++)
794 for (
int j = 0; j < 4; j++) {
796 if (
op->lin.m[j][0].num)
797 res[j] = spi_OpFMul(spi, type_s,
tmp[0],
798 id->const_ids[const_off + j*5 + 0]);
800 if (
op->lin.m[j][0].num &&
op->lin.m[j][4].num)
801 res[j] = spi_OpFAdd(spi, type_s,
802 id->const_ids[const_off + 4*5 + 1 + j], res[j]);
803 else if (
op->lin.m[j][4].num)
804 res[j] =
id->const_ids[const_off + 4*5 + 1 + j];
806 for (
int i = 1;
i < 4;
i++) {
807 if (!
op->lin.m[j][
i].num)
810 int v = spi_OpFMul(spi, type_s,
tmp[
i],
811 id->const_ids[const_off + j*5 +
i]);
812 if (
op->lin.m[j][0].num ||
op->lin.m[j][4].num)
813 res[j] = spi_OpFAdd(spi, type_s, res[j], v);
820 res[0], res[1], res[2], res[3]);
823 static int read_filtered(
SPICtx *spi, SPIRVIDs *
id,
const SwsOpList *ops,
824 const SwsOp *
op,
const struct FilterData *
f,
825 const int *in_img,
int gid,
int gi2)
831 const int read_vtype = src_float ?
id->f32vec4_type :
id->u32vec4_type;
838 id->u32_cid[1], axis);
839 int o =
spi_OpLoad(spi,
id->i32_type, o_ptr, SpvMemoryAccessMaskNone, 0);
847 if (src_interlaced && is_h) {
848 pos_y = spi_OpShiftLeftLogical(spi,
id->i32_type, pos_y,
id->u32_cid[1]);
849 pos_y = spi_OpIAdd(spi,
id->i32_type, pos_y,
id->field_i32);
853 int acc_s[4] = {
id->f32_0,
id->f32_0,
id->f32_0,
id->f32_0 };
854 int acc_v =
id->f32_0;
857 id->f32_0,
id->f32_0,
858 id->f32_0,
id->f32_0);
860 for (
int t = 0; t <
f->filter_size; t++) {
863 id->u32_cid[0], axis,
864 f->tap_const_base + t);
866 SpvMemoryAccessMaskNone, 0);
869 int c = t ? spi_OpIAdd(spi,
id->i32_type, o,
f->tap_const_base + t) : o;
872 if (src_interlaced && !is_h) {
873 c = spi_OpShiftLeftLogical(spi,
id->i32_type,
c,
id->u32_cid[1]);
874 c = spi_OpIAdd(spi,
id->i32_type,
c,
id->field_i32);
883 SpvImageOperandsMaskNone);
885 px = spi_OpConvertUToF(spi,
id->f32vec4_type,
px);
886 px = spi_OpVectorTimesScalar(spi,
id->f32vec4_type,
px,
w);
887 acc_v = spi_OpFAdd(spi,
id->f32vec4_type, acc_v,
px);
889 for (
int e = 0; e <
op->rw.elems; e++) {
892 SpvImageOperandsMaskNone);
897 px = spi_OpConvertUToF(spi,
id->f32_type,
px);
899 px = spi_OpFMul(spi,
id->f32_type,
w,
px);
900 acc_s[e] = spi_OpFAdd(spi,
id->f32_type, acc_s[e],
px);
908 acc_s[0], acc_s[1], acc_s[2], acc_s[3]);
914 uint8_t spvbuf[1024*16];
915 SPICtx spi_context = { 0 }, *spi = &spi_context;
916 SPIRVIDs spid_data = { 0 }, *
id = &spid_data;
917 spi_init(spi, spvbuf,
sizeof(spvbuf));
920 p->interlaced =
id->interlaced;
923 (uint32_t []) { 32, 32, 1 }, 0);
928 VK_SHADER_STAGE_COMPUTE_BIT);
942 .
type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
943 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
947 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
948 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
966 id->nb_dither_bufs = 0;
967 id->nb_filter_bufs = 0;
968 int nb_data_bufs = 0;
969 for (
int n = 0; n < ops->
num_ops; n++) {
976 struct DitherData *d = &
id->dither[
id->nb_dither_bufs++];
977 d->size = 1 <<
op->dither.size_log2;
982 d->binding = nb_data_bufs;
988 struct FilterData *
f = &
id->filt[
id->nb_filter_bufs++];
989 f->filter =
op->rw.filter;
998 f->binding = nb_data_bufs;
1004 id->in_vars[3 + nb_data_bufs] = var_id;
1006 .
type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1007 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1012 nb_data_bufs, 1, 0);
1014 if (
id->interlaced) {
1017 id->in_vars[3 +
id->nb_dither_bufs +
id->nb_filter_bufs] =
1018 id->push_const_var_id;
1022 define_shader_header(sws, shd, ops, spi,
id);
1023 define_shader_consts(sws, ops, spi,
id);
1024 define_shader_bindings(ops, spi,
id, in_img_count, out_img_count);
1031 int in_img[4] = { 0 };
1032 for (
int i = 0;
i < in_img_count;
i++) {
1035 id->in_vars[1],
id->u32_cid[
i]);
1037 SpvMemoryAccessMaskNone, 0);
1042 for (
int i = 0;
i < out_img_count;
i++) {
1044 id->in_vars[2],
id->u32_cid[
i]);
1046 SpvMemoryAccessMaskNone, 0);
1051 SpvMemoryAccessMaskNone, 0);
1055 int gi2 = spi_OpBitcast(spi,
id->i32vec2_type, gid);
1060 int dst_gid = gid, dst_gi2 = gi2;
1062 if (
id->interlaced) {
1064 id->push_const_var_id,
1066 int field_u32 =
spi_OpLoad(spi,
id->u32_type, field_u32_ptr,
1067 SpvMemoryAccessMaskNone, 0);
1068 id->field_i32 = spi_OpBitcast(spi,
id->i32_type, field_u32);
1070 int img_y_i32 = spi_OpShiftLeftLogical(spi,
id->i32_type,
1073 img_y_i32 = spi_OpIAdd(spi,
id->i32_type, img_y_i32,
id->field_i32);
1078 int mapped_gid = spi_OpBitcast(spi,
id->u32vec2_type, mapped_gi2);
1081 src_gid = mapped_gid;
1083 dst_gid = mapped_gid;
1084 dst_gi2 = mapped_gi2;
1089 int img1_s = spi_OpImageQuerySize(spi,
id->i32vec2_type, out_img[0]);
1090 int scmp = spi_OpSGreaterThanEqual(spi,
id->bvec2_type, dst_gi2, img1_s);
1091 scmp = spi_OpAny(spi,
id->b_type, scmp);
1106 id->f32_p,
id->f32_p,
1107 id->f32_p,
id->f32_p);
1110 id->u32_p,
id->u32_p,
1111 id->u32_p,
id->u32_p);
1114 int nb_const_ids = 0;
1115 int nb_dither_bufs = 0;
1116 int nb_linear_ops = 0;
1117 int nb_filter_used = 0;
1120 for (
int n = 0; n < ops->
num_ops; n++) {
1123 op->convert.to :
op->type;
1125 id->f32vec4_type :
id->u32vec4_type;
1127 id->f32_type :
id->u32_type;
1129 id->f32_p :
id->u32_p;
1135 }
else if (
op->rw.filter) {
1136 data = read_filtered(spi,
id, ops,
op,
1137 &
id->filt[nb_filter_used++],
1139 }
else if (
op->rw.packed) {
1141 src_gid, SpvImageOperandsMaskNone);
1144 for (
int i = 0;
i <
op->rw.elems;
i++) {
1147 SpvImageOperandsMaskNone);
1155 if (
op->rw.frac ||
op->rw.filter) {
1157 }
else if (
op->rw.packed) {
1159 SpvImageOperandsMaskNone);
1161 for (
int i = 0;
i <
op->rw.elems;
i++) {
1165 SpvImageOperandsMaskNone);
1170 for (
int i = 0;
i < 4;
i++) {
1171 if (!
op->clear.value[
i].den)
1174 id->const_ids[nb_const_ids++],
1187 data = spi_OpIMul(spi, type_v,
data,
id->const_ids[nb_const_ids++]);
1189 data = spi_OpConvertFToU(spi, type_v,
data);
1191 data = spi_OpConvertUToF(spi, type_v,
data);
1194 data = spi_OpShiftLeftLogical(spi, type_v,
data,
1195 id->const_ids[nb_const_ids++]);
1198 data = spi_OpShiftRightLogical(spi, type_v,
data,
1199 id->const_ids[nb_const_ids++]);
1203 data = spi_OpFMul(spi, type_v,
data,
1204 id->const_ids[nb_const_ids++]);
1206 data = spi_OpIMul(spi, type_v,
data,
1207 id->const_ids[nb_const_ids++]);
1212 op->op ==
SWS_OP_MIN ? GLSLstd450FMin : GLSLstd450FMax :
1213 op->op ==
SWS_OP_MIN ? GLSLstd450UMin : GLSLstd450UMax;
1214 for (
int i = 0;
i < 4;
i++) {
1215 if (!
op->clamp.limit[
i].den)
1219 tmp,
id->const_ids[nb_const_ids++]);
1225 int did = nb_dither_bufs++;
1228 x_id = spi_OpBitwiseAnd(spi,
id->u32_type, x_id,
1229 id->dither[did].mask_id);
1230 for (
int i = 0;
i < 4;
i++) {
1231 if (
op->dither.y_offset[
i] < 0)
1234 int y_id = spi_OpIAdd(spi,
id->u32_type, y_pos,
1235 id->const_ids[nb_const_ids++]);
1236 y_id = spi_OpBitwiseAnd(spi,
id->u32_type, y_id,
1237 id->dither[did].mask_id);
1240 id->dither[did].id,
id->u32_cid[0],
1243 SpvMemoryAccessMaskNone, 0);
1246 tmp = spi_OpFAdd(spi, type_s,
tmp,
val);
1253 data = insert_bitexact_linear(
op, spi,
id,
data, nb_linear_ops, nb_const_ids);
1255 data = insert_vmat_linear(
op, spi,
id,
data, nb_const_ids);
1257 nb_const_ids += 5*5 + 1;
1289 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
1301 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1302 .mem_layout = img_type,
1306 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1312 #define QSTR "(%i/%i%s)"
1313 #define QTYPE(Q) (Q).num, (Q).den, cur_type == SWS_PIXEL_F32 ? ".0f" : ""
1316 int idx,
const char *type_name,
1317 const char *type_v,
const char *type_s)
1321 if (
op->rw.filter) {
1324 const char *coord_y;
1328 coord_y =
interlaced ?
"((o + i) * 2 + int(params.field))" :
"o + i";
1330 av_bprintf(&shd->
src,
" int o = filter_o%i[%s];\n", idx, axis);
1335 if (
op->rw.packed) {
1336 GLSLF(2,
tmp +=
w * %
s(imageLoad(src_img[%
i], ivec2(%
s, %
s))); ,
1337 type_v, ops->
plane_src[0], coord_x, coord_y);
1339 for (
int i = 0;
i <
op->rw.elems;
i++)
1341 tmp.%
c +=
w * %
s(imageLoad(src_img[%
i], ivec2(%
s, %
s))[0]); ,
1342 "xyzw"[
i], type_s, ops->
plane_src[
i], coord_x, coord_y);
1347 const char *src_pos =
interlaced ?
"spos" :
"pos";
1348 if (
op->rw.packed) {
1349 GLSLF(1, %
s = %
s(imageLoad(src_img[%
i], %
s)); ,
1350 type_name, type_v, ops->
plane_src[0], src_pos);
1352 for (
int i = 0;
i <
op->rw.elems;
i++)
1353 GLSLF(1, %
s.%
c = %
s(imageLoad(src_img[%
i], %
s)[0]); ,
1354 type_name,
"xyzw"[
i], type_s, ops->
plane_src[
i], src_pos);
1365 void *spv_opaque =
NULL;
1369 VK_SHADER_STAGE_COMPUTE_BIT,
1370 NULL, 0, 32, 32, 1, 0);
1377 VK_SHADER_STAGE_COMPUTE_BIT);
1385 add_desc_read_write(&
buf_desc[nb_desc++], &
p->src_rep,
read);
1386 add_desc_read_write(&
buf_desc[nb_desc++], &
p->dst_rep, write);
1396 for (
int n = 0; n < ops->
num_ops; n++) {
1399 int size = (1 <<
op->dither.size_log2);
1401 snprintf(data_buf_name[nb_desc], 256,
"dither_buf%i", n);
1402 snprintf(data_str_name[nb_desc], 256,
"float dither_mat%i[%i][%i];",
1405 .name = data_buf_name[nb_desc],
1406 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1407 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1408 .mem_layout =
"scalar",
1409 .buf_content = data_str_name[nb_desc],
1417 op->rw.kernel :
op->filter.kernel;
1418 snprintf(data_buf_name[nb_desc], 256,
"filter_buf%i", n);
1419 snprintf(data_str_name[nb_desc], 256,
1420 "float filter_w%i[%i][%i];\n"
1421 " int filter_o%i[%i];",
1425 .name = data_buf_name[nb_desc],
1426 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1427 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1428 .mem_layout =
"scalar",
1429 .buf_content = data_str_name[nb_desc],
1439 GLSLC(0,
layout(push_constant, std430) uniform pushConstants { );
1441 GLSLC(0, } params; );
1447 GLSLC(1, ivec2
pos = ivec2(gl_GlobalInvocationID.xy); );
1448 GLSLC(1, ivec2
size = imageSize(dst_img[0]); );
1450 GLSLC(1, ivec2 spos = ivec2(
pos.x,
pos.y * 2 +
int(params.field)); );
1452 GLSLC(1, ivec2 dpos = ivec2(
pos.x,
pos.y * 2 +
int(params.field)); );
1454 GLSLC(1,
if (any(greaterThanEqual(dpos,
size))) );
1460 GLSLC(1, u8vec4 u8; );
1461 GLSLC(1, u16vec4 u16; );
1462 GLSLC(1, u32vec4 u32; );
1463 GLSLC(1, precise f32vec4 f32; );
1467 for (
int n = 0; n < ops->
num_ops; n++) {
1472 const char *type_v = cur_type ==
SWS_PIXEL_F32 ?
"f32vec4" :
1484 read_glsl(ops,
op, shd, n, type_name, type_v, type_s);
1489 if (
op->rw.frac ||
op->rw.filter) {
1491 }
else if (
op->rw.packed) {
1492 GLSLF(1, imageStore(dst_img[%
i], %
s, %
s(%
s)); ,
1493 ops->
plane_dst[0], dst_pos, type_v, type_name);
1495 for (
int i = 0;
i <
op->rw.elems;
i++)
1496 GLSLF(1, imageStore(dst_img[%
i], %
s, %
s(%
s[%
i])); ,
1497 ops->
plane_dst[
i], dst_pos, type_v, type_name,
i);
1503 for (
int i = 0;
i < 4;
i++)
1509 for (
int i = 0;
i < 4;
i++) {
1513 "xyzw"[
i], type_s, QTYPE(
op->clear.value[
i]));
1519 type_name, type_name, QTYPE(
op->scale.factor));
1523 for (
int i = 0;
i < 4;
i++) {
1524 if (!
op->clamp.limit[
i].den)
1527 type_name,
"xyzw"[
i],
1529 type_name,
"xyzw"[
i], QTYPE(
op->clamp.limit[
i]));
1549 int size = (1 <<
op->dither.size_log2);
1550 for (
int i = 0;
i < 4;
i++) {
1551 if (
op->dither.y_offset[
i] < 0)
1553 av_bprintf(&shd->
src,
" %s.%c += dither_mat%i[(pos.y + %i) & %i]"
1555 type_name,
"xyzw"[
i], n,
1556 op->dither.y_offset[
i],
size - 1,
1562 for (
int i = 0;
i < 4;
i++) {
1563 if (
op->lin.m[
i][4].num)
1565 QTYPE(
op->lin.m[
i][4]));
1568 for (
int j = 0; j < 4; j++) {
1569 if (!
op->lin.m[
i][j].num)
1572 "xyzw"[
i],
"xyzw"[j], QTYPE(
op->lin.m[
i][j]));
1579 av_bprintf(&shd->
src,
" %s = %s.%s;\n", type_name, type_name,
1584 av_bprintf(&shd->
src,
" %s = %s.%s;\n", type_name, type_name,
1594 err =
s->spvc->compile_shader(&
s->vkctx,
s->spvc, shd,
1595 &spv_data, &spv_len,
"main",
1603 s->spvc->free_shader(
s->spvc, &spv_opaque);
1635 VkFormatProperties2 prop = {
1636 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
1639 vk->GetPhysicalDeviceFormatProperties2(
s->vkctx.hwctx->phys_dev,
1640 VK_FORMAT_B8G8R8A8_UNORM,
1642 if (!(prop.formatProperties.optimalTilingFeatures &
1643 VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT)) {
1651 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
1652 err = add_ops_glsl(sws,
p,
s, ops, &
p->shd);
1656 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
1657 err = add_ops_spirv(sws,
p,
s, ops, &
p->shd);
1667 for (
int i = 0;
i <
p->nb_data_bufs;
i++)
1669 1,
i, 0, &
p->data_bufs[
i],
1670 0, VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED);
1686 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
1696 .compile = compile_spirv,
1701 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
1711 .compile = compile_glsl,