FFmpeg
vf_transpose_cuda.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2026 NyanMisaka
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 #include "libavutil/common.h"
23 #include "libavutil/hwcontext.h"
25 #include "libavutil/cuda_check.h"
26 #include "libavutil/internal.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
29 
30 #include "avfilter.h"
31 #include "filters.h"
32 #include "transpose.h"
33 #include "video.h"
34 
35 #include "cuda/load_helper.h"
36 
37 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
38 #define BLOCK_X 32
39 #define BLOCK_Y 16
40 
41 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
42 
43 static const enum AVPixelFormat supported_formats[] = {
61 };
62 
63 typedef struct TransposeCUDAContext {
64  const AVClass *class;
65 
70 
72 
73  CUcontext cu_ctx;
74  CUmodule cu_module;
75  CUfunction cu_func_uchar;
76  CUfunction cu_func_ushort;
77  CUfunction cu_func_uchar2;
78  CUfunction cu_func_ushort2;
79  CUfunction cu_func_uchar4;
80  CUstream cu_stream;
81 
82  int flip_wh;
83  int passthrough; ///< PassthroughType, landscape passthrough mode enabled
84  int dir; ///< TransposeDir
86 
88 {
89  TransposeCUDAContext *s = ctx->priv;
90 
91  s->frame = av_frame_alloc();
92  if (!s->frame)
93  return AVERROR(ENOMEM);
94 
95  s->tmp_frame = av_frame_alloc();
96  if (!s->tmp_frame)
97  return AVERROR(ENOMEM);
98 
99  return 0;
100 }
101 
103 {
104  TransposeCUDAContext *s = ctx->priv;
105 
106  if (s->hwctx && s->cu_module) {
107  CUcontext dummy;
108  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
109  CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx));
110  CHECK_CU(cu->cuModuleUnload(s->cu_module));
111  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
112  }
113 
114  av_frame_free(&s->frame);
115  av_buffer_unref(&s->frames_ctx);
116  av_frame_free(&s->tmp_frame);
117 }
118 
120  AVBufferRef *device_ctx,
121  int width, int height,
122  enum AVPixelFormat sw_format)
123 {
124  AVBufferRef *out_ref = NULL;
125  AVHWFramesContext *out_ctx;
126  int ret;
127 
128  out_ref = av_hwframe_ctx_alloc(device_ctx);
129  if (!out_ref)
130  return AVERROR(ENOMEM);
131  out_ctx = (AVHWFramesContext*)out_ref->data;
132 
133  out_ctx->format = AV_PIX_FMT_CUDA;
134  out_ctx->sw_format = sw_format;
135  out_ctx->width = FFALIGN(width, 32);
136  out_ctx->height = FFALIGN(height, 32);
137 
138  ret = av_hwframe_ctx_init(out_ref);
139  if (ret < 0)
140  goto fail;
141 
142  av_frame_unref(s->frame);
143  ret = av_hwframe_get_buffer(out_ref, s->frame, 0);
144  if (ret < 0)
145  goto fail;
146 
147  s->frame->width = width;
148  s->frame->height = height;
149 
150  av_buffer_unref(&s->frames_ctx);
151  s->frames_ctx = out_ref;
152 
153  return 0;
154 fail:
155  av_buffer_unref(&out_ref);
156  return ret;
157 }
158 
159 static int format_is_supported(enum AVPixelFormat fmt)
160 {
161  for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
162  if (supported_formats[i] == fmt)
163  return 1;
164  return 0;
165 }
166 
168  int out_width, int out_height)
169 {
170  FilterLink *inl = ff_filter_link(ctx->inputs[0]);
171  FilterLink *outl = ff_filter_link(ctx->outputs[0]);
172  TransposeCUDAContext *s = ctx->priv;
173  AVHWFramesContext *in_frames_ctx;
174  enum AVPixelFormat format;
175  int ret;
176 
177  /* check that we have a hw context */
178  if (!inl->hw_frames_ctx) {
179  av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
180  return AVERROR(EINVAL);
181  }
182 
183  in_frames_ctx = (AVHWFramesContext*)inl->hw_frames_ctx->data;
184  format = in_frames_ctx->sw_format;
185  s->pix_desc = av_pix_fmt_desc_get(format);
186 
187  if (!format_is_supported(format)) {
188  av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
190  return AVERROR(ENOSYS);
191  }
192 
193  ret = init_hwframe_ctx(s, in_frames_ctx->device_ref,
194  out_width, out_height, format);
195  if (ret < 0)
196  return ret;
197 
198  s->hwctx = in_frames_ctx->device_ctx->hwctx;
199  s->cu_stream = s->hwctx->stream;
200 
201  outl->hw_frames_ctx = av_buffer_ref(s->frames_ctx);
202  if (!outl->hw_frames_ctx)
203  return AVERROR(ENOMEM);
204 
205  return 0;
206 }
207 
209 {
210  extern const unsigned char ff_vf_transpose_cuda_ptx_data[];
211  extern const unsigned int ff_vf_transpose_cuda_ptx_len;
212  FilterLink *outl = ff_filter_link(outlink);
213  AVFilterContext *ctx = outlink->src;
214  AVFilterLink *inlink = ctx->inputs[0];
216  TransposeCUDAContext *s = ctx->priv;
217  CUcontext dummy, cuda_ctx;
218  CudaFunctions *cu;
219  int ret = 0;
220 
221  if ((inlink->w >= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_LANDSCAPE) ||
222  (inlink->w <= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_PORTRAIT)) {
223  if (inl->hw_frames_ctx) {
225  if (!outl->hw_frames_ctx)
226  return AVERROR(ENOMEM);
227  }
228 
230  "w:%d h:%d -> w:%d h:%d (passthrough mode)\n",
231  inlink->w, inlink->h, inlink->w, inlink->h);
232  return 0;
233  } else {
234  s->passthrough = TRANSPOSE_PT_TYPE_NONE;
235  }
236 
237  switch (s->dir) {
239  case TRANSPOSE_CCLOCK:
240  case TRANSPOSE_CLOCK:
242  outlink->w = inlink->h;
243  outlink->h = inlink->w;
244  s->flip_wh = 1;
245  break;
246  default:
247  outlink->w = inlink->w;
248  outlink->h = inlink->h;
249  s->flip_wh = 0;
250  break;
251  }
252 
253  if (s->flip_wh && inlink->sample_aspect_ratio.num)
254  outlink->sample_aspect_ratio = av_inv_q(inlink->sample_aspect_ratio);
255  else
256  outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
257 
258  ret = init_processing_chain(ctx, outlink->w, outlink->h);
259  if (ret < 0)
260  return ret;
261 
262  cuda_ctx = s->cu_ctx = s->hwctx->cuda_ctx;
263  cu = s->hwctx->internal->cuda_dl;
264 
265  ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
266  if (ret < 0)
267  return ret;
268 
269  ret = ff_cuda_load_module(ctx, s->hwctx, &s->cu_module,
270  ff_vf_transpose_cuda_ptx_data, ff_vf_transpose_cuda_ptx_len);
271  if (ret < 0)
272  goto exit;
273 
274  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Transpose_Cuda_uchar"));
275  if (ret < 0)
276  goto exit;
277 
278  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Transpose_Cuda_ushort"));
279  if (ret < 0)
280  goto exit;
281 
282  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Transpose_Cuda_uchar2"));
283  if (ret < 0)
284  goto exit;
285 
286  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Transpose_Cuda_ushort2"));
287  if (ret < 0)
288  goto exit;
289 
290  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Transpose_Cuda_uchar4"));
291  if (ret < 0)
292  goto exit;
293 
295  "w:%d h:%d dir:%d -> w:%d h:%d\n",
296  inlink->w, inlink->h, s->dir, outlink->w, outlink->h);
297 exit:
298  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
299 
300  return ret;
301 }
302 
303 static CUresult call_kernel(AVFilterContext *ctx,
304  CUfunction cu_func,
305  CUarray_format cu_format,
306  int channels,
307  int is_422_uv, // Dst* & Src* are 4:2:2 UV planes
308  CUdeviceptr dst0,
309  CUdeviceptr dst1, // Dst1 is for fully planar V, optional
310  int dst_width, // Width is pixels per channel
311  int dst_height, // Height is pixels per channel
312  int dst_pitch, // Pitch is elements per channel
313  CUdeviceptr src0,
314  CUdeviceptr src1, // Src1 is for fully planar V, optional
315  int src_width, // Width is pixels per channel
316  int src_height, // Height is pixels per channel
317  int src_pitch)
318 {
319  TransposeCUDAContext *s = ctx->priv;
320  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
321  CUtexObject src0_tex = 0, src1_tex = 0;
322  int ret;
323 
324  void *kernel_args[] = {
325  &dst0, &dst1, &dst_width, &dst_height, &dst_pitch,
326  &src0_tex, &src1_tex, &s->dir,
327  };
328 
329  CUDA_TEXTURE_DESC tex_desc = {
330  .addressMode = { CU_TR_ADDRESS_MODE_CLAMP,
331  CU_TR_ADDRESS_MODE_CLAMP },
332  .filterMode = is_422_uv ? CU_TR_FILTER_MODE_LINEAR
333  : CU_TR_FILTER_MODE_POINT,
334  .flags = 2 /* CU_TRSF_NORMALIZED_COORDINATES */
335  };
336  CUDA_RESOURCE_DESC res_desc = {
337  .resType = CU_RESOURCE_TYPE_PITCH2D,
338  .res.pitch2D.format = cu_format,
339  .res.pitch2D.numChannels = channels,
340  .res.pitch2D.pitchInBytes = src_pitch,
341  .res.pitch2D.width = src_width,
342  .res.pitch2D.height = src_height
343  };
344 
345  res_desc.res.pitch2D.devPtr = (CUdeviceptr)src0;
346  ret = CHECK_CU(cu->cuTexObjectCreate(&src0_tex, &res_desc, &tex_desc, NULL));
347  if (ret < 0)
348  goto exit;
349 
350  if (src1) {
351  res_desc.res.pitch2D.devPtr = (CUdeviceptr)src1;
352  ret = CHECK_CU(cu->cuTexObjectCreate(&src1_tex, &res_desc, &tex_desc, NULL));
353  if (ret < 0)
354  goto exit;
355  }
356 
357  ret = CHECK_CU(cu->cuLaunchKernel(cu_func,
358  DIV_UP(dst_width, BLOCK_X), DIV_UP(dst_height, BLOCK_Y), 1,
359  BLOCK_X, BLOCK_Y, 1, 0, s->cu_stream, kernel_args, NULL));
360 exit:
361  if (src0_tex)
362  CHECK_CU(cu->cuTexObjectDestroy(src0_tex));
363  if (src1_tex)
364  CHECK_CU(cu->cuTexObjectDestroy(src1_tex));
365 
366  return ret;
367 }
368 
370  AVFrame *out, AVFrame *in)
371 {
372  TransposeCUDAContext *s = ctx->priv;
373  int ret;
374 
375  for (int c = 0; c < s->pix_desc->nb_components; c++) {
376  const AVComponentDescriptor *comp = &s->pix_desc->comp[c];
377  const int p = comp->plane;
378  int pix_size, channels;
379  int is_planar_u, is_planar_v, is_422_uv;
380  CUfunction func;
381  CUarray_format format;
382 
383  pix_size = (comp->depth + 7) / 8;
384  channels = comp->step / pix_size;
385  if (pix_size > 2 || channels > 4)
386  av_unreachable("Unsupported pixel format!");
387 
388  is_planar_u = p == 1 && channels == 1;
389  is_planar_v = p == 2 && channels == 1;
390  is_422_uv = p && s->pix_desc->log2_chroma_w == 1 && !s->pix_desc->log2_chroma_h;
391 
392  if (comp->plane < c || is_planar_v) {
393  // We process planes as a whole, so don't reprocess
394  // them for additional components
395  continue;
396  }
397 
398  switch (pix_size) {
399  case 1:
400  func = channels == 4 ? s->cu_func_uchar4 :
401  channels == 2 ? s->cu_func_uchar2 : s->cu_func_uchar;
402  format = CU_AD_FORMAT_UNSIGNED_INT8;
403  break;
404  case 2:
405  func = channels == 2 ? s->cu_func_ushort2 : s->cu_func_ushort;
406  format = CU_AD_FORMAT_UNSIGNED_INT16;
407  break;
408  default:
409  av_unreachable("Unsupported pixel format!");
410  }
411 
412  ret = call_kernel(ctx, func, format, channels, is_422_uv,
413  (CUdeviceptr)out->data[p],
414  (CUdeviceptr)(is_planar_u ? out->data[p+1] : NULL),
415  AV_CEIL_RSHIFT(out->width, p ? s->pix_desc->log2_chroma_w : 0),
416  AV_CEIL_RSHIFT(out->height, p ? s->pix_desc->log2_chroma_h : 0),
417  out->linesize[p] / comp->step,
418  (CUdeviceptr)in->data[p],
419  (CUdeviceptr)(is_planar_u ? in->data[p+1] : NULL),
420  AV_CEIL_RSHIFT(in->width, p ? s->pix_desc->log2_chroma_w : 0),
421  AV_CEIL_RSHIFT(in->height, p ? s->pix_desc->log2_chroma_h : 0),
422  in->linesize[p]);
423  if (ret < 0)
424  return ret;
425  }
426 
427  return 0;
428 }
429 
431  AVFrame *out, AVFrame *in)
432 {
433  TransposeCUDAContext *s = ctx->priv;
434  AVFilterLink *outlink = ctx->outputs[0];
435  int ret;
436 
437  ret = cudatranspose_rotate(ctx, s->frame, in);
438  if (ret < 0)
439  return ret;
440 
441  ret = av_hwframe_get_buffer(s->frame->hw_frames_ctx, s->tmp_frame, 0);
442  if (ret < 0)
443  return ret;
444 
445  av_frame_move_ref(out, s->frame);
446  av_frame_move_ref(s->frame, s->tmp_frame);
447 
448  s->frame->width = outlink->w;
449  s->frame->height = outlink->h;
450 
451  ret = av_frame_copy_props(out, in);
452  if (ret < 0)
453  return ret;
454 
455  if (s->flip_wh && in->sample_aspect_ratio.num)
456  out->sample_aspect_ratio = av_inv_q(in->sample_aspect_ratio);
457  else
458  out->sample_aspect_ratio = in->sample_aspect_ratio;
459 
460  return 0;
461 }
462 
464 {
465  AVFilterContext *ctx = link->dst;
466  TransposeCUDAContext *s = ctx->priv;
467  AVFilterLink *outlink = ctx->outputs[0];
468  CudaFunctions *cu;
469  AVFrame *out = NULL;
470  CUcontext dummy;
471  int ret = 0;
472 
473  if (s->passthrough)
474  return ff_filter_frame(outlink, in);
475 
476  out = av_frame_alloc();
477  if (!out) {
478  ret = AVERROR(ENOMEM);
479  goto fail;
480  }
481 
482  cu = s->hwctx->internal->cuda_dl;
483 
484  ret = CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx));
485  if (ret < 0)
486  goto fail;
487 
489 
490  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
491  if (ret < 0)
492  goto fail;
493 
494  av_frame_free(&in);
495 
496  return ff_filter_frame(outlink, out);
497 
498 fail:
499  av_frame_free(&in);
500  av_frame_free(&out);
501  return ret;
502 }
503 
505 {
506  TransposeCUDAContext *s = inlink->dst->priv;
507 
508  return s->passthrough ?
511 }
512 
513 #define OFFSET(x) offsetof(TransposeCUDAContext, x)
514 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
515 
516 static const AVOption cudatranspose_options[] = {
517  { "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 6, FLAGS, .unit = "dir" },
518  { "cclock_flip", "rotate counter-clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 0, FLAGS, .unit = "dir" },
519  { "clock", "rotate clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK }, 0, 0, FLAGS, .unit = "dir" },
520  { "cclock", "rotate counter-clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK }, 0, 0, FLAGS, .unit = "dir" },
521  { "clock_flip", "rotate clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP }, 0, 0, FLAGS, .unit = "dir" },
522  { "reversal", "rotate by half-turn", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_REVERSAL }, 0, 0, FLAGS, .unit = "dir" },
523  { "hflip", "flip horizontally", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_HFLIP }, 0, 0, FLAGS, .unit = "dir" },
524  { "vflip", "flip vertically", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_VFLIP }, 0, 0, FLAGS, .unit = "dir" },
525 
526  { "passthrough", "do not apply transposition if the input matches the specified geometry", OFFSET(passthrough), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_PT_TYPE_NONE }, 0, 2, FLAGS, .unit = "passthrough" },
527  { "none", "always apply transposition", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_PT_TYPE_NONE }, 0, 0, FLAGS, .unit = "passthrough" },
528  { "landscape", "preserve landscape geometry", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_PT_TYPE_LANDSCAPE }, 0, 0, FLAGS, .unit = "passthrough" },
529  { "portrait", "preserve portrait geometry", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_PT_TYPE_PORTRAIT }, 0, 0, FLAGS, .unit = "passthrough" },
530 
531  { NULL },
532 };
533 
534 AVFILTER_DEFINE_CLASS(cudatranspose);
535 
537  {
538  .name = "default",
539  .type = AVMEDIA_TYPE_VIDEO,
540  .filter_frame = cudatranspose_filter_frame,
541  .get_buffer.video = cudatranspose_get_video_buffer,
542  },
543 };
544 
546  {
547  .name = "default",
548  .type = AVMEDIA_TYPE_VIDEO,
549  .config_props = cudatranspose_config_props,
550  },
551 };
552 
554  .p.name = "transpose_cuda",
555  .p.description = NULL_IF_CONFIG_SMALL("Transpose input video using CUDA"),
556  .p.priv_class = &cudatranspose_class,
557  .init = cudatranspose_init,
558  .uninit = cudatranspose_uninit,
559  .priv_size = sizeof(TransposeCUDAContext),
563  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
564 };
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:66
AVHWDeviceContext::hwctx
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:88
call_kernel
static CUresult call_kernel(AVFilterContext *ctx, CUfunction cu_func, CUarray_format cu_format, int channels, int is_422_uv, CUdeviceptr dst0, CUdeviceptr dst1, int dst_width, int dst_height, int dst_pitch, CUdeviceptr src0, CUdeviceptr src1, int src_width, int src_height, int src_pitch)
Definition: vf_transpose_cuda.c:303
AV_PIX_FMT_CUDA
@ AV_PIX_FMT_CUDA
HW acceleration through CUDA.
Definition: pixfmt.h:260
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
format_is_supported
static int format_is_supported(enum AVPixelFormat fmt)
Definition: vf_transpose_cuda.c:159
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
TransposeCUDAContext::cu_stream
CUstream cu_stream
Definition: vf_transpose_cuda.c:80
TransposeCUDAContext::cu_func_uchar
CUfunction cu_func_uchar
Definition: vf_transpose_cuda.c:75
hwcontext_cuda_internal.h
out
static FILE * out
Definition: movenc.c:55
cudatranspose_filter_frame
static int cudatranspose_filter_frame(AVFilterLink *link, AVFrame *in)
Definition: vf_transpose_cuda.c:463
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:79
TransposeCUDAContext::cu_module
CUmodule cu_module
Definition: vf_transpose_cuda.c:74
AV_PIX_FMT_BGR32
#define AV_PIX_FMT_BGR32
Definition: pixfmt.h:513
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1067
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3456
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
TransposeCUDAContext::dir
int dir
TransposeDir.
Definition: vf_transpose_cuda.c:84
AVHWFramesContext::format
enum AVPixelFormat format
The pixel format identifying the underlying HW surface type.
Definition: hwcontext.h:200
src1
const pixel * src1
Definition: h264pred_template.c:420
ff_cuda_load_module
int ff_cuda_load_module(void *avctx, AVCUDADeviceContext *hwctx, CUmodule *cu_module, const unsigned char *data, const unsigned int length)
Loads a CUDA module and applies any decompression, if necessary.
Definition: load_helper.c:34
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: filters.h:208
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:64
av_hwframe_ctx_init
int av_hwframe_ctx_init(AVBufferRef *ref)
Finalize the context before use.
Definition: hwcontext.c:337
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:435
pixdesc.h
AVFrame::width
int width
Definition: frame.h:507
av_hwframe_ctx_alloc
AVBufferRef * av_hwframe_ctx_alloc(AVBufferRef *device_ref_in)
Allocate an AVHWFramesContext tied to a given device context.
Definition: hwcontext.c:263
AVOption
AVOption.
Definition: opt.h:429
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: filters.h:254
filters.h
AV_PIX_FMT_YUV420P10
#define AV_PIX_FMT_YUV420P10
Definition: pixfmt.h:539
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:226
TRANSPOSE_CLOCK_FLIP
@ TRANSPOSE_CLOCK_FLIP
Definition: transpose.h:34
cudatranspose_get_video_buffer
static AVFrame * cudatranspose_get_video_buffer(AVFilterLink *inlink, int w, int h)
Definition: vf_transpose_cuda.c:504
TransposeCUDAContext::frame
AVFrame * frame
Definition: vf_transpose_cuda.c:68
av_buffer_ref
AVBufferRef * av_buffer_ref(const AVBufferRef *buf)
Create a new reference to an AVBuffer.
Definition: buffer.c:103
TransposeCUDAContext::frames_ctx
AVBufferRef * frames_ctx
Definition: vf_transpose_cuda.c:67
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:220
AVHWFramesContext::width
int width
The allocated dimensions of the frames in this pool.
Definition: hwcontext.h:220
cudatranspose_outputs
static const AVFilterPad cudatranspose_outputs[]
Definition: vf_transpose_cuda.c:545
video.h
TransposeCUDAContext::flip_wh
int flip_wh
Definition: vf_transpose_cuda.c:82
dummy
static int dummy
Definition: ffplay.c:3751
CHECK_CU
#define CHECK_CU(x)
Definition: vf_transpose_cuda.c:41
cudatranspose_inputs
static const AVFilterPad cudatranspose_inputs[]
Definition: vf_transpose_cuda.c:536
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:456
TRANSPOSE_CCLOCK
@ TRANSPOSE_CCLOCK
Definition: transpose.h:33
ff_default_get_video_buffer
AVFrame * ff_default_get_video_buffer(AVFilterLink *link, int w, int h)
Definition: video.c:84
fail
#define fail()
Definition: checkasm.h:224
supported_formats
static enum AVPixelFormat supported_formats[]
Definition: vf_transpose_cuda.c:43
TransposeCUDAContext::cu_ctx
CUcontext cu_ctx
Definition: vf_transpose_cuda.c:73
cudatranspose_config_props
static int cudatranspose_config_props(AVFilterLink *outlink)
Definition: vf_transpose_cuda.c:208
AVRational::num
int num
Numerator.
Definition: rational.h:59
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:40
av_frame_alloc
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:52
AV_PIX_FMT_YUV444P10
#define AV_PIX_FMT_YUV444P10
Definition: pixfmt.h:542
TRANSPOSE_HFLIP
@ TRANSPOSE_HFLIP
Definition: transpose.h:36
avassert.h
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:210
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:119
AVHWFramesContext::height
int height
Definition: hwcontext.h:220
FFFilter
Definition: filters.h:267
s
#define s(width, name)
Definition: cbs_vp9.c:198
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:265
AV_PIX_FMT_YUV444P16
#define AV_PIX_FMT_YUV444P16
Definition: pixfmt.h:552
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
cudatranspose_rotate
static int cudatranspose_rotate(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
Definition: vf_transpose_cuda.c:369
ff_filter_link
static FilterLink * ff_filter_link(AVFilterLink *link)
Definition: filters.h:199
AV_PIX_FMT_0BGR32
#define AV_PIX_FMT_0BGR32
Definition: pixfmt.h:516
TransposeCUDAContext::cu_func_ushort2
CUfunction cu_func_ushort2
Definition: vf_transpose_cuda.c:78
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
channels
channels
Definition: aptx.h:31
load_helper.h
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
FLAGS
#define FLAGS
Definition: vf_transpose_cuda.c:514
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:76
NULL
#define NULL
Definition: coverity.c:32
AVHWFramesContext::sw_format
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:213
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:599
format
New swscale design to change SwsGraph is what coordinates multiple passes These can include cascaded scaling error diffusion and so on Or we could have separate passes for the vertical and horizontal scaling In between each SwsPass lies a fully allocated image buffer Graph passes may have different levels of e g we can have a single threaded error diffusion pass following a multi threaded scaling pass SwsGraph is internally recreated whenever the image format
Definition: swscale-v2.txt:14
av_buffer_unref
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:139
AVComponentDescriptor
Definition: pixdesc.h:30
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:116
AVHWFramesContext::device_ref
AVBufferRef * device_ref
A reference to the parent AVHWDeviceContext.
Definition: hwcontext.h:129
TransposeCUDAContext::hwctx
AVCUDADeviceContext * hwctx
Definition: vf_transpose_cuda.c:66
AV_PIX_FMT_YUV422P10
#define AV_PIX_FMT_YUV422P10
Definition: pixfmt.h:540
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
DIV_UP
#define DIV_UP(a, b)
Definition: vf_transpose_cuda.c:37
init_processing_chain
static int init_processing_chain(AVFilterContext *ctx, int out_width, int out_height)
Definition: vf_transpose_cuda.c:167
TRANSPOSE_PT_TYPE_PORTRAIT
@ TRANSPOSE_PT_TYPE_PORTRAIT
Definition: transpose.h:27
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
height
#define height
Definition: dsp.h:89
BLOCK_Y
#define BLOCK_Y
Definition: vf_transpose_cuda.c:39
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
TransposeCUDAContext::tmp_frame
AVFrame * tmp_frame
Definition: vf_transpose_cuda.c:69
TRANSPOSE_PT_TYPE_NONE
@ TRANSPOSE_PT_TYPE_NONE
Definition: transpose.h:25
AV_PIX_FMT_NV16
@ AV_PIX_FMT_NV16
interleaved chroma YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:198
ff_vf_transpose_cuda
const FFFilter ff_vf_transpose_cuda
Definition: vf_transpose_cuda.c:553
AV_PIX_FMT_RGB32
#define AV_PIX_FMT_RGB32
Definition: pixfmt.h:511
AV_PIX_FMT_P216
#define AV_PIX_FMT_P216
Definition: pixfmt.h:620
AV_PIX_FMT_P210
#define AV_PIX_FMT_P210
Definition: pixfmt.h:616
ff_null_get_video_buffer
AVFrame * ff_null_get_video_buffer(AVFilterLink *link, int w, int h)
Definition: video.c:44
cudatranspose_options
static const AVOption cudatranspose_options[]
Definition: vf_transpose_cuda.c:516
internal.h
common.h
av_frame_move_ref
void av_frame_move_ref(AVFrame *dst, AVFrame *src)
Move everything contained in src to dst and reset src.
Definition: frame.c:523
av_frame_unref
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:496
BLOCK_X
#define BLOCK_X
Definition: vf_transpose_cuda.c:38
TRANSPOSE_CLOCK
@ TRANSPOSE_CLOCK
Definition: transpose.h:32
av_inv_q
static av_always_inline AVRational av_inv_q(AVRational q)
Invert a rational.
Definition: rational.h:159
TransposeCUDAContext::cu_func_ushort
CUfunction cu_func_ushort
Definition: vf_transpose_cuda.c:76
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:46
AV_PIX_FMT_P016
#define AV_PIX_FMT_P016
Definition: pixfmt.h:604
AVHWFramesContext
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:118
AVCUDADeviceContext
This struct is allocated as AVHWDeviceContext.hwctx.
Definition: hwcontext_cuda.h:42
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(cudatranspose)
ret
ret
Definition: filter_design.txt:187
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
AV_PIX_FMT_0RGB32
#define AV_PIX_FMT_0RGB32
Definition: pixfmt.h:515
AVHWFramesContext::device_ctx
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:137
cuda_check.h
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:264
AVFrame::sample_aspect_ratio
AVRational sample_aspect_ratio
Sample aspect ratio for the video frame, 0/1 if unknown/unspecified.
Definition: frame.h:532
OFFSET
#define OFFSET(x)
Definition: vf_transpose_cuda.c:513
AVFrame::height
int height
Definition: frame.h:507
TRANSPOSE_CCLOCK_FLIP
@ TRANSPOSE_CCLOCK_FLIP
Definition: transpose.h:31
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:259
avfilter.h
transpose.h
TRANSPOSE_REVERSAL
@ TRANSPOSE_REVERSAL
Definition: transpose.h:35
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:78
src0
const pixel *const src0
Definition: h264pred_template.c:419
AVFilterContext
An instance of a filter.
Definition: avfilter.h:274
TRANSPOSE_PT_TYPE_LANDSCAPE
@ TRANSPOSE_PT_TYPE_LANDSCAPE
Definition: transpose.h:26
TRANSPOSE_VFLIP
@ TRANSPOSE_VFLIP
Definition: transpose.h:37
AV_PIX_FMT_P010
#define AV_PIX_FMT_P010
Definition: pixfmt.h:602
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:200
FFFilter::p
AVFilter p
The public AVFilter.
Definition: filters.h:271
TransposeCUDAContext::cu_func_uchar2
CUfunction cu_func_uchar2
Definition: vf_transpose_cuda.c:77
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
w
uint8_t w
Definition: llvidencdsp.c:39
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
TransposeCUDAContext::pix_desc
const AVPixFmtDescriptor * pix_desc
Definition: vf_transpose_cuda.c:71
cudatranspose_init
static av_cold int cudatranspose_init(AVFilterContext *ctx)
Definition: vf_transpose_cuda.c:87
cudatranspose_uninit
static av_cold void cudatranspose_uninit(AVFilterContext *ctx)
Definition: vf_transpose_cuda.c:102
hwcontext.h
AVFrame::linesize
int linesize[AV_NUM_DATA_POINTERS]
For video, a positive or negative value, which is typically indicating the size in bytes of each pict...
Definition: frame.h:480
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
TransposeCUDAContext::cu_func_uchar4
CUfunction cu_func_uchar4
Definition: vf_transpose_cuda.c:79
TransposeCUDAContext::passthrough
int passthrough
PassthroughType, landscape passthrough mode enabled.
Definition: vf_transpose_cuda.c:83
h
h
Definition: vp9dsp_template.c:2070
width
#define width
Definition: dsp.h:89
av_hwframe_get_buffer
int av_hwframe_get_buffer(AVBufferRef *hwframe_ref, AVFrame *frame, int flags)
Allocate a new frame attached to the given AVHWFramesContext.
Definition: hwcontext.c:506
AV_OPT_TYPE_CONST
@ AV_OPT_TYPE_CONST
Special option type for declaring named constants.
Definition: opt.h:299
cudatranspose_transpose
static int cudatranspose_transpose(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
Definition: vf_transpose_cuda.c:430
av_get_pix_fmt_name
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:3376
init_hwframe_ctx
static av_cold int init_hwframe_ctx(TransposeCUDAContext *s, AVBufferRef *device_ctx, int width, int height, enum AVPixelFormat sw_format)
Definition: vf_transpose_cuda.c:119
TransposeCUDAContext
Definition: vf_transpose_cuda.c:63