Go to the documentation of this file.
37 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
41 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
106 if (
s->hwctx &&
s->cu_module) {
108 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
109 CHECK_CU(cu->cuCtxPushCurrent(
s->cu_ctx));
110 CHECK_CU(cu->cuModuleUnload(
s->cu_module));
151 s->frames_ctx = out_ref;
168 int out_width,
int out_height)
194 out_width, out_height,
format);
199 s->cu_stream =
s->hwctx->stream;
210 extern const unsigned char ff_vf_transpose_cuda_ptx_data[];
211 extern const unsigned int ff_vf_transpose_cuda_ptx_len;
217 CUcontext
dummy, cuda_ctx;
230 "w:%d h:%d -> w:%d h:%d (passthrough mode)\n",
253 if (
s->flip_wh &&
inlink->sample_aspect_ratio.num)
262 cuda_ctx =
s->cu_ctx =
s->hwctx->cuda_ctx;
263 cu =
s->hwctx->internal->cuda_dl;
270 ff_vf_transpose_cuda_ptx_data, ff_vf_transpose_cuda_ptx_len);
274 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uchar,
s->cu_module,
"Transpose_Cuda_uchar"));
278 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_ushort,
s->cu_module,
"Transpose_Cuda_ushort"));
282 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uchar2,
s->cu_module,
"Transpose_Cuda_uchar2"));
286 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_ushort2,
s->cu_module,
"Transpose_Cuda_ushort2"));
290 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uchar4,
s->cu_module,
"Transpose_Cuda_uchar4"));
295 "w:%d h:%d dir:%d -> w:%d h:%d\n",
305 CUarray_format cu_format,
320 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
321 CUtexObject src0_tex = 0, src1_tex = 0;
324 void *kernel_args[] = {
325 &dst0, &dst1, &dst_width, &dst_height, &dst_pitch,
326 &src0_tex, &src1_tex, &
s->dir,
329 CUDA_TEXTURE_DESC tex_desc = {
330 .addressMode = { CU_TR_ADDRESS_MODE_CLAMP,
331 CU_TR_ADDRESS_MODE_CLAMP },
332 .filterMode = is_422_uv ? CU_TR_FILTER_MODE_LINEAR
333 : CU_TR_FILTER_MODE_POINT,
336 CUDA_RESOURCE_DESC res_desc = {
337 .resType = CU_RESOURCE_TYPE_PITCH2D,
338 .res.pitch2D.format = cu_format,
339 .res.pitch2D.numChannels =
channels,
340 .res.pitch2D.pitchInBytes = src_pitch,
341 .res.pitch2D.width = src_width,
342 .res.pitch2D.height = src_height
345 res_desc.res.pitch2D.devPtr = (CUdeviceptr)
src0;
346 ret =
CHECK_CU(cu->cuTexObjectCreate(&src0_tex, &res_desc, &tex_desc,
NULL));
351 res_desc.res.pitch2D.devPtr = (CUdeviceptr)
src1;
352 ret =
CHECK_CU(cu->cuTexObjectCreate(&src1_tex, &res_desc, &tex_desc,
NULL));
362 CHECK_CU(cu->cuTexObjectDestroy(src0_tex));
364 CHECK_CU(cu->cuTexObjectDestroy(src1_tex));
375 for (
int c = 0;
c <
s->pix_desc->nb_components;
c++) {
377 const int p =
comp->plane;
379 int is_planar_u, is_planar_v, is_422_uv;
383 pix_size = (
comp->depth + 7) / 8;
390 is_422_uv =
p &&
s->pix_desc->log2_chroma_w == 1 && !
s->pix_desc->log2_chroma_h;
392 if (
comp->plane <
c || is_planar_v) {
401 channels == 2 ?
s->cu_func_uchar2 :
s->cu_func_uchar;
402 format = CU_AD_FORMAT_UNSIGNED_INT8;
406 format = CU_AD_FORMAT_UNSIGNED_INT16;
413 (CUdeviceptr)
out->data[
p],
414 (CUdeviceptr)(is_planar_u ?
out->data[
p+1] :
NULL),
418 (CUdeviceptr)in->
data[
p],
419 (CUdeviceptr)(is_planar_u ? in->
data[
p+1] :
NULL),
448 s->frame->width = outlink->
w;
449 s->frame->height = outlink->
h;
482 cu =
s->hwctx->internal->cuda_dl;
508 return s->passthrough ?
513 #define OFFSET(x) offsetof(TransposeCUDAContext, x)
514 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
554 .
p.
name =
"transpose_cuda",
556 .p.priv_class = &cudatranspose_class,
int(* func)(AVBPrint *dst, const char *in, const char *arg)
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
static CUresult call_kernel(AVFilterContext *ctx, CUfunction cu_func, CUarray_format cu_format, int channels, int is_422_uv, CUdeviceptr dst0, CUdeviceptr dst1, int dst_width, int dst_height, int dst_pitch, CUdeviceptr src0, CUdeviceptr src1, int src_width, int src_height, int src_pitch)
@ AV_PIX_FMT_CUDA
HW acceleration through CUDA.
AVPixelFormat
Pixel format.
static int format_is_supported(enum AVPixelFormat fmt)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
static int cudatranspose_filter_frame(AVFilterLink *link, AVFrame *in)
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
uint8_t * data
The data buffer.
enum AVPixelFormat format
The pixel format identifying the underlying HW surface type.
int ff_cuda_load_module(void *avctx, AVCUDADeviceContext *hwctx, CUmodule *cu_module, const unsigned char *data, const unsigned int length)
Loads a CUDA module and applies any decompression, if necessary.
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
int av_hwframe_ctx_init(AVBufferRef *ref)
Finalize the context before use.
This structure describes decoded (raw) audio or video data.
AVBufferRef * av_hwframe_ctx_alloc(AVBufferRef *device_ref_in)
Allocate an AVHWFramesContext tied to a given device context.
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
#define AV_PIX_FMT_YUV420P10
#define AV_LOG_VERBOSE
Detailed information.
static AVFrame * cudatranspose_get_video_buffer(AVFilterLink *inlink, int w, int h)
AVBufferRef * av_buffer_ref(const AVBufferRef *buf)
Create a new reference to an AVBuffer.
const char * name
Filter name.
int width
The allocated dimensions of the frames in this pool.
static const AVFilterPad cudatranspose_outputs[]
A link between two filters.
static const AVFilterPad cudatranspose_inputs[]
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Link properties exposed to filter code, but not external callers.
AVFrame * ff_default_get_video_buffer(AVFilterLink *link, int w, int h)
static enum AVPixelFormat supported_formats[]
static int cudatranspose_config_props(AVFilterLink *outlink)
A filter pad used for either input or output.
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
#define AV_PIX_FMT_YUV444P10
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
#define FF_ARRAY_ELEMS(a)
#define FILTER_OUTPUTS(array)
#define AV_PIX_FMT_YUV444P16
#define AV_CEIL_RSHIFT(a, b)
static int cudatranspose_rotate(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
static FilterLink * ff_filter_link(AVFilterLink *link)
#define AV_PIX_FMT_0BGR32
AVRational sample_aspect_ratio
agreed upon sample aspect ratio
CUfunction cu_func_ushort2
static AVFormatContext * ctx
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Describe the class of an AVClass context structure.
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
New swscale design to change SwsGraph is what coordinates multiple passes These can include cascaded scaling error diffusion and so on Or we could have separate passes for the vertical and horizontal scaling In between each SwsPass lies a fully allocated image buffer Graph passes may have different levels of e g we can have a single threaded error diffusion pass following a multi threaded scaling pass SwsGraph is internally recreated whenever the image format
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
AVBufferRef * device_ref
A reference to the parent AVHWDeviceContext.
AVCUDADeviceContext * hwctx
#define AV_PIX_FMT_YUV422P10
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
static int init_processing_chain(AVFilterContext *ctx, int out_width, int out_height)
@ TRANSPOSE_PT_TYPE_PORTRAIT
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
#define i(width, name, range_min, range_max)
AVFilterContext * src
source filter
@ AV_PIX_FMT_NV16
interleaved chroma YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
const FFFilter ff_vf_transpose_cuda
AVBufferRef * hw_frames_ctx
For hwaccel pixel formats, this should be a reference to the AVHWFramesContext describing the frames.
AVFrame * ff_null_get_video_buffer(AVFilterLink *link, int w, int h)
static const AVOption cudatranspose_options[]
int w
agreed upon image width
void av_frame_move_ref(AVFrame *dst, AVFrame *src)
Move everything contained in src to dst and reset src.
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
static av_always_inline AVRational av_inv_q(AVRational q)
Invert a rational.
CUfunction cu_func_ushort
const char * name
Pad name.
This struct describes a set or pool of "hardware" frames (i.e.
This struct is allocated as AVHWDeviceContext.hwctx.
AVFILTER_DEFINE_CLASS(cudatranspose)
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
#define AV_PIX_FMT_0RGB32
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
#define FILTER_INPUTS(array)
AVRational sample_aspect_ratio
Sample aspect ratio for the video frame, 0/1 if unknown/unspecified.
int h
agreed upon image height
@ AV_OPT_TYPE_INT
Underlying C type is int.
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
@ TRANSPOSE_PT_TYPE_LANDSCAPE
AVFilter p
The public AVFilter.
CUfunction cu_func_uchar2
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
A reference to a data buffer.
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
const AVPixFmtDescriptor * pix_desc
static av_cold int cudatranspose_init(AVFilterContext *ctx)
static av_cold void cudatranspose_uninit(AVFilterContext *ctx)
int linesize[AV_NUM_DATA_POINTERS]
For video, a positive or negative value, which is typically indicating the size in bytes of each pict...
CUfunction cu_func_uchar4
int passthrough
PassthroughType, landscape passthrough mode enabled.
int av_hwframe_get_buffer(AVBufferRef *hwframe_ref, AVFrame *frame, int flags)
Allocate a new frame attached to the given AVHWFramesContext.
@ AV_OPT_TYPE_CONST
Special option type for declaring named constants.
static int cudatranspose_transpose(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
static av_cold int init_hwframe_ctx(TransposeCUDAContext *s, AVBufferRef *device_ctx, int width, int height, enum AVPixelFormat sw_format)