Go to the documentation of this file.
33 #define PUT_PROTOTYPE(name, depth, opt) \
34 void ff_vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, const int8_t *hf, const int8_t *vf, int width);
36 #define PUT_PROTOTYPES(name, bitd, opt) \
37 PUT_PROTOTYPE(name##2, bitd, opt) \
38 PUT_PROTOTYPE(name##4, bitd, opt) \
39 PUT_PROTOTYPE(name##8, bitd, opt) \
40 PUT_PROTOTYPE(name##12, bitd, opt) \
41 PUT_PROTOTYPE(name##16, bitd, opt) \
42 PUT_PROTOTYPE(name##24, bitd, opt) \
43 PUT_PROTOTYPE(name##32, bitd, opt) \
44 PUT_PROTOTYPE(name##48, bitd, opt) \
45 PUT_PROTOTYPE(name##64, bitd, opt) \
46 PUT_PROTOTYPE(name##128, bitd, opt)
48 #define PUT_BPC_PROTOTYPES(name, opt) \
49 PUT_PROTOTYPES(name, 8, opt) \
50 PUT_PROTOTYPES(name, 10, opt) \
51 PUT_PROTOTYPES(name, 12, opt)
53 #define PUT_TAP_PROTOTYPES(n, opt) \
54 PUT_BPC_PROTOTYPES(n##tap_h, opt) \
55 PUT_BPC_PROTOTYPES(n##tap_v, opt) \
56 PUT_BPC_PROTOTYPES(n##tap_hv, opt)
66 #define bf(fn, bd, opt) fn##_##bd##_##opt
67 #define BF(fn, bpc, opt) fn##_##bpc##bpc_##opt
69 #define AVG_BPC_PROTOTYPES(bpc, opt) \
70 void BF(ff_vvc_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
71 const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, intptr_t pixel_max); \
72 void BF(ff_vvc_w_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
73 const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, \
74 intptr_t denom, intptr_t w0, intptr_t w1, intptr_t o0, intptr_t o1, intptr_t pixel_max);
76 #define AVG_PROTOTYPES(bd, opt) \
77 void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
78 const int16_t *src0, const int16_t *src1, int width, int height); \
79 void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
80 const int16_t *src0, const int16_t *src1, int width, int height, \
81 int denom, int w0, int w1, int o0, int o1);
90 #define ALF_BPC_PROTOTYPES(bpc, opt) \
91 void BF(ff_vvc_alf_filter_luma, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
92 const uint8_t *src, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, \
93 const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t vb_pos, ptrdiff_t pixel_max); \
94 void BF(ff_vvc_alf_filter_chroma, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
95 const uint8_t *src, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, \
96 const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t vb_pos, ptrdiff_t pixel_max); \
97 void BF(ff_vvc_alf_classify_grad, bpc, opt)(int *gradient_sum, \
98 const uint8_t *src, ptrdiff_t src_stride, intptr_t width, intptr_t height, intptr_t vb_pos); \
99 void BF(ff_vvc_alf_classify, bpc, opt)(int *class_idx, int *transpose_idx, const int *gradient_sum, \
100 intptr_t width, intptr_t height, intptr_t vb_pos, intptr_t bit_depth); \
102 #define ALF_PROTOTYPES(bpc, bd, opt) \
103 void bf(ff_vvc_alf_filter_luma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
104 int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos); \
105 void bf(ff_vvc_alf_filter_chroma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
106 int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos); \
107 void bf(ff_vvc_alf_classify, bd, opt)(int *class_idx, int *transpose_idx, \
108 const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp); \
118 #if HAVE_SSE4_EXTERNAL
119 #define FW_PUT(name, depth, opt) \
120 void ff_vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
121 int height, const int8_t *hf, const int8_t *vf, int width) \
123 ff_h2656_put_## name ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
126 #define FW_PUT_TAP(fname, bitd, opt ) \
127 FW_PUT(fname##4, bitd, opt ) \
128 FW_PUT(fname##8, bitd, opt ) \
129 FW_PUT(fname##16, bitd, opt ) \
130 FW_PUT(fname##32, bitd, opt ) \
131 FW_PUT(fname##64, bitd, opt ) \
132 FW_PUT(fname##128, bitd, opt ) \
134 #define FW_PUT_4TAP(fname, bitd, opt) \
135 FW_PUT(fname ## 2, bitd, opt) \
136 FW_PUT_TAP(fname, bitd, opt)
138 #define FW_PUT_4TAP_SSE4(bitd) \
139 FW_PUT_4TAP(pixels, bitd, sse4) \
140 FW_PUT_4TAP(4tap_h, bitd, sse4) \
141 FW_PUT_4TAP(4tap_v, bitd, sse4) \
142 FW_PUT_4TAP(4tap_hv, bitd, sse4)
144 #define FW_PUT_8TAP_SSE4(bitd) \
145 FW_PUT_TAP(8tap_h, bitd, sse4) \
146 FW_PUT_TAP(8tap_v, bitd, sse4) \
147 FW_PUT_TAP(8tap_hv, bitd, sse4)
149 #define FW_PUT_SSE4(bitd) \
150 FW_PUT_4TAP_SSE4(bitd) \
151 FW_PUT_8TAP_SSE4(bitd)
158 #if HAVE_AVX2_EXTERNAL
159 #define FW_PUT_TAP_AVX2(n, bitd) \
160 FW_PUT(n ## tap_h32, bitd, avx2) \
161 FW_PUT(n ## tap_h64, bitd, avx2) \
162 FW_PUT(n ## tap_h128, bitd, avx2) \
163 FW_PUT(n ## tap_v32, bitd, avx2) \
164 FW_PUT(n ## tap_v64, bitd, avx2) \
165 FW_PUT(n ## tap_v128, bitd, avx2)
167 #define FW_PUT_AVX2(bitd) \
168 FW_PUT(pixels32, bitd, avx2) \
169 FW_PUT(pixels64, bitd, avx2) \
170 FW_PUT(pixels128, bitd, avx2) \
171 FW_PUT_TAP_AVX2(4, bitd) \
172 FW_PUT_TAP_AVX2(8, bitd) \
178 #define FW_PUT_TAP_16BPC_AVX2(n, bitd) \
179 FW_PUT(n ## tap_h16, bitd, avx2) \
180 FW_PUT(n ## tap_v16, bitd, avx2) \
181 FW_PUT(n ## tap_hv16, bitd, avx2) \
182 FW_PUT(n ## tap_hv32, bitd, avx2) \
183 FW_PUT(n ## tap_hv64, bitd, avx2) \
184 FW_PUT(n ## tap_hv128, bitd, avx2)
186 #define FW_PUT_16BPC_AVX2(bitd) \
187 FW_PUT(pixels16, bitd, avx2) \
188 FW_PUT_TAP_16BPC_AVX2(4, bitd) \
189 FW_PUT_TAP_16BPC_AVX2(8, bitd)
191 FW_PUT_16BPC_AVX2(10)
192 FW_PUT_16BPC_AVX2(12)
194 #define AVG_FUNCS(bpc, bd, opt) \
195 void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
196 const int16_t *src0, const int16_t *src1, int width, int height) \
198 BF(ff_vvc_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, (1 << bd) - 1); \
200 void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
201 const int16_t *src0, const int16_t *src1, int width, int height, \
202 int denom, int w0, int w1, int o0, int o1) \
204 BF(ff_vvc_w_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, \
205 denom, w0, w1, o0, o1, (1 << bd) - 1); \
208 AVG_FUNCS(8, 8, avx2)
209 AVG_FUNCS(16, 10, avx2)
210 AVG_FUNCS(16, 12, avx2)
212 #define ALF_FUNCS(bpc, bd, opt) \
213 void bf(ff_vvc_alf_filter_luma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
214 int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos) \
216 const int param_stride = (width >> 2) * ALF_NUM_COEFF_LUMA; \
217 BF(ff_vvc_alf_filter_luma, bpc, opt)(dst, dst_stride, src, src_stride, width, height, \
218 filter, clip, param_stride, vb_pos, (1 << bd) - 1); \
220 void bf(ff_vvc_alf_filter_chroma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
221 int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos) \
223 BF(ff_vvc_alf_filter_chroma, bpc, opt)(dst, dst_stride, src, src_stride, width, height, \
224 filter, clip, 0, vb_pos,(1 << bd) - 1); \
226 void bf(ff_vvc_alf_classify, bd, opt)(int *class_idx, int *transpose_idx, \
227 const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp) \
229 BF(ff_vvc_alf_classify_grad, bpc, opt)(gradient_tmp, src, src_stride, width, height, vb_pos); \
230 BF(ff_vvc_alf_classify, bpc, opt)(class_idx, transpose_idx, gradient_tmp, width, height, vb_pos, bd); \
233 ALF_FUNCS(8, 8, avx2)
234 ALF_FUNCS(16, 10, avx2)
235 ALF_FUNCS(16, 12, avx2)
239 #define PEL_LINK(dst, C, W, idx1, idx2, name, D, opt) \
240 dst[C][W][idx1][idx2] = ff_vvc_put_## name ## _ ## D ## _##opt; \
241 dst ## _uni[C][W][idx1][idx2] = ff_h2656_put_uni_ ## name ## _ ## D ## _##opt; \
243 #define MC_TAP_LINKS(pointer, C, my, mx, fname, bitd, opt ) \
244 PEL_LINK(pointer, C, 1, my , mx , fname##4 , bitd, opt ); \
245 PEL_LINK(pointer, C, 2, my , mx , fname##8 , bitd, opt ); \
246 PEL_LINK(pointer, C, 3, my , mx , fname##16, bitd, opt ); \
247 PEL_LINK(pointer, C, 4, my , mx , fname##32, bitd, opt ); \
248 PEL_LINK(pointer, C, 5, my , mx , fname##64, bitd, opt ); \
249 PEL_LINK(pointer, C, 6, my , mx , fname##128, bitd, opt );
251 #define MC_8TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
252 MC_TAP_LINKS(pointer, LUMA, my, mx, fname, bitd, opt)
254 #define MC_8TAP_LINKS_SSE4(bd) \
255 MC_8TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
256 MC_8TAP_LINKS(c->inter.put, 0, 1, 8tap_h, bd, sse4); \
257 MC_8TAP_LINKS(c->inter.put, 1, 0, 8tap_v, bd, sse4); \
258 MC_8TAP_LINKS(c->inter.put, 1, 1, 8tap_hv, bd, sse4)
260 #define MC_4TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
261 PEL_LINK(pointer, CHROMA, 0, my , mx , fname##2 , bitd, opt ); \
262 MC_TAP_LINKS(pointer, CHROMA, my, mx, fname, bitd, opt) \
264 #define MC_4TAP_LINKS_SSE4(bd) \
265 MC_4TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
266 MC_4TAP_LINKS(c->inter.put, 0, 1, 4tap_h, bd, sse4); \
267 MC_4TAP_LINKS(c->inter.put, 1, 0, 4tap_v, bd, sse4); \
268 MC_4TAP_LINKS(c->inter.put, 1, 1, 4tap_hv, bd, sse4)
270 #define MC_LINK_SSE4(bd) \
271 MC_4TAP_LINKS_SSE4(bd) \
272 MC_8TAP_LINKS_SSE4(bd)
274 #define MC_TAP_LINKS_AVX2(C,tap,bd) do { \
275 PEL_LINK(c->inter.put, C, 4, 0, 0, pixels32, bd, avx2) \
276 PEL_LINK(c->inter.put, C, 5, 0, 0, pixels64, bd, avx2) \
277 PEL_LINK(c->inter.put, C, 6, 0, 0, pixels128, bd, avx2) \
278 PEL_LINK(c->inter.put, C, 4, 0, 1, tap##tap_h32, bd, avx2) \
279 PEL_LINK(c->inter.put, C, 5, 0, 1, tap##tap_h64, bd, avx2) \
280 PEL_LINK(c->inter.put, C, 6, 0, 1, tap##tap_h128, bd, avx2) \
281 PEL_LINK(c->inter.put, C, 4, 1, 0, tap##tap_v32, bd, avx2) \
282 PEL_LINK(c->inter.put, C, 5, 1, 0, tap##tap_v64, bd, avx2) \
283 PEL_LINK(c->inter.put, C, 6, 1, 0, tap##tap_v128, bd, avx2) \
286 #define MC_LINKS_AVX2(bd) \
287 MC_TAP_LINKS_AVX2(LUMA, 8, bd); \
288 MC_TAP_LINKS_AVX2(CHROMA, 4, bd);
290 #define MC_TAP_LINKS_16BPC_AVX2(C, tap, bd) do { \
291 PEL_LINK(c->inter.put, C, 3, 0, 0, pixels16, bd, avx2) \
292 PEL_LINK(c->inter.put, C, 3, 0, 1, tap##tap_h16, bd, avx2) \
293 PEL_LINK(c->inter.put, C, 3, 1, 0, tap##tap_v16, bd, avx2) \
294 PEL_LINK(c->inter.put, C, 3, 1, 1, tap##tap_hv16, bd, avx2) \
295 PEL_LINK(c->inter.put, C, 4, 1, 1, tap##tap_hv32, bd, avx2) \
296 PEL_LINK(c->inter.put, C, 5, 1, 1, tap##tap_hv64, bd, avx2) \
297 PEL_LINK(c->inter.put, C, 6, 1, 1, tap##tap_hv128, bd, avx2) \
300 #define MC_LINKS_16BPC_AVX2(bd) \
301 MC_TAP_LINKS_16BPC_AVX2(LUMA, 8, bd); \
302 MC_TAP_LINKS_16BPC_AVX2(CHROMA, 4, bd);
304 #define AVG_INIT(bd, opt) do { \
305 c->inter.avg = bf(ff_vvc_avg, bd, opt); \
306 c->inter.w_avg = bf(ff_vvc_w_avg, bd, opt); \
309 #define ALF_INIT(bd) do { \
310 c->alf.filter[LUMA] = ff_vvc_alf_filter_luma_##bd##_avx2; \
311 c->alf.filter[CHROMA] = ff_vvc_alf_filter_chroma_##bd##_avx2; \
312 c->alf.classify = ff_vvc_alf_classify_##bd##_avx2; \
340 MC_LINKS_16BPC_AVX2(10);
351 MC_LINKS_16BPC_AVX2(12);
void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
#define ALF_PROTOTYPES(bpc, bd, opt)
#define EXTERNAL_AVX2_FAST(flags)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
#define ALF_BPC_PROTOTYPES(bpc, opt)
#define AVG_PROTOTYPES(bd, opt)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define PUT_BPC_PROTOTYPES(name, opt)
#define EXTERNAL_SSE4(flags)
#define AVG_BPC_PROTOTYPES(bpc, opt)
#define PUT_TAP_PROTOTYPES(n, opt)