Go to the documentation of this file.
33 #define PUT_PROTOTYPE(name, depth, opt) \
34 void ff_vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, const int8_t *hf, const int8_t *vf, int width);
36 #define PUT_PROTOTYPES(name, bitd, opt) \
37 PUT_PROTOTYPE(name##2, bitd, opt) \
38 PUT_PROTOTYPE(name##4, bitd, opt) \
39 PUT_PROTOTYPE(name##8, bitd, opt) \
40 PUT_PROTOTYPE(name##12, bitd, opt) \
41 PUT_PROTOTYPE(name##16, bitd, opt) \
42 PUT_PROTOTYPE(name##24, bitd, opt) \
43 PUT_PROTOTYPE(name##32, bitd, opt) \
44 PUT_PROTOTYPE(name##48, bitd, opt) \
45 PUT_PROTOTYPE(name##64, bitd, opt) \
46 PUT_PROTOTYPE(name##128, bitd, opt)
48 #define PUT_BPC_PROTOTYPES(name, opt) \
49 PUT_PROTOTYPES(name, 8, opt) \
50 PUT_PROTOTYPES(name, 10, opt) \
51 PUT_PROTOTYPES(name, 12, opt)
53 #define PUT_TAP_PROTOTYPES(n, opt) \
54 PUT_BPC_PROTOTYPES(n##tap_h, opt) \
55 PUT_BPC_PROTOTYPES(n##tap_v, opt) \
56 PUT_BPC_PROTOTYPES(n##tap_hv, opt)
66 #define bf(fn, bd, opt) fn##_##bd##_##opt
67 #define BF(fn, bpc, opt) fn##_##bpc##bpc_##opt
69 #define AVG_BPC_PROTOTYPES(bpc, opt) \
70 void BF(ff_vvc_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
71 const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, intptr_t pixel_max); \
72 void BF(ff_vvc_w_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
73 const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, \
74 intptr_t denom, intptr_t w0, intptr_t w1, intptr_t o0, intptr_t o1, intptr_t pixel_max);
76 #define AVG_PROTOTYPES(bd, opt) \
77 void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
78 const int16_t *src0, const int16_t *src1, int width, int height); \
79 void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
80 const int16_t *src0, const int16_t *src1, int width, int height, \
81 int denom, int w0, int w1, int o0, int o1);
91 #define DMVR_PROTOTYPES(bd, opt) \
92 void ff_vvc_dmvr_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
93 int height, intptr_t mx, intptr_t my, int width); \
94 void ff_vvc_dmvr_h_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
95 int height, intptr_t mx, intptr_t my, int width); \
96 void ff_vvc_dmvr_v_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
97 int height, intptr_t mx, intptr_t my, int width); \
98 void ff_vvc_dmvr_hv_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
99 int height, intptr_t mx, intptr_t my, int width); \
105 #define OF_PROTOTYPES(bd, opt) \
106 void ff_vvc_apply_bdof_##bd##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
107 const int16_t *src0, const int16_t *src1, int w, int h); \
113 #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
114 void ff_vvc_apply_bdof_avx2(uint8_t *
dst, ptrdiff_t dst_stride, \
115 const int16_t *
src0,
const int16_t *
src1,
int w,
int h,
int pixel_max); \
117 #define OF_FUNC(bd, opt) \
118 void ff_vvc_apply_bdof_##bd##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
119 const int16_t *src0, const int16_t *src1, int w, int h) \
121 ff_vvc_apply_bdof##_##opt(dst, dst_stride, src0, src1, w, h, (1 << bd) - 1); \
129 #define ALF_BPC_PROTOTYPES(bpc, opt) \
130 void BF(ff_vvc_alf_filter_luma, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
131 const uint8_t *src, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, \
132 const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t vb_pos, ptrdiff_t pixel_max); \
133 void BF(ff_vvc_alf_filter_chroma, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
134 const uint8_t *src, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, \
135 const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t vb_pos, ptrdiff_t pixel_max); \
136 void BF(ff_vvc_alf_classify_grad, bpc, opt)(int *gradient_sum, \
137 const uint8_t *src, ptrdiff_t src_stride, intptr_t width, intptr_t height, intptr_t vb_pos); \
138 void BF(ff_vvc_alf_classify, bpc, opt)(int *class_idx, int *transpose_idx, const int *gradient_sum, \
139 intptr_t width, intptr_t height, intptr_t vb_pos, intptr_t bit_depth); \
141 #define ALF_PROTOTYPES(bpc, bd, opt) \
142 void bf(ff_vvc_alf_filter_luma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
143 int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos); \
144 void bf(ff_vvc_alf_filter_chroma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
145 int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos); \
146 void bf(ff_vvc_alf_classify, bd, opt)(int *class_idx, int *transpose_idx, \
147 const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp); \
157 #if HAVE_SSE4_EXTERNAL
158 #define FW_PUT(name, depth, opt) \
159 void ff_vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
160 int height, const int8_t *hf, const int8_t *vf, int width) \
162 ff_h2656_put_## name ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
165 #define FW_PUT_TAP(fname, bitd, opt ) \
166 FW_PUT(fname##4, bitd, opt ) \
167 FW_PUT(fname##8, bitd, opt ) \
168 FW_PUT(fname##16, bitd, opt ) \
169 FW_PUT(fname##32, bitd, opt ) \
170 FW_PUT(fname##64, bitd, opt ) \
171 FW_PUT(fname##128, bitd, opt ) \
173 #define FW_PUT_4TAP(fname, bitd, opt) \
174 FW_PUT(fname ## 2, bitd, opt) \
175 FW_PUT_TAP(fname, bitd, opt)
177 #define FW_PUT_4TAP_SSE4(bitd) \
178 FW_PUT_4TAP(pixels, bitd, sse4) \
179 FW_PUT_4TAP(4tap_h, bitd, sse4) \
180 FW_PUT_4TAP(4tap_v, bitd, sse4) \
181 FW_PUT_4TAP(4tap_hv, bitd, sse4)
183 #define FW_PUT_8TAP_SSE4(bitd) \
184 FW_PUT_TAP(8tap_h, bitd, sse4) \
185 FW_PUT_TAP(8tap_v, bitd, sse4) \
186 FW_PUT_TAP(8tap_hv, bitd, sse4)
188 #define FW_PUT_SSE4(bitd) \
189 FW_PUT_4TAP_SSE4(bitd) \
190 FW_PUT_8TAP_SSE4(bitd)
197 #if HAVE_AVX2_EXTERNAL
198 #define FW_PUT_TAP_AVX2(n, bitd) \
199 FW_PUT(n ## tap_h32, bitd, avx2) \
200 FW_PUT(n ## tap_h64, bitd, avx2) \
201 FW_PUT(n ## tap_h128, bitd, avx2) \
202 FW_PUT(n ## tap_v32, bitd, avx2) \
203 FW_PUT(n ## tap_v64, bitd, avx2) \
204 FW_PUT(n ## tap_v128, bitd, avx2)
206 #define FW_PUT_AVX2(bitd) \
207 FW_PUT(pixels32, bitd, avx2) \
208 FW_PUT(pixels64, bitd, avx2) \
209 FW_PUT(pixels128, bitd, avx2) \
210 FW_PUT_TAP_AVX2(4, bitd) \
211 FW_PUT_TAP_AVX2(8, bitd) \
217 #define FW_PUT_TAP_16BPC_AVX2(n, bitd) \
218 FW_PUT(n ## tap_h16, bitd, avx2) \
219 FW_PUT(n ## tap_v16, bitd, avx2) \
220 FW_PUT(n ## tap_hv16, bitd, avx2) \
221 FW_PUT(n ## tap_hv32, bitd, avx2) \
222 FW_PUT(n ## tap_hv64, bitd, avx2) \
223 FW_PUT(n ## tap_hv128, bitd, avx2)
225 #define FW_PUT_16BPC_AVX2(bitd) \
226 FW_PUT(pixels16, bitd, avx2) \
227 FW_PUT_TAP_16BPC_AVX2(4, bitd) \
228 FW_PUT_TAP_16BPC_AVX2(8, bitd)
230 FW_PUT_16BPC_AVX2(10)
231 FW_PUT_16BPC_AVX2(12)
233 #define AVG_FUNCS(bpc, bd, opt) \
234 void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
235 const int16_t *src0, const int16_t *src1, int width, int height) \
237 BF(ff_vvc_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, (1 << bd) - 1); \
239 void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
240 const int16_t *src0, const int16_t *src1, int width, int height, \
241 int denom, int w0, int w1, int o0, int o1) \
243 BF(ff_vvc_w_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, \
244 denom, w0, w1, o0, o1, (1 << bd) - 1); \
247 AVG_FUNCS(8, 8, avx2)
248 AVG_FUNCS(16, 10, avx2)
249 AVG_FUNCS(16, 12, avx2)
251 #define ALF_FUNCS(bpc, bd, opt) \
252 void bf(ff_vvc_alf_filter_luma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
253 int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos) \
255 const int param_stride = (width >> 2) * ALF_NUM_COEFF_LUMA; \
256 BF(ff_vvc_alf_filter_luma, bpc, opt)(dst, dst_stride, src, src_stride, width, height, \
257 filter, clip, param_stride, vb_pos, (1 << bd) - 1); \
259 void bf(ff_vvc_alf_filter_chroma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
260 int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos) \
262 BF(ff_vvc_alf_filter_chroma, bpc, opt)(dst, dst_stride, src, src_stride, width, height, \
263 filter, clip, 0, vb_pos,(1 << bd) - 1); \
265 void bf(ff_vvc_alf_classify, bd, opt)(int *class_idx, int *transpose_idx, \
266 const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp) \
268 BF(ff_vvc_alf_classify_grad, bpc, opt)(gradient_tmp, src, src_stride, width, height, vb_pos); \
269 BF(ff_vvc_alf_classify, bpc, opt)(class_idx, transpose_idx, gradient_tmp, width, height, vb_pos, bd); \
272 ALF_FUNCS(8, 8, avx2)
273 ALF_FUNCS(16, 10, avx2)
274 ALF_FUNCS(16, 12, avx2)
278 #define PEL_LINK(dst, C, W, idx1, idx2, name, D, opt) \
279 dst[C][W][idx1][idx2] = ff_vvc_put_## name ## _ ## D ## _##opt; \
280 dst ## _uni[C][W][idx1][idx2] = ff_h2656_put_uni_ ## name ## _ ## D ## _##opt; \
282 #define MC_TAP_LINKS(pointer, C, my, mx, fname, bitd, opt ) \
283 PEL_LINK(pointer, C, 1, my , mx , fname##4 , bitd, opt ); \
284 PEL_LINK(pointer, C, 2, my , mx , fname##8 , bitd, opt ); \
285 PEL_LINK(pointer, C, 3, my , mx , fname##16, bitd, opt ); \
286 PEL_LINK(pointer, C, 4, my , mx , fname##32, bitd, opt ); \
287 PEL_LINK(pointer, C, 5, my , mx , fname##64, bitd, opt ); \
288 PEL_LINK(pointer, C, 6, my , mx , fname##128, bitd, opt );
290 #define MC_8TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
291 MC_TAP_LINKS(pointer, LUMA, my, mx, fname, bitd, opt)
293 #define MC_8TAP_LINKS_SSE4(bd) \
294 MC_8TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
295 MC_8TAP_LINKS(c->inter.put, 0, 1, 8tap_h, bd, sse4); \
296 MC_8TAP_LINKS(c->inter.put, 1, 0, 8tap_v, bd, sse4); \
297 MC_8TAP_LINKS(c->inter.put, 1, 1, 8tap_hv, bd, sse4)
299 #define MC_4TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
300 PEL_LINK(pointer, CHROMA, 0, my , mx , fname##2 , bitd, opt ); \
301 MC_TAP_LINKS(pointer, CHROMA, my, mx, fname, bitd, opt) \
303 #define MC_4TAP_LINKS_SSE4(bd) \
304 MC_4TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
305 MC_4TAP_LINKS(c->inter.put, 0, 1, 4tap_h, bd, sse4); \
306 MC_4TAP_LINKS(c->inter.put, 1, 0, 4tap_v, bd, sse4); \
307 MC_4TAP_LINKS(c->inter.put, 1, 1, 4tap_hv, bd, sse4)
309 #define MC_LINK_SSE4(bd) \
310 MC_4TAP_LINKS_SSE4(bd) \
311 MC_8TAP_LINKS_SSE4(bd)
313 #define MC_TAP_LINKS_AVX2(C,tap,bd) do { \
314 PEL_LINK(c->inter.put, C, 4, 0, 0, pixels32, bd, avx2) \
315 PEL_LINK(c->inter.put, C, 5, 0, 0, pixels64, bd, avx2) \
316 PEL_LINK(c->inter.put, C, 6, 0, 0, pixels128, bd, avx2) \
317 PEL_LINK(c->inter.put, C, 4, 0, 1, tap##tap_h32, bd, avx2) \
318 PEL_LINK(c->inter.put, C, 5, 0, 1, tap##tap_h64, bd, avx2) \
319 PEL_LINK(c->inter.put, C, 6, 0, 1, tap##tap_h128, bd, avx2) \
320 PEL_LINK(c->inter.put, C, 4, 1, 0, tap##tap_v32, bd, avx2) \
321 PEL_LINK(c->inter.put, C, 5, 1, 0, tap##tap_v64, bd, avx2) \
322 PEL_LINK(c->inter.put, C, 6, 1, 0, tap##tap_v128, bd, avx2) \
325 #define MC_LINKS_AVX2(bd) \
326 MC_TAP_LINKS_AVX2(LUMA, 8, bd); \
327 MC_TAP_LINKS_AVX2(CHROMA, 4, bd);
329 #define MC_TAP_LINKS_16BPC_AVX2(C, tap, bd) do { \
330 PEL_LINK(c->inter.put, C, 3, 0, 0, pixels16, bd, avx2) \
331 PEL_LINK(c->inter.put, C, 3, 0, 1, tap##tap_h16, bd, avx2) \
332 PEL_LINK(c->inter.put, C, 3, 1, 0, tap##tap_v16, bd, avx2) \
333 PEL_LINK(c->inter.put, C, 3, 1, 1, tap##tap_hv16, bd, avx2) \
334 PEL_LINK(c->inter.put, C, 4, 1, 1, tap##tap_hv32, bd, avx2) \
335 PEL_LINK(c->inter.put, C, 5, 1, 1, tap##tap_hv64, bd, avx2) \
336 PEL_LINK(c->inter.put, C, 6, 1, 1, tap##tap_hv128, bd, avx2) \
339 #define MC_LINKS_16BPC_AVX2(bd) \
340 MC_TAP_LINKS_16BPC_AVX2(LUMA, 8, bd); \
341 MC_TAP_LINKS_16BPC_AVX2(CHROMA, 4, bd);
343 #define AVG_INIT(bd, opt) do { \
344 c->inter.avg = bf(ff_vvc_avg, bd, opt); \
345 c->inter.w_avg = bf(ff_vvc_w_avg, bd, opt); \
348 #define DMVR_INIT(bd) do { \
349 c->inter.dmvr[0][0] = ff_vvc_dmvr_##bd##_avx2; \
350 c->inter.dmvr[0][1] = ff_vvc_dmvr_h_##bd##_avx2; \
351 c->inter.dmvr[1][0] = ff_vvc_dmvr_v_##bd##_avx2; \
352 c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_##bd##_avx2; \
355 #define OF_INIT(bd) do { \
356 c->inter.apply_bdof = ff_vvc_apply_bdof_##bd##_avx2; \
359 #define ALF_INIT(bd) do { \
360 c->alf.filter[LUMA] = ff_vvc_alf_filter_luma_##bd##_avx2; \
361 c->alf.filter[CHROMA] = ff_vvc_alf_filter_chroma_##bd##_avx2; \
362 c->alf.classify = ff_vvc_alf_classify_##bd##_avx2; \
365 int ff_vvc_sad_avx2(
const int16_t *
src0,
const int16_t *
src1,
int dx,
int dy,
int block_w,
int block_h);
366 #define SAD_INIT() c->inter.sad = ff_vvc_sad_avx2
396 MC_LINKS_16BPC_AVX2(10);
410 MC_LINKS_16BPC_AVX2(12);
void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
#define ALF_PROTOTYPES(bpc, bd, opt)
#define EXTERNAL_AVX2_FAST(flags)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
#define ALF_BPC_PROTOTYPES(bpc, opt)
#define AVG_PROTOTYPES(bd, opt)
#define OF_PROTOTYPES(bd, opt)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
#define PUT_BPC_PROTOTYPES(name, opt)
#define DMVR_PROTOTYPES(bd, opt)
#define EXTERNAL_SSE4(flags)
#define AVG_BPC_PROTOTYPES(bpc, opt)
#define PUT_TAP_PROTOTYPES(n, opt)