FFmpeg
vvc_alf.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2023-2024 Nuo Mi <nuomi2021@gmail.com>
3  * Copyright (c) 2023-2024 Wu Jianhua <toqsxw@outlook.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #include <string.h>
23 
24 #include "checkasm.h"
25 #include "libavcodec/vvc/ctu.h"
26 #include "libavcodec/vvc/data.h"
27 #include "libavcodec/vvc/dsp.h"
28 
29 #include "libavutil/common.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/mem_internal.h"
32 
33 static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
34 
35 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
36 #define SRC_PIXEL_STRIDE (MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE)
37 #define DST_PIXEL_STRIDE (SRC_PIXEL_STRIDE + 4)
38 #define SRC_BUF_SIZE (SRC_PIXEL_STRIDE * (MAX_CTU_SIZE + 3 * 2) * 2) //+3 * 2 for top and bottom row, *2 for high bit depth
39 #define DST_BUF_SIZE (DST_PIXEL_STRIDE * (MAX_CTU_SIZE + 3 * 2) * 2)
40 #define LUMA_PARAMS_SIZE (MAX_CTU_SIZE * MAX_CTU_SIZE / ALF_BLOCK_SIZE / ALF_BLOCK_SIZE * ALF_NUM_COEFF_LUMA)
41 
42 #define randomize_buffers(buf0, buf1, size) \
43  do { \
44  uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
45  int k; \
46  for (k = 0; k < size; k += 4) { \
47  uint32_t r = rnd() & mask; \
48  AV_WN32A(buf0 + k, r); \
49  AV_WN32A(buf1 + k, r); \
50  } \
51  } while (0)
52 
53 #define randomize_buffers2(buf, size, filter) \
54  do { \
55  int k; \
56  if (filter) { \
57  for (k = 0; k < size; k++) { \
58  int8_t r = rnd(); \
59  buf[k] = r; \
60  } \
61  } else { \
62  for (k = 0; k < size; k++) { \
63  int r = rnd() % FF_ARRAY_ELEMS(clip_set); \
64  buf[k] = clip_set[r]; \
65  } \
66  } \
67  } while (0)
68 
69 static int get_alf_vb_pos(const int h, const int vb_pos_above)
70 {
71  if (h == MAX_CTU_SIZE)
72  return MAX_CTU_SIZE - vb_pos_above;
73  // If h < MAX_CTU_SIZE and picture virtual boundaries are involved, ALF virtual boundaries can either be within or outside this ALF block.
74  return ((rnd() & 1) ? h : MAX_CTU_SIZE) - vb_pos_above;
75 }
76 
77 static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
78 {
79  LOCAL_ALIGNED_32(uint8_t, dst0, [DST_BUF_SIZE]);
80  LOCAL_ALIGNED_32(uint8_t, dst1, [DST_BUF_SIZE]);
81  LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
82  LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
83  int16_t filter[LUMA_PARAMS_SIZE];
84  int16_t clip[LUMA_PARAMS_SIZE];
85 
86  const int16_t clip_set[] = {
87  1 << bit_depth, 1 << (bit_depth - 3), 1 << (bit_depth - 5), 1 << (bit_depth - 7)
88  };
89 
90  ptrdiff_t src_stride = SRC_PIXEL_STRIDE * SIZEOF_PIXEL;
91  ptrdiff_t dst_stride = DST_PIXEL_STRIDE * SIZEOF_PIXEL;
92  int offset = (3 * SRC_PIXEL_STRIDE + 3) * SIZEOF_PIXEL;
93 
94  declare_func(void, uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride,
95  int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos);
96 
100 
101  for (int h = 4; h <= MAX_CTU_SIZE; h += 4) {
102  for (int w = 4; w <= MAX_CTU_SIZE; w += 4) {
103  //Both picture size and virtual boundaries are 8-aligned. For luma, we only need to check 8-aligned sizes.
104  if (!(w % 8) && !(h % 8)) {
105  if (check_func(c->alf.filter[LUMA], "vvc_alf_filter_luma_%dx%d_%d", w, h, bit_depth)) {
106  const int vb_pos = get_alf_vb_pos(h, ALF_VB_POS_ABOVE_LUMA);
107  memset(dst0, 0, DST_BUF_SIZE);
108  memset(dst1, 0, DST_BUF_SIZE);
109  call_ref(dst0, dst_stride, src0 + offset, src_stride, w, h, filter, clip, vb_pos);
110  call_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
111  checkasm_check_pixel(dst0, dst_stride, dst1, dst_stride, w + 1, h + 1, "dst");
112  // Bench only square sizes, and ones with dimensions being a power of two.
113  if (w == h && (w & (w - 1)) == 0)
114  bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
115  }
116  }
117  //For chroma, once it exceeds 64, it's not a 4:2:0 format, so we only need to check 8-aligned sizes as well.
118  if ((w <= 64 || !(w % 8)) && (h <= 64 || !(h % 8))) {
119  if (check_func(c->alf.filter[CHROMA], "vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
120  const int vb_pos = get_alf_vb_pos(h, ALF_VB_POS_ABOVE_CHROMA);
121  memset(dst0, 0, DST_BUF_SIZE);
122  memset(dst1, 0, DST_BUF_SIZE);
123  call_ref(dst0, dst_stride, src0 + offset, src_stride, w, h, filter, clip, vb_pos);
124  call_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
125  checkasm_check_pixel(dst0, dst_stride, dst1, dst_stride, w + 1, h + 1, "dst");
126  if (w == h && (w & (w - 1)) == 0)
127  bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
128  }
129  }
130  }
131  }
132 }
133 
134 static void check_alf_classify(VVCDSPContext *c, const int bit_depth)
135 {
136  LOCAL_ALIGNED_32(int, class_idx0, [SRC_BUF_SIZE]);
137  LOCAL_ALIGNED_32(int, transpose_idx0, [SRC_BUF_SIZE]);
138  LOCAL_ALIGNED_32(int, class_idx1, [SRC_BUF_SIZE]);
139  LOCAL_ALIGNED_32(int, transpose_idx1, [SRC_BUF_SIZE]);
140  LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
141  LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
143 
144  ptrdiff_t stride = SRC_PIXEL_STRIDE * SIZEOF_PIXEL;
145  int offset = (3 * SRC_PIXEL_STRIDE + 3) * SIZEOF_PIXEL;
146 
147  declare_func(void, int *class_idx, int *transpose_idx,
148  const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp);
149 
151 
152  //Both picture size and virtual boundaries are 8-aligned. Classify is luma only, we only need to check 8-aligned sizes.
153  for (int h = 8; h <= MAX_CTU_SIZE; h += 8) {
154  for (int w = 8; w <= MAX_CTU_SIZE; w += 8) {
155  const int id_size = w * h / ALF_BLOCK_SIZE / ALF_BLOCK_SIZE * sizeof(int);
156  const int vb_pos = get_alf_vb_pos(h, ALF_VB_POS_ABOVE_LUMA);
157  if (check_func(c->alf.classify, "vvc_alf_classify_%dx%d_%d", w, h, bit_depth)) {
158  memset(class_idx0, 0, id_size);
159  memset(class_idx1, 0, id_size);
160  memset(transpose_idx0, 0, id_size);
161  memset(transpose_idx1, 0, id_size);
162  call_ref(class_idx0, transpose_idx0, src0 + offset, stride, w, h, vb_pos, alf_gradient_tmp);
163 
164  call_new(class_idx1, transpose_idx1, src1 + offset, stride, w, h, vb_pos, alf_gradient_tmp);
165 
166  if (memcmp(class_idx0, class_idx1, id_size))
167  fail();
168  if (memcmp(transpose_idx0, transpose_idx1, id_size))
169  fail();
170  // Bench only square sizes, and ones with dimensions being a power of two.
171  if (w == h && (w & (w - 1)) == 0)
172  bench_new(class_idx1, transpose_idx1, src1 + offset, stride, w, h, vb_pos, alf_gradient_tmp);
173  }
174  }
175  }
176 }
177 
179 {
180  int bit_depth;
182  for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
185  }
186  report("alf_filter");
187 
188  for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
191  }
192  report("alf_classify");
193 }
LUMA
#define LUMA
Definition: filter.c:31
mem_internal.h
ALF_VB_POS_ABOVE_LUMA
#define ALF_VB_POS_ABOVE_LUMA
Definition: ctu.h:81
src1
const pixel * src1
Definition: h264pred_template.c:420
data.h
w
uint8_t w
Definition: llviddspenc.c:38
ALF_GRADIENT_SIZE
#define ALF_GRADIENT_SIZE
Definition: ctu.h:86
check_func
#define check_func(func,...)
Definition: checkasm.h:184
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
call_ref
#define call_ref(...)
Definition: checkasm.h:199
DST_PIXEL_STRIDE
#define DST_PIXEL_STRIDE
Definition: vvc_alf.c:37
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
randomize_buffers2
#define randomize_buffers2(buf, size, filter)
Definition: vvc_alf.c:53
fail
#define fail()
Definition: checkasm.h:193
randomize_buffers
#define randomize_buffers(buf0, buf1, size)
Definition: vvc_alf.c:42
get_alf_vb_pos
static int get_alf_vb_pos(const int h, const int vb_pos_above)
Definition: vvc_alf.c:69
checkasm.h
check_alf_classify
static void check_alf_classify(VVCDSPContext *c, const int bit_depth)
Definition: vvc_alf.c:134
rnd
#define rnd()
Definition: checkasm.h:177
dsp.h
check_alf_filter
static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
Definition: vvc_alf.c:77
clip
clip
Definition: af_crystalizer.c:122
intreadwrite.h
pixel_mask
static const uint32_t pixel_mask[3]
Definition: vvc_alf.c:33
call_new
#define call_new(...)
Definition: checkasm.h:302
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:132
SRC_BUF_SIZE
#define SRC_BUF_SIZE
Definition: vvc_alf.c:38
ALF_NUM_DIR
#define ALF_NUM_DIR
Definition: ctu.h:87
MAX_CTU_SIZE
#define MAX_CTU_SIZE
Definition: ctu.h:33
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
height
#define height
Definition: dsp.h:85
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
checkasm_check_vvc_alf
void checkasm_check_vvc_alf(void)
Definition: vvc_alf.c:178
ALF_BLOCK_SIZE
#define ALF_BLOCK_SIZE
Definition: ctu.h:76
LUMA_PARAMS_SIZE
#define LUMA_PARAMS_SIZE
Definition: vvc_alf.c:40
ff_vvc_dsp_init
void ff_vvc_dsp_init(VVCDSPContext *vvcdsp, int bit_depth)
Definition: dsp.c:86
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
report
#define report
Definition: checkasm.h:196
checkasm_check_pixel
#define checkasm_check_pixel(buf1, stride1, buf2, stride2,...)
Definition: checkasm.h:398
bench_new
#define bench_new(...)
Definition: checkasm.h:373
common.h
stride
#define stride
Definition: h264pred_template.c:536
CHROMA
@ CHROMA
Definition: vf_waveform.c:49
SRC_PIXEL_STRIDE
#define SRC_PIXEL_STRIDE
Definition: vvc_alf.c:36
src0
const pixel *const src0
Definition: h264pred_template.c:419
DST_BUF_SIZE
#define DST_BUF_SIZE
Definition: vvc_alf.c:39
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:188
int32_t
int32_t
Definition: audioconvert.c:56
h
h
Definition: vp9dsp_template.c:2070
ctu.h
ALF_VB_POS_ABOVE_CHROMA
#define ALF_VB_POS_ABOVE_CHROMA
Definition: ctu.h:82
width
#define width
Definition: dsp.h:85
SIZEOF_PIXEL
#define SIZEOF_PIXEL
Definition: vvc_alf.c:35
src
#define src
Definition: vp8dsp.c:248
VVCDSPContext
Definition: dsp.h:169