FFmpeg
vp8dsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Martin Storsjo
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 #include <string.h>
22 
23 #include "libavcodec/avcodec.h"
24 #include "libavcodec/vp8dsp.h"
25 
26 #include "libavutil/common.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/mem_internal.h"
29 
30 #include "checkasm.h"
31 
32 #define PIXEL_STRIDE 16
33 
34 #define randomize_buffers(src, dst, stride, coef) \
35  do { \
36  int x, y; \
37  for (y = 0; y < 4; y++) { \
38  AV_WN32A((src) + y * (stride), rnd()); \
39  AV_WN32A((dst) + y * (stride), rnd()); \
40  for (x = 0; x < 4; x++) \
41  (coef)[y * 4 + x] = (src)[y * (stride) + x] - \
42  (dst)[y * (stride) + x]; \
43  } \
44  } while (0)
45 
46 static void dct4x4(int16_t *coef)
47 {
48  int i;
49  for (i = 0; i < 4; i++) {
50  const int a1 = (coef[i*4 + 0] + coef[i*4 + 3]) * 8;
51  const int b1 = (coef[i*4 + 1] + coef[i*4 + 2]) * 8;
52  const int c1 = (coef[i*4 + 1] - coef[i*4 + 2]) * 8;
53  const int d1 = (coef[i*4 + 0] - coef[i*4 + 3]) * 8;
54  coef[i*4 + 0] = a1 + b1;
55  coef[i*4 + 1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12;
56  coef[i*4 + 2] = a1 - b1;
57  coef[i*4 + 3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12;
58  }
59  for (i = 0; i < 4; i++) {
60  const int a1 = coef[i + 0*4] + coef[i + 3*4];
61  const int b1 = coef[i + 1*4] + coef[i + 2*4];
62  const int c1 = coef[i + 1*4] - coef[i + 2*4];
63  const int d1 = coef[i + 0*4] - coef[i + 3*4];
64  coef[i + 0*4] = (a1 + b1 + 7) >> 4;
65  coef[i + 1*4] = ((c1 * 2217 + d1 * 5352 + 12000) >> 16) + !!d1;
66  coef[i + 2*4] = (a1 - b1 + 7) >> 4;
67  coef[i + 3*4] = (d1 * 2217 - c1 * 5352 + 51000) >> 16;
68  }
69 }
70 
71 static void wht4x4(int16_t *coef)
72 {
73  int i;
74  for (i = 0; i < 4; i++) {
75  int a1 = coef[0 * 4 + i];
76  int b1 = coef[1 * 4 + i];
77  int c1 = coef[2 * 4 + i];
78  int d1 = coef[3 * 4 + i];
79  int e1;
80  a1 += b1;
81  d1 -= c1;
82  e1 = (a1 - d1) >> 1;
83  b1 = e1 - b1;
84  c1 = e1 - c1;
85  a1 -= c1;
86  d1 += b1;
87  coef[0 * 4 + i] = a1;
88  coef[1 * 4 + i] = c1;
89  coef[2 * 4 + i] = d1;
90  coef[3 * 4 + i] = b1;
91  }
92  for (i = 0; i < 4; i++) {
93  int a1 = coef[i * 4 + 0];
94  int b1 = coef[i * 4 + 1];
95  int c1 = coef[i * 4 + 2];
96  int d1 = coef[i * 4 + 3];
97  int e1;
98  a1 += b1;
99  d1 -= c1;
100  e1 = (a1 - d1) >> 1;
101  b1 = e1 - b1;
102  c1 = e1 - c1;
103  a1 -= c1;
104  d1 += b1;
105  coef[i * 4 + 0] = a1 * 2;
106  coef[i * 4 + 1] = c1 * 2;
107  coef[i * 4 + 2] = d1 * 2;
108  coef[i * 4 + 3] = b1 * 2;
109  }
110 }
111 
112 static void check_idct(void)
113 {
114  LOCAL_ALIGNED_16(uint8_t, src, [4 * 4]);
115  LOCAL_ALIGNED_16(uint8_t, dst, [4 * 4]);
116  LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4]);
117  LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4]);
118  LOCAL_ALIGNED_16(int16_t, coef, [4 * 4]);
119  LOCAL_ALIGNED_16(int16_t, subcoef0, [4 * 4]);
120  LOCAL_ALIGNED_16(int16_t, subcoef1, [4 * 4]);
122  int dc;
123  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, ptrdiff_t stride);
124 
125  ff_vp8dsp_init(&d);
126  randomize_buffers(src, dst, 4, coef);
127 
128  dct4x4(coef);
129 
130  for (dc = 0; dc <= 1; dc++) {
131  void (*idct)(uint8_t *, int16_t *, ptrdiff_t) = dc ? d.vp8_idct_dc_add : d.vp8_idct_add;
132 
133  if (check_func(idct, "vp8_idct_%sadd", dc ? "dc_" : "")) {
134  if (dc) {
135  memset(subcoef0, 0, 4 * 4 * sizeof(int16_t));
136  subcoef0[0] = coef[0];
137  } else {
138  memcpy(subcoef0, coef, 4 * 4 * sizeof(int16_t));
139  }
140  memcpy(dst0, dst, 4 * 4);
141  memcpy(dst1, dst, 4 * 4);
142  memcpy(subcoef1, subcoef0, 4 * 4 * sizeof(int16_t));
143  // Note, this uses a pixel stride of 4, even though the real decoder uses a stride as a
144  // multiple of 16. If optimizations want to take advantage of that, this test needs to be
145  // updated to make it more like the h264dsp tests.
146  call_ref(dst0, subcoef0, 4);
147  call_new(dst1, subcoef1, 4);
148  if (memcmp(dst0, dst1, 4 * 4) ||
149  memcmp(subcoef0, subcoef1, 4 * 4 * sizeof(int16_t)))
150  fail();
151 
152  bench_new(dst1, subcoef1, 4);
153  }
154  }
155 }
156 
157 static void check_idct_dc4(void)
158 {
159  LOCAL_ALIGNED_16(uint8_t, src, [4 * 4 * 4]);
160  LOCAL_ALIGNED_16(uint8_t, dst, [4 * 4 * 4]);
161  LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4 * 4]);
162  LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4 * 4]);
163  LOCAL_ALIGNED_16(int16_t, coef, [4], [4 * 4]);
164  LOCAL_ALIGNED_16(int16_t, subcoef0, [4], [4 * 4]);
165  LOCAL_ALIGNED_16(int16_t, subcoef1, [4], [4 * 4]);
167  int i, chroma;
168  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
169 
170  ff_vp8dsp_init(&d);
171 
172  for (chroma = 0; chroma <= 1; chroma++) {
173  void (*idct4dc)(uint8_t *, int16_t[4][16], ptrdiff_t) = chroma ? d.vp8_idct_dc_add4uv : d.vp8_idct_dc_add4y;
174  if (check_func(idct4dc, "vp8_idct_dc_add4%s", chroma ? "uv" : "y")) {
175  ptrdiff_t stride = chroma ? 8 : 16;
176  int w = chroma ? 2 : 4;
177  for (i = 0; i < 4; i++) {
178  int blockx = 4 * (i % w);
179  int blocky = 4 * (i / w);
180  randomize_buffers(src + stride * blocky + blockx, dst + stride * blocky + blockx, stride, coef[i]);
181  dct4x4(coef[i]);
182  memset(&coef[i][1], 0, 15 * sizeof(int16_t));
183  }
184 
185  memcpy(dst0, dst, 4 * 4 * 4);
186  memcpy(dst1, dst, 4 * 4 * 4);
187  memcpy(subcoef0, coef, 4 * 4 * 4 * sizeof(int16_t));
188  memcpy(subcoef1, coef, 4 * 4 * 4 * sizeof(int16_t));
189  call_ref(dst0, subcoef0, stride);
190  call_new(dst1, subcoef1, stride);
191  if (memcmp(dst0, dst1, 4 * 4 * 4) ||
192  memcmp(subcoef0, subcoef1, 4 * 4 * 4 * sizeof(int16_t)))
193  fail();
194  bench_new(dst1, subcoef1, stride);
195  }
196  }
197 
198 }
199 
200 static void check_luma_dc_wht(void)
201 {
202  LOCAL_ALIGNED_16(int16_t, dc, [4 * 4]);
203  LOCAL_ALIGNED_16(int16_t, dc0, [4 * 4]);
204  LOCAL_ALIGNED_16(int16_t, dc1, [4 * 4]);
205  int16_t block[4][4][16];
206  LOCAL_ALIGNED_16(int16_t, block0, [4], [4][16]);
207  LOCAL_ALIGNED_16(int16_t, block1, [4], [4][16]);
209  int dc_only;
210  int blockx, blocky;
211  declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t block[4][4][16], int16_t dc[16]);
212 
213  ff_vp8dsp_init(&d);
214 
215  for (blocky = 0; blocky < 4; blocky++) {
216  for (blockx = 0; blockx < 4; blockx++) {
217  uint8_t src[16], dst[16];
218  randomize_buffers(src, dst, 4, block[blocky][blockx]);
219 
220  dct4x4(block[blocky][blockx]);
221  dc[blocky * 4 + blockx] = block[blocky][blockx][0];
222  block[blocky][blockx][0] = rnd();
223  }
224  }
225  wht4x4(dc);
226 
227  for (dc_only = 0; dc_only <= 1; dc_only++) {
228  void (*idct)(int16_t [4][4][16], int16_t [16]) = dc_only ? d.vp8_luma_dc_wht_dc : d.vp8_luma_dc_wht;
229 
230  if (check_func(idct, "vp8_luma_dc_wht%s", dc_only ? "_dc" : "")) {
231  if (dc_only) {
232  memset(dc0, 0, 16 * sizeof(int16_t));
233  dc0[0] = dc[0];
234  } else {
235  memcpy(dc0, dc, 16 * sizeof(int16_t));
236  }
237  memcpy(dc1, dc0, 16 * sizeof(int16_t));
238  memcpy(block0, block, 4 * 4 * 16 * sizeof(int16_t));
239  memcpy(block1, block, 4 * 4 * 16 * sizeof(int16_t));
240  call_ref(block0, dc0);
241  call_new(block1, dc1);
242  if (memcmp(block0, block1, 4 * 4 * 16 * sizeof(int16_t)) ||
243  memcmp(dc0, dc1, 16 * sizeof(int16_t)))
244  fail();
245  bench_new(block1, dc1);
246  }
247  }
248 }
249 
250 #define SRC_BUF_STRIDE 32
251 #define SRC_BUF_SIZE (((size << (size < 16)) + 5) * SRC_BUF_STRIDE)
252 // The mc subpixel interpolation filter needs the 2 previous pixels in either
253 // direction, the +1 is to make sure the actual load addresses always are
254 // unaligned.
255 #define src (buf + 2 * SRC_BUF_STRIDE + 2 + 1)
256 
257 #undef randomize_buffers
258 #define randomize_buffers() \
259  do { \
260  int k; \
261  for (k = 0; k < SRC_BUF_SIZE; k += 4) { \
262  AV_WN32A(buf + k, rnd()); \
263  } \
264  } while (0)
265 
266 static void check_mc(void)
267 {
268  LOCAL_ALIGNED_16(uint8_t, buf, [32 * 32]);
269  LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16]);
270  LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16]);
272  int type, k, dx, dy;
273  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, uint8_t *, ptrdiff_t, int, int, int);
274 
275  ff_vp78dsp_init(&d);
276 
277  for (type = 0; type < 2; type++) {
278  vp8_mc_func (*tab)[3][3] = type ? d.put_vp8_bilinear_pixels_tab : d.put_vp8_epel_pixels_tab;
279  for (k = 1; k < 8; k++) {
280  int hsize = k / 3;
281  int size = 16 >> hsize;
282  int height = (size << 1) >> (k % 3);
283  for (dy = 0; dy < 3; dy++) {
284  for (dx = 0; dx < 3; dx++) {
285  char str[100];
286  if (dx || dy) {
287  if (type == 0) {
288  static const char *dx_names[] = { "", "h4", "h6" };
289  static const char *dy_names[] = { "", "v4", "v6" };
290  snprintf(str, sizeof(str), "epel%d_%s%s", size, dx_names[dx], dy_names[dy]);
291  } else {
292  snprintf(str, sizeof(str), "bilin%d_%s%s", size, dx ? "h" : "", dy ? "v" : "");
293  }
294  } else {
295  snprintf(str, sizeof(str), "pixels%d", size);
296  }
297  if (check_func(tab[hsize][dy][dx], "vp8_put_%s", str)) {
298  int mx, my;
299  int i;
300  if (type == 0) {
301  mx = dx == 2 ? 2 + 2 * (rnd() % 3) : dx == 1 ? 1 + 2 * (rnd() % 4) : 0;
302  my = dy == 2 ? 2 + 2 * (rnd() % 3) : dy == 1 ? 1 + 2 * (rnd() % 4) : 0;
303  } else {
304  mx = dx ? 1 + (rnd() % 7) : 0;
305  my = dy ? 1 + (rnd() % 7) : 0;
306  }
308  for (i = -2; i <= 3; i++) {
309  int val = (i == -1 || i == 2) ? 0 : 0xff;
310  // Set pixels in the first row and column to the maximum pattern,
311  // to test for potential overflows in the filter.
312  src[i ] = val;
313  src[i * SRC_BUF_STRIDE] = val;
314  }
315  call_ref(dst0, size, src, SRC_BUF_STRIDE, height, mx, my);
316  call_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my);
317  if (memcmp(dst0, dst1, size * height))
318  fail();
319  bench_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my);
320  }
321  }
322  }
323  }
324  }
325 }
326 
327 #undef randomize_buffers
328 
329 #define setpx(a, b, c) buf[(a) + (b) * jstride] = av_clip_uint8(c)
330 // Set the pixel to c +/- [0,d]
331 #define setdx(a, b, c, d) setpx(a, b, c - (d) + (rnd() % ((d) * 2 + 1)))
332 // Set the pixel to c +/- [d,d+e] (making sure it won't be clipped)
333 #define setdx2(a, b, o, c, d, e) setpx(a, b, o = c + ((d) + (rnd() % (e))) * (c >= 128 ? -1 : 1))
334 
335 static void randomize_loopfilter_buffers(int lineoff, int str,
336  int dir, int flim_E, int flim_I,
337  int hev_thresh, uint8_t *buf,
338  int force_hev)
339 {
340  uint32_t mask = 0xff;
341  int off = dir ? lineoff : lineoff * str;
342  int istride = dir ? 1 : str;
343  int jstride = dir ? str : 1;
344  int i;
345  for (i = 0; i < 8; i += 2) {
346  // Row 0 will trigger hev for q0/q1, row 2 will trigger hev for p0/p1,
347  // rows 4 and 6 will not trigger hev.
348  // force_hev 1 will make sure all rows trigger hev, while force_hev -1
349  // makes none of them trigger it.
350  int idx = off + i * istride, p2, p1, p0, q0, q1, q2;
351  setpx(idx, 0, q0 = rnd() & mask);
352  if (i == 0 && force_hev >= 0 || force_hev > 0)
353  setdx2(idx, 1, q1, q0, hev_thresh + 1, flim_I - hev_thresh - 1);
354  else
355  setdx(idx, 1, q1 = q0, hev_thresh);
356  setdx(idx, 2, q2 = q1, flim_I);
357  setdx(idx, 3, q2, flim_I);
358  setdx(idx, -1, p0 = q0, flim_E >> 2);
359  if (i == 2 && force_hev >= 0 || force_hev > 0)
360  setdx2(idx, -2, p1, p0, hev_thresh + 1, flim_I - hev_thresh - 1);
361  else
362  setdx(idx, -2, p1 = p0, hev_thresh);
363  setdx(idx, -3, p2 = p1, flim_I);
364  setdx(idx, -4, p2, flim_I);
365  }
366 }
367 
368 // Fill the buffer with random pixels
369 static void fill_loopfilter_buffers(uint8_t *buf, ptrdiff_t stride, int w, int h)
370 {
371  int x, y;
372  for (y = 0; y < h; y++)
373  for (x = 0; x < w; x++)
374  buf[y * stride + x] = rnd() & 0xff;
375 }
376 
377 #define randomize_buffers(buf, lineoff, str, force_hev) \
378  randomize_loopfilter_buffers(lineoff, str, dir, flim_E, flim_I, hev_thresh, buf, force_hev)
379 
380 static void check_loopfilter_16y(void)
381 {
382  LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);
383  LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);
385  int dir, edge, force_hev;
386  int flim_E = 20, flim_I = 10, hev_thresh = 7;
387  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int, int, int);
388 
389  ff_vp8dsp_init(&d);
390 
391  for (dir = 0; dir < 2; dir++) {
392  int midoff = dir ? 4 * 16 : 4;
393  int midoff_aligned = dir ? 4 * 16 : 16;
394  uint8_t *buf0 = base0 + midoff_aligned;
395  uint8_t *buf1 = base1 + midoff_aligned;
396  for (edge = 0; edge < 2; edge++) {
397  void (*func)(uint8_t *, ptrdiff_t, int, int, int) = NULL;
398  switch (dir << 1 | edge) {
399  case (0 << 1) | 0: func = d.vp8_h_loop_filter16y; break;
400  case (1 << 1) | 0: func = d.vp8_v_loop_filter16y; break;
401  case (0 << 1) | 1: func = d.vp8_h_loop_filter16y_inner; break;
402  case (1 << 1) | 1: func = d.vp8_v_loop_filter16y_inner; break;
403  }
404  if (check_func(func, "vp8_loop_filter16y%s_%s", edge ? "_inner" : "", dir ? "v" : "h")) {
405  for (force_hev = -1; force_hev <= 1; force_hev++) {
406  fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
407  randomize_buffers(buf0, 0, 16, force_hev);
408  randomize_buffers(buf0, 8, 16, force_hev);
409  memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
410  call_ref(buf0, 16, flim_E, flim_I, hev_thresh);
411  call_new(buf1, 16, flim_E, flim_I, hev_thresh);
412  if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
413  fail();
414  }
415  fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
416  randomize_buffers(buf0, 0, 16, 0);
417  randomize_buffers(buf0, 8, 16, 0);
418  bench_new(buf0, 16, flim_E, flim_I, hev_thresh);
419  }
420  }
421  }
422 }
423 
424 static void check_loopfilter_8uv(void)
425 {
426  LOCAL_ALIGNED_16(uint8_t, base0u, [32 + 16 * 16]);
427  LOCAL_ALIGNED_16(uint8_t, base0v, [32 + 16 * 16]);
428  LOCAL_ALIGNED_16(uint8_t, base1u, [32 + 16 * 16]);
429  LOCAL_ALIGNED_16(uint8_t, base1v, [32 + 16 * 16]);
431  int dir, edge, force_hev;
432  int flim_E = 20, flim_I = 10, hev_thresh = 7;
433  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, uint8_t *, ptrdiff_t, int, int, int);
434 
435  ff_vp8dsp_init(&d);
436 
437  for (dir = 0; dir < 2; dir++) {
438  int midoff = dir ? 4 * 16 : 4;
439  int midoff_aligned = dir ? 4 * 16 : 16;
440  uint8_t *buf0u = base0u + midoff_aligned;
441  uint8_t *buf0v = base0v + midoff_aligned;
442  uint8_t *buf1u = base1u + midoff_aligned;
443  uint8_t *buf1v = base1v + midoff_aligned;
444  for (edge = 0; edge < 2; edge++) {
445  void (*func)(uint8_t *, uint8_t *, ptrdiff_t, int, int, int) = NULL;
446  switch (dir << 1 | edge) {
447  case (0 << 1) | 0: func = d.vp8_h_loop_filter8uv; break;
448  case (1 << 1) | 0: func = d.vp8_v_loop_filter8uv; break;
449  case (0 << 1) | 1: func = d.vp8_h_loop_filter8uv_inner; break;
450  case (1 << 1) | 1: func = d.vp8_v_loop_filter8uv_inner; break;
451  }
452  if (check_func(func, "vp8_loop_filter8uv%s_%s", edge ? "_inner" : "", dir ? "v" : "h")) {
453  for (force_hev = -1; force_hev <= 1; force_hev++) {
454  fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);
455  fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);
456  randomize_buffers(buf0u, 0, 16, force_hev);
457  randomize_buffers(buf0v, 0, 16, force_hev);
458  memcpy(buf1u - midoff, buf0u - midoff, 16 * 16);
459  memcpy(buf1v - midoff, buf0v - midoff, 16 * 16);
460 
461  call_ref(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
462  call_new(buf1u, buf1v, 16, flim_E, flim_I, hev_thresh);
463  if (memcmp(buf0u - midoff, buf1u - midoff, 16 * 16) ||
464  memcmp(buf0v - midoff, buf1v - midoff, 16 * 16))
465  fail();
466  }
467  fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);
468  fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);
469  randomize_buffers(buf0u, 0, 16, 0);
470  randomize_buffers(buf0v, 0, 16, 0);
471  bench_new(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
472  }
473  }
474  }
475 }
476 
477 static void check_loopfilter_simple(void)
478 {
479  LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);
480  LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);
482  int dir;
483  int flim_E = 20, flim_I = 30, hev_thresh = 0;
484  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int);
485 
486  ff_vp8dsp_init(&d);
487 
488  for (dir = 0; dir < 2; dir++) {
489  int midoff = dir ? 4 * 16 : 4;
490  int midoff_aligned = dir ? 4 * 16 : 16;
491  uint8_t *buf0 = base0 + midoff_aligned;
492  uint8_t *buf1 = base1 + midoff_aligned;
493  void (*func)(uint8_t *, ptrdiff_t, int) = dir ? d.vp8_v_loop_filter_simple : d.vp8_h_loop_filter_simple;
494  if (check_func(func, "vp8_loop_filter_simple_%s", dir ? "v" : "h")) {
495  fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
496  randomize_buffers(buf0, 0, 16, -1);
497  randomize_buffers(buf0, 8, 16, -1);
498  memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
499  call_ref(buf0, 16, flim_E);
500  call_new(buf1, 16, flim_E);
501  if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
502  fail();
503  bench_new(buf0, 16, flim_E);
504  }
505  }
506 }
507 
509 {
510  check_idct();
511  check_idct_dc4();
513  report("idct");
514  check_mc();
515  report("mc");
519  report("loopfilter");
520 }
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:68
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:128
q1
static const uint8_t q1[256]
Definition: twofish.c:100
mem_internal.h
check_loopfilter_simple
static void check_loopfilter_simple(void)
Definition: vp8dsp.c:477
w
uint8_t w
Definition: llviddspenc.c:38
check_func
#define check_func(func,...)
Definition: checkasm.h:122
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1635
wht4x4
static void wht4x4(int16_t *coef)
Definition: vp8dsp.c:71
c1
static const uint64_t c1
Definition: murmur3.c:51
call_ref
#define call_ref(...)
Definition: checkasm.h:137
b1
static double b1(void *priv, double x, double y)
Definition: vf_xfade.c:1771
randomize_buffers
#define randomize_buffers(src, dst, stride, coef)
Definition: vp8dsp.c:377
fail
#define fail()
Definition: checkasm.h:131
tab
static const struct twinvq_data tab
Definition: twinvq_data.h:10345
checkasm.h
val
static double val(void *priv, double ch)
Definition: aeval.c:77
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
idct
static void idct(int16_t block[64])
Definition: 4xm.c:166
a1
#define a1
Definition: regdef.h:47
rnd
#define rnd()
Definition: checkasm.h:115
ff_vp8dsp_init
void ff_vp8dsp_init(VP8DSPContext *c)
vp8dsp.h
mask
static const uint16_t mask[17]
Definition: lzw.c:38
intreadwrite.h
randomize_loopfilter_buffers
static void randomize_loopfilter_buffers(int lineoff, int str, int dir, int flim_E, int flim_I, int hev_thresh, uint8_t *buf, int force_hev)
Definition: vp8dsp.c:335
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:130
q0
static const uint8_t q0[256]
Definition: twofish.c:81
check_idct_dc4
static void check_idct_dc4(void)
Definition: vp8dsp.c:157
if
if(ret)
Definition: filter_design.txt:179
checkasm_check_vp8dsp
void checkasm_check_vp8dsp(void)
Definition: vp8dsp.c:508
call_new
#define call_new(...)
Definition: checkasm.h:209
NULL
#define NULL
Definition: coverity.c:32
src
#define src
Definition: vp8dsp.c:255
setdx
#define setdx(a, b, c, d)
Definition: vp8dsp.c:331
vp8_mc_func
void(* vp8_mc_func)(uint8_t *dst, ptrdiff_t dstStride, uint8_t *src, ptrdiff_t srcStride, int h, int x, int y)
Definition: vp8dsp.h:33
VP8DSPContext
Definition: vp8dsp.h:37
setdx2
#define setdx2(a, b, o, c, d, e)
Definition: vp8dsp.c:333
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
size
int size
Definition: twinvq_data.h:10344
check_luma_dc_wht
static void check_luma_dc_wht(void)
Definition: vp8dsp.c:200
height
#define height
report
#define report
Definition: checkasm.h:134
bench_new
#define bench_new(...)
Definition: checkasm.h:272
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
common.h
check_mc
static void check_mc(void)
Definition: vp8dsp.c:266
avcodec.h
stride
#define stride
Definition: h264pred_template.c:537
dct4x4
static void dct4x4(int16_t *coef)
Definition: vp8dsp.c:46
fill_loopfilter_buffers
static void fill_loopfilter_buffers(uint8_t *buf, ptrdiff_t stride, int w, int h)
Definition: vp8dsp.c:369
SRC_BUF_STRIDE
#define SRC_BUF_STRIDE
Definition: vp8dsp.c:250
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:29
check_idct
static void check_idct(void)
Definition: vp8dsp.c:112
check_loopfilter_16y
static void check_loopfilter_16y(void)
Definition: vp8dsp.c:380
setpx
#define setpx(a, b, c)
Definition: vp8dsp.c:329
d
d
Definition: ffmpeg_filter.c:153
convert_header.str
string str
Definition: convert_header.py:20
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2038
check_loopfilter_8uv
static void check_loopfilter_8uv(void)
Definition: vp8dsp.c:424
int
int
Definition: ffmpeg_filter.c:153
snprintf
#define snprintf
Definition: snprintf.h:34
block1
static int16_t block1[64]
Definition: dct.c:118
ff_vp78dsp_init
av_cold void ff_vp78dsp_init(VP8DSPContext *dsp)
Definition: vp8dsp.c:668