24 #define ROUNDED_DIV_MVx2(a, b) \
25 (VP56mv) { .x = ROUNDED_DIV(a.x + b.x, 2), .y = ROUNDED_DIV(a.y + b.y, 2) }
26 #define ROUNDED_DIV_MVx4(a, b, c, d) \
27 (VP56mv) { .x = ROUNDED_DIV(a.x + b.x + c.x + d.x, 4), \
28 .y = ROUNDED_DIV(a.y + b.y + c.y + d.y, 4) }
33 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
34 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
38 int row =
td->row, col =
td->col;
42 ptrdiff_t ls_y =
td->y_stride, ls_uv =
td->uv_stride;
46 tref2 = &
s->s.refs[
s->s.h.refidx[
b->ref[1]]];
60 row << 3, col << 3, &
b->mv[0][0],,,,, 8, 4, w1, h1, 0);
62 td->dst[0] + 4 * ls_y, ls_y,
64 (row << 3) + 4, col << 3, &
b->mv[2][0],,,,, 8, 4, w1, h1, 0);
65 w1 = (w1 +
s->ss_h) >>
s->ss_h;
70 td->dst[1],
td->dst[2], ls_uv,
73 row << 2, col << (3 -
s->ss_h),
74 &uvmv,,,,, 8 >>
s->ss_h, 4, w1, h1, 0);
77 td->dst[1],
td->dst[2], ls_uv,
80 row << 3, col << (3 -
s->ss_h),
81 &
b->mv[0][0],,,,, 8 >>
s->ss_h, 4, w1, h1, 0);
91 td->dst[1] + 4 * ls_uv,
td->dst[2] + 4 * ls_uv, ls_uv,
94 (row << 3) + 4, col << (3 -
s->ss_h),
95 &uvmv,,,,, 8 >>
s->ss_h, 4, w1, h1, 0);
100 ref2->data[0], ref2->linesize[0], tref2,
101 row << 3, col << 3, &
b->mv[0][1],,,,, 8, 4, w2, h2, 1);
103 td->dst[0] + 4 * ls_y, ls_y,
104 ref2->data[0], ref2->linesize[0], tref2,
105 (row << 3) + 4, col << 3, &
b->mv[2][1],,,,, 8, 4, w2, h2, 1);
106 w2 = (w2 +
s->ss_h) >>
s->ss_h;
111 td->dst[1],
td->dst[2], ls_uv,
112 ref2->data[1], ref2->linesize[1],
113 ref2->data[2], ref2->linesize[2], tref2,
114 row << 2, col << (3 -
s->ss_h),
115 &uvmv,,,,, 8 >>
s->ss_h, 4, w2, h2, 1);
118 td->dst[1],
td->dst[2], ls_uv,
119 ref2->data[1], ref2->linesize[1],
120 ref2->data[2], ref2->linesize[2], tref2,
121 row << 3, col << (3 -
s->ss_h),
122 &
b->mv[0][1],,,,, 8 >>
s->ss_h, 4, w2, h2, 1);
132 td->dst[1] + 4 * ls_uv,
td->dst[2] + 4 * ls_uv, ls_uv,
133 ref2->data[1], ref2->linesize[1],
134 ref2->data[2], ref2->linesize[2], tref2,
135 (row << 3) + 4, col << (3 -
s->ss_h),
136 &uvmv,,,,, 8 >>
s->ss_h, 4, w2, h2, 1);
142 row << 3, col << 3, &
b->mv[0][0],,,,, 4, 8, w1, h1, 0);
145 row << 3, (col << 3) + 4, &
b->mv[1][0],,,,, 4, 8, w1, h1, 0);
146 h1 = (h1 +
s->ss_v) >>
s->ss_v;
151 td->dst[1],
td->dst[2], ls_uv,
154 row << (3 -
s->ss_v), col << 2,
155 &uvmv,,,,, 4, 8 >>
s->ss_v, w1, h1, 0);
158 td->dst[1],
td->dst[2], ls_uv,
161 row << (3 -
s->ss_v), col << 3,
162 &
b->mv[0][0],,,,, 4, 8 >>
s->ss_v, w1, h1, 0);
164 td->dst[1] + 4 * bytesperpixel,
165 td->dst[2] + 4 * bytesperpixel, ls_uv,
168 row << (3 -
s->ss_v), (col << 3) + 4,
169 &
b->mv[1][0],,,,, 4, 8 >>
s->ss_v, w1, h1, 0);
174 ref2->data[0], ref2->linesize[0], tref2,
175 row << 3, col << 3, &
b->mv[0][1],,,,, 4, 8, w2, h2, 1);
177 ref2->data[0], ref2->linesize[0], tref2,
178 row << 3, (col << 3) + 4, &
b->mv[1][1],,,,, 4, 8, w2, h2, 1);
179 h2 = (h2 +
s->ss_v) >>
s->ss_v;
184 td->dst[1],
td->dst[2], ls_uv,
185 ref2->data[1], ref2->linesize[1],
186 ref2->data[2], ref2->linesize[2], tref2,
187 row << (3 -
s->ss_v), col << 2,
188 &uvmv,,,,, 4, 8 >>
s->ss_v, w2, h2, 1);
191 td->dst[1],
td->dst[2], ls_uv,
192 ref2->data[1], ref2->linesize[1],
193 ref2->data[2], ref2->linesize[2], tref2,
194 row << (3 -
s->ss_v), col << 3,
195 &
b->mv[0][1],,,,, 4, 8 >>
s->ss_v, w2, h2, 1);
197 td->dst[1] + 4 * bytesperpixel,
198 td->dst[2] + 4 * bytesperpixel, ls_uv,
199 ref2->data[1], ref2->linesize[1],
200 ref2->data[2], ref2->linesize[2], tref2,
201 row << (3 -
s->ss_v), (col << 3) + 4,
202 &
b->mv[1][1],,,,, 4, 8 >>
s->ss_v, w2, h2, 1);
216 row << 3, col << 3, &
b->mv[0][0],
217 0, 0, 8, 8, 4, 4, w1, h1, 0);
220 row << 3, (col << 3) + 4, &
b->mv[1][0],
221 4, 0, 8, 8, 4, 4, w1, h1, 0);
223 td->dst[0] + 4 * ls_y, ls_y,
225 (row << 3) + 4, col << 3, &
b->mv[2][0],
226 0, 4, 8, 8, 4, 4, w1, h1, 0);
228 td->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
230 (row << 3) + 4, (col << 3) + 4, &
b->mv[3][0],
231 4, 4, 8, 8, 4, 4, w1, h1, 0);
237 b->mv[2][0],
b->mv[3][0]);
239 td->dst[1],
td->dst[2], ls_uv,
243 &uvmv, 0, 0, 4, 4, 4, 4, w1, h1, 0);
247 td->dst[1],
td->dst[2], ls_uv,
251 &uvmv, 0, 0, 8, 4, 4, 4, w1, h1, 0);
254 td->dst[1] + 4 * bytesperpixel,
255 td->dst[2] + 4 * bytesperpixel, ls_uv,
258 row << 2, (col << 3) + 4,
259 &uvmv, 4, 0, 8, 4, 4, 4, w1, h1, 0);
266 td->dst[1],
td->dst[2], ls_uv,
270 &uvmv, 0, 0, 4, 8, 4, 4, w1, h1, 0);
276 td->dst[1] + 4 * ls_uv,
td->dst[2] + 4 * ls_uv, ls_uv,
279 (row << 3) + 4, col << 2,
280 &uvmv, 0, 4, 4, 8, 4, 4, w1, h1, 0);
283 td->dst[1],
td->dst[2], ls_uv,
287 &
b->mv[0][0], 0, 0, 8, 8, 4, 4, w1, h1, 0);
289 td->dst[1] + 4 * bytesperpixel,
290 td->dst[2] + 4 * bytesperpixel, ls_uv,
293 row << 3, (col << 3) + 4,
294 &
b->mv[1][0], 4, 0, 8, 8, 4, 4, w1, h1, 0);
296 td->dst[1] + 4 * ls_uv,
td->dst[2] + 4 * ls_uv, ls_uv,
299 (row << 3) + 4, col << 3,
300 &
b->mv[2][0], 0, 4, 8, 8, 4, 4, w1, h1, 0);
302 td->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
303 td->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
306 (row << 3) + 4, (col << 3) + 4,
307 &
b->mv[3][0], 4, 4, 8, 8, 4, 4, w1, h1, 0);
313 ref2->data[0], ref2->linesize[0], tref2,
314 row << 3, col << 3, &
b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
316 ref2->data[0], ref2->linesize[0], tref2,
317 row << 3, (col << 3) + 4, &
b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
319 td->dst[0] + 4 * ls_y, ls_y,
320 ref2->data[0], ref2->linesize[0], tref2,
321 (row << 3) + 4, col << 3, &
b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
323 td->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
324 ref2->data[0], ref2->linesize[0], tref2,
325 (row << 3) + 4, (col << 3) + 4, &
b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
331 b->mv[2][1],
b->mv[3][1]);
333 td->dst[1],
td->dst[2], ls_uv,
334 ref2->data[1], ref2->linesize[1],
335 ref2->data[2], ref2->linesize[2], tref2,
337 &uvmv, 0, 0, 4, 4, 4, 4, w2, h2, 1);
341 td->dst[1],
td->dst[2], ls_uv,
342 ref2->data[1], ref2->linesize[1],
343 ref2->data[2], ref2->linesize[2], tref2,
345 &uvmv, 0, 0, 8, 4, 4, 4, w2, h2, 1);
348 td->dst[1] + 4 * bytesperpixel,
349 td->dst[2] + 4 * bytesperpixel, ls_uv,
350 ref2->data[1], ref2->linesize[1],
351 ref2->data[2], ref2->linesize[2], tref2,
352 row << 2, (col << 3) + 4,
353 &uvmv, 4, 0, 8, 4, 4, 4, w2, h2, 1);
360 td->dst[1],
td->dst[2], ls_uv,
361 ref2->data[1], ref2->linesize[1],
362 ref2->data[2], ref2->linesize[2], tref2,
364 &uvmv, 0, 0, 4, 8, 4, 4, w2, h2, 1);
370 td->dst[1] + 4 * ls_uv,
td->dst[2] + 4 * ls_uv, ls_uv,
371 ref2->data[1], ref2->linesize[1],
372 ref2->data[2], ref2->linesize[2], tref2,
373 (row << 3) + 4, col << 2,
374 &uvmv, 0, 4, 4, 8, 4, 4, w2, h2, 1);
377 td->dst[1],
td->dst[2], ls_uv,
378 ref2->data[1], ref2->linesize[1],
379 ref2->data[2], ref2->linesize[2], tref2,
381 &
b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
383 td->dst[1] + 4 * bytesperpixel,
384 td->dst[2] + 4 * bytesperpixel, ls_uv,
385 ref2->data[1], ref2->linesize[1],
386 ref2->data[2], ref2->linesize[2], tref2,
387 row << 3, (col << 3) + 4,
388 &
b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
390 td->dst[1] + 4 * ls_uv,
td->dst[2] + 4 * ls_uv, ls_uv,
391 ref2->data[1], ref2->linesize[1],
392 ref2->data[2], ref2->linesize[2], tref2,
393 (row << 3) + 4, col << 3,
394 &
b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
396 td->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
397 td->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
398 ref2->data[1], ref2->linesize[1],
399 ref2->data[2], ref2->linesize[2], tref2,
400 (row << 3) + 4, (col << 3) + 4,
401 &
b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
407 int bwl = bwlog_tab[0][
b->bs];
415 row << 3, col << 3, &
b->mv[0][0], 0, 0, bw, bh, bw, bh, w1, h1, 0);
416 w1 = (w1 +
s->ss_h) >>
s->ss_h;
417 h1 = (h1 +
s->ss_v) >>
s->ss_v;
419 td->dst[1],
td->dst[2], ls_uv,
422 row << (3 -
s->ss_v), col << (3 -
s->ss_h),
423 &
b->mv[0][0], 0, 0, uvbw, uvbh, uvbw, uvbh, w1, h1, 0);
427 ref2->data[0], ref2->linesize[0], tref2,
428 row << 3, col << 3, &
b->mv[0][1], 0, 0, bw, bh, bw, bh, w2, h2, 1);
429 w2 = (w2 +
s->ss_h) >>
s->ss_h;
430 h2 = (h2 +
s->ss_v) >>
s->ss_v;
432 td->dst[1],
td->dst[2], ls_uv,
433 ref2->data[1], ref2->linesize[1],
434 ref2->data[2], ref2->linesize[2], tref2,
435 row << (3 -
s->ss_v), col << (3 -
s->ss_h),
436 &
b->mv[0][1], 0, 0, uvbw, uvbh, uvbw, uvbh, w2, h2, 1);