60 # define entry int16_t
61 # define bitdepth_max ((1 << bitdepth) - 1)
62 # define HBD_DECL , const int bitdepth
63 # define HBD_CALL , bitdepth
64 # define SCALING_SIZE 4096
68 # define bitdepth_max UINT8_MAX
71 # define SCALING_SIZE 256
79 const int bitdepth_min_8 =
bitdepth - 8;
80 unsigned seed = params->seed;
81 const int shift = 4 - bitdepth_min_8 +
data->grain_scale_shift;
82 const int grain_ctr = 128 << bitdepth_min_8;
83 const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
86 const int ar_lag =
data->ar_coeff_lag;
96 for (
int x = ar_pad; x <
GRAIN_WIDTH - ar_pad; x++) {
99 for (
int dy = -ar_lag; dy <= 0; dy++) {
100 for (
int dx = -ar_lag; dx <= ar_lag; dx++) {
103 sum += *(
coeff++) * buf[y + dy][x + dx];
107 grain = buf[y][x] +
round2(sum,
data->ar_coeff_shift);
108 buf[y][x] =
av_clip(grain, grain_min, grain_max);
117 const int subx,
const int suby
HBD_DECL)
120 const int bitdepth_min_8 =
bitdepth - 8;
121 unsigned seed = params->seed ^ (uv ? 0x49d8 : 0xb524);
122 const int shift = 4 - bitdepth_min_8 +
data->grain_scale_shift;
123 const int grain_ctr = 128 << bitdepth_min_8;
124 const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
129 const int ar_pad = 3;
130 const int ar_lag =
data->ar_coeff_lag;
132 for (
int y = 0; y < chromaH; y++) {
133 for (
int x = 0; x < chromaW; x++) {
139 for (
int y = ar_pad; y < chromaH; y++) {
140 for (
int x = ar_pad; x < chromaW - ar_pad; x++) {
141 const int8_t *
coeff =
data->ar_coeffs_uv[uv];
143 for (
int dy = -ar_lag; dy <= 0; dy++) {
144 for (
int dx = -ar_lag; dx <= ar_lag; dx++) {
148 const int lumaX = ((x - ar_pad) << subx) + ar_pad;
149 const int lumaY = ((y - ar_pad) << suby) + ar_pad;
151 if (!
data->num_y_points)
153 for (
int i = 0;
i <= suby;
i++) {
154 for (
int j = 0; j <= subx; j++) {
155 luma += buf_y[lumaY +
i][lumaX + j];
158 luma =
round2(luma, subx + suby);
159 sum += luma * (*coeff);
163 sum += *(
coeff++) * buf[y + dy][x + dx];
167 grain = buf[y][x] +
round2(sum,
data->ar_coeff_shift);
168 buf[y][x] =
av_clip(grain, grain_min, grain_max);
177 const int subx,
const int suby,
178 const int bx,
const int by,
179 const int x,
const int y)
181 const int randval =
offsets[bx][by];
182 const int offx = 3 + (2 >> subx) * (3 + (randval >> 4));
183 const int offy = 3 + (2 >> suby) * (3 + (randval & 0xF));
193 const int bh,
const int row_num
HBD_DECL)
196 const int rows = 1 + (
data->overlap_flag && row_num > 0);
197 const int bitdepth_min_8 =
bitdepth - 8;
198 const int grain_ctr = 128 << bitdepth_min_8;
199 const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
203 int min_value, max_value;
204 if (
data->limit_output_range) {
205 min_value = 16 << bitdepth_min_8;
206 max_value = 235 << bitdepth_min_8;
213 for (
int i = 0;
i < rows;
i++) {
214 seed[
i] = params->seed;
215 seed[
i] ^= (((row_num -
i) * 37 + 178) & 0xFF) << 8;
216 seed[
i] ^= (((row_num -
i) * 173 + 105) & 0xFF);
229 const int ystart =
data->overlap_flag && row_num ?
FFMIN(2, bh) : 0;
230 const int xstart =
data->overlap_flag && bx ?
FFMIN(2, bw) : 0;
232 static const int w[2][2] = { { 27, 17 }, { 17, 27 } };
234 if (
data->overlap_flag && bx) {
236 for (
int i = 0;
i < rows;
i++)
241 for (
int i = 0;
i < rows;
i++)
244 #define add_noise_y(x, y, grain) \
245 src = (const pixel*)((const char*)src_row + (y) * stride) + (x) + bx; \
246 dst = (pixel*)((char*)dst_row + (y) * stride) + (x) + bx; \
247 noise = round2(scaling[ *src ] * (grain), data->scaling_shift); \
248 *dst = av_clip(*src + noise, min_value, max_value);
250 for (
int y = ystart; y < bh; y++) {
252 for (
int x = xstart; x < bw; x++) {
258 for (
int x = 0; x < xstart; x++) {
261 grain =
round2(old *
w[x][0] + grain *
w[x][1], 5);
262 grain =
av_clip(grain, grain_min, grain_max);
267 for (
int y = 0; y < ystart; y++) {
269 for (
int x = xstart; x < bw; x++) {
272 grain =
round2(old *
w[y][0] + grain *
w[y][1], 5);
273 grain =
av_clip(grain, grain_min, grain_max);
278 for (
int x = 0; x < xstart; x++) {
284 top =
round2(old *
w[x][0] + top *
w[x][1], 5);
285 top =
av_clip(top, grain_min, grain_max);
289 grain =
round2(old *
w[x][0] + grain *
w[x][1], 5);
290 grain =
av_clip(grain, grain_min, grain_max);
293 grain =
round2(top *
w[y][0] + grain *
w[y][1], 5);
294 grain =
av_clip(grain, grain_min, grain_max);
306 const int row_num,
const pixel *
const luma_row,
307 const ptrdiff_t luma_stride,
const int uv,
const int is_id,
308 const int sx,
const int sy
HBD_DECL)
311 const int rows = 1 + (
data->overlap_flag && row_num > 0);
312 const int bitdepth_min_8 =
bitdepth - 8;
313 const int grain_ctr = 128 << bitdepth_min_8;
314 const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
318 int min_value, max_value;
319 if (
data->limit_output_range) {
320 min_value = 16 << bitdepth_min_8;
321 max_value = (is_id ? 235 : 240) << bitdepth_min_8;
328 for (
int i = 0;
i < rows;
i++) {
329 seed[
i] = params->seed;
330 seed[
i] ^= (((row_num -
i) * 37 + 178) & 0xFF) << 8;
331 seed[
i] ^= (((row_num -
i) * 173 + 105) & 0xFF);
337 for (
unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE >> sx) {
344 const int ystart =
data->overlap_flag && row_num ?
FFMIN(2 >> sy, bh) : 0;
345 const int xstart =
data->overlap_flag && bx ?
FFMIN(2 >> sx, bw) : 0;
347 static const int w[2 ][2 ][2] = {
348 { { 27, 17 }, { 17, 27 } },
352 if (
data->overlap_flag && bx) {
354 for (
int i = 0;
i < rows;
i++)
359 for (
int i = 0;
i < rows;
i++)
362 #define add_noise_uv(x, y, grain) \
363 lx = (bx + x) << sx; \
365 luma = (const pixel*)((const char*)luma_row + ly * luma_stride) + lx;\
368 avg = (avg + luma[1] + 1) >> 1; \
369 src = (const pixel*)((const char *)src_row + (y) * stride) + bx + (x);\
370 dst = (pixel *) ((char *) dst_row + (y) * stride) + bx + (x); \
372 if (!data->chroma_scaling_from_luma) { \
373 const int combined = avg * data->uv_mult_luma[uv] + \
374 *src * data->uv_mult[uv]; \
375 val = av_clip( (combined >> 6) + \
376 (data->uv_offset[uv] * (1 << bitdepth_min_8)), \
379 noise = round2(scaling[ val ] * (grain), data->scaling_shift); \
380 *dst = av_clip(*src + noise, min_value, max_value);
382 for (
int y = ystart; y < bh; y++) {
384 for (
int x = xstart; x < bw; x++) {
390 for (
int x = 0; x < xstart; x++) {
393 grain =
round2(old *
w[sx][x][0] + grain *
w[sx][x][1], 5);
394 grain =
av_clip(grain, grain_min, grain_max);
399 for (
int y = 0; y < ystart; y++) {
401 for (
int x = xstart; x < bw; x++) {
404 grain =
round2(old *
w[sy][y][0] + grain *
w[sy][y][1], 5);
405 grain =
av_clip(grain, grain_min, grain_max);
410 for (
int x = 0; x < xstart; x++) {
416 top =
round2(old *
w[sx][x][0] + top *
w[sx][x][1], 5);
417 top =
av_clip(top, grain_min, grain_max);
421 grain =
round2(old *
w[sx][x][0] + grain *
w[sx][x][1], 5);
422 grain =
av_clip(grain, grain_min, grain_max);
425 grain =
round2(top *
w[sy][y][0] + grain *
w[sy][y][1], 5);
426 grain =
av_clip(grain, grain_min, grain_max);
437 const int scaling_size = 1 <<
bitdepth;
438 const int max_value = points[num - 1][0] << shift_x;
442 memset(scaling, 0, scaling_size);
447 memset(scaling, points[0][1], points[0][0] << shift_x);
450 for (
int i = 0;
i < num - 1;
i++) {
451 const int bx = points[
i][0];
452 const int by = points[
i][1];
453 const int ex = points[
i+1][0];
454 const int ey = points[
i+1][1];
455 const int dx = ex - bx;
456 const int dy = ey - by;
457 const int delta = dy * ((0x10000 + (dx >> 1)) / dx);
459 for (
int x = 0, d = 0x8000; x < dx; x++) {
460 scaling[(bx + x) << shift_x] = by + (d >> 16);
466 memset(&scaling[max_value], points[num - 1][1], scaling_size - max_value);
469 for (
int i = 0;
i < num - 1;
i++) {
470 const int pad = 1 << shift_x,
rnd = pad >> 1;
471 const int bx = points[
i][0] << shift_x;
472 const int ex = points[
i+1][0] << shift_x;
473 const int dx = ex - bx;
474 for (
int x = 0; x < dx; x += pad) {
475 const int range = scaling[bx + x + pad] - scaling[bx + x];
476 for (
int n = 1,
r =
rnd; n < pad; n++) {
478 scaling[bx + x + n] = scaling[bx + x] + (
r >> shift_x);
487 const int ss_x,
const int ss_y,
495 const int cpw = (
out->width + ss_x) >> ss_x;
500 ((
char *) in->data[0] + row *
FG_BLOCK_SIZE * in->linesize[0]);
502 if (
data->num_y_points) {
506 out->linesize[0], params,
out->width, scaling[0],
510 if (!
data->num_uv_points[0] && !
data->num_uv_points[1] &&
511 !
data->chroma_scaling_from_luma)
517 if (
out->width & ss_x) {
518 pixel *ptr = luma_src;
519 for (
int y = 0; y < bh; y++) {
520 ptr[
out->width] = ptr[
out->width - 1];
521 ptr = (
pixel *) ((
char *) ptr + (in->linesize[0] << ss_y));
525 if (
data->chroma_scaling_from_luma) {
526 for (
int pl = 0; pl < 2; pl++)
528 (
const pixel *) ((
const char *) in->data[1 + pl] + uv_off),
529 in->linesize[1], params, cpw, scaling[0],
530 grain_lut[1 + pl], bh, row, luma_src,
531 in->linesize[0], pl, is_id, ss_x, ss_y
HBD_CALL);
533 for (
int pl = 0; pl < 2; pl++) {
534 if (
data->num_uv_points[pl]) {
536 (
const pixel *) ((
const char *) in->data[1 + pl] + uv_off),
537 in->linesize[1], params, cpw, scaling[1 + pl],
538 grain_lut[1 + pl], bh, row, luma_src,
539 in->linesize[0], pl, is_id, ss_x, ss_y
HBD_CALL);
554 const int subx =
desc->log2_chroma_w, suby =
desc->log2_chroma_h;
558 if (
data->num_uv_points[0] ||
data->chroma_scaling_from_luma)
560 if (
data->num_uv_points[1] ||
data->chroma_scaling_from_luma)
564 if (
data->num_y_points ||
data->chroma_scaling_from_luma)
566 if (
data->num_uv_points[0])
568 if (
data->num_uv_points[1])
571 for (
int row = 0; row < rows; row++) {