90 #define BRIEF_PATCH_SIZE 31
91 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2)
93 #define MATCHES_CONTIG_SIZE 2000
95 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b)
314 return (
av_lfg_get(alfg) % (high - low)) + low;
320 return (
double)total_time / (double)num_frames / 1000000.0;
332 double x1 = point_pairs[0].
p.
p1.s[0];
333 double y1 = point_pairs[0].
p.
p1.s[1];
334 double x2 = point_pairs[1].
p.
p1.s[0];
335 double y2 = point_pairs[1].
p.
p1.s[1];
336 double x3 = point_pairs[2].
p.
p1.s[0];
337 double y3 = point_pairs[2].
p.
p1.s[1];
340 double X1 = point_pairs[0].
p.
p2.s[0];
341 double Y1 = point_pairs[0].
p.
p2.s[1];
342 double X2 = point_pairs[1].
p.
p2.s[0];
343 double Y2 = point_pairs[1].
p.
p2.s[1];
344 double X3 = point_pairs[2].
p.
p2.s[0];
345 double Y3 = point_pairs[2].
p.
p2.s[1];
347 double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );
349 model[0] = d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );
350 model[1] = d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );
351 model[2] = d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );
353 model[3] = d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );
354 model[4] = d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );
355 model[5] = d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );
363 for (j = 0; j <
i; j++) {
364 double dx1 = points[j]->s[0] - points[
i]->s[0];
365 double dy1 = points[j]->s[1] - points[
i]->s[1];
367 for (k = 0; k < j; k++) {
368 double dx2 = points[k]->s[0] - points[
i]->s[0];
369 double dy2 = points[k]->s[1] - points[
i]->s[1];
374 if (
fabs(dx2*dy1 - dy2*dx1) <= 1.0) {
387 const cl_float2 *prev_points[] = {
388 &pairs_subset[0].
p.
p1,
389 &pairs_subset[1].
p.
p1,
390 &pairs_subset[2].
p.
p1
393 const cl_float2 *curr_points[] = {
394 &pairs_subset[0].
p.
p2,
395 &pairs_subset[1].
p.
p2,
396 &pairs_subset[2].
p.
p2
406 const int num_point_pairs,
411 int i = 0, j, iters = 0;
413 for (; iters < max_attempts; iters++) {
414 for (
i = 0;
i < 3 && iters < max_attempts;) {
418 idx_i = idx[
i] =
rand_in(0, num_point_pairs, alfg);
420 for (j = 0; j <
i; j++) {
421 if (idx_i == idx[j]) {
431 pairs_subset[
i] = point_pairs[idx[
i]];
441 return i == 3 && iters < max_attempts;
447 const int num_point_pairs,
451 double F0 = model[0],
F1 = model[1],
F2 = model[2];
452 double F3 = model[3], F4 = model[4], F5 = model[5];
454 for (
int i = 0;
i < num_point_pairs;
i++) {
455 const cl_float2 *
f = &point_pairs[
i].
p.
p1;
456 const cl_float2 *t = &point_pairs[
i].
p.
p2;
458 double a = F0*
f->s[0] +
F1*
f->s[1] +
F2 - t->s[0];
459 double b =
F3*
f->s[0] + F4*
f->s[1] + F5 - t->s[1];
471 const int num_point_pairs,
476 float t = (float)(thresh * thresh);
477 int i, n = num_point_pairs, num_inliers = 0;
481 for (
i = 0;
i < n;
i++) {
509 confidence =
av_clipd(confidence, 0.0, 1.0);
510 num_outliers =
av_clipd(num_outliers, 0.0, 1.0);
513 num =
FFMAX(1.0 - confidence, DBL_MIN);
514 denom = 1.0 - pow(1.0 - num_outliers, 3);
515 if (denom < DBL_MIN) {
522 return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (
int)
round(num / denom);
531 const int num_point_pairs,
533 const double threshold,
535 const double confidence
538 double best_model[6], model[6];
541 int iter, niters =
FFMAX(max_iters, 1);
542 int good_count, max_good_count = 0;
545 if (num_point_pairs < 3) {
547 }
else if (num_point_pairs == 3) {
551 for (
int i = 0;
i < 3; ++
i) {
558 for (iter = 0; iter < niters; ++iter) {
559 int found =
get_subset(&deshake_ctx->
alfg, point_pairs, num_point_pairs, pairs_subset, 10000);
572 if (good_count >
FFMAX(max_good_count, 2)) {
573 for (
int mi = 0;
mi < 6; ++
mi) {
574 best_model[
mi] = model[
mi];
577 for (
int pi = 0; pi < 3; pi++) {
578 best_pairs[pi] = pairs_subset[pi];
581 max_good_count = good_count;
584 (
double)(num_point_pairs - good_count) / num_point_pairs,
590 if (max_good_count > 0) {
591 for (
int mi = 0;
mi < 6; ++
mi) {
592 model_out[
mi] = best_model[
mi];
595 for (
int pi = 0; pi < 3; ++pi) {
614 const int num_inliers,
618 float move_x_val = 0.01;
619 float move_y_val = 0.01;
621 float old_move_x_val = 0;
623 int last_changed = 0;
625 for (
int iters = 0; iters < 200; iters++) {
629 best_pairs[0].
p.
p2.s[0] += move_x_val;
631 best_pairs[0].
p.
p2.s[0] += move_y_val;
637 for (
int j = 0; j < num_inliers; j++) {
641 if (total_err < best_err) {
642 for (
int mi = 0;
mi < 6; ++
mi) {
643 model_out[
mi] = model[
mi];
646 best_err = total_err;
647 last_changed = iters;
651 best_pairs[0].
p.
p2.s[0] -= move_x_val;
653 best_pairs[0].
p.
p2.s[0] -= move_y_val;
656 if (iters - last_changed > 4) {
661 old_move_x_val = move_x_val;
669 if (old_move_x_val < 0) {
687 const int num_inliers,
692 float best_err = FLT_MAX;
693 double best_model[6], model[6];
696 for (
int i = 0;
i < max_iters;
i++) {
698 int found =
get_subset(&deshake_ctx->
alfg, inliers, num_inliers, pairs_subset, 10000);
711 for (
int j = 0; j < num_inliers; j++) {
715 if (total_err < best_err) {
716 for (
int mi = 0;
mi < 6; ++
mi) {
717 best_model[
mi] = model[
mi];
720 for (
int pi = 0; pi < 3; pi++) {
721 best_pairs[pi] = pairs_subset[pi];
724 best_err = total_err;
728 for (
int mi = 0;
mi < 6; ++
mi) {
729 model_out[
mi] = best_model[
mi];
732 for (
int pi = 0; pi < 3; ++pi) {
738 optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);
759 memset(&
ret, 0,
sizeof(
ret));
761 ret.translation.s[0] = e;
762 ret.translation.s[1] =
f;
765 if (
a != 0 ||
b != 0) {
771 ret.skew.s[0] = atan((
a *
c +
b * d) / (
r *
r));
773 }
else if (
c != 0 || d != 0) {
774 double s = sqrt(
c *
c + d * d);
780 ret.skew.s[1] = atan((
a *
c +
b * d) / (
s *
s));
794 for (
int i = 0;
i < size_y; ++
i) {
795 for (
int j = 0; j < size_x; ++j) {
814 return 1.0f /
expf(((
float)x * (
float)x) / (2.0
f * sigma * sigma));
822 int window_half = length / 2;
824 for (
int i = 0;
i < length; ++
i) {
828 gauss_kernel[
i] =
val;
832 for (
int i = 0;
i < length; ++
i) {
833 gauss_kernel[
i] /= gauss_sum;
860 int clip_start, clip_end, offset_clipped;
887 offset_clipped *
sizeof(
float),
910 float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,
911 percent_of_max, inverted_percent;
913 float large_sigma = 40.0f;
914 float small_sigma = 2.0f;
918 best_sigma = (large_sigma - 0.5f) * deshake_ctx->
smooth_percent + 0.5f;
930 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
932 new_large_s += old * gauss_kernel[j];
936 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
938 new_small_s += old * gauss_kernel[j];
941 diff_between =
fabsf(new_large_s - new_small_s);
942 percent_of_max = diff_between / max_val;
943 inverted_percent = 1 - percent_of_max;
944 best_sigma = large_sigma *
powf(inverted_percent, 40);
948 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
950 new_best += old * gauss_kernel[j];
978 float center_s_w, center_s_h;
990 center_s_w = center_w - center_s.s[0];
991 center_s_h = center_h - center_s.s[1];
994 x_shift + center_s_w,
995 y_shift + center_s_h,
1011 float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;
1017 float ar_h = frame_height / frame_width;
1018 float ar_w = frame_width / frame_height;
1054 adjusted_width = new_height * ar_w;
1057 if (adjusted_x >= crop->
top_left.s[0]) {
1060 adjusted_height = new_width * ar_h;
1061 adjusted_y = crop->
bottom_right.s[1] - adjusted_height;
1077 if (
ctx->gauss_kernel)
1080 if (
ctx->ransac_err)
1083 if (
ctx->matches_host)
1086 if (
ctx->matches_contig_host)
1117 if (
ctx->debug_on) {
1135 cl_ulong8 zeroed_ulong8;
1137 cl_image_format grayscale_format;
1138 cl_image_desc grayscale_desc;
1139 cl_command_queue_properties queue_props;
1161 const int descriptor_buf_size = image_grid_32 * (
BREIFN / 8);
1162 const int features_buf_size = image_grid_32 *
sizeof(cl_float2);
1174 ctx->curr_frame = 0;
1176 memset(&zeroed_ulong8, 0,
sizeof(cl_ulong8));
1179 if (!
ctx->gauss_kernel) {
1185 if (!
ctx->ransac_err) {
1196 if (!
ctx->abs_motion.ringbuffers[
i]) {
1202 if (
ctx->debug_on) {
1204 ctx->smooth_window / 2,
1208 if (!
ctx->abs_motion.debug_matches) {
1214 ctx->abs_motion.curr_frame_offset = 0;
1215 ctx->abs_motion.data_start_offset = -1;
1216 ctx->abs_motion.data_end_offset = -1;
1219 if (!pattern_host) {
1225 if (!
ctx->matches_host) {
1231 if (!
ctx->matches_contig_host) {
1237 if (!
ctx->inliers) {
1247 for (
int j = 0; j < 2; ++j) {
1252 pattern_host[
i] = pair;
1255 for (
int i = 0;
i < 14;
i++) {
1256 if (
ctx->sw_format == disallowed_formats[
i]) {
1268 ctx->sw_format = hw_frames_ctx->sw_format;
1274 if (
ctx->debug_on) {
1275 queue_props = CL_QUEUE_PROFILING_ENABLE;
1279 ctx->command_queue = clCreateCommandQueue(
1280 ctx->ocf.hwctx->context,
1281 ctx->ocf.hwctx->device_id,
1298 grayscale_format.image_channel_order = CL_R;
1299 grayscale_format.image_channel_data_type = CL_FLOAT;
1301 grayscale_desc = (cl_image_desc) {
1302 .image_type = CL_MEM_OBJECT_IMAGE2D,
1303 .image_width = outlink->
w,
1304 .image_height = outlink->
h,
1306 .image_array_size = 0,
1307 .image_row_pitch = 0,
1308 .image_slice_pitch = 0,
1309 .num_mip_levels = 0,
1314 ctx->grayscale = clCreateImage(
1315 ctx->ocf.hwctx->context,
1331 CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1341 if (
ctx->debug_on) {
1346 ctx->initialized = 1;
1360 "\tframe moved from: %f x, %f y\n"
1361 "\t to: %f x, %f y\n"
1362 "\t rotated from: %f degrees\n"
1363 "\t to: %f degrees\n"
1364 "\t scaled from: %f x, %f y\n"
1365 "\t to: %f x, %f y\n"
1367 "\tframe moved by: %f x, %f y\n"
1368 "\t rotated by: %f degrees\n"
1369 "\t scaled by: %f x, %f y\n",
1396 float transform_y[9];
1398 float transform_uv[9];
1400 float transform_crop_y[9];
1402 float transform_crop_uv[9];
1403 float transform_debug_rgb[9];
1404 size_t global_work[2];
1406 cl_mem
src, transformed, dst;
1409 cl_event transform_event, crop_upscale_event;
1411 cl_int num_model_matches;
1413 const float center_w = (float)input_frame->
width / 2;
1414 const float center_h = (
float)input_frame->
height / 2;
1420 const float center_w_chroma = (float)chroma_width / 2;
1421 const float center_h_chroma = (float)chroma_height / 2;
1423 const float luma_w_over_chroma_w = ((float)input_frame->
width / (
float)chroma_width);
1424 const float luma_h_over_chroma_h = ((float)input_frame->
height / (
float)chroma_height);
1531 if (!cropped_frame) {
1537 if (!transformed_frame) {
1545 for (
int p = 0; p <
FF_ARRAY_ELEMS(transformed_frame->data); p++) {
1547 src = (cl_mem)input_frame->
data[p];
1548 transformed = (cl_mem)transformed_frame->data[p];
1563 { sizeof(cl_mem), &src },
1564 { sizeof(cl_mem), &transformed },
1565 { sizeof(cl_mem), &transforms[p] },
1602 transformed = (cl_mem)transformed_frame->data[0];
1609 {
sizeof(cl_mem), &transformed },
1612 {
sizeof(cl_int), &num_model_matches },
1643 crops[0] = deshake_ctx->
crop_y;
1644 crops[1] = crops[2] = deshake_ctx->
crop_uv;
1648 dst = (cl_mem)cropped_frame->
data[p];
1649 transformed = (cl_mem)transformed_frame->data[p];
1663 &crop_upscale_event,
1664 { sizeof(cl_mem), &transformed },
1665 { sizeof(cl_mem), &dst },
1666 { sizeof(cl_float2), &crops[p].top_left },
1667 { sizeof(cl_float2), &crops[p].bottom_right },
1755 int num_inliers = 0;
1759 size_t global_work[2];
1760 size_t harris_global_work[2];
1761 size_t grid_32_global_work[2];
1762 int grid_32_h, grid_32_w;
1763 size_t local_work[2];
1767 cl_event grayscale_event, harris_response_event, refine_features_event,
1768 brief_event, match_descriptors_event, read_buf_event;
1789 grid_32_global_work[0] /= 32;
1790 grid_32_global_work[1] /= 32;
1795 if (deshake_ctx->
is_yuv) {
1798 src = (cl_mem)input_frame->
data[0];
1806 {
sizeof(cl_mem), &
src },
1807 {
sizeof(cl_mem), &deshake_ctx->
grayscale }
1812 deshake_ctx->command_queue,
1813 deshake_ctx->kernel_harris_response,
1816 &harris_response_event,
1817 { sizeof(cl_mem), &deshake_ctx->grayscale },
1818 { sizeof(cl_mem), &deshake_ctx->harris_buf }
1822 deshake_ctx->command_queue,
1823 deshake_ctx->kernel_refine_features,
1824 grid_32_global_work,
1826 &refine_features_event,
1827 { sizeof(cl_mem), &deshake_ctx->grayscale },
1828 { sizeof(cl_mem), &deshake_ctx->harris_buf },
1829 { sizeof(cl_mem), &deshake_ctx->refined_features },
1830 { sizeof(cl_int), &deshake_ctx->refine_features }
1834 deshake_ctx->command_queue,
1835 deshake_ctx->kernel_brief_descriptors,
1836 grid_32_global_work,
1839 { sizeof(cl_mem), &deshake_ctx->grayscale },
1840 { sizeof(cl_mem), &deshake_ctx->refined_features },
1841 { sizeof(cl_mem), &deshake_ctx->descriptors },
1842 { sizeof(cl_mem), &deshake_ctx->brief_pattern}
1849 goto no_motion_data;
1853 deshake_ctx->command_queue,
1854 deshake_ctx->kernel_match_descriptors,
1855 grid_32_global_work,
1857 &match_descriptors_event,
1858 { sizeof(cl_mem), &deshake_ctx->prev_refined_features },
1859 { sizeof(cl_mem), &deshake_ctx->refined_features },
1860 { sizeof(cl_mem), &deshake_ctx->descriptors },
1861 { sizeof(cl_mem), &deshake_ctx->prev_descriptors },
1862 { sizeof(cl_mem), &deshake_ctx->matches }
1865 cle = clEnqueueReadBuffer(
1866 deshake_ctx->command_queue,
1867 deshake_ctx->matches,
1871 deshake_ctx->matches_host,
1880 if (num_vectors < 10) {
1893 if (deshake_ctx->abs_motion.data_end_offset == -1) {
1894 deshake_ctx->abs_motion.data_end_offset =
1898 goto no_motion_data;
1903 deshake_ctx->matches_contig_host,
1911 goto no_motion_data;
1914 for (
int i = 0;
i < num_vectors;
i++) {
1915 if (deshake_ctx->matches_contig_host[
i].should_consider) {
1916 deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[
i];
1923 deshake_ctx->inliers,
1929 goto no_motion_data;
1938 deshake_ctx->abs_motion.ringbuffers[
i],
1940 av_fifo_size(deshake_ctx->abs_motion.ringbuffers[
i]) -
sizeof(
float),
1952 if (deshake_ctx->debug_on) {
1953 if (!deshake_ctx->is_yuv) {
1972 for (
int i = 0;
i < num_vectors;
i++) {
1973 deshake_ctx->matches_contig_host[
i].should_consider = 0;
1975 debug_matches.num_model_matches = 0;
1977 if (deshake_ctx->debug_on) {
1979 "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n"
1988 temp = deshake_ctx->prev_descriptors;
1989 deshake_ctx->prev_descriptors = deshake_ctx->descriptors;
1990 deshake_ctx->descriptors =
temp;
1993 temp = deshake_ctx->prev_refined_features;
1994 deshake_ctx->prev_refined_features = deshake_ctx->refined_features;
1995 deshake_ctx->refined_features =
temp;
1997 if (deshake_ctx->debug_on) {
1998 if (num_vectors == 0) {
1999 debug_matches.matches =
NULL;
2003 if (!debug_matches.matches) {
2009 for (
int i = 0;
i < num_vectors;
i++) {
2010 debug_matches.matches[
i] = deshake_ctx->matches_contig_host[
i];
2012 debug_matches.num_matches = num_vectors;
2015 deshake_ctx->abs_motion.debug_matches,
2024 deshake_ctx->abs_motion.ringbuffers[
i],
2034 clFinish(deshake_ctx->command_queue);
2050 if (!deshake_ctx->
eof) {
2055 if (!
frame->hw_frames_ctx)
2087 deshake_ctx->
eof = 1;
2091 if (deshake_ctx->
eof) {
2106 "Average kernel execution times:\n"
2107 "\t grayscale: %0.3f ms\n"
2108 "\t harris_response: %0.3f ms\n"
2109 "\t refine_features: %0.3f ms\n"
2110 "\tbrief_descriptors: %0.3f ms\n"
2111 "\tmatch_descriptors: %0.3f ms\n"
2112 "\t transform: %0.3f ms\n"
2113 "\t crop_upscale: %0.3f ms\n"
2114 "Average buffer read times:\n"
2115 "\t features buf: %0.3f ms\n",
2131 if (!deshake_ctx->
eof) {
2156 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x)
2157 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
2161 "tripod",
"simulates a tripod by preventing any camera movement whatsoever "
2162 "from the original frame",
2166 "debug",
"turn on additional debugging information",
2170 "adaptive_crop",
"attempt to subtly crop borders to reduce mirrored content",
2174 "refine_features",
"refine feature point locations at a sub-pixel level",
2178 "smooth_strength",
"smoothing strength (0 attempts to adaptively determine optimal strength)",
2182 "smooth_window_multiplier",
"multiplier for number of frames to buffer for motion data",
2191 .
name =
"deshake_opencl",
2194 .priv_class = &deshake_opencl_class,