89 #define BRIEF_PATCH_SIZE 31 
   90 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2) 
   92 #define MATCHES_CONTIG_SIZE 2000 
   94 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b) 
  305     return (
av_lfg_get(alfg) % (high - low)) + low;
 
  311     return (
double)total_time / (
double)num_frames / 1000000.0;
 
  323     double x1 = point_pairs[0].
p.
p1.s[0];
 
  324     double y1 = point_pairs[0].
p.
p1.s[1];
 
  325     double x2 = point_pairs[1].
p.
p1.s[0];
 
  326     double y2 = point_pairs[1].
p.
p1.s[1];
 
  327     double x3 = point_pairs[2].
p.
p1.s[0];
 
  328     double y3 = point_pairs[2].
p.
p1.s[1];
 
  331     double X1 = point_pairs[0].
p.
p2.s[0];
 
  332     double Y1 = point_pairs[0].
p.
p2.s[1];
 
  333     double X2 = point_pairs[1].
p.
p2.s[0];
 
  334     double Y2 = point_pairs[1].
p.
p2.s[1];
 
  335     double X3 = point_pairs[2].
p.
p2.s[0];
 
  336     double Y3 = point_pairs[2].
p.
p2.s[1];
 
  338     double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );
 
  340     model[0] = 
d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );
 
  341     model[1] = 
d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );
 
  342     model[2] = 
d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );
 
  344     model[3] = 
d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );
 
  345     model[4] = 
d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );
 
  346     model[5] = 
d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );
 
  354     for (j = 0; j < 
i; j++) {
 
  355         double dx1 = points[j]->s[0] - points[
i]->s[0];
 
  356         double dy1 = points[j]->s[1] - points[
i]->s[1];
 
  358         for (k = 0; k < j; k++) {
 
  359             double dx2 = points[k]->s[0] - points[
i]->s[0];
 
  360             double dy2 = points[k]->s[1] - points[
i]->s[1];
 
  365             if (
fabs(dx2*dy1 - dy2*dx1) <= 1.0) {
 
  378     const cl_float2 *prev_points[] = {
 
  379         &pairs_subset[0].
p.
p1,
 
  380         &pairs_subset[1].
p.
p1,
 
  381         &pairs_subset[2].
p.
p1 
  384     const cl_float2 *curr_points[] = {
 
  385         &pairs_subset[0].
p.
p2,
 
  386         &pairs_subset[1].
p.
p2,
 
  387         &pairs_subset[2].
p.
p2 
  397     const int num_point_pairs,
 
  402     int i = 0, j, iters = 0;
 
  404     for (; iters < max_attempts; iters++) {
 
  405         for (
i = 0; 
i < 3 && iters < max_attempts;) {
 
  409                 idx_i = idx[
i] = 
rand_in(0, num_point_pairs, alfg);
 
  411                 for (j = 0; j < 
i; j++) {
 
  412                     if (idx_i == idx[j]) {
 
  422             pairs_subset[
i] = point_pairs[idx[
i]];
 
  432     return i == 3 && iters < max_attempts;
 
  438     const int num_point_pairs,
 
  442     double F0 = model[0], 
F1 = model[1], 
F2 = model[2];
 
  443     double F3 = model[3], F4 = model[4], F5 = model[5];
 
  445     for (
int i = 0; 
i < num_point_pairs; 
i++) {
 
  446         const cl_float2 *
f = &point_pairs[
i].
p.
p1;
 
  447         const cl_float2 *t = &point_pairs[
i].
p.
p2;
 
  449         double a = F0*
f->s[0] + 
F1*
f->s[1] + 
F2 - t->s[0];
 
  450         double b = 
F3*
f->s[0] + F4*
f->s[1] + F5 - t->s[1];
 
  462     const int num_point_pairs,
 
  467     float t = (
float)(thresh * thresh);
 
  468     int i, n = num_point_pairs, num_inliers = 0;
 
  472     for (
i = 0; 
i < n; 
i++) {
 
  500     confidence   = 
av_clipd(confidence, 0.0, 1.0);
 
  501     num_outliers = 
av_clipd(num_outliers, 0.0, 1.0);
 
  504     num = 
FFMAX(1.0 - confidence, DBL_MIN);
 
  505     denom = 1.0 - pow(1.0 - num_outliers, 3);
 
  506     if (denom < DBL_MIN) {
 
  513     return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (
int)
round(num / denom);
 
  522     const int num_point_pairs,
 
  524     const double threshold,
 
  526     const double confidence
 
  529     double best_model[6], model[6];
 
  532     int iter, niters = 
FFMAX(max_iters, 1);
 
  533     int good_count, max_good_count = 0;
 
  536     if (num_point_pairs < 3) {
 
  538     } 
else if (num_point_pairs == 3) {
 
  542         for (
int i = 0; 
i < 3; ++
i) {
 
  549     for (iter = 0; iter < niters; ++iter) {
 
  550         int found = 
get_subset(&deshake_ctx->
alfg, point_pairs, num_point_pairs, pairs_subset, 10000);
 
  563         if (good_count > 
FFMAX(max_good_count, 2)) {
 
  564             for (
int mi = 0; 
mi < 6; ++
mi) {
 
  565                 best_model[
mi] = model[
mi];
 
  568             for (
int pi = 0; pi < 3; pi++) {
 
  569                 best_pairs[pi] = pairs_subset[pi];
 
  572             max_good_count = good_count;
 
  575                 (
double)(num_point_pairs - good_count) / num_point_pairs,
 
  581     if (max_good_count > 0) {
 
  582         for (
int mi = 0; 
mi < 6; ++
mi) {
 
  583             model_out[
mi] = best_model[
mi];
 
  586         for (
int pi = 0; pi < 3; ++pi) {
 
  605     const int num_inliers,
 
  609     float move_x_val = 0.01;
 
  610     float move_y_val = 0.01;
 
  612     float old_move_x_val = 0;
 
  614     int last_changed = 0;
 
  616     for (
int iters = 0; iters < 200; iters++) {
 
  620             best_pairs[0].
p.
p2.s[0] += move_x_val;
 
  622             best_pairs[0].
p.
p2.s[0] += move_y_val;
 
  628         for (
int j = 0; j < num_inliers; j++) {
 
  632         if (total_err < best_err) {
 
  633             for (
int mi = 0; 
mi < 6; ++
mi) {
 
  634                 model_out[
mi] = model[
mi];
 
  637             best_err = total_err;
 
  638             last_changed = iters;
 
  642                 best_pairs[0].
p.
p2.s[0] -= move_x_val;
 
  644                 best_pairs[0].
p.
p2.s[0] -= move_y_val;
 
  647             if (iters - last_changed > 4) {
 
  652             old_move_x_val = move_x_val;
 
  660             if (old_move_x_val < 0) {
 
  678     const int num_inliers,
 
  683     float best_err = FLT_MAX;
 
  684     double best_model[6], model[6];
 
  687     for (
int i = 0; 
i < max_iters; 
i++) {
 
  689         int found = 
get_subset(&deshake_ctx->
alfg, inliers, num_inliers, pairs_subset, 10000);
 
  702         for (
int j = 0; j < num_inliers; j++) {
 
  706         if (total_err < best_err) {
 
  707             for (
int mi = 0; 
mi < 6; ++
mi) {
 
  708                 best_model[
mi] = model[
mi];
 
  711             for (
int pi = 0; pi < 3; pi++) {
 
  712                 best_pairs[pi] = pairs_subset[pi];
 
  715             best_err = total_err;
 
  719     for (
int mi = 0; 
mi < 6; ++
mi) {
 
  720         model_out[
mi] = best_model[
mi];
 
  723     for (
int pi = 0; pi < 3; ++pi) {
 
  729     optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);
 
  750     memset(&
ret, 0, 
sizeof(
ret));
 
  752     ret.translation.s[0] = e;
 
  753     ret.translation.s[1] = 
f;
 
  756     if (
a != 0 || 
b != 0) {
 
  762         ret.skew.s[0] = atan((
a * 
c + 
b * 
d) / (
r * 
r));
 
  764     } 
else if (
c != 0 || 
d != 0) {
 
  765         double s = sqrt(
c * 
c + 
d * 
d);
 
  771         ret.skew.s[1] = atan((
a * 
c + 
b * 
d) / (
s * 
s));
 
  785     for (
int i = 0; 
i < size_y; ++
i) {
 
  786         for (
int j = 0; j < size_x; ++j) {
 
  805     return 1.0f / 
expf(((
float)x * (
float)x) / (2.0
f * sigma * sigma));
 
  813     int window_half = length / 2;
 
  815     for (
int i = 0; 
i < length; ++
i) {
 
  819         gauss_kernel[
i] = 
val;
 
  823     for (
int i = 0; 
i < length; ++
i) {
 
  824         gauss_kernel[
i] /= gauss_sum;
 
  851     int clip_start, clip_end, offset_clipped;
 
  895     float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,
 
  896           percent_of_max, inverted_percent;
 
  898     float large_sigma = 40.0f;
 
  899     float small_sigma = 2.0f;
 
  903         best_sigma = (large_sigma - 0.5f) * deshake_ctx->
smooth_percent + 0.5f;
 
  915         for (
int i = indices.
start, j = 0; 
i < indices.
end; ++
i, ++j) {
 
  917             new_large_s += old * gauss_kernel[j];
 
  921         for (
int i = indices.
start, j = 0; 
i < indices.
end; ++
i, ++j) {
 
  923             new_small_s += old * gauss_kernel[j];
 
  926         diff_between = 
fabsf(new_large_s - new_small_s);
 
  927         percent_of_max = diff_between / max_val;
 
  928         inverted_percent = 1 - percent_of_max;
 
  929         best_sigma = large_sigma * 
powf(inverted_percent, 40);
 
  933     for (
int i = indices.
start, j = 0; 
i < indices.
end; ++
i, ++j) {
 
  935         new_best += old * gauss_kernel[j];
 
  963     float center_s_w, center_s_h;
 
  975     center_s_w = center_w - center_s.s[0];
 
  976     center_s_h = center_h - center_s.s[1];
 
  979         x_shift + center_s_w,
 
  980         y_shift + center_s_h,
 
  996     float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;
 
 1002     float ar_h = frame_height / frame_width;
 
 1003     float ar_w = frame_width / frame_height;
 
 1039     adjusted_width = new_height * ar_w;
 
 1042     if (adjusted_x >= crop->
top_left.s[0]) {
 
 1045         adjusted_height = new_width * ar_h;
 
 1046         adjusted_y = crop->
bottom_right.s[1] - adjusted_height;
 
 1062     if (
ctx->gauss_kernel)
 
 1065     if (
ctx->ransac_err)
 
 1068     if (
ctx->matches_host)
 
 1071     if (
ctx->matches_contig_host)
 
 1102     if (
ctx->debug_on) {
 
 1120     cl_ulong8 zeroed_ulong8;
 
 1122     cl_image_format grayscale_format;
 
 1123     cl_image_desc grayscale_desc;
 
 1124     cl_command_queue_properties queue_props;
 
 1146     const int descriptor_buf_size = image_grid_32 * (
BREIFN / 8);
 
 1147     const int features_buf_size = image_grid_32 * 
sizeof(cl_float2);
 
 1159     ctx->curr_frame = 0;
 
 1161     memset(&zeroed_ulong8, 0, 
sizeof(cl_ulong8));
 
 1164     if (!
ctx->gauss_kernel) {
 
 1170     if (!
ctx->ransac_err) {
 
 1179         if (!
ctx->abs_motion.ringbuffers[
i]) {
 
 1185     if (
ctx->debug_on) {
 
 1187             ctx->smooth_window / 2,
 
 1191         if (!
ctx->abs_motion.debug_matches) {
 
 1197     ctx->abs_motion.curr_frame_offset = 0;
 
 1198     ctx->abs_motion.data_start_offset = -1;
 
 1199     ctx->abs_motion.data_end_offset = -1;
 
 1202     if (!pattern_host) {
 
 1208     if (!
ctx->matches_host) {
 
 1214     if (!
ctx->matches_contig_host) {
 
 1220     if (!
ctx->inliers) {
 
 1230         for (
int j = 0; j < 2; ++j) {
 
 1235         pattern_host[
i] = pair;
 
 1238     for (
int i = 0; 
i < 14; 
i++) {
 
 1239         if (
ctx->sw_format == disallowed_formats[
i]) {
 
 1251     ctx->sw_format = hw_frames_ctx->sw_format;
 
 1257     if (
ctx->debug_on) {
 
 1258         queue_props = CL_QUEUE_PROFILING_ENABLE;
 
 1262     ctx->command_queue = clCreateCommandQueue(
 
 1263         ctx->ocf.hwctx->context,
 
 1264         ctx->ocf.hwctx->device_id,
 
 1281         grayscale_format.image_channel_order = CL_R;
 
 1282         grayscale_format.image_channel_data_type = CL_FLOAT;
 
 1284         grayscale_desc = (cl_image_desc) {
 
 1285             .image_type = CL_MEM_OBJECT_IMAGE2D,
 
 1286             .image_width = outlink->
w,
 
 1287             .image_height = outlink->
h,
 
 1289             .image_array_size = 0,
 
 1290             .image_row_pitch = 0,
 
 1291             .image_slice_pitch = 0,
 
 1292             .num_mip_levels = 0,
 
 1297         ctx->grayscale = clCreateImage(
 
 1298             ctx->ocf.hwctx->context,
 
 1314         CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
 
 1324     if (
ctx->debug_on) {
 
 1329     ctx->initialized = 1;
 
 1343         "\tframe moved from: %f x, %f y\n" 
 1344         "\t              to: %f x, %f y\n" 
 1345         "\t    rotated from: %f degrees\n" 
 1346         "\t              to: %f degrees\n" 
 1347         "\t     scaled from: %f x, %f y\n" 
 1348         "\t              to: %f x, %f y\n" 
 1350         "\tframe moved by: %f x, %f y\n" 
 1351         "\t    rotated by: %f degrees\n" 
 1352         "\t     scaled by: %f x, %f y\n",
 
 1379     float transform_y[9];
 
 1381     float transform_uv[9];
 
 1383     float transform_crop_y[9];
 
 1385     float transform_crop_uv[9];
 
 1386     float transform_debug_rgb[9];
 
 1387     size_t global_work[2];
 
 1389     cl_mem 
src, transformed, dst;
 
 1392     cl_event transform_event, crop_upscale_event;
 
 1394     cl_int num_model_matches;
 
 1396     const float center_w = (
float)input_frame->
width / 2;
 
 1397     const float center_h = (
float)input_frame->
height / 2;
 
 1403     const float center_w_chroma = (
float)chroma_width / 2;
 
 1404     const float center_h_chroma = (
float)chroma_height / 2;
 
 1406     const float luma_w_over_chroma_w = ((
float)input_frame->
width / (
float)chroma_width);
 
 1407     const float luma_h_over_chroma_h = ((
float)input_frame->
height / (
float)chroma_height);
 
 1415 #if FF_API_PKT_DURATION 
 1514     if (!cropped_frame) {
 
 1520     if (!transformed_frame) {
 
 1528     for (
int p = 0; p < 
FF_ARRAY_ELEMS(transformed_frame->data); p++) {
 
 1530         src = (cl_mem)input_frame->
data[p];
 
 1531         transformed = (cl_mem)transformed_frame->data[p];
 
 1546             { sizeof(cl_mem), &src },
 
 1547             { sizeof(cl_mem), &transformed },
 
 1548             { sizeof(cl_mem), &transforms[p] },
 
 1585         transformed = (cl_mem)transformed_frame->data[0];
 
 1592             { 
sizeof(cl_mem), &transformed },
 
 1595             { 
sizeof(cl_int), &num_model_matches },
 
 1626         crops[0] = deshake_ctx->
crop_y;
 
 1627         crops[1] = crops[2] = deshake_ctx->
crop_uv;
 
 1631             dst = (cl_mem)cropped_frame->
data[p];
 
 1632             transformed = (cl_mem)transformed_frame->data[p];
 
 1646                 &crop_upscale_event,
 
 1647                 { sizeof(cl_mem), &transformed },
 
 1648                 { sizeof(cl_mem), &dst },
 
 1649                 { sizeof(cl_float2), &crops[p].top_left },
 
 1650                 { sizeof(cl_float2), &crops[p].bottom_right },
 
 1738     int num_inliers = 0;
 
 1742     size_t global_work[2];
 
 1743     size_t harris_global_work[2];
 
 1744     size_t grid_32_global_work[2];
 
 1745     int grid_32_h, grid_32_w;
 
 1746     size_t local_work[2];
 
 1750     cl_event grayscale_event, harris_response_event, refine_features_event,
 
 1751              brief_event, match_descriptors_event, read_buf_event;
 
 1772     grid_32_global_work[0] /= 32;
 
 1773     grid_32_global_work[1] /= 32;
 
 1778     if (deshake_ctx->
is_yuv) {
 
 1781         src = (cl_mem)input_frame->
data[0];
 
 1789             { 
sizeof(cl_mem), &
src },
 
 1790             { 
sizeof(cl_mem), &deshake_ctx->
grayscale }
 
 1795         deshake_ctx->command_queue,
 
 1796         deshake_ctx->kernel_harris_response,
 
 1799         &harris_response_event,
 
 1800         { sizeof(cl_mem), &deshake_ctx->grayscale },
 
 1801         { sizeof(cl_mem), &deshake_ctx->harris_buf }
 
 1805         deshake_ctx->command_queue,
 
 1806         deshake_ctx->kernel_refine_features,
 
 1807         grid_32_global_work,
 
 1809         &refine_features_event,
 
 1810         { sizeof(cl_mem), &deshake_ctx->grayscale },
 
 1811         { sizeof(cl_mem), &deshake_ctx->harris_buf },
 
 1812         { sizeof(cl_mem), &deshake_ctx->refined_features },
 
 1813         { sizeof(cl_int), &deshake_ctx->refine_features }
 
 1817         deshake_ctx->command_queue,
 
 1818         deshake_ctx->kernel_brief_descriptors,
 
 1819         grid_32_global_work,
 
 1822         { sizeof(cl_mem), &deshake_ctx->grayscale },
 
 1823         { sizeof(cl_mem), &deshake_ctx->refined_features },
 
 1824         { sizeof(cl_mem), &deshake_ctx->descriptors },
 
 1825         { sizeof(cl_mem), &deshake_ctx->brief_pattern}
 
 1832         goto no_motion_data;
 
 1836         deshake_ctx->command_queue,
 
 1837         deshake_ctx->kernel_match_descriptors,
 
 1838         grid_32_global_work,
 
 1840         &match_descriptors_event,
 
 1841         { sizeof(cl_mem), &deshake_ctx->prev_refined_features },
 
 1842         { sizeof(cl_mem), &deshake_ctx->refined_features },
 
 1843         { sizeof(cl_mem), &deshake_ctx->descriptors },
 
 1844         { sizeof(cl_mem), &deshake_ctx->prev_descriptors },
 
 1845         { sizeof(cl_mem), &deshake_ctx->matches }
 
 1848     cle = clEnqueueReadBuffer(
 
 1849         deshake_ctx->command_queue,
 
 1850         deshake_ctx->matches,
 
 1854         deshake_ctx->matches_host,
 
 1863     if (num_vectors < 10) {
 
 1876         if (deshake_ctx->abs_motion.data_end_offset == -1) {
 
 1877             deshake_ctx->abs_motion.data_end_offset =
 
 1881         goto no_motion_data;
 
 1886         deshake_ctx->matches_contig_host,
 
 1894         goto no_motion_data;
 
 1897     for (
int i = 0; 
i < num_vectors; 
i++) {
 
 1898         if (deshake_ctx->matches_contig_host[
i].should_consider) {
 
 1899             deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[
i];
 
 1906         deshake_ctx->inliers,
 
 1912         goto no_motion_data;
 
 1921             deshake_ctx->abs_motion.ringbuffers[
i],
 
 1932     if (deshake_ctx->debug_on) {
 
 1933         if (!deshake_ctx->is_yuv) {
 
 1952     for (
int i = 0; 
i < num_vectors; 
i++) {
 
 1953         deshake_ctx->matches_contig_host[
i].should_consider = 0;
 
 1955     debug_matches.num_model_matches = 0;
 
 1957     if (deshake_ctx->debug_on) {
 
 1959             "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n" 
 1968     temp = deshake_ctx->prev_descriptors;
 
 1969     deshake_ctx->prev_descriptors = deshake_ctx->descriptors;
 
 1970     deshake_ctx->descriptors = 
temp;
 
 1973     temp = deshake_ctx->prev_refined_features;
 
 1974     deshake_ctx->prev_refined_features = deshake_ctx->refined_features;
 
 1975     deshake_ctx->refined_features = 
temp;
 
 1977     if (deshake_ctx->debug_on) {
 
 1978         if (num_vectors == 0) {
 
 1979             debug_matches.matches = 
NULL;
 
 1983             if (!debug_matches.matches) {
 
 1989         for (
int i = 0; 
i < num_vectors; 
i++) {
 
 1990             debug_matches.matches[
i] = deshake_ctx->matches_contig_host[
i];
 
 1992         debug_matches.num_matches = num_vectors;
 
 1995             deshake_ctx->abs_motion.debug_matches,
 
 2000         av_fifo_write(deshake_ctx->abs_motion.ringbuffers[
i], &new_vals[
i], 1);
 
 2006     clFinish(deshake_ctx->command_queue);
 
 2022     if (!deshake_ctx->
eof) {
 
 2059             deshake_ctx->
eof = 1;
 
 2063     if (deshake_ctx->
eof) {
 
 2078                 "Average kernel execution times:\n" 
 2079                 "\t        grayscale: %0.3f ms\n" 
 2080                 "\t  harris_response: %0.3f ms\n" 
 2081                 "\t  refine_features: %0.3f ms\n" 
 2082                 "\tbrief_descriptors: %0.3f ms\n" 
 2083                 "\tmatch_descriptors: %0.3f ms\n" 
 2084                 "\t        transform: %0.3f ms\n" 
 2085                 "\t     crop_upscale: %0.3f ms\n" 
 2086                 "Average buffer read times:\n" 
 2087                 "\t     features buf: %0.3f ms\n",
 
 2103     if (!deshake_ctx->
eof) {
 
 2126 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x) 
 2127 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM 
 2131         "tripod", 
"simulates a tripod by preventing any camera movement whatsoever " 
 2132         "from the original frame",
 
 2136         "debug", 
"turn on additional debugging information",
 
 2140         "adaptive_crop", 
"attempt to subtly crop borders to reduce mirrored content",
 
 2144         "refine_features", 
"refine feature point locations at a sub-pixel level",
 
 2148         "smooth_strength", 
"smoothing strength (0 attempts to adaptively determine optimal strength)",
 
 2152         "smooth_window_multiplier", 
"multiplier for number of frames to buffer for motion data",
 
 2161     .
name           = 
"deshake_opencl",
 
 2164     .priv_class     = &deshake_opencl_class,