FFmpeg
vf_deshake_opencl.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  *
18  * Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
19  * Copyright (C) 2009, Willow Garage Inc., all rights reserved.
20  * Copyright (C) 2013, OpenCV Foundation, all rights reserved.
21  * Third party copyrights are property of their respective owners.
22  *
23  * Redistribution and use in source and binary forms, with or without modification,
24  * are permitted provided that the following conditions are met:
25  *
26  * * Redistribution's of source code must retain the above copyright notice,
27  * this list of conditions and the following disclaimer.
28  *
29  * * Redistribution's in binary form must reproduce the above copyright notice,
30  * this list of conditions and the following disclaimer in the documentation
31  * and/or other materials provided with the distribution.
32  *
33  * * The name of the copyright holders may not be used to endorse or promote products
34  * derived from this software without specific prior written permission.
35  *
36  * This software is provided by the copyright holders and contributors "as is" and
37  * any express or implied warranties, including, but not limited to, the implied
38  * warranties of merchantability and fitness for a particular purpose are disclaimed.
39  * In no event shall the Intel Corporation or contributors be liable for any direct,
40  * indirect, incidental, special, exemplary, or consequential damages
41  * (including, but not limited to, procurement of substitute goods or services;
42  * loss of use, data, or profits; or business interruption) however caused
43  * and on any theory of liability, whether in contract, strict liability,
44  * or tort (including negligence or otherwise) arising in any way out of
45  * the use of this software, even if advised of the possibility of such damage.
46  */
47 
48 #include <float.h>
49 #include <libavutil/lfg.h>
50 #include "libavutil/opt.h"
51 #include "libavutil/imgutils.h"
52 #include "libavutil/mem.h"
53 #include "libavutil/fifo.h"
54 #include "libavutil/common.h"
55 #include "libavutil/avassert.h"
56 #include "libavutil/pixfmt.h"
57 #include "avfilter.h"
58 #include "framequeue.h"
59 #include "filters.h"
60 #include "transform.h"
61 #include "formats.h"
62 #include "internal.h"
63 #include "opencl.h"
64 #include "opencl_source.h"
65 #include "video.h"
66 
67 /*
68 This filter matches feature points between frames (dealing with outliers) and then
69 uses the matches to estimate an affine transform between frames. This transform is
70 decomposed into various values (translation, scale, rotation) and the values are
71 summed relative to the start of the video to obtain on absolute camera position
72 for each frame. This "camera path" is then smoothed via a gaussian filter, resulting
73 in a new path that is turned back into an affine transform and applied to each
74 frame to render it.
75 
76 High-level overview:
77 
78 All of the work to extract motion data from frames occurs in queue_frame. Motion data
79 is buffered in a smoothing window, so queue_frame simply computes the absolute camera
80 positions and places them in ringbuffers.
81 
82 filter_frame is responsible for looking at the absolute camera positions currently
83 in the ringbuffers, applying the gaussian filter, and then transforming the frames.
84 */
85 
86 // Number of bits for BRIEF descriptors
87 #define BREIFN 512
88 // Size of the patch from which a BRIEF descriptor is extracted
89 // This is the size used in OpenCV
90 #define BRIEF_PATCH_SIZE 31
91 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2)
92 
93 #define MATCHES_CONTIG_SIZE 2000
94 
95 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b)
96 
97 typedef struct PointPair {
98  // Previous frame
99  cl_float2 p1;
100  // Current frame
101  cl_float2 p2;
102 } PointPair;
103 
104 typedef struct MotionVector {
106  // Used to mark vectors as potential outliers
108 } MotionVector;
109 
110 // Denotes the indices for the different types of motion in the ringbuffers array
117 
118  // Should always be last
120 };
121 
122 // Struct that holds data for drawing point match debug data
123 typedef struct DebugMatches {
125  // The points used to calculate the affine transform for a frame
127 
129  // For cases where we couldn't calculate a model
131 } DebugMatches;
132 
133 // Groups together the ringbuffers that store absolute distortion / position values
134 // for each frame
135 typedef struct AbsoluteFrameMotion {
136  // Array with the various ringbuffers, indexed via the RingbufferIndices enum
138 
139  // Offset to get to the current frame being processed
140  // (not in bytes)
142  // Keeps track of where the start and end of contiguous motion data is (to
143  // deal with cases where no motion data is found between two frames)
146 
149 
150 // Takes care of freeing the arrays within the DebugMatches inside of the
151 // debug_matches ringbuffer and then freeing the buffer itself.
153  DebugMatches dm;
154 
155  if (!afm->debug_matches) {
156  return;
157  }
158 
159  while (av_fifo_size(afm->debug_matches) > 0) {
161  afm->debug_matches,
162  &dm,
163  sizeof(DebugMatches),
164  NULL
165  );
166 
167  av_freep(&dm.matches);
168  }
169 
171 }
172 
173 // Stores the translation, scale, rotation, and skew deltas between two frames
174 typedef struct FrameDelta {
175  cl_float2 translation;
176  float rotation;
177  cl_float2 scale;
178  cl_float2 skew;
179 } FrameDelta;
180 
181 typedef struct SimilarityMatrix {
182  // The 2x3 similarity matrix
183  double matrix[6];
185 
186 typedef struct CropInfo {
187  // The top left corner of the bounding box for the crop
188  cl_float2 top_left;
189  // The bottom right corner of the bounding box for the crop
190  cl_float2 bottom_right;
191 } CropInfo;
192 
193 // Returned from function that determines start and end values for iteration
194 // around the current frame in a ringbuffer
195 typedef struct IterIndices {
196  int start;
197  int end;
198 } IterIndices;
199 
200 typedef struct DeshakeOpenCLContext {
202  // Whether or not the above `OpenCLFilterContext` has been initialized
204 
205  // These variables are used in the activate callback
206  int64_t duration;
207  int eof;
208 
209  // State for random number generation
211 
212  // FIFO frame queue used to buffer future frames for processing
214  // Ringbuffers for frame positions
216 
217  // The number of frames' motion to consider before and after the frame we are
218  // smoothing
220  // The number of the frame we are currently processing
222 
223  // Stores a 1d array of normalised gaussian kernel values for convolution
224  float *gauss_kernel;
225 
226  // Buffer for error values used in RANSAC code
227  float *ransac_err;
228 
229  // Information regarding how to crop the smoothed luminance (or RGB) planes
231  // Information regarding how to crop the smoothed chroma planes
233 
234  // Whether or not we are processing YUV input (as oppposed to RGB)
235  int is_yuv;
236  // The underlying format of the hardware surfaces
238 
239  // Buffer to copy `matches` into for the CPU to work with
242 
244 
245  cl_command_queue command_queue;
246  cl_kernel kernel_grayscale;
251  cl_kernel kernel_transform;
253 
254  // Stores a frame converted to grayscale
255  cl_mem grayscale;
256  // Stores the harris response for a frame (measure of "cornerness" for each pixel)
257  cl_mem harris_buf;
258 
259  // Detected features after non-maximum suppression and sub-pixel refinement
261  // Saved from the previous frame
263 
264  // BRIEF sampling pattern that is randomly initialized
266  // Feature point descriptors for the current frame
267  cl_mem descriptors;
268  // Feature point descriptors for the previous frame
270  // Vectors between points in current and previous frame
271  cl_mem matches;
273  // Holds the matrix to transform luminance (or RGB) with
274  cl_mem transform_y;
275  // Holds the matrix to transform chroma with
276  cl_mem transform_uv;
277 
278  // Configurable options
279 
281  int debug_on;
283 
284  // Whether or not feature points should be refined at a sub-pixel level
286  // If the user sets a value other than the default, 0, this percentage is
287  // translated into a sigma value ranging from 0.5 to 40.0
289  // This number is multiplied by the video frame rate to determine the size
290  // of the smooth window
292 
293  // Debug stuff
294 
298 
299  // These store the total time spent executing the different kernels in nanoseconds
300  unsigned long long grayscale_time;
301  unsigned long long harris_response_time;
302  unsigned long long refine_features_time;
303  unsigned long long brief_descriptors_time;
304  unsigned long long match_descriptors_time;
305  unsigned long long transform_time;
306  unsigned long long crop_upscale_time;
307 
308  // Time spent copying matched features from the device to the host
309  unsigned long long read_buf_time;
311 
312 // Returns a random uniformly-distributed number in [low, high]
313 static int rand_in(int low, int high, AVLFG *alfg) {
314  return (av_lfg_get(alfg) % (high - low)) + low;
315 }
316 
317 // Returns the average execution time for an event given the total time and the
318 // number of frames processed.
319 static double averaged_event_time_ms(unsigned long long total_time, int num_frames) {
320  return (double)total_time / (double)num_frames / 1000000.0;
321 }
322 
323 // The following code is loosely ported from OpenCV
324 
325 // Estimates affine transform from 3 point pairs
326 // model is a 2x3 matrix:
327 // a b c
328 // d e f
329 static void run_estimate_kernel(const MotionVector *point_pairs, double *model)
330 {
331  // src points
332  double x1 = point_pairs[0].p.p1.s[0];
333  double y1 = point_pairs[0].p.p1.s[1];
334  double x2 = point_pairs[1].p.p1.s[0];
335  double y2 = point_pairs[1].p.p1.s[1];
336  double x3 = point_pairs[2].p.p1.s[0];
337  double y3 = point_pairs[2].p.p1.s[1];
338 
339  // dest points
340  double X1 = point_pairs[0].p.p2.s[0];
341  double Y1 = point_pairs[0].p.p2.s[1];
342  double X2 = point_pairs[1].p.p2.s[0];
343  double Y2 = point_pairs[1].p.p2.s[1];
344  double X3 = point_pairs[2].p.p2.s[0];
345  double Y3 = point_pairs[2].p.p2.s[1];
346 
347  double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );
348 
349  model[0] = d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );
350  model[1] = d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );
351  model[2] = d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );
352 
353  model[3] = d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );
354  model[4] = d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );
355  model[5] = d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );
356 }
357 
358 // Checks that the 3 points in the given array are not collinear
359 static int points_not_collinear(const cl_float2 **points)
360 {
361  int j, k, i = 2;
362 
363  for (j = 0; j < i; j++) {
364  double dx1 = points[j]->s[0] - points[i]->s[0];
365  double dy1 = points[j]->s[1] - points[i]->s[1];
366 
367  for (k = 0; k < j; k++) {
368  double dx2 = points[k]->s[0] - points[i]->s[0];
369  double dy2 = points[k]->s[1] - points[i]->s[1];
370 
371  // Assuming a 3840 x 2160 video with a point at (0, 0) and one at
372  // (3839, 2159), this prevents a third point from being within roughly
373  // 0.5 of a pixel of the line connecting the two on both axes
374  if (fabs(dx2*dy1 - dy2*dx1) <= 1.0) {
375  return 0;
376  }
377  }
378  }
379 
380  return 1;
381 }
382 
383 // Checks a subset of 3 point pairs to make sure that the points are not collinear
384 // and not too close to each other
385 static int check_subset(const MotionVector *pairs_subset)
386 {
387  const cl_float2 *prev_points[] = {
388  &pairs_subset[0].p.p1,
389  &pairs_subset[1].p.p1,
390  &pairs_subset[2].p.p1
391  };
392 
393  const cl_float2 *curr_points[] = {
394  &pairs_subset[0].p.p2,
395  &pairs_subset[1].p.p2,
396  &pairs_subset[2].p.p2
397  };
398 
399  return points_not_collinear(prev_points) && points_not_collinear(curr_points);
400 }
401 
402 // Selects a random subset of 3 points from point_pairs and places them in pairs_subset
403 static int get_subset(
404  AVLFG *alfg,
405  const MotionVector *point_pairs,
406  const int num_point_pairs,
407  MotionVector *pairs_subset,
408  int max_attempts
409 ) {
410  int idx[3];
411  int i = 0, j, iters = 0;
412 
413  for (; iters < max_attempts; iters++) {
414  for (i = 0; i < 3 && iters < max_attempts;) {
415  int idx_i = 0;
416 
417  for (;;) {
418  idx_i = idx[i] = rand_in(0, num_point_pairs, alfg);
419 
420  for (j = 0; j < i; j++) {
421  if (idx_i == idx[j]) {
422  break;
423  }
424  }
425 
426  if (j == i) {
427  break;
428  }
429  }
430 
431  pairs_subset[i] = point_pairs[idx[i]];
432  i++;
433  }
434 
435  if (i == 3 && !check_subset(pairs_subset)) {
436  continue;
437  }
438  break;
439  }
440 
441  return i == 3 && iters < max_attempts;
442 }
443 
444 // Computes the error for each of the given points based on the given model.
445 static void compute_error(
446  const MotionVector *point_pairs,
447  const int num_point_pairs,
448  const double *model,
449  float *err
450 ) {
451  double F0 = model[0], F1 = model[1], F2 = model[2];
452  double F3 = model[3], F4 = model[4], F5 = model[5];
453 
454  for (int i = 0; i < num_point_pairs; i++) {
455  const cl_float2 *f = &point_pairs[i].p.p1;
456  const cl_float2 *t = &point_pairs[i].p.p2;
457 
458  double a = F0*f->s[0] + F1*f->s[1] + F2 - t->s[0];
459  double b = F3*f->s[0] + F4*f->s[1] + F5 - t->s[1];
460 
461  err[i] = a*a + b*b;
462  }
463 }
464 
465 // Determines which of the given point matches are inliers for the given model
466 // based on the specified threshold.
467 //
468 // err must be an array of num_point_pairs length
469 static int find_inliers(
470  MotionVector *point_pairs,
471  const int num_point_pairs,
472  const double *model,
473  float *err,
474  double thresh
475 ) {
476  float t = (float)(thresh * thresh);
477  int i, n = num_point_pairs, num_inliers = 0;
478 
479  compute_error(point_pairs, num_point_pairs, model, err);
480 
481  for (i = 0; i < n; i++) {
482  if (err[i] <= t) {
483  // This is an inlier
484  point_pairs[i].should_consider = 1;
485  num_inliers += 1;
486  } else {
487  point_pairs[i].should_consider = 0;
488  }
489  }
490 
491  return num_inliers;
492 }
493 
494 // Determines the number of iterations required to achieve the desired confidence level.
495 //
496 // The equation used to determine the number of iterations to do is:
497 // 1 - confidence = (1 - inlier_probability^num_points)^num_iters
498 //
499 // Solving for num_iters:
500 //
501 // num_iters = log(1 - confidence) / log(1 - inlier_probability^num_points)
502 //
503 // A more in-depth explanation can be found at https://en.wikipedia.org/wiki/Random_sample_consensus
504 // under the 'Parameters' heading
505 static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)
506 {
507  double num, denom;
508 
509  confidence = av_clipd(confidence, 0.0, 1.0);
510  num_outliers = av_clipd(num_outliers, 0.0, 1.0);
511 
512  // avoid inf's & nan's
513  num = FFMAX(1.0 - confidence, DBL_MIN);
514  denom = 1.0 - pow(1.0 - num_outliers, 3);
515  if (denom < DBL_MIN) {
516  return 0;
517  }
518 
519  num = log(num);
520  denom = log(denom);
521 
522  return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (int)round(num / denom);
523 }
524 
525 // Estimates an affine transform between the given pairs of points using RANdom
526 // SAmple Consensus
528  DeshakeOpenCLContext *deshake_ctx,
529  MotionVector *point_pairs,
530  DebugMatches *debug_matches,
531  const int num_point_pairs,
532  double *model_out,
533  const double threshold,
534  const int max_iters,
535  const double confidence
536 ) {
537  int result = 0;
538  double best_model[6], model[6];
539  MotionVector pairs_subset[3], best_pairs[3];
540 
541  int iter, niters = FFMAX(max_iters, 1);
542  int good_count, max_good_count = 0;
543 
544  // We need at least 3 points to build a model from
545  if (num_point_pairs < 3) {
546  return 0;
547  } else if (num_point_pairs == 3) {
548  // There are only 3 points, so RANSAC doesn't apply here
549  run_estimate_kernel(point_pairs, model_out);
550 
551  for (int i = 0; i < 3; ++i) {
552  point_pairs[i].should_consider = 1;
553  }
554 
555  return 1;
556  }
557 
558  for (iter = 0; iter < niters; ++iter) {
559  int found = get_subset(&deshake_ctx->alfg, point_pairs, num_point_pairs, pairs_subset, 10000);
560 
561  if (!found) {
562  if (iter == 0) {
563  return 0;
564  }
565 
566  break;
567  }
568 
569  run_estimate_kernel(pairs_subset, model);
570  good_count = find_inliers(point_pairs, num_point_pairs, model, deshake_ctx->ransac_err, threshold);
571 
572  if (good_count > FFMAX(max_good_count, 2)) {
573  for (int mi = 0; mi < 6; ++mi) {
574  best_model[mi] = model[mi];
575  }
576 
577  for (int pi = 0; pi < 3; pi++) {
578  best_pairs[pi] = pairs_subset[pi];
579  }
580 
581  max_good_count = good_count;
582  niters = ransac_update_num_iters(
583  confidence,
584  (double)(num_point_pairs - good_count) / num_point_pairs,
585  niters
586  );
587  }
588  }
589 
590  if (max_good_count > 0) {
591  for (int mi = 0; mi < 6; ++mi) {
592  model_out[mi] = best_model[mi];
593  }
594 
595  for (int pi = 0; pi < 3; ++pi) {
596  debug_matches->model_matches[pi] = best_pairs[pi];
597  }
598  debug_matches->num_model_matches = 3;
599 
600  // Find the inliers again for the best model for debugging
601  find_inliers(point_pairs, num_point_pairs, best_model, deshake_ctx->ransac_err, threshold);
602  result = 1;
603  }
604 
605  return result;
606 }
607 
608 // "Wiggles" the first point in best_pairs around a tiny bit in order to decrease the
609 // total error
610 static void optimize_model(
611  DeshakeOpenCLContext *deshake_ctx,
612  MotionVector *best_pairs,
613  MotionVector *inliers,
614  const int num_inliers,
615  float best_err,
616  double *model_out
617 ) {
618  float move_x_val = 0.01;
619  float move_y_val = 0.01;
620  int move_x = 1;
621  float old_move_x_val = 0;
622  double model[6];
623  int last_changed = 0;
624 
625  for (int iters = 0; iters < 200; iters++) {
626  float total_err = 0;
627 
628  if (move_x) {
629  best_pairs[0].p.p2.s[0] += move_x_val;
630  } else {
631  best_pairs[0].p.p2.s[0] += move_y_val;
632  }
633 
634  run_estimate_kernel(best_pairs, model);
635  compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);
636 
637  for (int j = 0; j < num_inliers; j++) {
638  total_err += deshake_ctx->ransac_err[j];
639  }
640 
641  if (total_err < best_err) {
642  for (int mi = 0; mi < 6; ++mi) {
643  model_out[mi] = model[mi];
644  }
645 
646  best_err = total_err;
647  last_changed = iters;
648  } else {
649  // Undo the change
650  if (move_x) {
651  best_pairs[0].p.p2.s[0] -= move_x_val;
652  } else {
653  best_pairs[0].p.p2.s[0] -= move_y_val;
654  }
655 
656  if (iters - last_changed > 4) {
657  // We've already improved the model as much as we can
658  break;
659  }
660 
661  old_move_x_val = move_x_val;
662 
663  if (move_x) {
664  move_x_val *= -1;
665  } else {
666  move_y_val *= -1;
667  }
668 
669  if (old_move_x_val < 0) {
670  move_x = 0;
671  } else {
672  move_x = 1;
673  }
674  }
675  }
676 }
677 
678 // Uses a process similar to that of RANSAC to find a transform that minimizes
679 // the total error for a set of point matches determined to be inliers
680 //
681 // (Pick random subsets, compute model, find total error, iterate until error
682 // is minimized.)
683 static int minimize_error(
684  DeshakeOpenCLContext *deshake_ctx,
685  MotionVector *inliers,
686  DebugMatches *debug_matches,
687  const int num_inliers,
688  double *model_out,
689  const int max_iters
690 ) {
691  int result = 0;
692  float best_err = FLT_MAX;
693  double best_model[6], model[6];
694  MotionVector pairs_subset[3], best_pairs[3];
695 
696  for (int i = 0; i < max_iters; i++) {
697  float total_err = 0;
698  int found = get_subset(&deshake_ctx->alfg, inliers, num_inliers, pairs_subset, 10000);
699 
700  if (!found) {
701  if (i == 0) {
702  return 0;
703  }
704 
705  break;
706  }
707 
708  run_estimate_kernel(pairs_subset, model);
709  compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);
710 
711  for (int j = 0; j < num_inliers; j++) {
712  total_err += deshake_ctx->ransac_err[j];
713  }
714 
715  if (total_err < best_err) {
716  for (int mi = 0; mi < 6; ++mi) {
717  best_model[mi] = model[mi];
718  }
719 
720  for (int pi = 0; pi < 3; pi++) {
721  best_pairs[pi] = pairs_subset[pi];
722  }
723 
724  best_err = total_err;
725  }
726  }
727 
728  for (int mi = 0; mi < 6; ++mi) {
729  model_out[mi] = best_model[mi];
730  }
731 
732  for (int pi = 0; pi < 3; ++pi) {
733  debug_matches->model_matches[pi] = best_pairs[pi];
734  }
735  debug_matches->num_model_matches = 3;
736  result = 1;
737 
738  optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);
739  return result;
740 }
741 
742 // End code from OpenCV
743 
744 // Decomposes a similarity matrix into translation, rotation, scale, and skew
745 //
746 // See http://frederic-wang.fr/decomposition-of-2d-transform-matrices.html
747 static FrameDelta decompose_transform(double *model)
748 {
749  FrameDelta ret;
750 
751  double a = model[0];
752  double c = model[1];
753  double e = model[2];
754  double b = model[3];
755  double d = model[4];
756  double f = model[5];
757  double delta = a * d - b * c;
758 
759  memset(&ret, 0, sizeof(ret));
760 
761  ret.translation.s[0] = e;
762  ret.translation.s[1] = f;
763 
764  // This is the QR method
765  if (a != 0 || b != 0) {
766  double r = hypot(a, b);
767 
768  ret.rotation = FFSIGN(b) * acos(a / r);
769  ret.scale.s[0] = r;
770  ret.scale.s[1] = delta / r;
771  ret.skew.s[0] = atan((a * c + b * d) / (r * r));
772  ret.skew.s[1] = 0;
773  } else if (c != 0 || d != 0) {
774  double s = sqrt(c * c + d * d);
775 
776  ret.rotation = M_PI / 2 - FFSIGN(d) * acos(-c / s);
777  ret.scale.s[0] = delta / s;
778  ret.scale.s[1] = s;
779  ret.skew.s[0] = 0;
780  ret.skew.s[1] = atan((a * c + b * d) / (s * s));
781  } // otherwise there is only translation
782 
783  return ret;
784 }
785 
786 // Move valid vectors from the 2d buffer into a 1d buffer where they are contiguous
788  DeshakeOpenCLContext *deshake_ctx,
789  int size_y,
790  int size_x
791 ) {
792  int num_vectors = 0;
793 
794  for (int i = 0; i < size_y; ++i) {
795  for (int j = 0; j < size_x; ++j) {
796  MotionVector v = deshake_ctx->matches_host[j + i * size_x];
797 
798  if (v.should_consider) {
799  deshake_ctx->matches_contig_host[num_vectors] = v;
800  ++num_vectors;
801  }
802 
803  // Make sure we do not exceed the amount of space we allocated for these vectors
804  if (num_vectors == MATCHES_CONTIG_SIZE - 1) {
805  return num_vectors;
806  }
807  }
808  }
809  return num_vectors;
810 }
811 
812 // Returns the gaussian kernel value for the given x coordinate and sigma value
813 static float gaussian_for(int x, float sigma) {
814  return 1.0f / expf(((float)x * (float)x) / (2.0f * sigma * sigma));
815 }
816 
817 // Makes a normalized gaussian kernel of the given length for the given sigma
818 // and places it in gauss_kernel
819 static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)
820 {
821  float gauss_sum = 0;
822  int window_half = length / 2;
823 
824  for (int i = 0; i < length; ++i) {
825  float val = gaussian_for(i - window_half, sigma);
826 
827  gauss_sum += val;
828  gauss_kernel[i] = val;
829  }
830 
831  // Normalize the gaussian values
832  for (int i = 0; i < length; ++i) {
833  gauss_kernel[i] /= gauss_sum;
834  }
835 }
836 
837 // Returns indices to start and end iteration at in order to iterate over a window
838 // of length size centered at the current frame in a ringbuffer
839 //
840 // Always returns numbers that result in a window of length size, even if that
841 // means specifying negative indices or indices past the end of the values in the
842 // ringbuffers. Make sure you clip indices appropriately within your loop.
843 static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length) {
844  IterIndices indices;
845 
846  indices.start = deshake_ctx->abs_motion.curr_frame_offset - (length / 2);
847  indices.end = deshake_ctx->abs_motion.curr_frame_offset + (length / 2) + (length % 2);
848 
849  return indices;
850 }
851 
852 // Sets val to the value in the given ringbuffer at the given offset, taking care of
853 // clipping the offset into the appropriate range
854 static void ringbuf_float_at(
855  DeshakeOpenCLContext *deshake_ctx,
857  float *val,
858  int offset
859 ) {
860  int clip_start, clip_end, offset_clipped;
861  if (deshake_ctx->abs_motion.data_end_offset != -1) {
862  clip_end = deshake_ctx->abs_motion.data_end_offset;
863  } else {
864  // This expression represents the last valid index in the buffer,
865  // which we use repeatedly at the end of the video.
866  clip_end = deshake_ctx->smooth_window - (av_fifo_space(values) / sizeof(float)) - 1;
867  }
868 
869  if (deshake_ctx->abs_motion.data_start_offset != -1) {
870  clip_start = deshake_ctx->abs_motion.data_start_offset;
871  } else {
872  // Negative indices will occur at the start of the video, and we want
873  // them to be clipped to 0 in order to repeatedly use the position of
874  // the first frame.
875  clip_start = 0;
876  }
877 
878  offset_clipped = av_clip(
879  offset,
880  clip_start,
881  clip_end
882  );
883 
885  values,
886  val,
887  offset_clipped * sizeof(float),
888  sizeof(float),
889  NULL
890  );
891 }
892 
893 // Returns smoothed current frame value of the given buffer of floats based on the
894 // given Gaussian kernel and its length (also the window length, centered around the
895 // current frame) and the "maximum value" of the motion.
896 //
897 // This "maximum value" should be the width / height of the image in the case of
898 // translation and an empirically chosen constant for rotation / scale.
899 //
900 // The sigma chosen to generate the final gaussian kernel with used to smooth the
901 // camera path is either hardcoded (set by user, deshake_ctx->smooth_percent) or
902 // adaptively chosen.
903 static float smooth(
904  DeshakeOpenCLContext *deshake_ctx,
905  float *gauss_kernel,
906  int length,
907  float max_val,
909 ) {
910  float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,
911  percent_of_max, inverted_percent;
912  IterIndices indices = start_end_for(deshake_ctx, length);
913  float large_sigma = 40.0f;
914  float small_sigma = 2.0f;
915  float best_sigma;
916 
917  if (deshake_ctx->smooth_percent) {
918  best_sigma = (large_sigma - 0.5f) * deshake_ctx->smooth_percent + 0.5f;
919  } else {
920  // Strategy to adaptively smooth trajectory:
921  //
922  // 1. Smooth path with large and small sigma values
923  // 2. Take the absolute value of the difference between them
924  // 3. Get a percentage by putting the difference over the "max value"
925  // 4, Invert the percentage
926  // 5. Calculate a new sigma value weighted towards the larger sigma value
927  // 6. Determine final smoothed trajectory value using that sigma
928 
929  make_gauss_kernel(gauss_kernel, length, large_sigma);
930  for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
931  ringbuf_float_at(deshake_ctx, values, &old, i);
932  new_large_s += old * gauss_kernel[j];
933  }
934 
935  make_gauss_kernel(gauss_kernel, length, small_sigma);
936  for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
937  ringbuf_float_at(deshake_ctx, values, &old, i);
938  new_small_s += old * gauss_kernel[j];
939  }
940 
941  diff_between = fabsf(new_large_s - new_small_s);
942  percent_of_max = diff_between / max_val;
943  inverted_percent = 1 - percent_of_max;
944  best_sigma = large_sigma * powf(inverted_percent, 40);
945  }
946 
947  make_gauss_kernel(gauss_kernel, length, best_sigma);
948  for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
949  ringbuf_float_at(deshake_ctx, values, &old, i);
950  new_best += old * gauss_kernel[j];
951  }
952 
953  return new_best;
954 }
955 
956 // Returns the position of the given point after the transform is applied
957 static cl_float2 transformed_point(float x, float y, float *transform) {
958  cl_float2 ret;
959 
960  ret.s[0] = x * transform[0] + y * transform[1] + transform[2];
961  ret.s[1] = x * transform[3] + y * transform[4] + transform[5];
962 
963  return ret;
964 }
965 
966 // Creates an affine transform that scales from the center of a frame
968  float x_shift,
969  float y_shift,
970  float angle,
971  float scale_x,
972  float scale_y,
973  float center_w,
974  float center_h,
975  float *matrix
976 ) {
977  cl_float2 center_s;
978  float center_s_w, center_s_h;
979 
981  0,
982  0,
983  0,
984  scale_x,
985  scale_y,
986  matrix
987  );
988 
989  center_s = transformed_point(center_w, center_h, matrix);
990  center_s_w = center_w - center_s.s[0];
991  center_s_h = center_h - center_s.s[1];
992 
994  x_shift + center_s_w,
995  y_shift + center_s_h,
996  angle,
997  scale_x,
998  scale_y,
999  matrix
1000  );
1001 }
1002 
1003 // Determines the crop necessary to eliminate black borders from a smoothed frame
1004 // and updates target crop accordingly
1006  CropInfo* crop,
1007  float *transform,
1008  float frame_width,
1009  float frame_height
1010 ) {
1011  float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;
1012 
1013  cl_float2 top_left = transformed_point(0, 0, transform);
1014  cl_float2 top_right = transformed_point(frame_width, 0, transform);
1015  cl_float2 bottom_left = transformed_point(0, frame_height, transform);
1016  cl_float2 bottom_right = transformed_point(frame_width, frame_height, transform);
1017  float ar_h = frame_height / frame_width;
1018  float ar_w = frame_width / frame_height;
1019 
1020  if (crop->bottom_right.s[0] == 0) {
1021  // The crop hasn't been set to the original size of the plane
1022  crop->bottom_right.s[0] = frame_width;
1023  crop->bottom_right.s[1] = frame_height;
1024  }
1025 
1026  crop->top_left.s[0] = FFMAX3(
1027  crop->top_left.s[0],
1028  top_left.s[0],
1029  bottom_left.s[0]
1030  );
1031 
1032  crop->top_left.s[1] = FFMAX3(
1033  crop->top_left.s[1],
1034  top_left.s[1],
1035  top_right.s[1]
1036  );
1037 
1038  crop->bottom_right.s[0] = FFMIN3(
1039  crop->bottom_right.s[0],
1040  bottom_right.s[0],
1041  top_right.s[0]
1042  );
1043 
1044  crop->bottom_right.s[1] = FFMIN3(
1045  crop->bottom_right.s[1],
1046  bottom_right.s[1],
1047  bottom_left.s[1]
1048  );
1049 
1050  // Make sure our potentially new bounding box has the same aspect ratio
1051  new_height = crop->bottom_right.s[1] - crop->top_left.s[1];
1052  new_width = crop->bottom_right.s[0] - crop->top_left.s[0];
1053 
1054  adjusted_width = new_height * ar_w;
1055  adjusted_x = crop->bottom_right.s[0] - adjusted_width;
1056 
1057  if (adjusted_x >= crop->top_left.s[0]) {
1058  crop->top_left.s[0] = adjusted_x;
1059  } else {
1060  adjusted_height = new_width * ar_h;
1061  adjusted_y = crop->bottom_right.s[1] - adjusted_height;
1062  crop->top_left.s[1] = adjusted_y;
1063  }
1064 }
1065 
1067 {
1068  DeshakeOpenCLContext *ctx = avctx->priv;
1069  cl_int cle;
1070 
1071  for (int i = 0; i < RingbufCount; i++)
1072  av_fifo_freep(&ctx->abs_motion.ringbuffers[i]);
1073 
1074  if (ctx->debug_on)
1075  free_debug_matches(&ctx->abs_motion);
1076 
1077  if (ctx->gauss_kernel)
1078  av_freep(&ctx->gauss_kernel);
1079 
1080  if (ctx->ransac_err)
1081  av_freep(&ctx->ransac_err);
1082 
1083  if (ctx->matches_host)
1084  av_freep(&ctx->matches_host);
1085 
1086  if (ctx->matches_contig_host)
1087  av_freep(&ctx->matches_contig_host);
1088 
1089  if (ctx->inliers)
1090  av_freep(&ctx->inliers);
1091 
1092  ff_framequeue_free(&ctx->fq);
1093 
1094  CL_RELEASE_KERNEL(ctx->kernel_grayscale);
1095  CL_RELEASE_KERNEL(ctx->kernel_harris_response);
1096  CL_RELEASE_KERNEL(ctx->kernel_refine_features);
1097  CL_RELEASE_KERNEL(ctx->kernel_brief_descriptors);
1098  CL_RELEASE_KERNEL(ctx->kernel_match_descriptors);
1099  CL_RELEASE_KERNEL(ctx->kernel_crop_upscale);
1100  if (ctx->debug_on)
1101  CL_RELEASE_KERNEL(ctx->kernel_draw_debug_info);
1102 
1103  CL_RELEASE_QUEUE(ctx->command_queue);
1104 
1105  if (!ctx->is_yuv)
1106  CL_RELEASE_MEMORY(ctx->grayscale);
1107  CL_RELEASE_MEMORY(ctx->harris_buf);
1108  CL_RELEASE_MEMORY(ctx->refined_features);
1109  CL_RELEASE_MEMORY(ctx->prev_refined_features);
1110  CL_RELEASE_MEMORY(ctx->brief_pattern);
1111  CL_RELEASE_MEMORY(ctx->descriptors);
1112  CL_RELEASE_MEMORY(ctx->prev_descriptors);
1113  CL_RELEASE_MEMORY(ctx->matches);
1114  CL_RELEASE_MEMORY(ctx->matches_contig);
1115  CL_RELEASE_MEMORY(ctx->transform_y);
1116  CL_RELEASE_MEMORY(ctx->transform_uv);
1117  if (ctx->debug_on) {
1118  CL_RELEASE_MEMORY(ctx->debug_matches);
1119  CL_RELEASE_MEMORY(ctx->debug_model_matches);
1120  }
1121 
1122  ff_opencl_filter_uninit(avctx);
1123 }
1124 
1126 {
1127  DeshakeOpenCLContext *ctx = avctx->priv;
1128  AVFilterLink *outlink = avctx->outputs[0];
1129  AVFilterLink *inlink = avctx->inputs[0];
1130  // Pointer to the host-side pattern buffer to be initialized and then copied
1131  // to the GPU
1132  PointPair *pattern_host = NULL;
1133  cl_int cle;
1134  int err;
1135  cl_ulong8 zeroed_ulong8;
1136  FFFrameQueueGlobal fqg;
1137  cl_image_format grayscale_format;
1138  cl_image_desc grayscale_desc;
1139  cl_command_queue_properties queue_props;
1140 
1141  const enum AVPixelFormat disallowed_formats[14] = {
1156  };
1157 
1158  // Number of elements for an array
1159  const int image_grid_32 = ROUNDED_UP_DIV(outlink->h, 32) * ROUNDED_UP_DIV(outlink->w, 32);
1160 
1161  const int descriptor_buf_size = image_grid_32 * (BREIFN / 8);
1162  const int features_buf_size = image_grid_32 * sizeof(cl_float2);
1163 
1164  const AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
1165  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hw_frames_ctx->sw_format);
1166 
1167  av_assert0(hw_frames_ctx);
1168  av_assert0(desc);
1169 
1171  ff_framequeue_init(&ctx->fq, &fqg);
1172  ctx->eof = 0;
1173  ctx->smooth_window = (int)(av_q2d(avctx->inputs[0]->frame_rate) * ctx->smooth_window_multiplier);
1174  ctx->curr_frame = 0;
1175 
1176  memset(&zeroed_ulong8, 0, sizeof(cl_ulong8));
1177 
1178  ctx->gauss_kernel = av_malloc_array(ctx->smooth_window, sizeof(float));
1179  if (!ctx->gauss_kernel) {
1180  err = AVERROR(ENOMEM);
1181  goto fail;
1182  }
1183 
1184  ctx->ransac_err = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(float));
1185  if (!ctx->ransac_err) {
1186  err = AVERROR(ENOMEM);
1187  goto fail;
1188  }
1189 
1190  for (int i = 0; i < RingbufCount; i++) {
1191  ctx->abs_motion.ringbuffers[i] = av_fifo_alloc_array(
1192  ctx->smooth_window,
1193  sizeof(float)
1194  );
1195 
1196  if (!ctx->abs_motion.ringbuffers[i]) {
1197  err = AVERROR(ENOMEM);
1198  goto fail;
1199  }
1200  }
1201 
1202  if (ctx->debug_on) {
1203  ctx->abs_motion.debug_matches = av_fifo_alloc_array(
1204  ctx->smooth_window / 2,
1205  sizeof(DebugMatches)
1206  );
1207 
1208  if (!ctx->abs_motion.debug_matches) {
1209  err = AVERROR(ENOMEM);
1210  goto fail;
1211  }
1212  }
1213 
1214  ctx->abs_motion.curr_frame_offset = 0;
1215  ctx->abs_motion.data_start_offset = -1;
1216  ctx->abs_motion.data_end_offset = -1;
1217 
1218  pattern_host = av_malloc_array(BREIFN, sizeof(PointPair));
1219  if (!pattern_host) {
1220  err = AVERROR(ENOMEM);
1221  goto fail;
1222  }
1223 
1224  ctx->matches_host = av_malloc_array(image_grid_32, sizeof(MotionVector));
1225  if (!ctx->matches_host) {
1226  err = AVERROR(ENOMEM);
1227  goto fail;
1228  }
1229 
1230  ctx->matches_contig_host = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));
1231  if (!ctx->matches_contig_host) {
1232  err = AVERROR(ENOMEM);
1233  goto fail;
1234  }
1235 
1236  ctx->inliers = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));
1237  if (!ctx->inliers) {
1238  err = AVERROR(ENOMEM);
1239  goto fail;
1240  }
1241 
1242  // Initializing the patch pattern for building BREIF descriptors with
1243  av_lfg_init(&ctx->alfg, 234342424);
1244  for (int i = 0; i < BREIFN; ++i) {
1245  PointPair pair;
1246 
1247  for (int j = 0; j < 2; ++j) {
1248  pair.p1.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);
1249  pair.p2.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);
1250  }
1251 
1252  pattern_host[i] = pair;
1253  }
1254 
1255  for (int i = 0; i < 14; i++) {
1256  if (ctx->sw_format == disallowed_formats[i]) {
1257  av_log(avctx, AV_LOG_ERROR, "unsupported format in deshake_opencl.\n");
1258  err = AVERROR(ENOSYS);
1259  goto fail;
1260  }
1261  }
1262 
1263  if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
1264  ctx->is_yuv = 0;
1265  } else {
1266  ctx->is_yuv = 1;
1267  }
1268  ctx->sw_format = hw_frames_ctx->sw_format;
1269 
1271  if (err < 0)
1272  goto fail;
1273 
1274  if (ctx->debug_on) {
1275  queue_props = CL_QUEUE_PROFILING_ENABLE;
1276  } else {
1277  queue_props = 0;
1278  }
1279  ctx->command_queue = clCreateCommandQueue(
1280  ctx->ocf.hwctx->context,
1281  ctx->ocf.hwctx->device_id,
1282  queue_props,
1283  &cle
1284  );
1285  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL command queue %d.\n", cle);
1286 
1287  CL_CREATE_KERNEL(ctx, grayscale);
1288  CL_CREATE_KERNEL(ctx, harris_response);
1289  CL_CREATE_KERNEL(ctx, refine_features);
1290  CL_CREATE_KERNEL(ctx, brief_descriptors);
1291  CL_CREATE_KERNEL(ctx, match_descriptors);
1293  CL_CREATE_KERNEL(ctx, crop_upscale);
1294  if (ctx->debug_on)
1295  CL_CREATE_KERNEL(ctx, draw_debug_info);
1296 
1297  if (!ctx->is_yuv) {
1298  grayscale_format.image_channel_order = CL_R;
1299  grayscale_format.image_channel_data_type = CL_FLOAT;
1300 
1301  grayscale_desc = (cl_image_desc) {
1302  .image_type = CL_MEM_OBJECT_IMAGE2D,
1303  .image_width = outlink->w,
1304  .image_height = outlink->h,
1305  .image_depth = 0,
1306  .image_array_size = 0,
1307  .image_row_pitch = 0,
1308  .image_slice_pitch = 0,
1309  .num_mip_levels = 0,
1310  .num_samples = 0,
1311  .buffer = NULL,
1312  };
1313 
1314  ctx->grayscale = clCreateImage(
1315  ctx->ocf.hwctx->context,
1316  0,
1317  &grayscale_format,
1318  &grayscale_desc,
1319  NULL,
1320  &cle
1321  );
1322  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create grayscale image: %d.\n", cle);
1323  }
1324 
1325  CL_CREATE_BUFFER(ctx, harris_buf, outlink->h * outlink->w * sizeof(float));
1326  CL_CREATE_BUFFER(ctx, refined_features, features_buf_size);
1327  CL_CREATE_BUFFER(ctx, prev_refined_features, features_buf_size);
1329  ctx,
1330  brief_pattern,
1331  CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1332  BREIFN * sizeof(PointPair),
1333  pattern_host
1334  );
1335  CL_CREATE_BUFFER(ctx, descriptors, descriptor_buf_size);
1336  CL_CREATE_BUFFER(ctx, prev_descriptors, descriptor_buf_size);
1337  CL_CREATE_BUFFER(ctx, matches, image_grid_32 * sizeof(MotionVector));
1338  CL_CREATE_BUFFER(ctx, matches_contig, MATCHES_CONTIG_SIZE * sizeof(MotionVector));
1339  CL_CREATE_BUFFER(ctx, transform_y, 9 * sizeof(float));
1340  CL_CREATE_BUFFER(ctx, transform_uv, 9 * sizeof(float));
1341  if (ctx->debug_on) {
1342  CL_CREATE_BUFFER(ctx, debug_matches, MATCHES_CONTIG_SIZE * sizeof(MotionVector));
1343  CL_CREATE_BUFFER(ctx, debug_model_matches, 3 * sizeof(MotionVector));
1344  }
1345 
1346  ctx->initialized = 1;
1347  av_freep(&pattern_host);
1348 
1349  return 0;
1350 
1351 fail:
1352  av_freep(&pattern_host);
1353  return err;
1354 }
1355 
1356 // Logs debug information about the transform data
1357 static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame) {
1358  av_log(avctx, AV_LOG_VERBOSE,
1359  "Frame %d:\n"
1360  "\tframe moved from: %f x, %f y\n"
1361  "\t to: %f x, %f y\n"
1362  "\t rotated from: %f degrees\n"
1363  "\t to: %f degrees\n"
1364  "\t scaled from: %f x, %f y\n"
1365  "\t to: %f x, %f y\n"
1366  "\n"
1367  "\tframe moved by: %f x, %f y\n"
1368  "\t rotated by: %f degrees\n"
1369  "\t scaled by: %f x, %f y\n",
1370  curr_frame,
1371  old_vals[RingbufX], old_vals[RingbufY],
1372  new_vals[RingbufX], new_vals[RingbufY],
1373  old_vals[RingbufRot] * (180.0 / M_PI),
1374  new_vals[RingbufRot] * (180.0 / M_PI),
1375  old_vals[RingbufScaleX], old_vals[RingbufScaleY],
1376  new_vals[RingbufScaleX], new_vals[RingbufScaleY],
1377  old_vals[RingbufX] - new_vals[RingbufX], old_vals[RingbufY] - new_vals[RingbufY],
1378  old_vals[RingbufRot] * (180.0 / M_PI) - new_vals[RingbufRot] * (180.0 / M_PI),
1379  new_vals[RingbufScaleX] / old_vals[RingbufScaleX], new_vals[RingbufScaleY] / old_vals[RingbufScaleY]
1380  );
1381 }
1382 
1383 // Uses the buffered motion information to determine a transform that smooths the
1384 // given frame and applies it
1385 static int filter_frame(AVFilterLink *link, AVFrame *input_frame)
1386 {
1387  AVFilterContext *avctx = link->dst;
1388  AVFilterLink *outlink = avctx->outputs[0];
1389  DeshakeOpenCLContext *deshake_ctx = avctx->priv;
1390  AVFrame *cropped_frame = NULL, *transformed_frame = NULL;
1391  int err;
1392  cl_int cle;
1393  float new_vals[RingbufCount];
1394  float old_vals[RingbufCount];
1395  // Luma (in the case of YUV) transform, or just the transform in the case of RGB
1396  float transform_y[9];
1397  // Chroma transform
1398  float transform_uv[9];
1399  // Luma crop transform (or RGB)
1400  float transform_crop_y[9];
1401  // Chroma crop transform
1402  float transform_crop_uv[9];
1403  float transform_debug_rgb[9];
1404  size_t global_work[2];
1405  int64_t duration;
1406  cl_mem src, transformed, dst;
1407  cl_mem transforms[3];
1408  CropInfo crops[3];
1409  cl_event transform_event, crop_upscale_event;
1410  DebugMatches debug_matches;
1411  cl_int num_model_matches;
1412 
1413  const float center_w = (float)input_frame->width / 2;
1414  const float center_h = (float)input_frame->height / 2;
1415 
1416  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(deshake_ctx->sw_format);
1417  const int chroma_width = AV_CEIL_RSHIFT(input_frame->width, desc->log2_chroma_w);
1418  const int chroma_height = AV_CEIL_RSHIFT(input_frame->height, desc->log2_chroma_h);
1419 
1420  const float center_w_chroma = (float)chroma_width / 2;
1421  const float center_h_chroma = (float)chroma_height / 2;
1422 
1423  const float luma_w_over_chroma_w = ((float)input_frame->width / (float)chroma_width);
1424  const float luma_h_over_chroma_h = ((float)input_frame->height / (float)chroma_height);
1425 
1426  if (deshake_ctx->debug_on) {
1428  deshake_ctx->abs_motion.debug_matches,
1429  &debug_matches,
1430  sizeof(DebugMatches),
1431  NULL
1432  );
1433  }
1434 
1435  if (input_frame->pkt_duration) {
1436  duration = input_frame->pkt_duration;
1437  } else {
1438  duration = av_rescale_q(1, av_inv_q(outlink->frame_rate), outlink->time_base);
1439  }
1440  deshake_ctx->duration = input_frame->pts + duration;
1441 
1442  // Get the absolute transform data for this frame
1443  for (int i = 0; i < RingbufCount; i++) {
1445  deshake_ctx->abs_motion.ringbuffers[i],
1446  &old_vals[i],
1447  deshake_ctx->abs_motion.curr_frame_offset * sizeof(float),
1448  sizeof(float),
1449  NULL
1450  );
1451  }
1452 
1453  if (deshake_ctx->tripod_mode) {
1454  // If tripod mode is turned on we simply undo all motion relative to the
1455  // first frame
1456 
1457  new_vals[RingbufX] = 0.0f;
1458  new_vals[RingbufY] = 0.0f;
1459  new_vals[RingbufRot] = 0.0f;
1460  new_vals[RingbufScaleX] = 1.0f;
1461  new_vals[RingbufScaleY] = 1.0f;
1462  } else {
1463  // Tripod mode is off and we need to smooth a moving camera
1464 
1465  new_vals[RingbufX] = smooth(
1466  deshake_ctx,
1467  deshake_ctx->gauss_kernel,
1468  deshake_ctx->smooth_window,
1469  input_frame->width,
1470  deshake_ctx->abs_motion.ringbuffers[RingbufX]
1471  );
1472  new_vals[RingbufY] = smooth(
1473  deshake_ctx,
1474  deshake_ctx->gauss_kernel,
1475  deshake_ctx->smooth_window,
1476  input_frame->height,
1477  deshake_ctx->abs_motion.ringbuffers[RingbufY]
1478  );
1479  new_vals[RingbufRot] = smooth(
1480  deshake_ctx,
1481  deshake_ctx->gauss_kernel,
1482  deshake_ctx->smooth_window,
1483  M_PI / 4,
1484  deshake_ctx->abs_motion.ringbuffers[RingbufRot]
1485  );
1486  new_vals[RingbufScaleX] = smooth(
1487  deshake_ctx,
1488  deshake_ctx->gauss_kernel,
1489  deshake_ctx->smooth_window,
1490  2.0f,
1491  deshake_ctx->abs_motion.ringbuffers[RingbufScaleX]
1492  );
1493  new_vals[RingbufScaleY] = smooth(
1494  deshake_ctx,
1495  deshake_ctx->gauss_kernel,
1496  deshake_ctx->smooth_window,
1497  2.0f,
1498  deshake_ctx->abs_motion.ringbuffers[RingbufScaleY]
1499  );
1500  }
1501 
1503  old_vals[RingbufX] - new_vals[RingbufX],
1504  old_vals[RingbufY] - new_vals[RingbufY],
1505  old_vals[RingbufRot] - new_vals[RingbufRot],
1506  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1507  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1508  center_w,
1509  center_h,
1510  transform_y
1511  );
1512 
1514  (old_vals[RingbufX] - new_vals[RingbufX]) / luma_w_over_chroma_w,
1515  (old_vals[RingbufY] - new_vals[RingbufY]) / luma_h_over_chroma_h,
1516  old_vals[RingbufRot] - new_vals[RingbufRot],
1517  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1518  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1519  center_w_chroma,
1520  center_h_chroma,
1521  transform_uv
1522  );
1523 
1524  CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_y, NULL);
1525  CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_uv, 9 * sizeof(float), transform_uv, NULL);
1526 
1527  if (deshake_ctx->debug_on)
1528  transform_debug(avctx, new_vals, old_vals, deshake_ctx->curr_frame);
1529 
1530  cropped_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
1531  if (!cropped_frame) {
1532  err = AVERROR(ENOMEM);
1533  goto fail;
1534  }
1535 
1536  transformed_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
1537  if (!transformed_frame) {
1538  err = AVERROR(ENOMEM);
1539  goto fail;
1540  }
1541 
1542  transforms[0] = deshake_ctx->transform_y;
1543  transforms[1] = transforms[2] = deshake_ctx->transform_uv;
1544 
1545  for (int p = 0; p < FF_ARRAY_ELEMS(transformed_frame->data); p++) {
1546  // Transform all of the planes appropriately
1547  src = (cl_mem)input_frame->data[p];
1548  transformed = (cl_mem)transformed_frame->data[p];
1549 
1550  if (!transformed)
1551  break;
1552 
1553  err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);
1554  if (err < 0)
1555  goto fail;
1556 
1558  deshake_ctx->command_queue,
1559  deshake_ctx->kernel_transform,
1560  global_work,
1561  NULL,
1562  &transform_event,
1563  { sizeof(cl_mem), &src },
1564  { sizeof(cl_mem), &transformed },
1565  { sizeof(cl_mem), &transforms[p] },
1566  );
1567  }
1568 
1569  if (deshake_ctx->debug_on && !deshake_ctx->is_yuv && debug_matches.num_matches > 0) {
1571  deshake_ctx->command_queue,
1572  deshake_ctx->debug_matches,
1573  debug_matches.num_matches * sizeof(MotionVector),
1574  debug_matches.matches,
1575  NULL
1576  );
1577 
1579  deshake_ctx->command_queue,
1580  deshake_ctx->debug_model_matches,
1581  debug_matches.num_model_matches * sizeof(MotionVector),
1582  debug_matches.model_matches,
1583  NULL
1584  );
1585 
1586  num_model_matches = debug_matches.num_model_matches;
1587 
1588  // Invert the transform
1590  new_vals[RingbufX] - old_vals[RingbufX],
1591  new_vals[RingbufY] - old_vals[RingbufY],
1592  new_vals[RingbufRot] - old_vals[RingbufRot],
1593  old_vals[RingbufScaleX] / new_vals[RingbufScaleX],
1594  old_vals[RingbufScaleY] / new_vals[RingbufScaleY],
1595  center_w,
1596  center_h,
1597  transform_debug_rgb
1598  );
1599 
1600  CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_debug_rgb, NULL);
1601 
1602  transformed = (cl_mem)transformed_frame->data[0];
1604  deshake_ctx->command_queue,
1605  deshake_ctx->kernel_draw_debug_info,
1606  (size_t[]){ debug_matches.num_matches },
1607  NULL,
1608  NULL,
1609  { sizeof(cl_mem), &transformed },
1610  { sizeof(cl_mem), &deshake_ctx->debug_matches },
1611  { sizeof(cl_mem), &deshake_ctx->debug_model_matches },
1612  { sizeof(cl_int), &num_model_matches },
1613  { sizeof(cl_mem), &deshake_ctx->transform_y }
1614  );
1615  }
1616 
1617  if (deshake_ctx->should_crop) {
1618  // Generate transforms for cropping
1620  (old_vals[RingbufX] - new_vals[RingbufX]) / 5,
1621  (old_vals[RingbufY] - new_vals[RingbufY]) / 5,
1622  (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1623  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1624  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1625  center_w,
1626  center_h,
1627  transform_crop_y
1628  );
1629  update_needed_crop(&deshake_ctx->crop_y, transform_crop_y, input_frame->width, input_frame->height);
1630 
1632  (old_vals[RingbufX] - new_vals[RingbufX]) / (5 * luma_w_over_chroma_w),
1633  (old_vals[RingbufY] - new_vals[RingbufY]) / (5 * luma_h_over_chroma_h),
1634  (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1635  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1636  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1637  center_w_chroma,
1638  center_h_chroma,
1639  transform_crop_uv
1640  );
1641  update_needed_crop(&deshake_ctx->crop_uv, transform_crop_uv, chroma_width, chroma_height);
1642 
1643  crops[0] = deshake_ctx->crop_y;
1644  crops[1] = crops[2] = deshake_ctx->crop_uv;
1645 
1646  for (int p = 0; p < FF_ARRAY_ELEMS(cropped_frame->data); p++) {
1647  // Crop all of the planes appropriately
1648  dst = (cl_mem)cropped_frame->data[p];
1649  transformed = (cl_mem)transformed_frame->data[p];
1650 
1651  if (!dst)
1652  break;
1653 
1654  err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);
1655  if (err < 0)
1656  goto fail;
1657 
1659  deshake_ctx->command_queue,
1660  deshake_ctx->kernel_crop_upscale,
1661  global_work,
1662  NULL,
1663  &crop_upscale_event,
1664  { sizeof(cl_mem), &transformed },
1665  { sizeof(cl_mem), &dst },
1666  { sizeof(cl_float2), &crops[p].top_left },
1667  { sizeof(cl_float2), &crops[p].bottom_right },
1668  );
1669  }
1670  }
1671 
1672  if (deshake_ctx->curr_frame < deshake_ctx->smooth_window / 2) {
1673  // This means we are somewhere at the start of the video. We need to
1674  // increment the current frame offset until it reaches the center of
1675  // the ringbuffers (as the current frame will be located there for
1676  // the rest of the video).
1677  //
1678  // The end of the video is taken care of by draining motion data
1679  // one-by-one out of the buffer, causing the (at that point fixed)
1680  // offset to move towards later frames' data.
1681  ++deshake_ctx->abs_motion.curr_frame_offset;
1682  }
1683 
1684  if (deshake_ctx->abs_motion.data_end_offset != -1) {
1685  // Keep the end offset in sync with the frame it's supposed to be
1686  // positioned at
1687  --deshake_ctx->abs_motion.data_end_offset;
1688 
1689  if (deshake_ctx->abs_motion.data_end_offset == deshake_ctx->abs_motion.curr_frame_offset - 1) {
1690  // The end offset would be the start of the new video sequence; flip to
1691  // start offset
1692  deshake_ctx->abs_motion.data_end_offset = -1;
1693  deshake_ctx->abs_motion.data_start_offset = deshake_ctx->abs_motion.curr_frame_offset;
1694  }
1695  } else if (deshake_ctx->abs_motion.data_start_offset != -1) {
1696  // Keep the start offset in sync with the frame it's supposed to be
1697  // positioned at
1698  --deshake_ctx->abs_motion.data_start_offset;
1699  }
1700 
1701  if (deshake_ctx->debug_on) {
1702  deshake_ctx->transform_time += ff_opencl_get_event_time(transform_event);
1703  if (deshake_ctx->should_crop) {
1704  deshake_ctx->crop_upscale_time += ff_opencl_get_event_time(crop_upscale_event);
1705  }
1706  }
1707 
1708  ++deshake_ctx->curr_frame;
1709 
1710  if (deshake_ctx->debug_on)
1711  av_freep(&debug_matches.matches);
1712 
1713  if (deshake_ctx->should_crop) {
1714  err = av_frame_copy_props(cropped_frame, input_frame);
1715  if (err < 0)
1716  goto fail;
1717 
1718  av_frame_free(&transformed_frame);
1719  av_frame_free(&input_frame);
1720  return ff_filter_frame(outlink, cropped_frame);
1721 
1722  } else {
1723  err = av_frame_copy_props(transformed_frame, input_frame);
1724  if (err < 0)
1725  goto fail;
1726 
1727  av_frame_free(&cropped_frame);
1728  av_frame_free(&input_frame);
1729  return ff_filter_frame(outlink, transformed_frame);
1730  }
1731 
1732 fail:
1733  clFinish(deshake_ctx->command_queue);
1734 
1735  if (deshake_ctx->debug_on)
1736  if (debug_matches.matches)
1737  av_freep(&debug_matches.matches);
1738 
1739  av_frame_free(&input_frame);
1740  av_frame_free(&transformed_frame);
1741  av_frame_free(&cropped_frame);
1742  return err;
1743 }
1744 
1745 // Add the given frame to the frame queue to eventually be processed.
1746 //
1747 // Also determines the motion from the previous frame and updates the stored
1748 // motion information accordingly.
1749 static int queue_frame(AVFilterLink *link, AVFrame *input_frame)
1750 {
1751  AVFilterContext *avctx = link->dst;
1752  DeshakeOpenCLContext *deshake_ctx = avctx->priv;
1753  int err;
1754  int num_vectors;
1755  int num_inliers = 0;
1756  cl_int cle;
1757  FrameDelta relative;
1758  SimilarityMatrix model;
1759  size_t global_work[2];
1760  size_t harris_global_work[2];
1761  size_t grid_32_global_work[2];
1762  int grid_32_h, grid_32_w;
1763  size_t local_work[2];
1764  cl_mem src, temp;
1765  float prev_vals[5];
1766  float new_vals[5];
1767  cl_event grayscale_event, harris_response_event, refine_features_event,
1768  brief_event, match_descriptors_event, read_buf_event;
1769  DebugMatches debug_matches;
1770 
1771  num_vectors = 0;
1772 
1773  local_work[0] = 8;
1774  local_work[1] = 8;
1775 
1776  err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, 0, 0);
1777  if (err < 0)
1778  goto fail;
1779 
1780  err = ff_opencl_filter_work_size_from_image(avctx, harris_global_work, input_frame, 0, 8);
1781  if (err < 0)
1782  goto fail;
1783 
1784  err = ff_opencl_filter_work_size_from_image(avctx, grid_32_global_work, input_frame, 0, 32);
1785  if (err < 0)
1786  goto fail;
1787 
1788  // We want a single work-item for each 32x32 block of pixels in the input frame
1789  grid_32_global_work[0] /= 32;
1790  grid_32_global_work[1] /= 32;
1791 
1792  grid_32_h = ROUNDED_UP_DIV(input_frame->height, 32);
1793  grid_32_w = ROUNDED_UP_DIV(input_frame->width, 32);
1794 
1795  if (deshake_ctx->is_yuv) {
1796  deshake_ctx->grayscale = (cl_mem)input_frame->data[0];
1797  } else {
1798  src = (cl_mem)input_frame->data[0];
1799 
1801  deshake_ctx->command_queue,
1802  deshake_ctx->kernel_grayscale,
1803  global_work,
1804  NULL,
1805  &grayscale_event,
1806  { sizeof(cl_mem), &src },
1807  { sizeof(cl_mem), &deshake_ctx->grayscale }
1808  );
1809  }
1810 
1812  deshake_ctx->command_queue,
1813  deshake_ctx->kernel_harris_response,
1814  harris_global_work,
1815  local_work,
1816  &harris_response_event,
1817  { sizeof(cl_mem), &deshake_ctx->grayscale },
1818  { sizeof(cl_mem), &deshake_ctx->harris_buf }
1819  );
1820 
1822  deshake_ctx->command_queue,
1823  deshake_ctx->kernel_refine_features,
1824  grid_32_global_work,
1825  NULL,
1826  &refine_features_event,
1827  { sizeof(cl_mem), &deshake_ctx->grayscale },
1828  { sizeof(cl_mem), &deshake_ctx->harris_buf },
1829  { sizeof(cl_mem), &deshake_ctx->refined_features },
1830  { sizeof(cl_int), &deshake_ctx->refine_features }
1831  );
1832 
1834  deshake_ctx->command_queue,
1835  deshake_ctx->kernel_brief_descriptors,
1836  grid_32_global_work,
1837  NULL,
1838  &brief_event,
1839  { sizeof(cl_mem), &deshake_ctx->grayscale },
1840  { sizeof(cl_mem), &deshake_ctx->refined_features },
1841  { sizeof(cl_mem), &deshake_ctx->descriptors },
1842  { sizeof(cl_mem), &deshake_ctx->brief_pattern}
1843  );
1844 
1845  if (av_fifo_size(deshake_ctx->abs_motion.ringbuffers[RingbufX]) == 0) {
1846  // This is the first frame we've been given to queue, meaning there is
1847  // no previous frame to match descriptors to
1848 
1849  goto no_motion_data;
1850  }
1851 
1853  deshake_ctx->command_queue,
1854  deshake_ctx->kernel_match_descriptors,
1855  grid_32_global_work,
1856  NULL,
1857  &match_descriptors_event,
1858  { sizeof(cl_mem), &deshake_ctx->prev_refined_features },
1859  { sizeof(cl_mem), &deshake_ctx->refined_features },
1860  { sizeof(cl_mem), &deshake_ctx->descriptors },
1861  { sizeof(cl_mem), &deshake_ctx->prev_descriptors },
1862  { sizeof(cl_mem), &deshake_ctx->matches }
1863  );
1864 
1865  cle = clEnqueueReadBuffer(
1866  deshake_ctx->command_queue,
1867  deshake_ctx->matches,
1868  CL_TRUE,
1869  0,
1870  grid_32_h * grid_32_w * sizeof(MotionVector),
1871  deshake_ctx->matches_host,
1872  0,
1873  NULL,
1874  &read_buf_event
1875  );
1876  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to read matches to host: %d.\n", cle);
1877 
1878  num_vectors = make_vectors_contig(deshake_ctx, grid_32_h, grid_32_w);
1879 
1880  if (num_vectors < 10) {
1881  // Not enough matches to get reliable motion data for this frame
1882  //
1883  // From this point on all data is relative to this frame rather than the
1884  // original frame. We have to make sure that we don't mix values that were
1885  // relative to the original frame with the new values relative to this
1886  // frame when doing the gaussian smoothing. We keep track of where the old
1887  // values end using this data_end_offset field in order to accomplish
1888  // that goal.
1889  //
1890  // If no motion data is present for multiple frames in a short window of
1891  // time, we leave the end where it was to avoid mixing 0s in with the
1892  // old data (and just treat them all as part of the new values)
1893  if (deshake_ctx->abs_motion.data_end_offset == -1) {
1894  deshake_ctx->abs_motion.data_end_offset =
1895  av_fifo_size(deshake_ctx->abs_motion.ringbuffers[RingbufX]) / sizeof(float) - 1;
1896  }
1897 
1898  goto no_motion_data;
1899  }
1900 
1901  if (!estimate_affine_2d(
1902  deshake_ctx,
1903  deshake_ctx->matches_contig_host,
1904  &debug_matches,
1905  num_vectors,
1906  model.matrix,
1907  10.0,
1908  3000,
1909  0.999999999999
1910  )) {
1911  goto no_motion_data;
1912  }
1913 
1914  for (int i = 0; i < num_vectors; i++) {
1915  if (deshake_ctx->matches_contig_host[i].should_consider) {
1916  deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[i];
1917  num_inliers++;
1918  }
1919  }
1920 
1921  if (!minimize_error(
1922  deshake_ctx,
1923  deshake_ctx->inliers,
1924  &debug_matches,
1925  num_inliers,
1926  model.matrix,
1927  400
1928  )) {
1929  goto no_motion_data;
1930  }
1931 
1932 
1933  relative = decompose_transform(model.matrix);
1934 
1935  // Get the absolute transform data for the previous frame
1936  for (int i = 0; i < RingbufCount; i++) {
1938  deshake_ctx->abs_motion.ringbuffers[i],
1939  &prev_vals[i],
1940  av_fifo_size(deshake_ctx->abs_motion.ringbuffers[i]) - sizeof(float),
1941  sizeof(float),
1942  NULL
1943  );
1944  }
1945 
1946  new_vals[RingbufX] = prev_vals[RingbufX] + relative.translation.s[0];
1947  new_vals[RingbufY] = prev_vals[RingbufY] + relative.translation.s[1];
1948  new_vals[RingbufRot] = prev_vals[RingbufRot] + relative.rotation;
1949  new_vals[RingbufScaleX] = prev_vals[RingbufScaleX] / relative.scale.s[0];
1950  new_vals[RingbufScaleY] = prev_vals[RingbufScaleY] / relative.scale.s[1];
1951 
1952  if (deshake_ctx->debug_on) {
1953  if (!deshake_ctx->is_yuv) {
1954  deshake_ctx->grayscale_time += ff_opencl_get_event_time(grayscale_event);
1955  }
1956  deshake_ctx->harris_response_time += ff_opencl_get_event_time(harris_response_event);
1957  deshake_ctx->refine_features_time += ff_opencl_get_event_time(refine_features_event);
1958  deshake_ctx->brief_descriptors_time += ff_opencl_get_event_time(brief_event);
1959  deshake_ctx->match_descriptors_time += ff_opencl_get_event_time(match_descriptors_event);
1960  deshake_ctx->read_buf_time += ff_opencl_get_event_time(read_buf_event);
1961  }
1962 
1963  goto end;
1964 
1965 no_motion_data:
1966  new_vals[RingbufX] = 0.0f;
1967  new_vals[RingbufY] = 0.0f;
1968  new_vals[RingbufRot] = 0.0f;
1969  new_vals[RingbufScaleX] = 1.0f;
1970  new_vals[RingbufScaleY] = 1.0f;
1971 
1972  for (int i = 0; i < num_vectors; i++) {
1973  deshake_ctx->matches_contig_host[i].should_consider = 0;
1974  }
1975  debug_matches.num_model_matches = 0;
1976 
1977  if (deshake_ctx->debug_on) {
1978  av_log(avctx, AV_LOG_VERBOSE,
1979  "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n"
1980  );
1981  }
1982 
1983  goto end;
1984 
1985 end:
1986  // Swap the descriptor buffers (we don't need the previous frame's descriptors
1987  // again so we will use that space for the next frame's descriptors)
1988  temp = deshake_ctx->prev_descriptors;
1989  deshake_ctx->prev_descriptors = deshake_ctx->descriptors;
1990  deshake_ctx->descriptors = temp;
1991 
1992  // Same for the refined features
1993  temp = deshake_ctx->prev_refined_features;
1994  deshake_ctx->prev_refined_features = deshake_ctx->refined_features;
1995  deshake_ctx->refined_features = temp;
1996 
1997  if (deshake_ctx->debug_on) {
1998  if (num_vectors == 0) {
1999  debug_matches.matches = NULL;
2000  } else {
2001  debug_matches.matches = av_malloc_array(num_vectors, sizeof(MotionVector));
2002 
2003  if (!debug_matches.matches) {
2004  err = AVERROR(ENOMEM);
2005  goto fail;
2006  }
2007  }
2008 
2009  for (int i = 0; i < num_vectors; i++) {
2010  debug_matches.matches[i] = deshake_ctx->matches_contig_host[i];
2011  }
2012  debug_matches.num_matches = num_vectors;
2013 
2015  deshake_ctx->abs_motion.debug_matches,
2016  &debug_matches,
2017  sizeof(DebugMatches),
2018  NULL
2019  );
2020  }
2021 
2022  for (int i = 0; i < RingbufCount; i++) {
2024  deshake_ctx->abs_motion.ringbuffers[i],
2025  &new_vals[i],
2026  sizeof(float),
2027  NULL
2028  );
2029  }
2030 
2031  return ff_framequeue_add(&deshake_ctx->fq, input_frame);
2032 
2033 fail:
2034  clFinish(deshake_ctx->command_queue);
2035  av_frame_free(&input_frame);
2036  return err;
2037 }
2038 
2040 {
2041  AVFilterLink *inlink = ctx->inputs[0];
2042  AVFilterLink *outlink = ctx->outputs[0];
2043  DeshakeOpenCLContext *deshake_ctx = ctx->priv;
2044  AVFrame *frame = NULL;
2045  int ret, status;
2046  int64_t pts;
2047 
2049 
2050  if (!deshake_ctx->eof) {
2052  if (ret < 0)
2053  return ret;
2054  if (ret > 0) {
2055  if (!frame->hw_frames_ctx)
2056  return AVERROR(EINVAL);
2057 
2058  if (!deshake_ctx->initialized) {
2060  if (ret < 0)
2061  return ret;
2062  }
2063 
2064  // If there is no more space in the ringbuffers, remove the oldest
2065  // values to make room for the new ones
2066  if (av_fifo_space(deshake_ctx->abs_motion.ringbuffers[RingbufX]) == 0) {
2067  for (int i = 0; i < RingbufCount; i++) {
2068  av_fifo_drain(deshake_ctx->abs_motion.ringbuffers[i], sizeof(float));
2069  }
2070  }
2072  if (ret < 0)
2073  return ret;
2074  if (ret >= 0) {
2075  // See if we have enough buffered frames to process one
2076  //
2077  // "enough" is half the smooth window of queued frames into the future
2078  if (ff_framequeue_queued_frames(&deshake_ctx->fq) >= deshake_ctx->smooth_window / 2) {
2079  return filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));
2080  }
2081  }
2082  }
2083  }
2084 
2085  if (!deshake_ctx->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
2086  if (status == AVERROR_EOF) {
2087  deshake_ctx->eof = 1;
2088  }
2089  }
2090 
2091  if (deshake_ctx->eof) {
2092  // Finish processing the rest of the frames in the queue.
2093  while(ff_framequeue_queued_frames(&deshake_ctx->fq) != 0) {
2094  for (int i = 0; i < RingbufCount; i++) {
2095  av_fifo_drain(deshake_ctx->abs_motion.ringbuffers[i], sizeof(float));
2096  }
2097 
2098  ret = filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));
2099  if (ret < 0) {
2100  return ret;
2101  }
2102  }
2103 
2104  if (deshake_ctx->debug_on) {
2106  "Average kernel execution times:\n"
2107  "\t grayscale: %0.3f ms\n"
2108  "\t harris_response: %0.3f ms\n"
2109  "\t refine_features: %0.3f ms\n"
2110  "\tbrief_descriptors: %0.3f ms\n"
2111  "\tmatch_descriptors: %0.3f ms\n"
2112  "\t transform: %0.3f ms\n"
2113  "\t crop_upscale: %0.3f ms\n"
2114  "Average buffer read times:\n"
2115  "\t features buf: %0.3f ms\n",
2116  averaged_event_time_ms(deshake_ctx->grayscale_time, deshake_ctx->curr_frame),
2117  averaged_event_time_ms(deshake_ctx->harris_response_time, deshake_ctx->curr_frame),
2118  averaged_event_time_ms(deshake_ctx->refine_features_time, deshake_ctx->curr_frame),
2119  averaged_event_time_ms(deshake_ctx->brief_descriptors_time, deshake_ctx->curr_frame),
2120  averaged_event_time_ms(deshake_ctx->match_descriptors_time, deshake_ctx->curr_frame),
2121  averaged_event_time_ms(deshake_ctx->transform_time, deshake_ctx->curr_frame),
2122  averaged_event_time_ms(deshake_ctx->crop_upscale_time, deshake_ctx->curr_frame),
2123  averaged_event_time_ms(deshake_ctx->read_buf_time, deshake_ctx->curr_frame)
2124  );
2125  }
2126 
2127  ff_outlink_set_status(outlink, AVERROR_EOF, deshake_ctx->duration);
2128  return 0;
2129  }
2130 
2131  if (!deshake_ctx->eof) {
2132  FF_FILTER_FORWARD_WANTED(outlink, inlink);
2133  }
2134 
2135  return FFERROR_NOT_READY;
2136 }
2137 
2139  {
2140  .name = "default",
2141  .type = AVMEDIA_TYPE_VIDEO,
2142  .config_props = &ff_opencl_filter_config_input,
2143  },
2144  { NULL }
2145 };
2146 
2148  {
2149  .name = "default",
2150  .type = AVMEDIA_TYPE_VIDEO,
2151  .config_props = &ff_opencl_filter_config_output,
2152  },
2153  { NULL }
2154 };
2155 
2156 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x)
2157 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
2158 
2160  {
2161  "tripod", "simulates a tripod by preventing any camera movement whatsoever "
2162  "from the original frame",
2163  OFFSET(tripod_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS
2164  },
2165  {
2166  "debug", "turn on additional debugging information",
2167  OFFSET(debug_on), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS
2168  },
2169  {
2170  "adaptive_crop", "attempt to subtly crop borders to reduce mirrored content",
2171  OFFSET(should_crop), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS
2172  },
2173  {
2174  "refine_features", "refine feature point locations at a sub-pixel level",
2175  OFFSET(refine_features), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS
2176  },
2177  {
2178  "smooth_strength", "smoothing strength (0 attempts to adaptively determine optimal strength)",
2179  OFFSET(smooth_percent), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, 0.0f, 1.0f, FLAGS
2180  },
2181  {
2182  "smooth_window_multiplier", "multiplier for number of frames to buffer for motion data",
2183  OFFSET(smooth_window_multiplier), AV_OPT_TYPE_FLOAT, {.dbl = 2.0}, 0.1, 10.0, FLAGS
2184  },
2185  { NULL }
2186 };
2187 
2188 AVFILTER_DEFINE_CLASS(deshake_opencl);
2189 
2191  .name = "deshake_opencl",
2192  .description = NULL_IF_CONFIG_SMALL("Feature-point based video stabilization filter"),
2193  .priv_size = sizeof(DeshakeOpenCLContext),
2194  .priv_class = &deshake_opencl_class,
2198  .activate = activate,
2201  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE
2202 };
deshake_opencl_inputs
static const AVFilterPad deshake_opencl_inputs[]
Definition: vf_deshake_opencl.c:2138
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(deshake_opencl)
MATCHES_CONTIG_SIZE
#define MATCHES_CONTIG_SIZE
Definition: vf_deshake_opencl.c:93
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:99
CL_RUN_KERNEL_WITH_ARGS
#define CL_RUN_KERNEL_WITH_ARGS(queue, kernel, global_work_size, local_work_size, event,...)
Uses the above macro to enqueue the given kernel and then additionally runs it to completion via clFi...
Definition: opencl.h:180
F2
#define F2(l, r, i)
Definition: cast5.c:40
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
free_debug_matches
static void free_debug_matches(AbsoluteFrameMotion *afm)
Definition: vf_deshake_opencl.c:152
status
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
FrameDelta::translation
cl_float2 translation
Definition: vf_deshake_opencl.c:175
init
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
IterIndices
Definition: vf_deshake_opencl.c:195
r
const char * r
Definition: vf_curves.c:114
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
DeshakeOpenCLContext::brief_pattern
cl_mem brief_pattern
Definition: vf_deshake_opencl.c:265
deshake_opencl_init
static int deshake_opencl_init(AVFilterContext *avctx)
Definition: vf_deshake_opencl.c:1125
estimate_affine_2d
static int estimate_affine_2d(DeshakeOpenCLContext *deshake_ctx, MotionVector *point_pairs, DebugMatches *debug_matches, const int num_point_pairs, double *model_out, const double threshold, const int max_iters, const double confidence)
Definition: vf_deshake_opencl.c:527
av_fifo_generic_write
int av_fifo_generic_write(AVFifoBuffer *f, void *src, int size, int(*func)(void *, void *, int))
Feed data from a user-supplied callback to an AVFifoBuffer.
Definition: fifo.c:122
minimize_error
static int minimize_error(DeshakeOpenCLContext *deshake_ctx, MotionVector *inliers, DebugMatches *debug_matches, const int num_inliers, double *model_out, const int max_iters)
Definition: vf_deshake_opencl.c:683
OFFSET
#define OFFSET(x)
Definition: vf_deshake_opencl.c:2156
av_lfg_init
av_cold void av_lfg_init(AVLFG *c, unsigned int seed)
Definition: lfg.c:32
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:365
AV_PIX_FMT_GBRP16BE
@ AV_PIX_FMT_GBRP16BE
planar GBR 4:4:4 48bpp, big-endian
Definition: pixfmt.h:174
DeshakeOpenCLContext::refine_features_time
unsigned long long refine_features_time
Definition: vf_deshake_opencl.c:302
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1075
AbsoluteFrameMotion
Definition: vf_deshake_opencl.c:135
AV_PIX_FMT_GBRP10BE
@ AV_PIX_FMT_GBRP10BE
planar GBR 4:4:4 30bpp, big-endian
Definition: pixfmt.h:172
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2549
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:55
FFERROR_NOT_READY
return FFERROR_NOT_READY
Definition: filter_design.txt:204
DeshakeOpenCLContext::kernel_draw_debug_info
cl_kernel kernel_draw_debug_info
Definition: vf_deshake_opencl.c:295
PointPair::p2
cl_float2 p2
Definition: vf_deshake_opencl.c:101
DeshakeOpenCLContext::transform_uv
cl_mem transform_uv
Definition: vf_deshake_opencl.c:276
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
DeshakeOpenCLContext::should_crop
int should_crop
Definition: vf_deshake_opencl.c:282
end
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:92
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:300
AVFrame::pts
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:393
AVFrame::width
int width
Definition: frame.h:358
DeshakeOpenCLContext::prev_descriptors
cl_mem prev_descriptors
Definition: vf_deshake_opencl.c:269
opencl.h
AVOption
AVOption.
Definition: opt.h:246
b
#define b
Definition: input.c:41
RingbufX
@ RingbufX
Definition: vf_deshake_opencl.c:112
RingbufRot
@ RingbufRot
Definition: vf_deshake_opencl.c:114
DeshakeOpenCLContext::abs_motion
AbsoluteFrameMotion abs_motion
Definition: vf_deshake_opencl.c:215
expf
#define expf(x)
Definition: libm.h:283
AVFrame::pkt_duration
int64_t pkt_duration
duration of the corresponding packet, expressed in AVStream->time_base units, 0 if unknown.
Definition: frame.h:579
AbsoluteFrameMotion::curr_frame_offset
int curr_frame_offset
Definition: vf_deshake_opencl.c:141
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:192
float.h
ff_opencl_filter_load_program
int ff_opencl_filter_load_program(AVFilterContext *avctx, const char **program_source_array, int nb_strings)
Load a new OpenCL program from strings in memory.
Definition: opencl.c:171
update_needed_crop
static void update_needed_crop(CropInfo *crop, float *transform, float frame_width, float frame_height)
Definition: vf_deshake_opencl.c:1005
ff_framequeue_init
void ff_framequeue_init(FFFrameQueue *fq, FFFrameQueueGlobal *fqg)
Init a frame queue and attach it to a global structure.
Definition: framequeue.c:47
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:148
av_fifo_generic_read
int av_fifo_generic_read(AVFifoBuffer *f, void *dest, int buf_size, void(*func)(void *, void *, int))
Feed data from an AVFifoBuffer to a user-supplied callback.
Definition: fifo.c:213
points_not_collinear
static int points_not_collinear(const cl_float2 **points)
Definition: vf_deshake_opencl.c:359
video.h
FF_FILTER_FORWARD_STATUS_BACK
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:199
CL_CREATE_KERNEL
#define CL_CREATE_KERNEL(ctx, kernel_name)
Create a kernel with the given name.
Definition: opencl.h:93
av_fifo_drain
void av_fifo_drain(AVFifoBuffer *f, int size)
Discard data from the FIFO.
Definition: fifo.c:233
CL_CREATE_BUFFER
#define CL_CREATE_BUFFER(ctx, buffer_name, size)
Create a buffer with the given information.
Definition: opencl.h:237
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:314
BRIEF_PATCH_SIZE_HALF
#define BRIEF_PATCH_SIZE_HALF
Definition: vf_deshake_opencl.c:91
DeshakeOpenCLContext::harris_buf
cl_mem harris_buf
Definition: vf_deshake_opencl.c:257
DeshakeOpenCLContext::smooth_window_multiplier
float smooth_window_multiplier
Definition: vf_deshake_opencl.c:291
formats.h
AVFifoBuffer
Definition: fifo.h:31
AV_PIX_FMT_GBRAP12LE
@ AV_PIX_FMT_GBRAP12LE
planar GBR 4:4:4:4 48bpp, little-endian
Definition: pixfmt.h:288
ff_inlink_consume_frame
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1476
ff_opencl_filter_work_size_from_image
int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment)
Find the work size needed needed for a given plane of an image.
Definition: opencl.c:278
AbsoluteFrameMotion::ringbuffers
AVFifoBuffer * ringbuffers[RingbufCount]
Definition: vf_deshake_opencl.c:137
fifo.h
DeshakeOpenCLContext::debug_matches
cl_mem debug_matches
Definition: vf_deshake_opencl.c:296
AbsoluteFrameMotion::data_end_offset
int data_end_offset
Definition: vf_deshake_opencl.c:145
AV_PIX_FMT_GBRAP
@ AV_PIX_FMT_GBRAP
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:215
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:353
IterIndices::start
int start
Definition: vf_deshake_opencl.c:196
fail
#define fail()
Definition: checkasm.h:123
FFSIGN
#define FFSIGN(a)
Definition: common.h:73
DeshakeOpenCLContext::curr_frame
int curr_frame
Definition: vf_deshake_opencl.c:221
DeshakeOpenCLContext::kernel_transform
cl_kernel kernel_transform
Definition: vf_deshake_opencl.c:251
RingbufScaleY
@ RingbufScaleY
Definition: vf_deshake_opencl.c:116
IterIndices::end
int end
Definition: vf_deshake_opencl.c:197
AbsoluteFrameMotion::debug_matches
AVFifoBuffer * debug_matches
Definition: vf_deshake_opencl.c:147
val
static double val(void *priv, double ch)
Definition: aeval.c:76
deshake_opencl_outputs
static const AVFilterPad deshake_opencl_outputs[]
Definition: vf_deshake_opencl.c:2147
pts
static int64_t pts
Definition: transcode_aac.c:647
ff_vf_deshake_opencl
AVFilter ff_vf_deshake_opencl
Definition: vf_deshake_opencl.c:2190
FFMIN3
#define FFMIN3(a, b, c)
Definition: common.h:97
DeshakeOpenCLContext::kernel_crop_upscale
cl_kernel kernel_crop_upscale
Definition: vf_deshake_opencl.c:252
DeshakeOpenCLContext::initialized
int initialized
Definition: vf_deshake_opencl.c:203
DeshakeOpenCLContext::smooth_window
int smooth_window
Definition: vf_deshake_opencl.c:219
FrameDelta::skew
cl_float2 skew
Definition: vf_deshake_opencl.c:178
RingbufScaleX
@ RingbufScaleX
Definition: vf_deshake_opencl.c:115
ff_opencl_filter_config_output
int ff_opencl_filter_config_output(AVFilterLink *outlink)
Create a suitable hardware frames context for the output.
Definition: opencl.c:96
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:54
DeshakeOpenCLContext::eof
int eof
Definition: vf_deshake_opencl.c:207
avassert.h
DeshakeOpenCLContext::read_buf_time
unsigned long long read_buf_time
Definition: vf_deshake_opencl.c:309
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
av_fifo_space
int av_fifo_space(const AVFifoBuffer *f)
Return the amount of space in bytes in the AVFifoBuffer, that is the amount of data you can write int...
Definition: fifo.c:82
av_cold
#define av_cold
Definition: attributes.h:90
DeshakeOpenCLContext::debug_model_matches
cl_mem debug_model_matches
Definition: vf_deshake_opencl.c:297
DebugMatches::num_model_matches
int num_model_matches
Definition: vf_deshake_opencl.c:130
duration
int64_t duration
Definition: movenc.c:63
AV_PIX_FMT_GBRAP16BE
@ AV_PIX_FMT_GBRAP16BE
planar GBRA 4:4:4:4 64bpp, big-endian
Definition: pixfmt.h:216
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:189
FFMAX3
#define FFMAX3(a, b, c)
Definition: common.h:95
s
#define s(width, name)
Definition: cbs_vp9.c:257
AV_PIX_FMT_GBRP16LE
@ AV_PIX_FMT_GBRP16LE
planar GBR 4:4:4 48bpp, little-endian
Definition: pixfmt.h:175
RingbufY
@ RingbufY
Definition: vf_deshake_opencl.c:113
av_lfg_get
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG.
Definition: lfg.h:53
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
DeshakeOpenCLContext::smooth_percent
float smooth_percent
Definition: vf_deshake_opencl.c:288
FFFrameQueueGlobal
Structure to hold global options and statistics for frame queues.
Definition: framequeue.h:46
mi
#define mi
Definition: vf_colormatrix.c:108
DeshakeOpenCLContext::grayscale_time
unsigned long long grayscale_time
Definition: vf_deshake_opencl.c:300
DeshakeOpenCLContext::kernel_brief_descriptors
cl_kernel kernel_brief_descriptors
Definition: vf_deshake_opencl.c:249
av_q2d
static double av_q2d(AVRational a)
Convert an AVRational to a double.
Definition: rational.h:104
lfg.h
deshake_opencl_uninit
static av_cold void deshake_opencl_uninit(AVFilterContext *avctx)
Definition: vf_deshake_opencl.c:1066
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
outputs
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
filters.h
DeshakeOpenCLContext::kernel_match_descriptors
cl_kernel kernel_match_descriptors
Definition: vf_deshake_opencl.c:250
ctx
AVFormatContext * ctx
Definition: movenc.c:48
av_rescale_q
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
AV_PIX_FMT_GBRP10LE
@ AV_PIX_FMT_GBRP10LE
planar GBR 4:4:4 30bpp, little-endian
Definition: pixfmt.h:173
f
#define f(width, name)
Definition: cbs_vp9.c:255
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
CL_RELEASE_KERNEL
#define CL_RELEASE_KERNEL(k)
release an OpenCL Kernel
Definition: opencl.h:101
ff_framequeue_take
AVFrame * ff_framequeue_take(FFFrameQueue *fq)
Take the first frame in the queue.
Definition: framequeue.c:98
RingbufferIndices
RingbufferIndices
Definition: vf_deshake_opencl.c:111
rand_in
static int rand_in(int low, int high, AVLFG *alfg)
Definition: vf_deshake_opencl.c:313
averaged_event_time_ms
static double averaged_event_time_ms(unsigned long long total_time, int num_frames)
Definition: vf_deshake_opencl.c:319
filter_frame
static int filter_frame(AVFilterLink *link, AVFrame *input_frame)
Definition: vf_deshake_opencl.c:1385
result
and forward the result(frame or status change) to the corresponding input. If nothing is possible
AV_PIX_FMT_GBRAP12BE
@ AV_PIX_FMT_GBRAP12BE
planar GBR 4:4:4:4 48bpp, big-endian
Definition: pixfmt.h:287
NULL
#define NULL
Definition: coverity.c:32
DeshakeOpenCLContext::alfg
AVLFG alfg
Definition: vf_deshake_opencl.c:210
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:659
transform_center_scale
static void transform_center_scale(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float center_w, float center_h, float *matrix)
Definition: vf_deshake_opencl.c:967
ff_framequeue_add
int ff_framequeue_add(FFFrameQueue *fq, AVFrame *frame)
Add a frame.
Definition: framequeue.c:63
ff_framequeue_free
void ff_framequeue_free(FFFrameQueue *fq)
Free the queue and all queued frames.
Definition: framequeue.c:53
DeshakeOpenCLContext::ransac_err
float * ransac_err
Definition: vf_deshake_opencl.c:227
DeshakeOpenCLContext::kernel_harris_response
cl_kernel kernel_harris_response
Definition: vf_deshake_opencl.c:247
framequeue.h
transform
static const int8_t transform[32][32]
Definition: hevcdsp.c:27
AVFilterContext::inputs
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
src
#define src
Definition: vp8dsp.c:254
start_end_for
static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length)
Definition: vf_deshake_opencl.c:843
DeshakeOpenCLContext::matches_contig
cl_mem matches_contig
Definition: vf_deshake_opencl.c:272
DebugMatches::model_matches
MotionVector model_matches[3]
Definition: vf_deshake_opencl.c:126
inputs
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
Definition: filter_design.txt:243
DeshakeOpenCLContext::duration
int64_t duration
Definition: vf_deshake_opencl.c:206
DeshakeOpenCLContext
Definition: vf_deshake_opencl.c:200
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1431
DeshakeOpenCLContext::crop_uv
CropInfo crop_uv
Definition: vf_deshake_opencl.c:232
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
transformed_point
static cl_float2 transformed_point(float x, float y, float *transform)
Definition: vf_deshake_opencl.c:957
DeshakeOpenCLContext::harris_response_time
unsigned long long harris_response_time
Definition: vf_deshake_opencl.c:301
make_gauss_kernel
static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)
Definition: vf_deshake_opencl.c:819
FFFrameQueue
Queue of AVFrame pointers.
Definition: framequeue.h:53
DeshakeOpenCLContext::crop_upscale_time
unsigned long long crop_upscale_time
Definition: vf_deshake_opencl.c:306
AVLFG
Context structure for the Lagged Fibonacci PRNG.
Definition: lfg.h:33
desc
const char * desc
Definition: nvenc.c:79
F3
#define F3(l, r, i)
Definition: cast5.c:33
DeshakeOpenCLContext::matches_contig_host
MotionVector * matches_contig_host
Definition: vf_deshake_opencl.c:241
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:188
powf
#define powf(x, y)
Definition: libm.h:50
CL_RELEASE_MEMORY
#define CL_RELEASE_MEMORY(m)
release an OpenCL Memory Object
Definition: opencl.h:114
AV_PIX_FMT_FLAG_RGB
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
Definition: pixdesc.h:148
FFMAX
#define FFMAX(a, b)
Definition: common.h:94
hypot
static av_const double hypot(double x, double y)
Definition: libm.h:366
CropInfo::bottom_right
cl_float2 bottom_right
Definition: vf_deshake_opencl.c:190
AV_PIX_FMT_GBRP9BE
@ AV_PIX_FMT_GBRP9BE
planar GBR 4:4:4 27bpp, big-endian
Definition: pixfmt.h:170
get_subset
static int get_subset(AVLFG *alfg, const MotionVector *point_pairs, const int num_point_pairs, MotionVector *pairs_subset, int max_attempts)
Definition: vf_deshake_opencl.c:403
ROUNDED_UP_DIV
#define ROUNDED_UP_DIV(a, b)
Definition: vf_deshake_opencl.c:95
AV_PIX_FMT_GBRP9LE
@ AV_PIX_FMT_GBRP9LE
planar GBR 4:4:4 27bpp, little-endian
Definition: pixfmt.h:171
FLAGS
#define FLAGS
Definition: vf_deshake_opencl.c:2157
MotionVector::should_consider
cl_int should_consider
Definition: vf_deshake_opencl.c:107
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
AV_PIX_FMT_GBRAP10LE
@ AV_PIX_FMT_GBRAP10LE
planar GBR 4:4:4:4 40bpp, little-endian
Definition: pixfmt.h:291
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
FF_FILTER_FORWARD_WANTED
FF_FILTER_FORWARD_WANTED(outlink, inlink)
MotionVector
Definition: agm.c:54
opencl_source.h
M_PI
#define M_PI
Definition: mathematics.h:52
ff_opencl_source_deshake
const char * ff_opencl_source_deshake
ff_opencl_filter_config_input
int ff_opencl_filter_config_input(AVFilterLink *inlink)
Check that the input link contains a suitable hardware frames context and extract the device from it.
Definition: opencl.c:60
internal.h
AV_OPT_TYPE_FLOAT
@ AV_OPT_TYPE_FLOAT
Definition: opt.h:226
DeshakeOpenCLContext::inliers
MotionVector * inliers
Definition: vf_deshake_opencl.c:243
FrameDelta::rotation
float rotation
Definition: vf_deshake_opencl.c:176
ff_get_matrix
void ff_get_matrix(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float *matrix)
Get an affine transformation matrix from given translation, rotation, and zoom factors.
Definition: transform.c:106
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
queue_frame
static int queue_frame(AVFilterLink *link, AVFrame *input_frame)
Definition: vf_deshake_opencl.c:1749
round
static av_always_inline av_const double round(double x)
Definition: libm.h:444
DeshakeOpenCLContext::transform_y
cl_mem transform_y
Definition: vf_deshake_opencl.c:274
transform_debug
static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame)
Definition: vf_deshake_opencl.c:1357
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:32
common.h
av_fifo_alloc_array
AVFifoBuffer * av_fifo_alloc_array(size_t nmemb, size_t size)
Initialize an AVFifoBuffer.
Definition: fifo.c:49
make_vectors_contig
static int make_vectors_contig(DeshakeOpenCLContext *deshake_ctx, int size_y, int size_x)
Definition: vf_deshake_opencl.c:787
delta
float delta
Definition: vorbis_enc_data.h:457
activate
static int activate(AVFilterContext *ctx)
Definition: vf_deshake_opencl.c:2039
DeshakeOpenCLContext::is_yuv
int is_yuv
Definition: vf_deshake_opencl.c:235
DeshakeOpenCLContext::fq
FFFrameQueue fq
Definition: vf_deshake_opencl.c:213
av_inv_q
static av_always_inline AVRational av_inv_q(AVRational q)
Invert a rational.
Definition: rational.h:159
DeshakeOpenCLContext::grayscale
cl_mem grayscale
Definition: vf_deshake_opencl.c:255
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:60
DeshakeOpenCLContext::transform_time
unsigned long long transform_time
Definition: vf_deshake_opencl.c:305
RingbufCount
@ RingbufCount
Definition: vf_deshake_opencl.c:119
smooth
static float smooth(DeshakeOpenCLContext *deshake_ctx, float *gauss_kernel, int length, float max_val, AVFifoBuffer *values)
Definition: vf_deshake_opencl.c:903
AVFilter
Filter definition.
Definition: avfilter.h:144
AVHWFramesContext
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:124
DeshakeOpenCLContext::ocf
OpenCLFilterContext ocf
Definition: vf_deshake_opencl.c:201
AV_PIX_FMT_GBRAP16LE
@ AV_PIX_FMT_GBRAP16LE
planar GBRA 4:4:4:4 64bpp, little-endian
Definition: pixfmt.h:217
ff_opencl_filter_init
int ff_opencl_filter_init(AVFilterContext *avctx)
Initialise an OpenCL filter context.
Definition: opencl.c:147
CL_BLOCKING_WRITE_BUFFER
#define CL_BLOCKING_WRITE_BUFFER(queue, buffer, size, host_ptr, event)
Perform a blocking write to a buffer.
Definition: opencl.h:214
ret
ret
Definition: filter_design.txt:187
FrameDelta
Definition: vf_deshake_opencl.c:174
DeshakeOpenCLContext::matches_host
MotionVector * matches_host
Definition: vf_deshake_opencl.c:240
compute_error
static void compute_error(const MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err)
Definition: vf_deshake_opencl.c:445
ransac_update_num_iters
static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)
Definition: vf_deshake_opencl.c:505
pixfmt.h
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
DeshakeOpenCLContext::prev_refined_features
cl_mem prev_refined_features
Definition: vf_deshake_opencl.c:262
FrameDelta::scale
cl_float2 scale
Definition: vf_deshake_opencl.c:177
AbsoluteFrameMotion::data_start_offset
int data_start_offset
Definition: vf_deshake_opencl.c:144
DeshakeOpenCLContext::refined_features
cl_mem refined_features
Definition: vf_deshake_opencl.c:260
transform.h
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen_template.c:38
check_subset
static int check_subset(const MotionVector *pairs_subset)
Definition: vf_deshake_opencl.c:385
ff_framequeue_queued_frames
static size_t ff_framequeue_queued_frames(const FFFrameQueue *fq)
Get the number of queued frames.
Definition: framequeue.h:146
transforms
static const struct @78 transforms[18]
run_estimate_kernel
static void run_estimate_kernel(const MotionVector *point_pairs, double *model)
Definition: vf_deshake_opencl.c:329
CropInfo
Definition: vf_deshake_opencl.c:186
SimilarityMatrix::matrix
double matrix[6]
Definition: vf_deshake_opencl.c:183
AVFrame::height
int height
Definition: frame.h:358
DebugMatches::matches
MotionVector * matches
Definition: vf_deshake_opencl.c:124
DeshakeOpenCLContext::tripod_mode
int tripod_mode
Definition: vf_deshake_opencl.c:280
DeshakeOpenCLContext::match_descriptors_time
unsigned long long match_descriptors_time
Definition: vf_deshake_opencl.c:304
ff_framequeue_global_init
void ff_framequeue_global_init(FFFrameQueueGlobal *fqg)
Init a global structure.
Definition: framequeue.c:30
avfilter.h
gaussian_for
static float gaussian_for(int x, float sigma)
Definition: vf_deshake_opencl.c:813
values
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return values
Definition: filter_design.txt:263
temp
else temp
Definition: vf_mcdeint.c:256
OpenCLFilterContext
Definition: opencl.h:36
ff_opencl_filter_uninit
void ff_opencl_filter_uninit(AVFilterContext *avctx)
Uninitialise an OpenCL filter context.
Definition: opencl.c:156
DeshakeOpenCLContext::debug_on
int debug_on
Definition: vf_deshake_opencl.c:281
DebugMatches::num_matches
int num_matches
Definition: vf_deshake_opencl.c:128
CL_RELEASE_QUEUE
#define CL_RELEASE_QUEUE(q)
release an OpenCL Command Queue
Definition: opencl.h:127
DeshakeOpenCLContext::kernel_refine_features
cl_kernel kernel_refine_features
Definition: vf_deshake_opencl.c:248
AVFilterContext
An instance of a filter.
Definition: avfilter.h:338
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:168
find_inliers
static int find_inliers(MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err, double thresh)
Definition: vf_deshake_opencl.c:469
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
deshake_opencl_options
static const AVOption deshake_opencl_options[]
Definition: vf_deshake_opencl.c:2159
mem.h
av_fifo_size
int av_fifo_size(const AVFifoBuffer *f)
Return the amount of data in bytes in the AVFifoBuffer, that is the amount of data you can read from ...
Definition: fifo.c:77
av_fifo_generic_peek_at
int av_fifo_generic_peek_at(AVFifoBuffer *f, void *dest, int offset, int buf_size, void(*func)(void *, void *, int))
Feed data at specific position from an AVFifoBuffer to a user-supplied callback.
Definition: fifo.c:151
PointPair::p1
cl_float2 p1
Definition: vf_deshake_opencl.c:99
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
ff_opencl_get_event_time
cl_ulong ff_opencl_get_event_time(cl_event event)
Gets the command start and end times for the given event and returns the difference (the time that th...
Definition: opencl.c:354
av_fifo_freep
void av_fifo_freep(AVFifoBuffer **f)
Free an AVFifoBuffer and reset pointer to NULL.
Definition: fifo.c:63
ringbuf_float_at
static void ringbuf_float_at(DeshakeOpenCLContext *deshake_ctx, AVFifoBuffer *values, float *val, int offset)
Definition: vf_deshake_opencl.c:854
DeshakeOpenCLContext::kernel_grayscale
cl_kernel kernel_grayscale
Definition: vf_deshake_opencl.c:246
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:240
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
DebugMatches
Definition: vf_deshake_opencl.c:123
query_formats
static int query_formats(AVFilterContext *ctx)
Definition: aeval.c:244
imgutils.h
CL_CREATE_BUFFER_FLAGS
#define CL_CREATE_BUFFER_FLAGS(ctx, buffer_name, flags, size, host_ptr)
Create a buffer with the given information.
Definition: opencl.h:197
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:28
CL_FAIL_ON_ERROR
#define CL_FAIL_ON_ERROR(errcode,...)
A helper macro to handle OpenCL errors.
Definition: opencl.h:74
AV_PIX_FMT_GBRAP10BE
@ AV_PIX_FMT_GBRAP10BE
planar GBR 4:4:4:4 40bpp, big-endian
Definition: pixfmt.h:290
uninit
static av_cold int uninit(AVCodecContext *avctx)
Definition: crystalhd.c:279
DeshakeOpenCLContext::descriptors
cl_mem descriptors
Definition: vf_deshake_opencl.c:267
DeshakeOpenCLContext::matches
cl_mem matches
Definition: vf_deshake_opencl.c:271
int
int
Definition: ffmpeg_filter.c:192
MotionVector::p
PointPair p
Definition: vf_deshake_opencl.c:105
DeshakeOpenCLContext::brief_descriptors_time
unsigned long long brief_descriptors_time
Definition: vf_deshake_opencl.c:303
DeshakeOpenCLContext::command_queue
cl_command_queue command_queue
Definition: vf_deshake_opencl.c:245
PointPair
Definition: vf_deshake_opencl.c:97
BREIFN
#define BREIFN
Definition: vf_deshake_opencl.c:87
DeshakeOpenCLContext::sw_format
int sw_format
Definition: vf_deshake_opencl.c:237
DeshakeOpenCLContext::gauss_kernel
float * gauss_kernel
Definition: vf_deshake_opencl.c:224
DeshakeOpenCLContext::refine_features
cl_int refine_features
Definition: vf_deshake_opencl.c:285
SimilarityMatrix
Definition: vf_deshake_opencl.c:181
optimize_model
static void optimize_model(DeshakeOpenCLContext *deshake_ctx, MotionVector *best_pairs, MotionVector *inliers, const int num_inliers, float best_err, double *model_out)
Definition: vf_deshake_opencl.c:610
DeshakeOpenCLContext::crop_y
CropInfo crop_y
Definition: vf_deshake_opencl.c:230
ff_opencl_filter_query_formats
int ff_opencl_filter_query_formats(AVFilterContext *avctx)
Return that all inputs and outputs support only AV_PIX_FMT_OPENCL.
Definition: opencl.c:28
CropInfo::top_left
cl_float2 top_left
Definition: vf_deshake_opencl.c:188
decompose_transform
static FrameDelta decompose_transform(double *model)
Definition: vf_deshake_opencl.c:747
AVFilterContext::outputs
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
F1
#define F1(l, r, i)
Definition: cast5.c:47