88 #define BRIEF_PATCH_SIZE 31
89 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2)
91 #define MATCHES_CONTIG_SIZE 2000
93 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b)
310 return (
double)total_time / (
double)num_frames / 1000000.0;
322 double x1 = point_pairs[0].
p.
p1.s[0];
323 double y1 = point_pairs[0].
p.
p1.s[1];
324 double x2 = point_pairs[1].
p.
p1.s[0];
325 double y2 = point_pairs[1].
p.
p1.s[1];
326 double x3 = point_pairs[2].
p.
p1.s[0];
327 double y3 = point_pairs[2].
p.
p1.s[1];
330 double X1 = point_pairs[0].
p.
p2.s[0];
331 double Y1 = point_pairs[0].
p.
p2.s[1];
332 double X2 = point_pairs[1].
p.
p2.s[0];
333 double Y2 = point_pairs[1].
p.
p2.s[1];
334 double X3 = point_pairs[2].
p.
p2.s[0];
335 double Y3 = point_pairs[2].
p.
p2.s[1];
337 double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );
339 model[0] = d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );
340 model[1] = d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );
341 model[2] = d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );
343 model[3] = d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );
344 model[4] = d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );
345 model[5] = d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );
353 for (j = 0; j <
i; j++) {
354 double dx1 = points[j]->s[0] - points[
i]->s[0];
355 double dy1 = points[j]->s[1] - points[
i]->s[1];
357 for (k = 0; k < j; k++) {
358 double dx2 = points[k]->s[0] - points[
i]->s[0];
359 double dy2 = points[k]->s[1] - points[
i]->s[1];
364 if (
fabs(dx2*dy1 - dy2*dx1) <= 1.0) {
377 const cl_float2 *prev_points[] = {
378 &pairs_subset[0].
p.
p1,
379 &pairs_subset[1].
p.
p1,
380 &pairs_subset[2].
p.
p1
383 const cl_float2 *curr_points[] = {
384 &pairs_subset[0].
p.
p2,
385 &pairs_subset[1].
p.
p2,
386 &pairs_subset[2].
p.
p2
396 const int num_point_pairs,
401 int i = 0, j, iters = 0;
403 for (; iters < max_attempts; iters++) {
404 for (
i = 0;
i < 3 && iters < max_attempts;) {
408 idx_i = idx[
i] =
rand_in(0, num_point_pairs, alfg);
410 for (j = 0; j <
i; j++) {
411 if (idx_i == idx[j]) {
421 pairs_subset[
i] = point_pairs[idx[
i]];
431 return i == 3 && iters < max_attempts;
437 const int num_point_pairs,
441 double F0 = model[0],
F1 = model[1],
F2 = model[2];
442 double F3 = model[3], F4 = model[4], F5 = model[5];
444 for (
int i = 0;
i < num_point_pairs;
i++) {
445 const cl_float2 *
f = &point_pairs[
i].
p.
p1;
446 const cl_float2 *t = &point_pairs[
i].
p.
p2;
448 double a = F0*
f->s[0] +
F1*
f->s[1] +
F2 - t->s[0];
449 double b =
F3*
f->s[0] + F4*
f->s[1] + F5 - t->s[1];
461 const int num_point_pairs,
466 float t = (
float)(thresh * thresh);
467 int i, n = num_point_pairs, num_inliers = 0;
471 for (
i = 0;
i < n;
i++) {
499 confidence =
av_clipd(confidence, 0.0, 1.0);
500 num_outliers =
av_clipd(num_outliers, 0.0, 1.0);
503 num =
FFMAX(1.0 - confidence, DBL_MIN);
504 denom = 1.0 - pow(1.0 - num_outliers, 3);
505 if (denom < DBL_MIN) {
512 return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (
int)
round(num / denom);
521 const int num_point_pairs,
523 const double threshold,
525 const double confidence
528 double best_model[6], model[6];
531 int iter, niters =
FFMAX(max_iters, 1);
532 int good_count, max_good_count = 0;
535 if (num_point_pairs < 3) {
537 }
else if (num_point_pairs == 3) {
541 for (
int i = 0;
i < 3; ++
i) {
548 for (iter = 0; iter < niters; ++iter) {
549 int found =
get_subset(&deshake_ctx->
alfg, point_pairs, num_point_pairs, pairs_subset, 10000);
562 if (good_count >
FFMAX(max_good_count, 2)) {
563 for (
int mi = 0;
mi < 6; ++
mi) {
564 best_model[
mi] = model[
mi];
567 for (
int pi = 0; pi < 3; pi++) {
568 best_pairs[pi] = pairs_subset[pi];
571 max_good_count = good_count;
574 (
double)(num_point_pairs - good_count) / num_point_pairs,
580 if (max_good_count > 0) {
581 for (
int mi = 0;
mi < 6; ++
mi) {
582 model_out[
mi] = best_model[
mi];
585 for (
int pi = 0; pi < 3; ++pi) {
604 const int num_inliers,
608 float move_x_val = 0.01;
609 float move_y_val = 0.01;
611 float old_move_x_val = 0;
613 int last_changed = 0;
615 for (
int iters = 0; iters < 200; iters++) {
619 best_pairs[0].
p.
p2.s[0] += move_x_val;
621 best_pairs[0].
p.
p2.s[0] += move_y_val;
627 for (
int j = 0; j < num_inliers; j++) {
631 if (total_err < best_err) {
632 for (
int mi = 0;
mi < 6; ++
mi) {
633 model_out[
mi] = model[
mi];
636 best_err = total_err;
637 last_changed = iters;
641 best_pairs[0].
p.
p2.s[0] -= move_x_val;
643 best_pairs[0].
p.
p2.s[0] -= move_y_val;
646 if (iters - last_changed > 4) {
651 old_move_x_val = move_x_val;
659 if (old_move_x_val < 0) {
677 const int num_inliers,
682 float best_err = FLT_MAX;
683 double best_model[6], model[6];
686 for (
int i = 0;
i < max_iters;
i++) {
688 int found =
get_subset(&deshake_ctx->
alfg, inliers, num_inliers, pairs_subset, 10000);
701 for (
int j = 0; j < num_inliers; j++) {
705 if (
i == 0 || total_err < best_err) {
706 for (
int mi = 0;
mi < 6; ++
mi) {
707 best_model[
mi] = model[
mi];
710 for (
int pi = 0; pi < 3; pi++) {
711 best_pairs[pi] = pairs_subset[pi];
714 best_err = total_err;
718 for (
int mi = 0;
mi < 6; ++
mi) {
719 model_out[
mi] = best_model[
mi];
722 for (
int pi = 0; pi < 3; ++pi) {
728 optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);
749 memset(&
ret, 0,
sizeof(
ret));
751 ret.translation.s[0] = e;
752 ret.translation.s[1] =
f;
755 if (
a != 0 ||
b != 0) {
761 ret.skew.s[0] = atan((
a *
c +
b * d) / (
r *
r));
763 }
else if (
c != 0 || d != 0) {
764 double s = sqrt(
c *
c + d * d);
770 ret.skew.s[1] = atan((
a *
c +
b * d) / (
s *
s));
784 for (
int i = 0;
i < size_y; ++
i) {
785 for (
int j = 0; j < size_x; ++j) {
804 return 1.0f /
expf(((
float)x * (
float)x) / (2.0
f * sigma * sigma));
812 int window_half = length / 2;
814 for (
int i = 0;
i < length; ++
i) {
818 gauss_kernel[
i] =
val;
822 for (
int i = 0;
i < length; ++
i) {
823 gauss_kernel[
i] /= gauss_sum;
850 int clip_start, clip_end, offset_clipped;
894 float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,
895 percent_of_max, inverted_percent;
897 float large_sigma = 40.0f;
898 float small_sigma = 2.0f;
902 best_sigma = (large_sigma - 0.5f) * deshake_ctx->
smooth_percent + 0.5f;
914 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
916 new_large_s += old * gauss_kernel[j];
920 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
922 new_small_s += old * gauss_kernel[j];
925 diff_between =
fabsf(new_large_s - new_small_s);
926 percent_of_max = diff_between / max_val;
927 inverted_percent = 1 - percent_of_max;
928 best_sigma = large_sigma *
powf(inverted_percent, 40);
932 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
934 new_best += old * gauss_kernel[j];
962 float center_s_w, center_s_h;
974 center_s_w = center_w - center_s.s[0];
975 center_s_h = center_h - center_s.s[1];
978 x_shift + center_s_w,
979 y_shift + center_s_h,
995 float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;
1001 float ar_h = frame_height / frame_width;
1002 float ar_w = frame_width / frame_height;
1038 adjusted_width = new_height * ar_w;
1041 if (adjusted_x >= crop->
top_left.s[0]) {
1044 adjusted_height = new_width * ar_h;
1045 adjusted_y = crop->
bottom_right.s[1] - adjusted_height;
1061 if (
ctx->gauss_kernel)
1064 if (
ctx->ransac_err)
1067 if (
ctx->matches_host)
1070 if (
ctx->matches_contig_host)
1101 if (
ctx->debug_on) {
1120 cl_ulong8 zeroed_ulong8;
1122 cl_image_format grayscale_format;
1123 cl_image_desc grayscale_desc;
1124 cl_command_queue_properties queue_props;
1146 const int descriptor_buf_size = image_grid_32 * (
BREIFN / 8);
1147 const int features_buf_size = image_grid_32 *
sizeof(cl_float2);
1159 ctx->curr_frame = 0;
1161 memset(&zeroed_ulong8, 0,
sizeof(cl_ulong8));
1164 if (!
ctx->gauss_kernel) {
1170 if (!
ctx->ransac_err) {
1179 if (!
ctx->abs_motion.ringbuffers[
i]) {
1185 if (
ctx->debug_on) {
1187 ctx->smooth_window / 2,
1191 if (!
ctx->abs_motion.debug_matches) {
1197 ctx->abs_motion.curr_frame_offset = 0;
1198 ctx->abs_motion.data_start_offset = -1;
1199 ctx->abs_motion.data_end_offset = -1;
1202 if (!pattern_host) {
1208 if (!
ctx->matches_host) {
1214 if (!
ctx->matches_contig_host) {
1220 if (!
ctx->inliers) {
1230 for (
int j = 0; j < 2; ++j) {
1235 pattern_host[
i] = pair;
1238 for (
int i = 0;
i < 14;
i++) {
1239 if (
ctx->sw_format == disallowed_formats[
i]) {
1251 ctx->sw_format = hw_frames_ctx->sw_format;
1257 if (
ctx->debug_on) {
1258 queue_props = CL_QUEUE_PROFILING_ENABLE;
1262 ctx->command_queue = clCreateCommandQueue(
1263 ctx->ocf.hwctx->context,
1264 ctx->ocf.hwctx->device_id,
1281 grayscale_format.image_channel_order = CL_R;
1282 grayscale_format.image_channel_data_type = CL_FLOAT;
1284 grayscale_desc = (cl_image_desc) {
1285 .image_type = CL_MEM_OBJECT_IMAGE2D,
1286 .image_width = outlink->
w,
1287 .image_height = outlink->
h,
1289 .image_array_size = 0,
1290 .image_row_pitch = 0,
1291 .image_slice_pitch = 0,
1292 .num_mip_levels = 0,
1297 ctx->grayscale = clCreateImage(
1298 ctx->ocf.hwctx->context,
1314 CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1324 if (
ctx->debug_on) {
1329 ctx->initialized = 1;
1343 "\tframe moved from: %f x, %f y\n"
1344 "\t to: %f x, %f y\n"
1345 "\t rotated from: %f degrees\n"
1346 "\t to: %f degrees\n"
1347 "\t scaled from: %f x, %f y\n"
1348 "\t to: %f x, %f y\n"
1350 "\tframe moved by: %f x, %f y\n"
1351 "\t rotated by: %f degrees\n"
1352 "\t scaled by: %f x, %f y\n",
1380 float transform_y[9];
1382 float transform_uv[9];
1384 float transform_crop_y[9];
1386 float transform_crop_uv[9];
1387 float transform_debug_rgb[9];
1388 size_t global_work[2];
1390 cl_mem
src, transformed,
dst;
1393 cl_event transform_event, crop_upscale_event;
1395 cl_int num_model_matches;
1397 const float center_w = (
float)input_frame->
width / 2;
1398 const float center_h = (
float)input_frame->
height / 2;
1404 const float center_w_chroma = (
float)chroma_width / 2;
1405 const float center_h_chroma = (
float)chroma_height / 2;
1407 const float luma_w_over_chroma_w = ((
float)input_frame->
width / (
float)chroma_width);
1408 const float luma_h_over_chroma_h = ((
float)input_frame->
height / (
float)chroma_height);
1508 if (!cropped_frame) {
1514 if (!transformed_frame) {
1524 src = (cl_mem)input_frame->
data[p];
1525 transformed = (cl_mem)transformed_frame->data[p];
1540 { sizeof(cl_mem), &src },
1541 { sizeof(cl_mem), &transformed },
1542 { sizeof(cl_mem), &transforms[p] },
1579 transformed = (cl_mem)transformed_frame->data[0];
1586 {
sizeof(cl_mem), &transformed },
1589 {
sizeof(cl_int), &num_model_matches },
1620 crops[0] = deshake_ctx->
crop_y;
1621 crops[1] = crops[2] = deshake_ctx->
crop_uv;
1625 dst = (cl_mem)cropped_frame->
data[p];
1626 transformed = (cl_mem)transformed_frame->data[p];
1640 &crop_upscale_event,
1641 { sizeof(cl_mem), &transformed },
1642 { sizeof(cl_mem), &dst },
1643 { sizeof(cl_float2), &crops[p].top_left },
1644 { sizeof(cl_float2), &crops[p].bottom_right },
1732 int num_inliers = 0;
1736 size_t global_work[2];
1737 size_t harris_global_work[2];
1738 size_t grid_32_global_work[2];
1739 int grid_32_h, grid_32_w;
1740 size_t local_work[2];
1744 cl_event grayscale_event, harris_response_event, refine_features_event,
1745 brief_event, match_descriptors_event, read_buf_event;
1766 grid_32_global_work[0] /= 32;
1767 grid_32_global_work[1] /= 32;
1772 if (deshake_ctx->
is_yuv) {
1775 src = (cl_mem)input_frame->
data[0];
1783 {
sizeof(cl_mem), &
src },
1784 {
sizeof(cl_mem), &deshake_ctx->
grayscale }
1789 deshake_ctx->command_queue,
1790 deshake_ctx->kernel_harris_response,
1793 &harris_response_event,
1794 { sizeof(cl_mem), &deshake_ctx->grayscale },
1795 { sizeof(cl_mem), &deshake_ctx->harris_buf }
1799 deshake_ctx->command_queue,
1800 deshake_ctx->kernel_refine_features,
1801 grid_32_global_work,
1803 &refine_features_event,
1804 { sizeof(cl_mem), &deshake_ctx->grayscale },
1805 { sizeof(cl_mem), &deshake_ctx->harris_buf },
1806 { sizeof(cl_mem), &deshake_ctx->refined_features },
1807 { sizeof(cl_int), &deshake_ctx->refine_features }
1811 deshake_ctx->command_queue,
1812 deshake_ctx->kernel_brief_descriptors,
1813 grid_32_global_work,
1816 { sizeof(cl_mem), &deshake_ctx->grayscale },
1817 { sizeof(cl_mem), &deshake_ctx->refined_features },
1818 { sizeof(cl_mem), &deshake_ctx->descriptors },
1819 { sizeof(cl_mem), &deshake_ctx->brief_pattern}
1826 goto no_motion_data;
1830 deshake_ctx->command_queue,
1831 deshake_ctx->kernel_match_descriptors,
1832 grid_32_global_work,
1834 &match_descriptors_event,
1835 { sizeof(cl_mem), &deshake_ctx->prev_refined_features },
1836 { sizeof(cl_mem), &deshake_ctx->refined_features },
1837 { sizeof(cl_mem), &deshake_ctx->descriptors },
1838 { sizeof(cl_mem), &deshake_ctx->prev_descriptors },
1839 { sizeof(cl_mem), &deshake_ctx->matches }
1842 cle = clEnqueueReadBuffer(
1843 deshake_ctx->command_queue,
1844 deshake_ctx->matches,
1848 deshake_ctx->matches_host,
1857 if (num_vectors < 10) {
1870 if (deshake_ctx->abs_motion.data_end_offset == -1) {
1871 deshake_ctx->abs_motion.data_end_offset =
1875 goto no_motion_data;
1880 deshake_ctx->matches_contig_host,
1888 goto no_motion_data;
1891 for (
int i = 0;
i < num_vectors;
i++) {
1892 if (deshake_ctx->matches_contig_host[
i].should_consider) {
1893 deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[
i];
1900 deshake_ctx->inliers,
1906 goto no_motion_data;
1915 deshake_ctx->abs_motion.ringbuffers[
i],
1926 if (deshake_ctx->debug_on) {
1927 if (!deshake_ctx->is_yuv) {
1946 for (
int i = 0;
i < num_vectors;
i++) {
1947 deshake_ctx->matches_contig_host[
i].should_consider = 0;
1949 debug_matches.num_model_matches = 0;
1951 if (deshake_ctx->debug_on) {
1953 "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n"
1962 temp = deshake_ctx->prev_descriptors;
1963 deshake_ctx->prev_descriptors = deshake_ctx->descriptors;
1964 deshake_ctx->descriptors =
temp;
1967 temp = deshake_ctx->prev_refined_features;
1968 deshake_ctx->prev_refined_features = deshake_ctx->refined_features;
1969 deshake_ctx->refined_features =
temp;
1971 if (deshake_ctx->debug_on) {
1972 if (num_vectors == 0) {
1973 debug_matches.matches =
NULL;
1977 if (!debug_matches.matches) {
1983 for (
int i = 0;
i < num_vectors;
i++) {
1984 debug_matches.matches[
i] = deshake_ctx->matches_contig_host[
i];
1986 debug_matches.num_matches = num_vectors;
1989 deshake_ctx->abs_motion.debug_matches,
1994 av_fifo_write(deshake_ctx->abs_motion.ringbuffers[
i], &new_vals[
i], 1);
2000 clFinish(deshake_ctx->command_queue);
2016 if (!deshake_ctx->
eof) {
2021 if (!
frame->hw_frames_ctx)
2053 deshake_ctx->
eof = 1;
2057 if (deshake_ctx->
eof) {
2072 "Average kernel execution times:\n"
2073 "\t grayscale: %0.3f ms\n"
2074 "\t harris_response: %0.3f ms\n"
2075 "\t refine_features: %0.3f ms\n"
2076 "\tbrief_descriptors: %0.3f ms\n"
2077 "\tmatch_descriptors: %0.3f ms\n"
2078 "\t transform: %0.3f ms\n"
2079 "\t crop_upscale: %0.3f ms\n"
2080 "Average buffer read times:\n"
2081 "\t features buf: %0.3f ms\n",
2097 if (!deshake_ctx->
eof) {
2120 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x)
2121 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
2125 "tripod",
"simulates a tripod by preventing any camera movement whatsoever "
2126 "from the original frame",
2130 "debug",
"turn on additional debugging information",
2134 "adaptive_crop",
"attempt to subtly crop borders to reduce mirrored content",
2138 "refine_features",
"refine feature point locations at a sub-pixel level",
2142 "smooth_strength",
"smoothing strength (0 attempts to adaptively determine optimal strength)",
2146 "smooth_window_multiplier",
"multiplier for number of frames to buffer for motion data",
2155 .
name =
"deshake_opencl",
2158 .priv_class = &deshake_opencl_class,