aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/examples/android/jni/object_tracking/optical_flow.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/examples/android/jni/object_tracking/optical_flow.cc')
-rw-r--r--tensorflow/examples/android/jni/object_tracking/optical_flow.cc490
1 files changed, 490 insertions, 0 deletions
diff --git a/tensorflow/examples/android/jni/object_tracking/optical_flow.cc b/tensorflow/examples/android/jni/object_tracking/optical_flow.cc
new file mode 100644
index 0000000000..fab0a3155d
--- /dev/null
+++ b/tensorflow/examples/android/jni/object_tracking/optical_flow.cc
@@ -0,0 +1,490 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <math.h>
+
+#include "tensorflow/examples/android/jni/object_tracking/geom.h"
+#include "tensorflow/examples/android/jni/object_tracking/image-inl.h"
+#include "tensorflow/examples/android/jni/object_tracking/image.h"
+#include "tensorflow/examples/android/jni/object_tracking/time_log.h"
+#include "tensorflow/examples/android/jni/object_tracking/utils.h"
+
+#include "tensorflow/examples/android/jni/object_tracking/config.h"
+#include "tensorflow/examples/android/jni/object_tracking/flow_cache.h"
+#include "tensorflow/examples/android/jni/object_tracking/frame_pair.h"
+#include "tensorflow/examples/android/jni/object_tracking/image_data.h"
+#include "tensorflow/examples/android/jni/object_tracking/keypoint.h"
+#include "tensorflow/examples/android/jni/object_tracking/keypoint_detector.h"
+#include "tensorflow/examples/android/jni/object_tracking/optical_flow.h"
+
+namespace tf_tracking {
+
+OpticalFlow::OpticalFlow(const OpticalFlowConfig* const config)
+ : config_(config),
+ frame1_(NULL),
+ frame2_(NULL),
+ working_size_(config->image_size) {}
+
+
+void OpticalFlow::NextFrame(const ImageData* const image_data) {
+ // Special case for the first frame: make sure the image ends up in
+ // frame1_ so that keypoint detection can be done on it if desired.
+ frame1_ = (frame1_ == NULL) ? image_data : frame2_;
+ frame2_ = image_data;
+}
+
+
+// Static heart of the optical flow computation.
+// Lucas Kanade algorithm.
+bool OpticalFlow::FindFlowAtPoint_LK(const Image<uint8>& img_I,
+ const Image<uint8>& img_J,
+ const Image<int32>& I_x,
+ const Image<int32>& I_y,
+ const float p_x,
+ const float p_y,
+ float* out_g_x,
+ float* out_g_y) {
+ float g_x = *out_g_x;
+ float g_y = *out_g_y;
+ // Get values for frame 1. They remain constant through the inner
+ // iteration loop.
+ float vals_I[kFlowArraySize];
+ float vals_I_x[kFlowArraySize];
+ float vals_I_y[kFlowArraySize];
+
+ const int kPatchSize = 2 * kFlowIntegrationWindowSize + 1;
+ const float kWindowSizeFloat = static_cast<float>(kFlowIntegrationWindowSize);
+
+#if USE_FIXED_POINT_FLOW
+ const int fixed_x_max = RealToFixed1616(img_I.width_less_one_) - 1;
+ const int fixed_y_max = RealToFixed1616(img_I.height_less_one_) - 1;
+#else
+ const float real_x_max = I_x.width_less_one_ - EPSILON;
+ const float real_y_max = I_x.height_less_one_ - EPSILON;
+#endif
+
+ // Get the window around the original point.
+ const float src_left_real = p_x - kWindowSizeFloat;
+ const float src_top_real = p_y - kWindowSizeFloat;
+ float* vals_I_ptr = vals_I;
+ float* vals_I_x_ptr = vals_I_x;
+ float* vals_I_y_ptr = vals_I_y;
+#if USE_FIXED_POINT_FLOW
+ // Source integer coordinates.
+ const int src_left_fixed = RealToFixed1616(src_left_real);
+ const int src_top_fixed = RealToFixed1616(src_top_real);
+
+ for (int y = 0; y < kPatchSize; ++y) {
+ const int fp_y = Clip(src_top_fixed + (y << 16), 0, fixed_y_max);
+
+ for (int x = 0; x < kPatchSize; ++x) {
+ const int fp_x = Clip(src_left_fixed + (x << 16), 0, fixed_x_max);
+
+ *vals_I_ptr++ = img_I.GetPixelInterpFixed1616(fp_x, fp_y);
+ *vals_I_x_ptr++ = I_x.GetPixelInterpFixed1616(fp_x, fp_y);
+ *vals_I_y_ptr++ = I_y.GetPixelInterpFixed1616(fp_x, fp_y);
+ }
+ }
+#else
+ for (int y = 0; y < kPatchSize; ++y) {
+ const float y_pos = Clip(src_top_real + y, 0.0f, real_y_max);
+
+ for (int x = 0; x < kPatchSize; ++x) {
+ const float x_pos = Clip(src_left_real + x, 0.0f, real_x_max);
+
+ *vals_I_ptr++ = img_I.GetPixelInterp(x_pos, y_pos);
+ *vals_I_x_ptr++ = I_x.GetPixelInterp(x_pos, y_pos);
+ *vals_I_y_ptr++ = I_y.GetPixelInterp(x_pos, y_pos);
+ }
+ }
+#endif
+
+ // Compute the spatial gradient matrix about point p.
+ float G[] = { 0, 0, 0, 0 };
+ CalculateG(vals_I_x, vals_I_y, kFlowArraySize, G);
+
+ // Find the inverse of G.
+ float G_inv[4];
+ if (!Invert2x2(G, G_inv)) {
+ return false;
+ }
+
+#if NORMALIZE
+ const float mean_I = ComputeMean(vals_I, kFlowArraySize);
+ const float std_dev_I = ComputeStdDev(vals_I, kFlowArraySize, mean_I);
+#endif
+
+ // Iterate kNumIterations times or until we converge.
+ for (int iteration = 0; iteration < kNumIterations; ++iteration) {
+ // Get values for frame 2.
+ float vals_J[kFlowArraySize];
+
+ // Get the window around the destination point.
+ const float left_real = p_x + g_x - kWindowSizeFloat;
+ const float top_real = p_y + g_y - kWindowSizeFloat;
+ float* vals_J_ptr = vals_J;
+#if USE_FIXED_POINT_FLOW
+ // The top-left sub-pixel is set for the current iteration (in 16:16
+ // fixed). This is constant over one iteration.
+ const int left_fixed = RealToFixed1616(left_real);
+ const int top_fixed = RealToFixed1616(top_real);
+
+ for (int win_y = 0; win_y < kPatchSize; ++win_y) {
+ const int fp_y = Clip(top_fixed + (win_y << 16), 0, fixed_y_max);
+ for (int win_x = 0; win_x < kPatchSize; ++win_x) {
+ const int fp_x = Clip(left_fixed + (win_x << 16), 0, fixed_x_max);
+ *vals_J_ptr++ = img_J.GetPixelInterpFixed1616(fp_x, fp_y);
+ }
+ }
+#else
+ for (int win_y = 0; win_y < kPatchSize; ++win_y) {
+ const float y_pos = Clip(top_real + win_y, 0.0f, real_y_max);
+ for (int win_x = 0; win_x < kPatchSize; ++win_x) {
+ const float x_pos = Clip(left_real + win_x, 0.0f, real_x_max);
+ *vals_J_ptr++ = img_J.GetPixelInterp(x_pos, y_pos);
+ }
+ }
+#endif
+
+#if NORMALIZE
+ const float mean_J = ComputeMean(vals_J, kFlowArraySize);
+ const float std_dev_J = ComputeStdDev(vals_J, kFlowArraySize, mean_J);
+
+ // TODO(andrewharp): Probably better to completely detect and handle the
+ // "corner case" where the patch is fully outside the image diagonally.
+ const float std_dev_ratio = std_dev_J > 0.0f ? std_dev_I / std_dev_J : 1.0f;
+#endif
+
+ // Compute image mismatch vector.
+ float b_x = 0.0f;
+ float b_y = 0.0f;
+
+ vals_I_ptr = vals_I;
+ vals_J_ptr = vals_J;
+ vals_I_x_ptr = vals_I_x;
+ vals_I_y_ptr = vals_I_y;
+
+ for (int win_y = 0; win_y < kPatchSize; ++win_y) {
+ for (int win_x = 0; win_x < kPatchSize; ++win_x) {
+#if NORMALIZE
+ // Normalized Image difference.
+ const float dI =
+ (*vals_I_ptr++ - mean_I) - (*vals_J_ptr++ - mean_J) * std_dev_ratio;
+#else
+ const float dI = *vals_I_ptr++ - *vals_J_ptr++;
+#endif
+ b_x += dI * *vals_I_x_ptr++;
+ b_y += dI * *vals_I_y_ptr++;
+ }
+ }
+
+ // Optical flow... solve n = G^-1 * b
+ const float n_x = (G_inv[0] * b_x) + (G_inv[1] * b_y);
+ const float n_y = (G_inv[2] * b_x) + (G_inv[3] * b_y);
+
+ // Update best guess with residual displacement from this level and
+ // iteration.
+ g_x += n_x;
+ g_y += n_y;
+
+ // LOGV("Iteration %d: delta (%.3f, %.3f)", iteration, n_x, n_y);
+
+ // Abort early if we're already below the threshold.
+ if (Square(n_x) + Square(n_y) < Square(kTrackingAbortThreshold)) {
+ break;
+ }
+ } // Iteration.
+
+ // Copy value back into output.
+ *out_g_x = g_x;
+ *out_g_y = g_y;
+ return true;
+}
+
+
+// Pointwise flow using translational 2dof ESM.
+bool OpticalFlow::FindFlowAtPoint_ESM(const Image<uint8>& img_I,
+ const Image<uint8>& img_J,
+ const Image<int32>& I_x,
+ const Image<int32>& I_y,
+ const Image<int32>& J_x,
+ const Image<int32>& J_y,
+ const float p_x,
+ const float p_y,
+ float* out_g_x,
+ float* out_g_y) {
+ float g_x = *out_g_x;
+ float g_y = *out_g_y;
+ const float area_inv = 1.0f / static_cast<float>(kFlowArraySize);
+
+ // Get values for frame 1. They remain constant through the inner
+ // iteration loop.
+ uint8 vals_I[kFlowArraySize];
+ uint8 vals_J[kFlowArraySize];
+ int16 src_gradient_x[kFlowArraySize];
+ int16 src_gradient_y[kFlowArraySize];
+
+ // TODO(rspring): try out the IntegerPatchAlign() method once
+ // the code for that is in ../common.
+ const float wsize_float = static_cast<float>(kFlowIntegrationWindowSize);
+ const int src_left_fixed = RealToFixed1616(p_x - wsize_float);
+ const int src_top_fixed = RealToFixed1616(p_y - wsize_float);
+ const int patch_size = 2 * kFlowIntegrationWindowSize + 1;
+
+ // Create the keypoint template patch from a subpixel location.
+ if (!img_I.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed,
+ patch_size, patch_size, vals_I) ||
+ !I_x.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed,
+ patch_size, patch_size,
+ src_gradient_x) ||
+ !I_y.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed,
+ patch_size, patch_size,
+ src_gradient_y)) {
+ return false;
+ }
+
+ int bright_offset = 0;
+ int sum_diff = 0;
+
+ // The top-left sub-pixel is set for the current iteration (in 16:16 fixed).
+ // This is constant over one iteration.
+ int left_fixed = RealToFixed1616(p_x + g_x - wsize_float);
+ int top_fixed = RealToFixed1616(p_y + g_y - wsize_float);
+
+ // The truncated version gives the most top-left pixel that is used.
+ int left_trunc = left_fixed >> 16;
+ int top_trunc = top_fixed >> 16;
+
+ // Compute an initial brightness offset.
+ if (kDoBrightnessNormalize &&
+ left_trunc >= 0 && top_trunc >= 0 &&
+ (left_trunc + patch_size) < img_J.width_less_one_ &&
+ (top_trunc + patch_size) < img_J.height_less_one_) {
+ int templ_index = 0;
+ const uint8* j_row = img_J[top_trunc] + left_trunc;
+
+ const int j_stride = img_J.stride();
+
+ for (int y = 0; y < patch_size; ++y, j_row += j_stride) {
+ for (int x = 0; x < patch_size; ++x) {
+ sum_diff += static_cast<int>(j_row[x]) - vals_I[templ_index++];
+ }
+ }
+
+ bright_offset = static_cast<int>(static_cast<float>(sum_diff) * area_inv);
+ }
+
+ // Iterate kNumIterations times or until we go out of image.
+ for (int iteration = 0; iteration < kNumIterations; ++iteration) {
+ int jtj[3] = { 0, 0, 0 };
+ int jtr[2] = { 0, 0 };
+ sum_diff = 0;
+
+ // Extract the target image values.
+ // Extract the gradient from the target image patch and accumulate to
+ // the gradient of the source image patch.
+ if (!img_J.ExtractPatchAtSubpixelFixed1616(left_fixed, top_fixed,
+ patch_size, patch_size,
+ vals_J)) {
+ break;
+ }
+
+ const uint8* templ_row = vals_I;
+ const uint8* extract_row = vals_J;
+ const int16* src_dx_row = src_gradient_x;
+ const int16* src_dy_row = src_gradient_y;
+
+ for (int y = 0; y < patch_size; ++y, templ_row += patch_size,
+ src_dx_row += patch_size, src_dy_row += patch_size,
+ extract_row += patch_size) {
+ const int fp_y = top_fixed + (y << 16);
+ for (int x = 0; x < patch_size; ++x) {
+ const int fp_x = left_fixed + (x << 16);
+ int32 target_dx = J_x.GetPixelInterpFixed1616(fp_x, fp_y);
+ int32 target_dy = J_y.GetPixelInterpFixed1616(fp_x, fp_y);
+
+ // Combine the two Jacobians.
+ // Right-shift by one to account for the fact that we add
+ // two Jacobians.
+ int32 dx = (src_dx_row[x] + target_dx) >> 1;
+ int32 dy = (src_dy_row[x] + target_dy) >> 1;
+
+ // The current residual b - h(q) == extracted - (template + offset)
+ int32 diff = static_cast<int32>(extract_row[x]) -
+ static_cast<int32>(templ_row[x]) -
+ bright_offset;
+
+ jtj[0] += dx * dx;
+ jtj[1] += dx * dy;
+ jtj[2] += dy * dy;
+
+ jtr[0] += dx * diff;
+ jtr[1] += dy * diff;
+
+ sum_diff += diff;
+ }
+ }
+
+ const float jtr1_float = static_cast<float>(jtr[0]);
+ const float jtr2_float = static_cast<float>(jtr[1]);
+
+ // Add some baseline stability to the system.
+ jtj[0] += kEsmRegularizer;
+ jtj[2] += kEsmRegularizer;
+
+ const int64 prod1 = static_cast<int64>(jtj[0]) * jtj[2];
+ const int64 prod2 = static_cast<int64>(jtj[1]) * jtj[1];
+
+ // One ESM step.
+ const float jtj_1[4] = { static_cast<float>(jtj[2]),
+ static_cast<float>(-jtj[1]),
+ static_cast<float>(-jtj[1]),
+ static_cast<float>(jtj[0]) };
+ const double det_inv = 1.0 / static_cast<double>(prod1 - prod2);
+
+ g_x -= det_inv * (jtj_1[0] * jtr1_float + jtj_1[1] * jtr2_float);
+ g_y -= det_inv * (jtj_1[2] * jtr1_float + jtj_1[3] * jtr2_float);
+
+ if (kDoBrightnessNormalize) {
+ bright_offset +=
+ static_cast<int>(area_inv * static_cast<float>(sum_diff) + 0.5f);
+ }
+
+ // Update top left position.
+ left_fixed = RealToFixed1616(p_x + g_x - wsize_float);
+ top_fixed = RealToFixed1616(p_y + g_y - wsize_float);
+
+ left_trunc = left_fixed >> 16;
+ top_trunc = top_fixed >> 16;
+
+ // Abort iterations if we go out of borders.
+ if (left_trunc < 0 || top_trunc < 0 ||
+ (left_trunc + patch_size) >= J_x.width_less_one_ ||
+ (top_trunc + patch_size) >= J_y.height_less_one_) {
+ break;
+ }
+ } // Iteration.
+
+ // Copy value back into output.
+ *out_g_x = g_x;
+ *out_g_y = g_y;
+ return true;
+}
+
+
+bool OpticalFlow::FindFlowAtPointReversible(
+ const int level, const float u_x, const float u_y,
+ const bool reverse_flow,
+ float* flow_x, float* flow_y) const {
+ const ImageData& frame_a = reverse_flow ? *frame2_ : *frame1_;
+ const ImageData& frame_b = reverse_flow ? *frame1_ : *frame2_;
+
+ // Images I (prev) and J (next).
+ const Image<uint8>& img_I = *frame_a.GetPyramidSqrt2Level(level * 2);
+ const Image<uint8>& img_J = *frame_b.GetPyramidSqrt2Level(level * 2);
+
+ // Computed gradients.
+ const Image<int32>& I_x = *frame_a.GetSpatialX(level);
+ const Image<int32>& I_y = *frame_a.GetSpatialY(level);
+ const Image<int32>& J_x = *frame_b.GetSpatialX(level);
+ const Image<int32>& J_y = *frame_b.GetSpatialY(level);
+
+ // Shrink factor from original.
+ const float shrink_factor = (1 << level);
+
+ // Image position vector (p := u^l), scaled for this level.
+ const float scaled_p_x = u_x / shrink_factor;
+ const float scaled_p_y = u_y / shrink_factor;
+
+ float scaled_flow_x = *flow_x / shrink_factor;
+ float scaled_flow_y = *flow_y / shrink_factor;
+
+ // LOGE("FindFlowAtPoint level %d: %5.2f, %5.2f (%5.2f, %5.2f)", level,
+ // scaled_p_x, scaled_p_y, &scaled_flow_x, &scaled_flow_y);
+
+ const bool success = kUseEsm ?
+ FindFlowAtPoint_ESM(img_I, img_J, I_x, I_y, J_x, J_y,
+ scaled_p_x, scaled_p_y,
+ &scaled_flow_x, &scaled_flow_y) :
+ FindFlowAtPoint_LK(img_I, img_J, I_x, I_y,
+ scaled_p_x, scaled_p_y,
+ &scaled_flow_x, &scaled_flow_y);
+
+ *flow_x = scaled_flow_x * shrink_factor;
+ *flow_y = scaled_flow_y * shrink_factor;
+
+ return success;
+}
+
+
+bool OpticalFlow::FindFlowAtPointSingleLevel(
+ const int level,
+ const float u_x, const float u_y,
+ const bool filter_by_fb_error,
+ float* flow_x, float* flow_y) const {
+ if (!FindFlowAtPointReversible(level, u_x, u_y, false, flow_x, flow_y)) {
+ return false;
+ }
+
+ if (filter_by_fb_error) {
+ const float new_position_x = u_x + *flow_x;
+ const float new_position_y = u_y + *flow_y;
+
+ float reverse_flow_x = 0.0f;
+ float reverse_flow_y = 0.0f;
+
+ // Now find the backwards flow and confirm it lines up with the original
+ // starting point.
+ if (!FindFlowAtPointReversible(level, new_position_x, new_position_y,
+ true,
+ &reverse_flow_x, &reverse_flow_y)) {
+ LOGE("Backward error!");
+ return false;
+ }
+
+ const float discrepancy_length =
+ sqrtf(Square(*flow_x + reverse_flow_x) +
+ Square(*flow_y + reverse_flow_y));
+
+ const float flow_length = sqrtf(Square(*flow_x) + Square(*flow_y));
+
+ return discrepancy_length <
+ (kMaxForwardBackwardErrorAllowed * flow_length);
+ }
+
+ return true;
+}
+
+
+// An implementation of the Pyramidal Lucas-Kanade Optical Flow algorithm.
+// See http://robots.stanford.edu/cs223b04/algo_tracking.pdf for details.
+bool OpticalFlow::FindFlowAtPointPyramidal(const float u_x, const float u_y,
+ const bool filter_by_fb_error,
+ float* flow_x, float* flow_y) const {
+ const int max_level = MAX(kMinNumPyramidLevelsToUseForAdjustment,
+ kNumPyramidLevels - kNumCacheLevels);
+
+ // For every level in the pyramid, update the coordinates of the best match.
+ for (int l = max_level - 1; l >= 0; --l) {
+ if (!FindFlowAtPointSingleLevel(l, u_x, u_y,
+ filter_by_fb_error, flow_x, flow_y)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+} // namespace tf_tracking