1 files changed, 490 insertions, 0 deletions
diff --git a/tensorflow/examples/android/jni/object_tracking/optical_flow.cc b/tensorflow/examples/android/jni/object_tracking/optical_flow.cc
new file mode 100644
index 0000000000..fab0a3155d
--- /dev/null
+++ b/tensorflow/examples/android/jni/object_tracking/optical_flow.cc
@@ -0,0 +1,490 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <math.h>
+
+#include "tensorflow/examples/android/jni/object_tracking/geom.h"
+#include "tensorflow/examples/android/jni/object_tracking/image-inl.h"
+#include "tensorflow/examples/android/jni/object_tracking/image.h"
+#include "tensorflow/examples/android/jni/object_tracking/time_log.h"
+#include "tensorflow/examples/android/jni/object_tracking/utils.h"
+
+#include "tensorflow/examples/android/jni/object_tracking/config.h"
+#include "tensorflow/examples/android/jni/object_tracking/flow_cache.h"
+#include "tensorflow/examples/android/jni/object_tracking/frame_pair.h"
+#include "tensorflow/examples/android/jni/object_tracking/image_data.h"
+#include "tensorflow/examples/android/jni/object_tracking/keypoint.h"
+#include "tensorflow/examples/android/jni/object_tracking/keypoint_detector.h"
+#include "tensorflow/examples/android/jni/object_tracking/optical_flow.h"
+
+namespace tf_tracking {
+
+OpticalFlow::OpticalFlow(const OpticalFlowConfig* const config)
+    : config_(config),
+      frame1_(NULL),
+      frame2_(NULL),
+      working_size_(config->image_size) {}
+
+
+void OpticalFlow::NextFrame(const ImageData* const image_data) {
+  // Special case for the first frame: make sure the image ends up in
+  // frame1_ so that keypoint detection can be done on it if desired.
+  frame1_ = (frame1_ == NULL) ? image_data : frame2_;
+  frame2_ = image_data;
+}
+
+
+// Static heart of the optical flow computation.
+// Lucas Kanade algorithm.
+bool OpticalFlow::FindFlowAtPoint_LK(const Image<uint8>& img_I,
+                                     const Image<uint8>& img_J,
+                                     const Image<int32>& I_x,
+                                     const Image<int32>& I_y,
+                                     const float p_x,
+                                     const float p_y,
+                                     float* out_g_x,
+                                     float* out_g_y) {
+  float g_x = *out_g_x;
+  float g_y = *out_g_y;
+  // Get values for frame 1.  They remain constant through the inner
+  // iteration loop.
+  float vals_I[kFlowArraySize];
+  float vals_I_x[kFlowArraySize];
+  float vals_I_y[kFlowArraySize];
+
+  const int kPatchSize = 2 * kFlowIntegrationWindowSize + 1;
+  const float kWindowSizeFloat = static_cast<float>(kFlowIntegrationWindowSize);
+
+#if USE_FIXED_POINT_FLOW
+  const int fixed_x_max = RealToFixed1616(img_I.width_less_one_) - 1;
+  const int fixed_y_max = RealToFixed1616(img_I.height_less_one_) - 1;
+#else
+  const float real_x_max = I_x.width_less_one_ - EPSILON;
+  const float real_y_max = I_x.height_less_one_ - EPSILON;
+#endif
+
+  // Get the window around the original point.
+  const float src_left_real = p_x - kWindowSizeFloat;
+  const float src_top_real = p_y - kWindowSizeFloat;
+  float* vals_I_ptr = vals_I;
+  float* vals_I_x_ptr = vals_I_x;
+  float* vals_I_y_ptr = vals_I_y;
+#if USE_FIXED_POINT_FLOW
+  // Source integer coordinates.
+  const int src_left_fixed = RealToFixed1616(src_left_real);
+  const int src_top_fixed = RealToFixed1616(src_top_real);
+
+  for (int y = 0; y < kPatchSize; ++y) {
+    const int fp_y = Clip(src_top_fixed + (y << 16), 0, fixed_y_max);
+
+    for (int x = 0; x < kPatchSize; ++x) {
+      const int fp_x = Clip(src_left_fixed + (x << 16), 0, fixed_x_max);
+
+      *vals_I_ptr++ = img_I.GetPixelInterpFixed1616(fp_x, fp_y);
+      *vals_I_x_ptr++ = I_x.GetPixelInterpFixed1616(fp_x, fp_y);
+      *vals_I_y_ptr++ = I_y.GetPixelInterpFixed1616(fp_x, fp_y);
+    }
+  }
+#else
+  for (int y = 0; y < kPatchSize; ++y) {
+    const float y_pos = Clip(src_top_real + y, 0.0f, real_y_max);
+
+    for (int x = 0; x < kPatchSize; ++x) {
+      const float x_pos = Clip(src_left_real + x, 0.0f, real_x_max);
+
+      *vals_I_ptr++ = img_I.GetPixelInterp(x_pos, y_pos);
+      *vals_I_x_ptr++ = I_x.GetPixelInterp(x_pos, y_pos);
+      *vals_I_y_ptr++ = I_y.GetPixelInterp(x_pos, y_pos);
+    }
+  }
+#endif
+
+  // Compute the spatial gradient matrix about point p.
+  float G[] = { 0, 0, 0, 0 };
+  CalculateG(vals_I_x, vals_I_y, kFlowArraySize, G);
+
+  // Find the inverse of G.
+  float G_inv[4];
+  if (!Invert2x2(G, G_inv)) {
+    return false;
+  }
+
+#if NORMALIZE
+  const float mean_I = ComputeMean(vals_I, kFlowArraySize);
+  const float std_dev_I = ComputeStdDev(vals_I, kFlowArraySize, mean_I);
+#endif
+
+  // Iterate kNumIterations times or until we converge.
+  for (int iteration = 0; iteration < kNumIterations; ++iteration) {
+    // Get values for frame 2.
+    float vals_J[kFlowArraySize];
+
+    // Get the window around the destination point.
+    const float left_real = p_x + g_x - kWindowSizeFloat;
+    const float top_real  = p_y + g_y - kWindowSizeFloat;
+    float* vals_J_ptr = vals_J;
+#if USE_FIXED_POINT_FLOW
+    // The top-left sub-pixel is set for the current iteration (in 16:16
+    // fixed). This is constant over one iteration.
+    const int left_fixed = RealToFixed1616(left_real);
+    const int top_fixed  = RealToFixed1616(top_real);
+
+    for (int win_y = 0; win_y < kPatchSize; ++win_y) {
+      const int fp_y = Clip(top_fixed + (win_y << 16), 0, fixed_y_max);
+      for (int win_x = 0; win_x < kPatchSize; ++win_x) {
+        const int fp_x = Clip(left_fixed + (win_x << 16), 0, fixed_x_max);
+        *vals_J_ptr++ = img_J.GetPixelInterpFixed1616(fp_x, fp_y);
+      }
+    }
+#else
+    for (int win_y = 0; win_y < kPatchSize; ++win_y) {
+      const float y_pos = Clip(top_real + win_y, 0.0f, real_y_max);
+      for (int win_x = 0; win_x < kPatchSize; ++win_x) {
+        const float x_pos = Clip(left_real + win_x, 0.0f, real_x_max);
+        *vals_J_ptr++ = img_J.GetPixelInterp(x_pos, y_pos);
+      }
+    }
+#endif
+
+#if NORMALIZE
+    const float mean_J = ComputeMean(vals_J, kFlowArraySize);
+    const float std_dev_J = ComputeStdDev(vals_J, kFlowArraySize, mean_J);
+
+    // TODO(andrewharp): Probably better to completely detect and handle the
+    // "corner case" where the patch is fully outside the image diagonally.
+    const float std_dev_ratio = std_dev_J > 0.0f ? std_dev_I / std_dev_J : 1.0f;
+#endif
+
+    // Compute image mismatch vector.
+    float b_x = 0.0f;
+    float b_y = 0.0f;
+
+    vals_I_ptr = vals_I;
+    vals_J_ptr = vals_J;
+    vals_I_x_ptr = vals_I_x;
+    vals_I_y_ptr = vals_I_y;
+
+    for (int win_y = 0; win_y < kPatchSize; ++win_y) {
+      for (int win_x = 0; win_x < kPatchSize; ++win_x) {
+#if NORMALIZE
+        // Normalized Image difference.
+        const float dI =
+            (*vals_I_ptr++ - mean_I) - (*vals_J_ptr++ - mean_J) * std_dev_ratio;
+#else
+        const float dI = *vals_I_ptr++ - *vals_J_ptr++;
+#endif
+        b_x += dI * *vals_I_x_ptr++;
+        b_y += dI * *vals_I_y_ptr++;
+      }
+    }
+
+    // Optical flow... solve n = G^-1 * b
+    const float n_x = (G_inv[0] * b_x) + (G_inv[1] * b_y);
+    const float n_y = (G_inv[2] * b_x) + (G_inv[3] * b_y);
+
+    // Update best guess with residual displacement from this level and
+    // iteration.
+    g_x += n_x;
+    g_y += n_y;
+
+    // LOGV("Iteration %d: delta (%.3f, %.3f)", iteration, n_x, n_y);
+
+    // Abort early if we're already below the threshold.
+    if (Square(n_x) + Square(n_y) < Square(kTrackingAbortThreshold)) {
+      break;
+    }
+  }  // Iteration.
+
+  // Copy value back into output.
+  *out_g_x = g_x;
+  *out_g_y = g_y;
+  return true;
+}
+
+
+// Pointwise flow using translational 2dof ESM.
+bool OpticalFlow::FindFlowAtPoint_ESM(const Image<uint8>& img_I,
+                                      const Image<uint8>& img_J,
+                                      const Image<int32>& I_x,
+                                      const Image<int32>& I_y,
+                                      const Image<int32>& J_x,
+                                      const Image<int32>& J_y,
+                                      const float p_x,
+                                      const float p_y,
+                                      float* out_g_x,
+                                      float* out_g_y) {
+  float g_x = *out_g_x;
+  float g_y = *out_g_y;
+  const float area_inv = 1.0f / static_cast<float>(kFlowArraySize);
+
+  // Get values for frame 1. They remain constant through the inner
+  // iteration loop.
+  uint8 vals_I[kFlowArraySize];
+  uint8 vals_J[kFlowArraySize];
+  int16 src_gradient_x[kFlowArraySize];
+  int16 src_gradient_y[kFlowArraySize];
+
+  // TODO(rspring): try out the IntegerPatchAlign() method once
+  // the code for that is in ../common.
+  const float wsize_float = static_cast<float>(kFlowIntegrationWindowSize);
+  const int src_left_fixed = RealToFixed1616(p_x - wsize_float);
+  const int src_top_fixed = RealToFixed1616(p_y - wsize_float);
+  const int patch_size = 2 * kFlowIntegrationWindowSize + 1;
+
+  // Create the keypoint template patch from a subpixel location.
+  if (!img_I.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed,
+                                             patch_size, patch_size, vals_I) ||
+      !I_x.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed,
+                                           patch_size, patch_size,
+                                           src_gradient_x) ||
+      !I_y.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed,
+                                           patch_size, patch_size,
+                                           src_gradient_y)) {
+    return false;
+  }
+
+  int bright_offset = 0;
+  int sum_diff = 0;
+
+  // The top-left sub-pixel is set for the current iteration (in 16:16 fixed).
+  // This is constant over one iteration.
+  int left_fixed = RealToFixed1616(p_x + g_x - wsize_float);
+  int top_fixed  = RealToFixed1616(p_y + g_y - wsize_float);
+
+  // The truncated version gives the most top-left pixel that is used.
+  int left_trunc = left_fixed >> 16;
+  int top_trunc = top_fixed >> 16;
+
+  // Compute an initial brightness offset.
+  if (kDoBrightnessNormalize &&
+      left_trunc >= 0 && top_trunc >= 0 &&
+      (left_trunc + patch_size) < img_J.width_less_one_ &&
+      (top_trunc + patch_size) < img_J.height_less_one_) {
+    int templ_index = 0;
+    const uint8* j_row = img_J[top_trunc] + left_trunc;
+
+    const int j_stride = img_J.stride();
+
+    for (int y = 0; y < patch_size; ++y, j_row += j_stride) {
+      for (int x = 0; x < patch_size; ++x) {
+        sum_diff += static_cast<int>(j_row[x]) - vals_I[templ_index++];
+      }
+    }
+
+    bright_offset = static_cast<int>(static_cast<float>(sum_diff) * area_inv);
+  }
+
+  // Iterate kNumIterations times or until we go out of image.
+  for (int iteration = 0; iteration < kNumIterations; ++iteration) {
+    int jtj[3] = { 0, 0, 0 };
+    int jtr[2] = { 0, 0 };
+    sum_diff = 0;
+
+    // Extract the target image values.
+    // Extract the gradient from the target image patch and accumulate to
+    // the gradient of the source image patch.
+    if (!img_J.ExtractPatchAtSubpixelFixed1616(left_fixed, top_fixed,
+                                               patch_size, patch_size,
+                                               vals_J)) {
+      break;
+    }
+
+    const uint8* templ_row = vals_I;
+    const uint8* extract_row = vals_J;
+    const int16* src_dx_row = src_gradient_x;
+    const int16* src_dy_row = src_gradient_y;
+
+    for (int y = 0; y < patch_size; ++y, templ_row += patch_size,
+         src_dx_row += patch_size, src_dy_row += patch_size,
+         extract_row += patch_size) {
+      const int fp_y = top_fixed + (y << 16);
+      for (int x = 0; x < patch_size; ++x) {
+        const int fp_x = left_fixed + (x << 16);
+        int32 target_dx = J_x.GetPixelInterpFixed1616(fp_x, fp_y);
+        int32 target_dy = J_y.GetPixelInterpFixed1616(fp_x, fp_y);
+
+        // Combine the two Jacobians.
+        // Right-shift by one to account for the fact that we add
+        // two Jacobians.
+        int32 dx = (src_dx_row[x] + target_dx) >> 1;
+        int32 dy = (src_dy_row[x] + target_dy) >> 1;
+
+        // The current residual b - h(q) == extracted - (template + offset)
+        int32 diff = static_cast<int32>(extract_row[x]) -
+                     static_cast<int32>(templ_row[x]) -
+                     bright_offset;
+
+        jtj[0] += dx * dx;
+        jtj[1] += dx * dy;
+        jtj[2] += dy * dy;
+
+        jtr[0] += dx * diff;
+        jtr[1] += dy * diff;
+
+        sum_diff += diff;
+      }
+    }
+
+    const float jtr1_float = static_cast<float>(jtr[0]);
+    const float jtr2_float = static_cast<float>(jtr[1]);
+
+    // Add some baseline stability to the system.
+    jtj[0] += kEsmRegularizer;
+    jtj[2] += kEsmRegularizer;
+
+    const int64 prod1 = static_cast<int64>(jtj[0]) * jtj[2];
+    const int64 prod2 = static_cast<int64>(jtj[1]) * jtj[1];
+
+    // One ESM step.
+    const float jtj_1[4] = { static_cast<float>(jtj[2]),
+                             static_cast<float>(-jtj[1]),
+                             static_cast<float>(-jtj[1]),
+                             static_cast<float>(jtj[0]) };
+    const double det_inv = 1.0 / static_cast<double>(prod1 - prod2);
+
+    g_x -= det_inv * (jtj_1[0] * jtr1_float + jtj_1[1] * jtr2_float);
+    g_y -= det_inv * (jtj_1[2] * jtr1_float + jtj_1[3] * jtr2_float);
+
+    if (kDoBrightnessNormalize) {
+      bright_offset +=
+          static_cast<int>(area_inv * static_cast<float>(sum_diff) + 0.5f);
+    }
+
+    // Update top left position.
+    left_fixed = RealToFixed1616(p_x + g_x - wsize_float);
+    top_fixed  = RealToFixed1616(p_y + g_y - wsize_float);
+
+    left_trunc = left_fixed >> 16;
+    top_trunc = top_fixed >> 16;
+
+    // Abort iterations if we go out of borders.
+    if (left_trunc < 0 || top_trunc < 0 ||
+        (left_trunc + patch_size) >= J_x.width_less_one_ ||
+        (top_trunc + patch_size) >= J_y.height_less_one_) {
+      break;
+    }
+  }  // Iteration.
+
+  // Copy value back into output.
+  *out_g_x = g_x;
+  *out_g_y = g_y;
+  return true;
+}
+
+
+bool OpticalFlow::FindFlowAtPointReversible(
+    const int level, const float u_x, const float u_y,
+    const bool reverse_flow,
+    float* flow_x, float* flow_y) const {
+  const ImageData& frame_a = reverse_flow ? *frame2_ : *frame1_;
+  const ImageData& frame_b = reverse_flow ? *frame1_ : *frame2_;
+
+  // Images I (prev) and J (next).
+  const Image<uint8>& img_I = *frame_a.GetPyramidSqrt2Level(level * 2);
+  const Image<uint8>& img_J = *frame_b.GetPyramidSqrt2Level(level * 2);
+
+  // Computed gradients.
+  const Image<int32>& I_x = *frame_a.GetSpatialX(level);
+  const Image<int32>& I_y = *frame_a.GetSpatialY(level);
+  const Image<int32>& J_x = *frame_b.GetSpatialX(level);
+  const Image<int32>& J_y = *frame_b.GetSpatialY(level);
+
+  // Shrink factor from original.
+  const float shrink_factor = (1 << level);
+
+  // Image position vector (p := u^l), scaled for this level.
+  const float scaled_p_x = u_x / shrink_factor;
+  const float scaled_p_y = u_y / shrink_factor;
+
+  float scaled_flow_x = *flow_x / shrink_factor;
+  float scaled_flow_y = *flow_y / shrink_factor;
+
+  // LOGE("FindFlowAtPoint level %d: %5.2f, %5.2f (%5.2f, %5.2f)", level,
+  //     scaled_p_x, scaled_p_y, &scaled_flow_x, &scaled_flow_y);
+
+  const bool success = kUseEsm ?
+    FindFlowAtPoint_ESM(img_I, img_J, I_x, I_y, J_x, J_y,
+                        scaled_p_x, scaled_p_y,
+                        &scaled_flow_x, &scaled_flow_y) :
+    FindFlowAtPoint_LK(img_I, img_J, I_x, I_y,
+                       scaled_p_x, scaled_p_y,
+                       &scaled_flow_x, &scaled_flow_y);
+
+  *flow_x = scaled_flow_x * shrink_factor;
+  *flow_y = scaled_flow_y * shrink_factor;
+
+  return success;
+}
+
+
+bool OpticalFlow::FindFlowAtPointSingleLevel(
+    const int level,
+    const float u_x, const float u_y,
+    const bool filter_by_fb_error,
+    float* flow_x, float* flow_y) const {
+  if (!FindFlowAtPointReversible(level, u_x, u_y, false, flow_x, flow_y)) {
+    return false;
+  }
+
+  if (filter_by_fb_error) {
+    const float new_position_x = u_x + *flow_x;
+    const float new_position_y = u_y + *flow_y;
+
+    float reverse_flow_x = 0.0f;
+    float reverse_flow_y = 0.0f;
+
+    // Now find the backwards flow and confirm it lines up with the original
+    // starting point.
+    if (!FindFlowAtPointReversible(level, new_position_x, new_position_y,
+                                   true,
+                                   &reverse_flow_x, &reverse_flow_y)) {
+      LOGE("Backward error!");
+      return false;
+    }
+
+    const float discrepancy_length =
+        sqrtf(Square(*flow_x + reverse_flow_x) +
+              Square(*flow_y + reverse_flow_y));
+
+    const float flow_length = sqrtf(Square(*flow_x) + Square(*flow_y));
+
+    return discrepancy_length <
+        (kMaxForwardBackwardErrorAllowed * flow_length);
+  }
+
+  return true;
+}
+
+
+// An implementation of the Pyramidal Lucas-Kanade Optical Flow algorithm.
+// See http://robots.stanford.edu/cs223b04/algo_tracking.pdf for details.
+bool OpticalFlow::FindFlowAtPointPyramidal(const float u_x, const float u_y,
+                                           const bool filter_by_fb_error,
+                                           float* flow_x, float* flow_y) const {
+  const int max_level = MAX(kMinNumPyramidLevelsToUseForAdjustment,
+                            kNumPyramidLevels - kNumCacheLevels);
+
+  // For every level in the pyramid, update the coordinates of the best match.
+  for (int l = max_level - 1; l >= 0; --l) {
+    if (!FindFlowAtPointSingleLevel(l, u_x, u_y,
+                                    filter_by_fb_error, flow_x, flow_y)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+}  // namespace tf_tracking