diff options
Diffstat (limited to 'tensorflow/examples/android/jni/object_tracking/image-inl.h')
-rw-r--r-- | tensorflow/examples/android/jni/object_tracking/image-inl.h | 642 |
1 files changed, 642 insertions, 0 deletions
diff --git a/tensorflow/examples/android/jni/object_tracking/image-inl.h b/tensorflow/examples/android/jni/object_tracking/image-inl.h new file mode 100644 index 0000000000..18123cef01 --- /dev/null +++ b/tensorflow/examples/android/jni/object_tracking/image-inl.h @@ -0,0 +1,642 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_INL_H_ +#define THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_INL_H_ + +#include "tensorflow/core/platform/types.h" + +#include "tensorflow/examples/android/jni/object_tracking/geom.h" +#include "tensorflow/examples/android/jni/object_tracking/image.h" +#include "tensorflow/examples/android/jni/object_tracking/utils.h" + +namespace tf_tracking { + +template <typename T> +Image<T>::Image(const int width, const int height) + : width_less_one_(width - 1), + height_less_one_(height - 1), + data_size_(width * height), + own_data_(true), + width_(width), + height_(height), + stride_(width) { + Allocate(); +} + +template <typename T> +Image<T>::Image(const Size& size) + : width_less_one_(size.width - 1), + height_less_one_(size.height - 1), + data_size_(size.width * size.height), + own_data_(true), + width_(size.width), + height_(size.height), + stride_(size.width) { + Allocate(); +} + +// Constructor that creates an image from preallocated data. +// Note: The image takes ownership of the data lifecycle, unless own_data is +// set to false. +template <typename T> +Image<T>::Image(const int width, const int height, T* const image_data, + const bool own_data) : + width_less_one_(width - 1), + height_less_one_(height - 1), + data_size_(width * height), + own_data_(own_data), + width_(width), + height_(height), + stride_(width) { + image_data_ = image_data; + SCHECK(image_data_ != NULL, "Can't create image with NULL data!"); +} + +template <typename T> +Image<T>::~Image() { + if (own_data_) { + delete[] image_data_; + } + image_data_ = NULL; +} + +template<typename T> +template<class DstType> +bool Image<T>::ExtractPatchAtSubpixelFixed1616(const int fp_x, + const int fp_y, + const int patchwidth, + const int patchheight, + DstType* to_data) const { + // Calculate weights. + const int trunc_x = fp_x >> 16; + const int trunc_y = fp_y >> 16; + + if (trunc_x < 0 || trunc_y < 0 || + (trunc_x + patchwidth) >= width_less_one_ || + (trunc_y + patchheight) >= height_less_one_) { + return false; + } + + // Now walk over destination patch and fill from interpolated source image. + for (int y = 0; y < patchheight; ++y, to_data += patchwidth) { + for (int x = 0; x < patchwidth; ++x) { + to_data[x] = + static_cast<DstType>(GetPixelInterpFixed1616(fp_x + (x << 16), + fp_y + (y << 16))); + } + } + + return true; +} + +template <typename T> +Image<T>* Image<T>::Crop( + const int left, const int top, const int right, const int bottom) const { + SCHECK(left >= 0 && left < width_, "out of bounds at %d!", left); + SCHECK(right >= 0 && right < width_, "out of bounds at %d!", right); + SCHECK(top >= 0 && top < height_, "out of bounds at %d!", top); + SCHECK(bottom >= 0 && bottom < height_, "out of bounds at %d!", bottom); + + SCHECK(left <= right, "mismatch!"); + SCHECK(top <= bottom, "mismatch!"); + + const int new_width = right - left + 1; + const int new_height = bottom - top + 1; + + Image<T>* const cropped_image = new Image(new_width, new_height); + + for (int y = 0; y < new_height; ++y) { + memcpy((*cropped_image)[y], ((*this)[y + top] + left), + new_width * sizeof(T)); + } + + return cropped_image; +} + +template <typename T> +inline float Image<T>::GetPixelInterp(const float x, const float y) const { + // Do int conversion one time. + const int floored_x = static_cast<int>(x); + const int floored_y = static_cast<int>(y); + + // Note: it might be the case that the *_[min|max] values are clipped, and + // these (the a b c d vals) aren't (for speed purposes), but that doesn't + // matter. We'll just be blending the pixel with itself in that case anyway. + const float b = x - floored_x; + const float a = 1.0f - b; + + const float d = y - floored_y; + const float c = 1.0f - d; + + SCHECK(ValidInterpPixel(x, y), + "x or y out of bounds! %.2f [0 - %d), %.2f [0 - %d)", + x, width_less_one_, y, height_less_one_); + + const T* const pix_ptr = (*this)[floored_y] + floored_x; + + // Get the pixel values surrounding this point. + const T& p1 = pix_ptr[0]; + const T& p2 = pix_ptr[1]; + const T& p3 = pix_ptr[width_]; + const T& p4 = pix_ptr[width_ + 1]; + + // Simple bilinear interpolation between four reference pixels. + // If x is the value requested: + // a b + // ------- + // c |p1 p2| + // | x | + // d |p3 p4| + // ------- + return c * ((a * p1) + (b * p2)) + + d * ((a * p3) + (b * p4)); +} + + +template <typename T> +inline T Image<T>::GetPixelInterpFixed1616( + const int fp_x_whole, const int fp_y_whole) const { + static const int kFixedPointOne = 0x00010000; + static const int kFixedPointHalf = 0x00008000; + static const int kFixedPointTruncateMask = 0xFFFF0000; + + int trunc_x = fp_x_whole & kFixedPointTruncateMask; + int trunc_y = fp_y_whole & kFixedPointTruncateMask; + const int fp_x = fp_x_whole - trunc_x; + const int fp_y = fp_y_whole - trunc_y; + + // Scale the truncated values back to regular ints. + trunc_x >>= 16; + trunc_y >>= 16; + + const int one_minus_fp_x = kFixedPointOne - fp_x; + const int one_minus_fp_y = kFixedPointOne - fp_y; + + const T* trunc_start = (*this)[trunc_y] + trunc_x; + + const T a = trunc_start[0]; + const T b = trunc_start[1]; + const T c = trunc_start[stride_]; + const T d = trunc_start[stride_ + 1]; + + return ((one_minus_fp_y * static_cast<int64>(one_minus_fp_x * a + fp_x * b) + + fp_y * static_cast<int64>(one_minus_fp_x * c + fp_x * d) + + kFixedPointHalf) >> 32); +} + +template <typename T> +inline bool Image<T>::ValidPixel(const int x, const int y) const { + return InRange(x, ZERO, width_less_one_) && + InRange(y, ZERO, height_less_one_); +} + +template <typename T> +inline BoundingBox Image<T>::GetContainingBox() const { + return BoundingBox( + 0, 0, width_less_one_ - EPSILON, height_less_one_ - EPSILON); +} + +template <typename T> +inline bool Image<T>::Contains(const BoundingBox& bounding_box) const { + // TODO(andrewharp): Come up with a more elegant way of ensuring that bounds + // are ok. + return GetContainingBox().Contains(bounding_box); +} + +template <typename T> +inline bool Image<T>::ValidInterpPixel(const float x, const float y) const { + // Exclusive of max because we can be more efficient if we don't handle + // interpolating on or past the last pixel. + return (x >= ZERO) && (x < width_less_one_) && + (y >= ZERO) && (y < height_less_one_); +} + +template <typename T> +void Image<T>::DownsampleAveraged(const T* const original, const int stride, + const int factor) { +#ifdef __ARM_NEON + if (factor == 4 || factor == 2) { + DownsampleAveragedNeon(original, stride, factor); + return; + } +#endif + + // TODO(andrewharp): delete or enable this for non-uint8 downsamples. + const int pixels_per_block = factor * factor; + + // For every pixel in resulting image. + for (int y = 0; y < height_; ++y) { + const int orig_y = y * factor; + const int y_bound = orig_y + factor; + + // Sum up the original pixels. + for (int x = 0; x < width_; ++x) { + const int orig_x = x * factor; + const int x_bound = orig_x + factor; + + // Making this int32 because type U or T might overflow. + int32 pixel_sum = 0; + + // Grab all the pixels that make up this pixel. + for (int curr_y = orig_y; curr_y < y_bound; ++curr_y) { + const T* p = original + curr_y * stride + orig_x; + + for (int curr_x = orig_x; curr_x < x_bound; ++curr_x) { + pixel_sum += *p++; + } + } + + (*this)[y][x] = pixel_sum / pixels_per_block; + } + } +} + +template <typename T> +void Image<T>::DownsampleInterpolateNearest(const Image<T>& original) { + // Calculating the scaling factors based on target image size. + const float factor_x = static_cast<float>(original.GetWidth()) / + static_cast<float>(width_); + const float factor_y = static_cast<float>(original.GetHeight()) / + static_cast<float>(height_); + + // Calculating initial offset in x-axis. + const float offset_x = 0.5f * (original.GetWidth() - width_) / width_; + + // Calculating initial offset in y-axis. + const float offset_y = 0.5f * (original.GetHeight() - height_) / height_; + + float orig_y = offset_y; + + // For every pixel in resulting image. + for (int y = 0; y < height_; ++y) { + float orig_x = offset_x; + + // Finding nearest pixel on y-axis. + const int nearest_y = static_cast<int>(orig_y + 0.5f); + const T* row_data = original[nearest_y]; + + T* pixel_ptr = (*this)[y]; + + for (int x = 0; x < width_; ++x) { + // Finding nearest pixel on x-axis. + const int nearest_x = static_cast<int>(orig_x + 0.5f); + + *pixel_ptr++ = row_data[nearest_x]; + + orig_x += factor_x; + } + + orig_y += factor_y; + } +} + +template <typename T> +void Image<T>::DownsampleInterpolateLinear(const Image<T>& original) { + // TODO(andrewharp): Turn this into a general compare sizes/bulk + // copy method. + if (original.GetWidth() == GetWidth() && + original.GetHeight() == GetHeight() && + original.stride() == stride()) { + memcpy(image_data_, original.data(), data_size_ * sizeof(T)); + return; + } + + // Calculating the scaling factors based on target image size. + const float factor_x = static_cast<float>(original.GetWidth()) / + static_cast<float>(width_); + const float factor_y = static_cast<float>(original.GetHeight()) / + static_cast<float>(height_); + + // Calculating initial offset in x-axis. + const float offset_x = 0; + const int offset_x_fp = RealToFixed1616(offset_x); + + // Calculating initial offset in y-axis. + const float offset_y = 0; + const int offset_y_fp = RealToFixed1616(offset_y); + + // Get the fixed point scaling factor value. + // Shift by 8 so we can fit everything into a 4 byte int later for speed + // reasons. This means the precision is limited to 1 / 256th of a pixel, + // but this should be good enough. + const int factor_x_fp = RealToFixed1616(factor_x) >> 8; + const int factor_y_fp = RealToFixed1616(factor_y) >> 8; + + int src_y_fp = offset_y_fp >> 8; + + static const int kFixedPointOne8 = 0x00000100; + static const int kFixedPointHalf8 = 0x00000080; + static const int kFixedPointTruncateMask8 = 0xFFFFFF00; + + // For every pixel in resulting image. + for (int y = 0; y < height_; ++y) { + int src_x_fp = offset_x_fp >> 8; + + int trunc_y = src_y_fp & kFixedPointTruncateMask8; + const int fp_y = src_y_fp - trunc_y; + + // Scale the truncated values back to regular ints. + trunc_y >>= 8; + + const int one_minus_fp_y = kFixedPointOne8 - fp_y; + + T* pixel_ptr = (*this)[y]; + + // Make sure not to read from an invalid row. + const int trunc_y_b = MIN(original.height_less_one_, trunc_y + 1); + const T* other_top_ptr = original[trunc_y]; + const T* other_bot_ptr = original[trunc_y_b]; + + int last_trunc_x = -1; + int trunc_x = -1; + + T a = 0; + T b = 0; + T c = 0; + T d = 0; + + for (int x = 0; x < width_; ++x) { + trunc_x = src_x_fp & kFixedPointTruncateMask8; + + const int fp_x = (src_x_fp - trunc_x) >> 8; + + // Scale the truncated values back to regular ints. + trunc_x >>= 8; + + // It's possible we're reading from the same pixels + if (trunc_x != last_trunc_x) { + // Make sure not to read from an invalid column. + const int trunc_x_b = MIN(original.width_less_one_, trunc_x + 1); + a = other_top_ptr[trunc_x]; + b = other_top_ptr[trunc_x_b]; + c = other_bot_ptr[trunc_x]; + d = other_bot_ptr[trunc_x_b]; + last_trunc_x = trunc_x; + } + + const int one_minus_fp_x = kFixedPointOne8 - fp_x; + + const int32 value = + ((one_minus_fp_y * one_minus_fp_x * a + fp_x * b) + + (fp_y * one_minus_fp_x * c + fp_x * d) + + kFixedPointHalf8) >> 16; + + *pixel_ptr++ = value; + + src_x_fp += factor_x_fp; + } + src_y_fp += factor_y_fp; + } +} + +template <typename T> +void Image<T>::DownsampleSmoothed3x3(const Image<T>& original) { + for (int y = 0; y < height_; ++y) { + const int orig_y = Clip(2 * y, ZERO, original.height_less_one_); + const int min_y = Clip(orig_y - 1, ZERO, original.height_less_one_); + const int max_y = Clip(orig_y + 1, ZERO, original.height_less_one_); + + for (int x = 0; x < width_; ++x) { + const int orig_x = Clip(2 * x, ZERO, original.width_less_one_); + const int min_x = Clip(orig_x - 1, ZERO, original.width_less_one_); + const int max_x = Clip(orig_x + 1, ZERO, original.width_less_one_); + + // Center. + int32 pixel_sum = original[orig_y][orig_x] * 4; + + // Sides. + pixel_sum += (original[orig_y][max_x] + + original[orig_y][min_x] + + original[max_y][orig_x] + + original[min_y][orig_x]) * 2; + + // Diagonals. + pixel_sum += (original[min_y][max_x] + + original[min_y][min_x] + + original[max_y][max_x] + + original[max_y][min_x]); + + (*this)[y][x] = pixel_sum >> 4; // 16 + } + } +} + +template <typename T> +void Image<T>::DownsampleSmoothed5x5(const Image<T>& original) { + const int max_x = original.width_less_one_; + const int max_y = original.height_less_one_; + + // The JY Bouget paper on Lucas-Kanade recommends a + // [1/16 1/4 3/8 1/4 1/16]^2 filter. + // This works out to a [1 4 6 4 1]^2 / 256 array, precomputed below. + static const int window_radius = 2; + static const int window_size = window_radius*2 + 1; + static const int window_weights[] = {1, 4, 6, 4, 1, // 16 + + 4, 16, 24, 16, 4, // 64 + + 6, 24, 36, 24, 6, // 96 + + 4, 16, 24, 16, 4, // 64 + + 1, 4, 6, 4, 1}; // 16 = 256 + + // We'll multiply and sum with the the whole numbers first, then divide by + // the total weight to normalize at the last moment. + for (int y = 0; y < height_; ++y) { + for (int x = 0; x < width_; ++x) { + int32 pixel_sum = 0; + + const int* w = window_weights; + const int start_x = Clip((x << 1) - window_radius, ZERO, max_x); + + // Clip the boundaries to the size of the image. + for (int window_y = 0; window_y < window_size; ++window_y) { + const int start_y = + Clip((y << 1) - window_radius + window_y, ZERO, max_y); + + const T* p = original[start_y] + start_x; + + for (int window_x = 0; window_x < window_size; ++window_x) { + pixel_sum += *p++ * *w++; + } + } + + // Conversion to type T will happen here after shifting right 8 bits to + // divide by 256. + (*this)[y][x] = pixel_sum >> 8; + } + } +} + +template <typename T> +template <typename U> +inline T Image<T>::ScharrPixelX(const Image<U>& original, + const int center_x, const int center_y) const { + const int min_x = Clip(center_x - 1, ZERO, original.width_less_one_); + const int max_x = Clip(center_x + 1, ZERO, original.width_less_one_); + const int min_y = Clip(center_y - 1, ZERO, original.height_less_one_); + const int max_y = Clip(center_y + 1, ZERO, original.height_less_one_); + + // Convolution loop unrolled for performance... + return (3 * (original[min_y][max_x] + + original[max_y][max_x] + - original[min_y][min_x] + - original[max_y][min_x]) + + 10 * (original[center_y][max_x] + - original[center_y][min_x])) / 32; +} + +template <typename T> +template <typename U> +inline T Image<T>::ScharrPixelY(const Image<U>& original, + const int center_x, const int center_y) const { + const int min_x = Clip(center_x - 1, 0, original.width_less_one_); + const int max_x = Clip(center_x + 1, 0, original.width_less_one_); + const int min_y = Clip(center_y - 1, 0, original.height_less_one_); + const int max_y = Clip(center_y + 1, 0, original.height_less_one_); + + // Convolution loop unrolled for performance... + return (3 * (original[max_y][min_x] + + original[max_y][max_x] + - original[min_y][min_x] + - original[min_y][max_x]) + + 10 * (original[max_y][center_x] + - original[min_y][center_x])) / 32; +} + +template <typename T> +template <typename U> +inline void Image<T>::ScharrX(const Image<U>& original) { + for (int y = 0; y < height_; ++y) { + for (int x = 0; x < width_; ++x) { + SetPixel(x, y, ScharrPixelX(original, x, y)); + } + } +} + +template <typename T> +template <typename U> +inline void Image<T>::ScharrY(const Image<U>& original) { + for (int y = 0; y < height_; ++y) { + for (int x = 0; x < width_; ++x) { + SetPixel(x, y, ScharrPixelY(original, x, y)); + } + } +} + +template <typename T> +template <typename U> +void Image<T>::DerivativeX(const Image<U>& original) { + for (int y = 0; y < height_; ++y) { + const U* const source_row = original[y]; + T* const dest_row = (*this)[y]; + + // Compute first pixel. Approximated with forward difference. + dest_row[0] = source_row[1] - source_row[0]; + + // All the pixels in between. Central difference method. + const U* source_prev_pixel = source_row; + T* dest_pixel = dest_row + 1; + const U* source_next_pixel = source_row + 2; + for (int x = 1; x < width_less_one_; ++x) { + *dest_pixel++ = HalfDiff(*source_prev_pixel++, *source_next_pixel++); + } + + // Last pixel. Approximated with backward difference. + dest_row[width_less_one_] = + source_row[width_less_one_] - source_row[width_less_one_ - 1]; + } +} + +template <typename T> +template <typename U> +void Image<T>::DerivativeY(const Image<U>& original) { + const int src_stride = original.stride(); + + // Compute 1st row. Approximated with forward difference. + { + const U* const src_row = original[0]; + T* dest_row = (*this)[0]; + for (int x = 0; x < width_; ++x) { + dest_row[x] = src_row[x + src_stride] - src_row[x]; + } + } + + // Compute all rows in between using central difference. + for (int y = 1; y < height_less_one_; ++y) { + T* dest_row = (*this)[y]; + + const U* source_prev_pixel = original[y - 1]; + const U* source_next_pixel = original[y + 1]; + for (int x = 0; x < width_; ++x) { + *dest_row++ = HalfDiff(*source_prev_pixel++, *source_next_pixel++); + } + } + + // Compute last row. Approximated with backward difference. + { + const U* const src_row = original[height_less_one_]; + T* dest_row = (*this)[height_less_one_]; + for (int x = 0; x < width_; ++x) { + dest_row[x] = src_row[x] - src_row[x - src_stride]; + } + } +} + +template <typename T> +template <typename U> +inline T Image<T>::ConvolvePixel3x3(const Image<U>& original, + const int* const filter, + const int center_x, const int center_y, + const int total) const { + int32 sum = 0; + for (int filter_y = 0; filter_y < 3; ++filter_y) { + const int y = Clip(center_y - 1 + filter_y, 0, original.GetHeight()); + for (int filter_x = 0; filter_x < 3; ++filter_x) { + const int x = Clip(center_x - 1 + filter_x, 0, original.GetWidth()); + sum += original[y][x] * filter[filter_y * 3 + filter_x]; + } + } + return sum / total; +} + +template <typename T> +template <typename U> +inline void Image<T>::Convolve3x3(const Image<U>& original, + const int32* const filter) { + int32 sum = 0; + for (int i = 0; i < 9; ++i) { + sum += abs(filter[i]); + } + for (int y = 0; y < height_; ++y) { + for (int x = 0; x < width_; ++x) { + SetPixel(x, y, ConvolvePixel3x3(original, filter, x, y, sum)); + } + } +} + +template <typename T> +inline void Image<T>::FromArray(const T* const pixels, const int stride, + const int factor) { + if (factor == 1 && stride == width_) { + // If not subsampling, memcpy per line should be faster. + memcpy(this->image_data_, pixels, data_size_ * sizeof(T)); + return; + } + + DownsampleAveraged(pixels, stride, factor); +} + +} // namespace tf_tracking + +#endif // THIRD_PARTY_TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_INL_H_ |