/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include "tensorflow/core/framework/common_shape_fns.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"

namespace tensorflow {

using shape_inference::DimensionHandle;
using shape_inference::InferenceContext;
using shape_inference::ShapeHandle;

namespace {

// Sets output[0] to shape [batch_dim,height,width,channel_dim], where
// height and width come from the size_tensor.
Status SetOutputToSizedImage(InferenceContext* c, DimensionHandle batch_dim,
                             int size_input_idx, DimensionHandle channel_dim) {
  // Verify shape of size input.
  ShapeHandle size;
  TF_RETURN_IF_ERROR(c->WithRank(c->input(size_input_idx), 1, &size));
  DimensionHandle unused;
  TF_RETURN_IF_ERROR(c->WithValue(c->Dim(size, 0), 2, &unused));

  // Get size values from the size tensor.
  const Tensor* size_tensor = c->input_tensor(size_input_idx);
  DimensionHandle width;
  DimensionHandle height;
  if (size_tensor == nullptr) {
    width = c->UnknownDim();
    height = c->UnknownDim();
  } else {
    // TODO(petewarden) - Remove once we have constant evaluation in C++ only.
    if (size_tensor->dtype() != DT_INT32) {
      return errors::InvalidArgument(
          "Bad size input type for SetOutputToSizedImage: Expected DT_INT32 "
          "but got ",
          DataTypeString(size_tensor->dtype()), " for input #", size_input_idx,
          " in ", c->DebugString());
    }
    auto vec = size_tensor->vec<int32>();
    height = c->MakeDim(vec(0));
    width = c->MakeDim(vec(1));
  }
  c->set_output(0, c->MakeShape({batch_dim, height, width, channel_dim}));
  return Status::OK();
}

Status ResizeShapeFn(InferenceContext* c) {
  ShapeHandle input;
  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
  return SetOutputToSizedImage(c, c->Dim(input, 0), 1 /* size_input_idx */,
                               c->Dim(input, 3));
}

Status DecodeImageShapeFn(InferenceContext* c) {
  ShapeHandle unused;
  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
  DimensionHandle channels_dim;
  int32 channels;
  TF_RETURN_IF_ERROR(c->GetAttr("channels", &channels));
  if (channels == 0) {
    channels_dim = c->UnknownDim();
  } else {
    if (channels < 0) {
      return errors::InvalidArgument("channels must be non-negative, got ",
                                     channels);
    }
    channels_dim = c->MakeDim(channels);
  }

  c->set_output(0, c->MakeShape({InferenceContext::kUnknownDim,
                                 InferenceContext::kUnknownDim, channels_dim}));
  return Status::OK();
}

Status EncodeImageShapeFn(InferenceContext* c) {
  ShapeHandle unused;
  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 3, &unused));
  c->set_output(0, c->Scalar());
  return Status::OK();
}

Status ColorspaceShapeFn(InferenceContext* c) {
  ShapeHandle input;
  TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 1, &input));

  // The last dimension value is always 3.
  DimensionHandle last_dim;
  TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input, -1), 3, &last_dim));
  ShapeHandle out;
  TF_RETURN_IF_ERROR(c->ReplaceDim(input, -1, last_dim, &out));
  c->set_output(0, out);

  return Status::OK();
}

}  // namespace

// --------------------------------------------------------------------------
REGISTER_OP("ResizeArea")
    .Input("images: T")
    .Input("size: int32")
    .Output("resized_images: float")
    .Attr("T: {uint8, int8, int16, int32, int64, half, float, double}")
    .Attr("align_corners: bool = false")
    .SetShapeFn(ResizeShapeFn)
    .Doc(R"doc(
Resize `images` to `size` using area interpolation.

Input images can be of different types but output images are always float.

images: 4-D with shape `[batch, height, width, channels]`.
size:= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
  new size for the images.
align_corners: If true, rescale input by (new_height - 1) / (height - 1), which
  exactly aligns the 4 corners of images and resized images. If false, rescale
  by new_height / height. Treat similarly the width dimension.
resized_images: 4-D with shape
  `[batch, new_height, new_width, channels]`.
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("ResizeBicubic")
    .Input("images: T")
    .Input("size: int32")
    .Output("resized_images: float")
    .Attr("T: {uint8, int8, int16, int32, int64, half, float, double}")
    .Attr("align_corners: bool = false")
    .SetShapeFn(ResizeShapeFn)
    .Doc(R"doc(
Resize `images` to `size` using bicubic interpolation.

Input images can be of different types but output images are always float.

images: 4-D with shape `[batch, height, width, channels]`.
size:= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
  new size for the images.
align_corners: If true, rescale input by (new_height - 1) / (height - 1), which
  exactly aligns the 4 corners of images and resized images. If false, rescale
  by new_height / height. Treat similarly the width dimension.
resized_images: 4-D with shape
  `[batch, new_height, new_width, channels]`.
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("ResizeBilinear")
    .Input("images: T")
    .Input("size: int32")
    .Output("resized_images: float")
    .Attr("T: {uint8, int8, int16, int32, int64, half, float, double}")
    .Attr("align_corners: bool = false")
    .SetShapeFn(ResizeShapeFn)
    .Doc(R"doc(
Resize `images` to `size` using bilinear interpolation.

Input images can be of different types but output images are always float.

images: 4-D with shape `[batch, height, width, channels]`.
size:= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
  new size for the images.
align_corners: If true, rescale input by (new_height - 1) / (height - 1), which
  exactly aligns the 4 corners of images and resized images. If false, rescale
  by new_height / height. Treat similarly the width dimension.
resized_images: 4-D with shape
  `[batch, new_height, new_width, channels]`.
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("QuantizedResizeBilinear")
    .Input("images: T")
    .Input("size: int32")
    .Input("min: float")
    .Input("max: float")
    .Output("resized_images: T")
    .Output("out_min: float")
    .Output("out_max: float")
    .Attr("T: {quint8, qint32, float}")
    .Attr("align_corners: bool = false")
    .SetShapeFn([](InferenceContext* c) {
      TF_RETURN_IF_ERROR(ResizeShapeFn(c));
      ShapeHandle min_shape;
      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &min_shape));
      ShapeHandle max_shape;
      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &max_shape));
      c->set_output(1, c->MakeShape({}));
      c->set_output(2, c->MakeShape({}));
      return Status::OK();
    })
    .Doc(R"doc(
Resize quantized `images` to `size` using quantized bilinear interpolation.

Input images and output images must be quantized types.

images: 4-D with shape `[batch, height, width, channels]`.
size:= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
  new size for the images.
align_corners: If true, rescale input by (new_height - 1) / (height - 1), which
  exactly aligns the 4 corners of images and resized images. If false, rescale
  by new_height / height. Treat similarly the width dimension.
resized_images: 4-D with shape
  `[batch, new_height, new_width, channels]`.
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("ResizeBilinearGrad")
    .Input("grads: float")
    .Input("original_image: T")
    .Output("output: T")
    .Attr("T: {float, half, double}")
    .Attr("align_corners: bool = false")
    .SetShapeFn([](InferenceContext* c) {
      c->set_output(0, c->input(1));
      return Status::OK();
    })
    .Doc(R"doc(
Computes the gradient of bilinear interpolation.

grads: 4-D with shape `[batch, height, width, channels]`.
original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`,
  The image tensor that was resized.
align_corners: If true, rescale grads by (orig_height - 1) / (height - 1), which
  exactly aligns the 4 corners of grads and original_image. If false, rescale by
  orig_height / height. Treat similarly the width dimension.
output: 4-D with shape `[batch, orig_height, orig_width, channels]`.
  Gradients with respect to the input image. Input image must have been
  float or double.
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("ResizeNearestNeighbor")
    .Input("images: T")
    .Input("size: int32")
    .Output("resized_images: T")
    .Attr("T: {uint8, int8, int16, int32, int64, half, float, double}")
    .Attr("align_corners: bool = false")
    .SetShapeFn(ResizeShapeFn)
    .Doc(R"doc(
Resize `images` to `size` using nearest neighbor interpolation.

images: 4-D with shape `[batch, height, width, channels]`.
size:= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
  new size for the images.
align_corners: If true, rescale input by (new_height - 1) / (height - 1), which
  exactly aligns the 4 corners of images and resized images. If false, rescale
  by new_height / height. Treat similarly the width dimension.
resized_images: 4-D with shape
  `[batch, new_height, new_width, channels]`.
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("ResizeNearestNeighborGrad")
    .Input("grads: T")
    .Input("size: int32")
    .Output("output: T")
    .Attr("T: {uint8, int8, int32, half, float, double}")
    .Attr("align_corners: bool = false")
    .SetShapeFn([](InferenceContext* c) {
      ShapeHandle input;
      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
      ShapeHandle unused;
      DimensionHandle unused_dim;
      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused));
      TF_RETURN_IF_ERROR(c->WithValue(c->Dim(unused, 0), 2, &unused_dim));
      const Tensor* size = c->input_tensor(1);
      if (size == nullptr) {
        TF_RETURN_IF_ERROR(c->ReplaceDim(input, 1, c->UnknownDim(), &input));
        TF_RETURN_IF_ERROR(c->ReplaceDim(input, 2, c->UnknownDim(), &input));
      } else {
        auto size_vec = size->vec<int32>();
        TF_RETURN_IF_ERROR(
            c->ReplaceDim(input, 1, c->MakeDim(size_vec(0)), &input));
        TF_RETURN_IF_ERROR(
            c->ReplaceDim(input, 2, c->MakeDim(size_vec(1)), &input));
      }
      c->set_output(0, input);
      return Status::OK();
    })
    .Doc(R"doc(
Computes the gradient of nearest neighbor interpolation.

grads: 4-D with shape `[batch, height, width, channels]`.
size:= A 1-D int32 Tensor of 2 elements: `orig_height, orig_width`. The
  original input size.
align_corners: If true, rescale grads by (orig_height - 1) / (height - 1), which
  exactly aligns the 4 corners of grads and original_image. If false, rescale by
  orig_height / height. Treat similarly the width dimension.
output: 4-D with shape `[batch, orig_height, orig_width, channels]`. Gradients
  with respect to the input image.
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("RandomCrop")
    .Input("image: T")
    .Input("size: int64")
    .Output("output: T")
    .Attr("T: {uint8, int8, int16, int32, int64, float, double}")
    .Attr("seed: int = 0")
    .Attr("seed2: int = 0")
    .SetIsStateful()
    .Deprecated(8, "Random crop is now pure Python")
    .SetShapeFn([](InferenceContext* c) {
      ShapeHandle image;
      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 3, &image));
      DimensionHandle channels = c->Dim(image, -1);

      ShapeHandle unused;
      TF_RETURN_IF_ERROR(c->Merge(c->input(1), c->Vector(2), &unused));

      const Tensor* size = c->input_tensor(1);
      DimensionHandle h;
      DimensionHandle w;
      if (size == nullptr) {
        h = c->UnknownDim();
        w = c->UnknownDim();
      } else {
        auto size_vec = size->vec<int64>();
        h = c->MakeDim(size_vec(0));
        w = c->MakeDim(size_vec(1));
      }
      c->set_output(0, c->MakeShape({h, w, channels}));
      return Status::OK();
    })
    .Doc(R"doc(
Randomly crop `image`.

`size` is a 1-D int64 tensor with 2 elements representing the crop height and
width.  The values must be non negative.

This Op picks a random location in `image` and crops a `height` by `width`
rectangle from that location.  The random location is picked so the cropped
area will fit inside the original image.

image: 3-D of shape `[height, width, channels]`.
size: 1-D of length 2 containing: `crop_height`, `crop_width`..
seed: If either seed or seed2 are set to be non-zero, the random number
  generator is seeded by the given seed.  Otherwise, it is seeded by a
  random seed.
seed2: An second seed to avoid seed collision.
output: 3-D of shape `[crop_height, crop_width, channels].`
)doc");
// TODO(shlens): Support variable rank in RandomCrop.

// --------------------------------------------------------------------------
REGISTER_OP("DecodeJpeg")
    .Input("contents: string")
    .Attr("channels: int = 0")
    .Attr("ratio: int = 1")
    .Attr("fancy_upscaling: bool = true")
    .Attr("try_recover_truncated: bool = false")
    .Attr("acceptable_fraction: float = 1.0")
    .Attr("dct_method: string = ''")
    .Output("image: uint8")
    .SetShapeFn(DecodeImageShapeFn)
    .Doc(R"doc(
Decode a JPEG-encoded image to a uint8 tensor.

The attr `channels` indicates the desired number of color channels for the
decoded image.

Accepted values are:

*   0: Use the number of channels in the JPEG-encoded image.
*   1: output a grayscale image.
*   3: output an RGB image.

If needed, the JPEG-encoded image is transformed to match the requested number
of color channels.

The attr `ratio` allows downscaling the image by an integer factor during
decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
downscaling the image later.

This op also supports decoding PNGs and non-animated GIFs since the interface is
the same, though it is cleaner to use `tf.image.decode_image`.

contents: 0-D.  The JPEG-encoded image.
channels: Number of color channels for the decoded image.
ratio: Downscaling ratio.
fancy_upscaling: If true use a slower but nicer upscaling of the
  chroma planes (yuv420/422 only).
try_recover_truncated:  If true try to recover an image from truncated input.
acceptable_fraction: The minimum required fraction of lines before a truncated
  input is accepted.
dct_method: string specifying a hint about the algorithm used for
  decompression.  Defaults to "" which maps to a system-specific
  default.  Currently valid values are ["INTEGER_FAST",
  "INTEGER_ACCURATE"].  The hint may be ignored (e.g., the internal
  jpeg library changes to a version that does not have that specific
  option.)
image: 3-D with shape `[height, width, channels]`..
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("EncodeJpeg")
    .Input("image: uint8")
    .Attr("format: {'', 'grayscale', 'rgb'} = ''")
    .Attr("quality: int = 95")
    .Attr("progressive: bool = false")
    .Attr("optimize_size: bool = false")
    .Attr("chroma_downsampling: bool = true")
    .Attr("density_unit: {'in', 'cm'} = 'in'")
    .Attr("x_density: int = 300")
    .Attr("y_density: int = 300")
    .Attr("xmp_metadata: string = ''")
    .Output("contents: string")
    .SetShapeFn(EncodeImageShapeFn)
    .Doc(R"doc(
JPEG-encode an image.

`image` is a 3-D uint8 Tensor of shape `[height, width, channels]`.

The attr `format` can be used to override the color format of the encoded
output.  Values can be:

*   `''`: Use a default format based on the number of channels in the image.
*   `grayscale`: Output a grayscale JPEG image.  The `channels` dimension
    of `image` must be 1.
*   `rgb`: Output an RGB JPEG image. The `channels` dimension
    of `image` must be 3.

If `format` is not specified or is the empty string, a default format is picked
in function of the number of channels in `image`:

*   1: Output a grayscale image.
*   3: Output an RGB image.

image: 3-D with shape `[height, width, channels]`.
format: Per pixel image format.
quality: Quality of the compression from 0 to 100 (higher is better and slower).
progressive: If True, create a JPEG that loads progressively (coarse to fine).
optimize_size: If True, spend CPU/RAM to reduce size with no quality change.
chroma_downsampling: See http://en.wikipedia.org/wiki/Chroma_subsampling.
density_unit: Unit used to specify `x_density` and `y_density`:
   pixels per inch (`'in'`) or centimeter (`'cm'`).
x_density: Horizontal pixels per density unit.
y_density: Vertical pixels per density unit.
xmp_metadata: If not empty, embed this XMP metadata in the image header.
contents: 0-D. JPEG-encoded image.
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("AdjustContrast")
    .Input("images: T")
    .Input("contrast_factor: float")
    .Input("min_value: float")
    .Input("max_value: float")
    .Output("output: float")
    .Attr("T: {uint8, int8, int16, int32, int64, float, double}")
    .Deprecated(2, "Use AdjustContrastv2 instead")
    .SetShapeFn([](InferenceContext* c) {
      return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
    })
    .Doc(R"Doc(
Deprecated. Disallowed in GraphDef version >= 2.
)Doc");

// --------------------------------------------------------------------------
REGISTER_OP("AdjustContrastv2")
    .Input("images: float")
    .Input("contrast_factor: float")
    .Output("output: float")
    .SetShapeFn([](InferenceContext* c) {
      return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
    })
    .Doc(R"Doc(
Adjust the contrast of one or more images.

`images` is a tensor of at least 3 dimensions.  The last 3 dimensions are
interpreted as `[height, width, channels]`.  The other dimensions only
represent a collection of images, such as `[batch, height, width, channels].`

Contrast is adjusted independently for each channel of each image.

For each channel, the Op first computes the mean of the image pixels in the
channel and then adjusts each component of each pixel to
`(x - mean) * contrast_factor + mean`.

images: Images to adjust.  At least 3-D.
contrast_factor: A float multiplier for adjusting contrast.
output: The contrast-adjusted image or images.
)Doc");

// --------------------------------------------------------------------------
REGISTER_OP("AdjustHue")
    .Input("images: float")
    .Input("delta: float")
    .Output("output: float")
    .SetShapeFn([](InferenceContext* c) {
      return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
    })
    .Doc(R"Doc(
Adjust the hue of one or more images.

`images` is a tensor of at least 3 dimensions.  The last dimension is
interpretted as channels, and must be three.

The input image is considered in the RGB colorspace. Conceptually, the RGB
colors are first mapped into HSV. A delta is then applied all the hue values,
and then remapped back to RGB colorspace.

images: Images to adjust.  At least 3-D.
delta: A float delta to add to the hue.
output: The hue-adjusted image or images.
)Doc");

// --------------------------------------------------------------------------
REGISTER_OP("AdjustSaturation")
    .Input("images: float")
    .Input("scale: float")
    .Output("output: float")
    .SetShapeFn([](InferenceContext* c) {
      return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
    })
    .Doc(R"Doc(
Adjust the saturation of one or more images.

`images` is a tensor of at least 3 dimensions.  The last dimension is
interpretted as channels, and must be three.

The input image is considered in the RGB colorspace. Conceptually, the RGB
colors are first mapped into HSV. A scale is then applied all the saturation
values, and then remapped back to RGB colorspace.

images: Images to adjust.  At least 3-D.
scale: A float scale to add to the saturation.
output: The hue-adjusted image or images.
)Doc");

// --------------------------------------------------------------------------
REGISTER_OP("DecodePng")
    .Input("contents: string")
    .Attr("channels: int = 0")
    .Attr("dtype: {uint8, uint16} = DT_UINT8")
    .Output("image: dtype")
    .SetShapeFn(DecodeImageShapeFn)
    .Doc(R"doc(
Decode a PNG-encoded image to a uint8 or uint16 tensor.

The attr `channels` indicates the desired number of color channels for the
decoded image.

Accepted values are:

*   0: Use the number of channels in the PNG-encoded image.
*   1: output a grayscale image.
*   3: output an RGB image.
*   4: output an RGBA image.

If needed, the PNG-encoded image is transformed to match the requested number
of color channels.

This op also supports decoding JPEGs and non-animated GIFs since the interface
is the same, though it is cleaner to use `tf.image.decode_image`.

contents: 0-D.  The PNG-encoded image.
channels: Number of color channels for the decoded image.
image: 3-D with shape `[height, width, channels]`.
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("EncodePng")
    .Attr("compression: int = -1")
    .Attr("T: {uint8, uint16} = DT_UINT8")
    .Input("image: T")
    .Output("contents: string")
    .SetShapeFn(EncodeImageShapeFn)
    .Doc(R"doc(
PNG-encode an image.

`image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]`
where `channels` is:

*   1: for grayscale.
*   2: for grayscale + alpha.
*   3: for RGB.
*   4: for RGBA.

The ZLIB compression level, `compression`, can be -1 for the PNG-encoder
default or a value from 0 to 9.  9 is the highest compression level, generating
the smallest output, but is slower.

image: 3-D with shape `[height, width, channels]`.
compression: Compression level.
contents: 0-D. PNG-encoded image.
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("DecodeBmp")
    .Input("contents: string")
    .Output("image: uint8")
    .Attr("channels: int = 0")
    .SetShapeFn(DecodeImageShapeFn)
    .Doc(R"doc(
Decode the first frame of a BMP-encoded image to a uint8 tensor.

The attr `channels` indicates the desired number of color channels for the
decoded image.

Accepted values are:

*   0: Use the number of channels in the BMP-encoded image.
*   3: output an RGB image.
*   4: output an RGBA image.

contents: 0-D.  The BMP-encoded image.
image: 3-D with shape `[height, width, channels]`. RGB order
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("DecodeGif")
    .Input("contents: string")
    .Output("image: uint8")
    .SetShapeFn([](InferenceContext* c) {
      ShapeHandle unused;
      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
      c->set_output(0, c->MakeShape({InferenceContext::kUnknownDim,
                                     InferenceContext::kUnknownDim,
                                     InferenceContext::kUnknownDim, 3}));
      return Status::OK();
    })
    .Doc(R"doc(
Decode the first frame of a GIF-encoded image to a uint8 tensor.

GIF with frame or transparency compression are not supported
convert animated GIF from compressed to uncompressed by:

    convert $src.gif -coalesce $dst.gif

This op also supports decoding JPEGs and PNGs, though it is cleaner to use
`tf.image.decode_image`.

contents: 0-D.  The GIF-encoded image.
image: 4-D with shape `[num_frames, height, width, 3]`. RGB order
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("RGBToHSV")
    .Input("images: T")
    .Output("output: T")
    .Attr("T: {float, double} = DT_FLOAT")
    .SetShapeFn(ColorspaceShapeFn)
    .Doc(R"doc(
Converts one or more images from RGB to HSV.

Outputs a tensor of the same shape as the `images` tensor, containing the HSV
value of the pixels. The output is only well defined if the value in `images`
are in `[0,1]`.

`output[..., 0]` contains hue, `output[..., 1]` contains saturation, and
`output[..., 2]` contains value. All HSV values are in `[0,1]`. A hue of 0
corresponds to pure red, hue 1/3 is pure green, and 2/3 is pure blue.

images: 1-D or higher rank. RGB data to convert. Last dimension must be size 3.
output: `images` converted to HSV.
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("HSVToRGB")
    .Input("images: T")
    .Output("output: T")
    .Attr("T: {float, double} = DT_FLOAT")
    .SetShapeFn(ColorspaceShapeFn)
    .Doc(R"doc(
Convert one or more images from HSV to RGB.

Outputs a tensor of the same shape as the `images` tensor, containing the RGB
value of the pixels. The output is only well defined if the value in `images`
are in `[0,1]`.

See `rgb_to_hsv` for a description of the HSV encoding.

images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3.
output: `images` converted to RGB.
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("DrawBoundingBoxes")
    .Input("images: T")
    .Input("boxes: float")
    .Output("output: T")
    .Attr("T: {float, half} = DT_FLOAT")
    .SetShapeFn([](InferenceContext* c) {
      return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
    })
    .Doc(R"doc(
Draw bounding boxes on a batch of images.

Outputs a copy of `images` but draws on top of the pixels zero or more bounding
boxes specified by the locations in `boxes`. The coordinates of the each
bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
height of the underlying image.

For example, if an image is 100 x 200 pixels and the bounding box is
`[0.1, 0.2, 0.5, 0.9]`, the bottom-left and upper-right coordinates of the
bounding box will be `(10, 40)` to `(50, 180)`.

Parts of the bounding box may fall outside the image.

images: 4-D with shape `[batch, height, width, depth]`. A batch of images.
boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding
  boxes.
output: 4-D with the same shape as `images`. The batch of input images with
  bounding boxes drawn on the images.
)doc");

// --------------------------------------------------------------------------
REGISTER_OP("SampleDistortedBoundingBox")
    .Input("image_size: T")
    .Input("bounding_boxes: float")
    .Output("begin: T")
    .Output("size: T")
    .Output("bboxes: float")
    .Attr("T: {uint8, int8, int16, int32, int64}")
    .Attr("seed: int = 0")
    .Attr("seed2: int = 0")
    .Attr("min_object_covered: float = 0.1")
    .Attr("aspect_ratio_range: list(float) = [0.75, 1.33]")
    .Attr("area_range: list(float) = [0.05, 1.0]")
    .Attr("max_attempts: int = 100")
    .Attr("use_image_if_no_bounding_boxes: bool = false")
    .SetIsStateful()
    .SetShapeFn([](InferenceContext* c) {
      c->set_output(0, c->Vector(3));
      c->set_output(1, c->Vector(3));
      c->set_output(2, c->MakeShape({1, 1, 4}));
      return Status::OK();
    })
    .Doc(R"doc(
Generate a single randomly distorted bounding box for an image.

Bounding box annotations are often supplied in addition to ground-truth labels
in image recognition or object localization tasks. A common technique for
training such a system is to randomly distort an image while preserving
its content, i.e. *data augmentation*. This Op outputs a randomly distorted
localization of an object, i.e. bounding box, given an `image_size`,
`bounding_boxes` and a series of constraints.

The output of this Op is a single bounding box that may be used to crop the
original image. The output is returned as 3 tensors: `begin`, `size` and
`bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
what the bounding box looks like.

Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
height of the underlying image.

For example,

```python
    # Generate a single distorted bounding box.
    begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
        tf.shape(image),
        bounding_boxes=bounding_boxes)

    # Draw the bounding box in an image summary.
    image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
                                                  bbox_for_draw)
    tf.image_summary('images_with_box', image_with_box)

    # Employ the bounding box to distort the image.
    distorted_image = tf.slice(image, begin, size)
```

Note that if no bounding box information is available, setting
`use_image_if_no_bounding_boxes = true` will assume there is a single implicit
bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
false and no bounding boxes are supplied, an error is raised.

image_size: 1-D, containing `[height, width, channels]`.
bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
  associated with the image.
begin: 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
  `tf.slice`.
size: 1-D, containing `[target_height, target_width, -1]`. Provide as input to
  `tf.slice`.
bboxes: 3-D with shape `[1, 1, 4]` containing the distorted bounding box.
  Provide as input to `tf.image.draw_bounding_boxes`.
seed: If either `seed` or `seed2` are set to non-zero, the random number
  generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
  seed.
seed2: A second seed to avoid seed collision.
min_object_covered: The cropped area of the image must contain at least this
  fraction of any bounding box supplied. The value of this parameter should be
  non-negative. In the case of 0, the cropped area does not need to overlap
  any of the bounding boxes supplied.
aspect_ratio_range: The cropped area of the image must have an aspect ratio =
  width / height within this range.
area_range: The cropped area of the image must contain a fraction of the
  supplied image within in this range.
max_attempts: Number of attempts at generating a cropped region of the image
  of the specified constraints. After `max_attempts` failures, return the entire
  image.
use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes supplied.
  If true, assume an implicit bounding box covering the whole input. If false,
  raise an error.
)doc");

REGISTER_OP("SampleDistortedBoundingBoxV2")
  .Input("image_size: T")
  .Input("bounding_boxes: float")
  .Input("min_object_covered: float")
  .Output("begin: T")
  .Output("size: T")
  .Output("bboxes: float")
  .Attr("T: {uint8, int8, int16, int32, int64}")
  .Attr("seed: int = 0")
  .Attr("seed2: int = 0")
  .Attr("aspect_ratio_range: list(float) = [0.75, 1.33]")
  .Attr("area_range: list(float) = [0.05, 1.0]")
  .Attr("max_attempts: int = 100")
  .Attr("use_image_if_no_bounding_boxes: bool = false")
  .SetIsStateful()
  .SetShapeFn([](InferenceContext* c) {
    c->set_output(0, c->Vector(3));
    c->set_output(1, c->Vector(3));
    c->set_output(2, c->MakeShape({1, 1, 4}));
    return Status::OK();
  })
  .Doc(R"doc(
Generate a single randomly distorted bounding box for an image.

Bounding box annotations are often supplied in addition to ground-truth labels
in image recognition or object localization tasks. A common technique for
training such a system is to randomly distort an image while preserving
its content, i.e. *data augmentation*. This Op outputs a randomly distorted
localization of an object, i.e. bounding box, given an `image_size`,
`bounding_boxes` and a series of constraints.

The output of this Op is a single bounding box that may be used to crop the
original image. The output is returned as 3 tensors: `begin`, `size` and
`bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
what the bounding box looks like.

Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
height of the underlying image.

For example,

```python
    # Generate a single distorted bounding box.
    begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
        tf.shape(image),
        bounding_boxes=bounding_boxes)

    # Draw the bounding box in an image summary.
    image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
                                                  bbox_for_draw)
    tf.image_summary('images_with_box', image_with_box)

    # Employ the bounding box to distort the image.
    distorted_image = tf.slice(image, begin, size)
```

Note that if no bounding box information is available, setting
`use_image_if_no_bounding_boxes = true` will assume there is a single implicit
bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
false and no bounding boxes are supplied, an error is raised.

image_size: 1-D, containing `[height, width, channels]`.
bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
  associated with the image.
min_object_covered: The cropped area of the image must contain at least this
  fraction of any bounding box supplied. The value of this parameter should be
  non-negative. In the case of 0, the cropped area does not need to overlap
  any of the bounding boxes supplied.
begin: 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
  `tf.slice`.
size: 1-D, containing `[target_height, target_width, -1]`. Provide as input to
  `tf.slice`.
bboxes: 3-D with shape `[1, 1, 4]` containing the distorted bounding box.
  Provide as input to `tf.image.draw_bounding_boxes`.
seed: If either `seed` or `seed2` are set to non-zero, the random number
  generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
  seed.
seed2: A second seed to avoid seed collision.
aspect_ratio_range: The cropped area of the image must have an aspect ratio =
  width / height within this range.
area_range: The cropped area of the image must contain a fraction of the
  supplied image within in this range.
max_attempts: Number of attempts at generating a cropped region of the image
  of the specified constraints. After `max_attempts` failures, return the entire
  image.
use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes supplied.
  If true, assume an implicit bounding box covering the whole input. If false,
  raise an error.
)doc");

// --------------------------------------------------------------------------

// glimpse = extract_glimpse(input, size, offsets) extract the glimpse
// of size `size` centered at location `offsets` from the input tensor
// `input`.
//
// REQUIRES: input.dims() == 4
//
REGISTER_OP("ExtractGlimpse")
    .Input("input: float")
    .Input("size: int32")
    .Input("offsets: float")
    .Output("glimpse: float")
    .Attr("centered: bool = true")
    .Attr("normalized: bool = true")
    .Attr("uniform_noise: bool = true")
    .SetShapeFn([](InferenceContext* c) {
      ShapeHandle input;
      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
      ShapeHandle offsets;
      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 2, &offsets));

      DimensionHandle batch_dim;
      TF_RETURN_IF_ERROR(
          c->Merge(c->Dim(input, 0), c->Dim(offsets, 0), &batch_dim));
      DimensionHandle unused;
      TF_RETURN_IF_ERROR(c->WithValue(c->Dim(offsets, 1), 2, &unused));

      return SetOutputToSizedImage(c, batch_dim, 1 /* size_input_idx */,
                                   c->Dim(input, 3));
    })
    .Doc(R"doc(
Extracts a glimpse from the input tensor.

Returns a set of windows called glimpses extracted at location
`offsets` from the input tensor. If the windows only partially
overlaps the inputs, the non overlapping areas will be filled with
random noise.

The result is a 4-D tensor of shape `[batch_size, glimpse_height,
glimpse_width, channels]`. The channels and batch dimensions are the
same as that of the input tensor. The height and width of the output
windows are specified in the `size` parameter.

The argument `normalized` and `centered` controls how the windows are built:

* If the coordinates are normalized but not centered, 0.0 and 1.0
  correspond to the minimum and maximum of each height and width
  dimension.
* If the coordinates are both normalized and centered, they range from
  -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper
  left corner, the lower right corner is located at (1.0, 1.0) and the
  center is at (0, 0).
* If the coordinates are not normalized they are interpreted as
  numbers of pixels.

input: A 4-D float tensor of shape `[batch_size, height, width, channels]`.
size: A 1-D tensor of 2 elements containing the size of the glimpses
  to extract.  The glimpse height must be specified first, following
  by the glimpse width.
offsets: A 2-D integer tensor of shape `[batch_size, 2]` containing
  the y, x locations of the center of each window.
glimpse: A tensor representing the glimpses `[batch_size,
  glimpse_height, glimpse_width, channels]`.
centered: indicates if the offset coordinates are centered relative to
  the image, in which case the (0, 0) offset is relative to the center
  of the input images. If false, the (0,0) offset corresponds to the
  upper left corner of the input images.
normalized: indicates if the offset coordinates are normalized.
uniform_noise: indicates if the noise should be generated using a
  uniform distribution or a Gaussian distribution.
)doc");

// --------------------------------------------------------------------------

REGISTER_OP("CropAndResize")
    .Input("image: T")
    .Input("boxes: float")
    .Input("box_ind: int32")
    .Input("crop_size: int32")
    .Output("crops: float")
    .Attr("T: {uint8, int8, int16, int32, int64, half, float, double}")
    .Attr("method: {'bilinear'} = 'bilinear'")
    .Attr("extrapolation_value: float = 0")
    .SetShapeFn([](InferenceContext* c) {
      // Get inputs and validate ranks.
      ShapeHandle input;
      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
      ShapeHandle boxes;
      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &boxes));
      ShapeHandle box_ind;
      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &box_ind));

      // boxes[0] and box_ind[0] are both num_boxes.
      DimensionHandle num_boxes_dim;
      TF_RETURN_IF_ERROR(
          c->Merge(c->Dim(boxes, 0), c->Dim(box_ind, 0), &num_boxes_dim));

      // boxes.dim(1) is 4.
      DimensionHandle unused;
      TF_RETURN_IF_ERROR(c->WithValue(c->Dim(boxes, 1), 4, &unused));

      return SetOutputToSizedImage(c, num_boxes_dim, 3 /* size_input_idx */,
                                   c->Dim(input, 3));
    })
    .Doc(R"doc(
Extracts crops from the input image tensor and bilinearly resizes them (possibly
with aspect ratio change) to a common output size specified by `crop_size`. This
is more general than the `crop_to_bounding_box` op which extracts a fixed size
slice from the input image and does not allow resizing or aspect ratio change.

Returns a tensor with `crops` from the input `image` at positions defined at the
bounding box locations in `boxes`. The cropped boxes are all resized (with
bilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The
result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`.

image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
  Both `image_height` and `image_width` need to be positive.
boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
  specifies the coordinates of a box in the `box_ind[i]` image and is specified
  in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
  `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
  `[0, 1]` interval of normalized image height is mapped to
  `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in
  which case the sampled crop is an up-down flipped version of the original
  image. The width dimension is treated similarly. Normalized coordinates
  outside the `[0, 1]` range are allowed, in which case we use
  `extrapolation_value` to extrapolate the input image values.
box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
  The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All
  cropped image patches are resized to this size. The aspect ratio of the image
  content is not preserved. Both `crop_height` and `crop_width` need to be
  positive.
crops: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
method: A string specifying the interpolation method. Only 'bilinear' is
  supported for now.
extrapolation_value: Value used for extrapolation, when applicable.
)doc");

REGISTER_OP("CropAndResizeGradImage")
    .Input("grads: float")
    .Input("boxes: float")
    .Input("box_ind: int32")
    .Input("image_size: int32")
    .Output("output: T")
    .Attr("T: {float, half, double}")
    .Attr("method: {'bilinear'} = 'bilinear'")
    .SetShapeFn([](InferenceContext* c) {
      ShapeHandle out;
      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(3, &out));
      TF_RETURN_IF_ERROR(c->WithRank(out, 4, &out));
      c->set_output(0, out);
      return Status::OK();
    })
    .Doc(R"doc(
Computes the gradient of the crop_and_resize op wrt the input image tensor.

grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
  specifies the coordinates of a box in the `box_ind[i]` image and is specified
  in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
  `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
  `[0, 1]` interval of normalized image height is mapped to
  `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
  which case the sampled crop is an up-down flipped version of the original
  image. The width dimension is treated similarly. Normalized coordinates
  outside the `[0, 1]` range are allowed, in which case we use
  `extrapolation_value` to extrapolate the input image values.
box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
  The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]`
  containing the original image size. Both `image_height` and `image_width` need
  to be positive.
output: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
method: A string specifying the interpolation method. Only 'bilinear' is
  supported for now.
)doc");

REGISTER_OP("CropAndResizeGradBoxes")
    .Input("grads: float")
    .Input("image: T")
    .Input("boxes: float")
    .Input("box_ind: int32")
    .Output("output: float")
    .Attr("T: {uint8, int8, int16, int32, int64, half, float, double}")
    .Attr("method: {'bilinear'} = 'bilinear'")
    .SetShapeFn([](InferenceContext* c) {
      c->set_output(0, c->input(2));
      return Status::OK();
    })
    .Doc(R"doc(
Computes the gradient of the crop_and_resize op wrt the input boxes tensor.

grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
  Both `image_height` and `image_width` need to be positive.
boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
  specifies the coordinates of a box in the `box_ind[i]` image and is specified
  in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
  `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
  `[0, 1]` interval of normalized image height is mapped to
  `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
  which case the sampled crop is an up-down flipped version of the original
  image. The width dimension is treated similarly. Normalized coordinates
  outside the `[0, 1]` range are allowed, in which case we use
  `extrapolation_value` to extrapolate the input image values.
box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
  The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
output: A 2-D tensor of shape `[num_boxes, 4]`.
method: A string specifying the interpolation method. Only 'bilinear' is
  supported for now.
)doc");

// --------------------------------------------------------------------------

REGISTER_OP("NonMaxSuppression")
  .Input("boxes: float")
  .Input("scores: float")
  .Input("max_output_size: int32")
  .Output("selected_indices: int32")
  .Attr("iou_threshold: float = 0.5")
  .SetShapeFn([](InferenceContext* c) {
      c->set_output(0, c->Vector(c->UnknownDim()));
      return Status::OK();
    })
  .Doc(R"doc(
Greedily selects a subset of bounding boxes in descending order of score,
pruning away boxes that have high intersection-over-union (IOU) overlap
with previously selected boxes.  Bounding boxes are supplied as
[y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
diagonal pair of box corners and the coordinates can be provided as normalized
(i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
is agnostic to where the origin is in the coordinate system.  Note that this
algorithm is invariant to orthogonal transformations and translations
of the coordinate system; thus translating or reflections of the coordinate
system result in the same boxes being selected by the algorithm.
The output of this operation is a set of integers indexing into the input
collection of bounding boxes representing the selected boxes.  The bounding
box coordinates corresponding to the selected indices can then be obtained
using the `tf.gather operation`.  For example:
  selected_indices = tf.image.non_max_suppression(
      boxes, scores, max_output_size, iou_threshold)
  selected_boxes = tf.gather(boxes, selected_indices)
boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
scores: A 1-D float tensor of shape `[num_boxes]` representing a single
  score corresponding to each box (each row of boxes).
max_output_size: A scalar integer tensor representing the maximum number of
  boxes to be selected by non max suppression.
iou_threshold: A float representing the threshold for deciding whether boxes
  overlap too much with respect to IOU.
selected_indices: A 1-D integer tensor of shape `[M]` representing the selected
  indices from the boxes tensor, where `M <= max_output_size`.
)doc");

REGISTER_OP("NonMaxSuppressionV2")
    .Input("boxes: float")
    .Input("scores: float")
    .Input("max_output_size: int32")
    .Input("iou_threshold: float")
    .Output("selected_indices: int32")
    .SetShapeFn([](InferenceContext* c) {
      c->set_output(0, c->Vector(c->UnknownDim()));
      return Status::OK();
    })
    .Doc(R"doc(
Greedily selects a subset of bounding boxes in descending order of score,
pruning away boxes that have high intersection-over-union (IOU) overlap
with previously selected boxes.  Bounding boxes are supplied as
[y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
diagonal pair of box corners and the coordinates can be provided as normalized
(i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
is agnostic to where the origin is in the coordinate system.  Note that this
algorithm is invariant to orthogonal transformations and translations
of the coordinate system; thus translating or reflections of the coordinate
system result in the same boxes being selected by the algorithm.

The output of this operation is a set of integers indexing into the input
collection of bounding boxes representing the selected boxes.  The bounding
box coordinates corresponding to the selected indices can then be obtained
using the `tf.gather operation`.  For example:

  selected_indices = tf.image.non_max_suppression_v2(
      boxes, scores, max_output_size, iou_threshold)
  selected_boxes = tf.gather(boxes, selected_indices)

boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
scores: A 1-D float tensor of shape `[num_boxes]` representing a single
  score corresponding to each box (each row of boxes).
max_output_size: A scalar integer tensor representing the maximum number of
  boxes to be selected by non max suppression.
iou_threshold: A 0-D float tensor representing the threshold for deciding whether
  boxes overlap too much with respect to IOU.
selected_indices: A 1-D integer tensor of shape `[M]` representing the selected
  indices from the boxes tensor, where `M <= max_output_size`.
)doc");

}  // namespace tensorflow