diff options
Diffstat (limited to 'tensorflow/python/ops/image_ops_impl.py')
-rw-r--r-- | tensorflow/python/ops/image_ops_impl.py | 89 |
1 files changed, 76 insertions, 13 deletions
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 2c7751f792..9440bab9ee 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -55,8 +55,10 @@ ops.NotDifferentiable('SampleDistortedBoundingBoxV2') ops.NotDifferentiable('ExtractGlimpse') ops.NotDifferentiable('NonMaxSuppression') ops.NotDifferentiable('NonMaxSuppressionV2') +ops.NotDifferentiable('NonMaxSuppressionWithOverlaps') +# pylint: disable=invalid-name def _assert(cond, ex_type, msg): """A polymorphic assert, works with tensors and boolean expressions. @@ -1070,15 +1072,16 @@ def resize_images(images, @tf_export('image.resize_image_with_pad') -def resize_image_with_pad(image, target_height, target_width, +def resize_image_with_pad(image, + target_height, + target_width, method=ResizeMethod.BILINEAR): - """ - Resizes and pads an image to a target width and height. + """Resizes and pads an image to a target width and height. Resizes an image to a target width and height by keeping the aspect ratio the same without distortion. If the target dimensions don't match the image dimensions, the image - is resized and then padded with zeroes to match requested + is resized and then padded with zeroes to match requested dimensions. Args: @@ -1139,10 +1142,10 @@ def resize_image_with_pad(image, target_height, target_width, ratio = max_(f_width / f_target_width, f_height / f_target_height) resized_height_float = f_height / ratio resized_width_float = f_width / ratio - resized_height = math_ops.cast(math_ops.floor(resized_height_float), - dtype=dtypes.int32) - resized_width = math_ops.cast(math_ops.floor(resized_width_float), - dtype=dtypes.int32) + resized_height = math_ops.cast( + math_ops.floor(resized_height_float), dtype=dtypes.int32) + resized_width = math_ops.cast( + math_ops.floor(resized_width_float), dtype=dtypes.int32) padding_height = (f_target_height - resized_height_float) / 2 padding_width = (f_target_width - resized_width_float) / 2 @@ -1154,13 +1157,13 @@ def resize_image_with_pad(image, target_height, target_width, # Resize first, then pad to meet requested dimensions resized = resize_images(image, [resized_height, resized_width], method) - padded = pad_to_bounding_box(resized, p_height, p_width, - target_height, target_width) + padded = pad_to_bounding_box(resized, p_height, p_width, target_height, + target_width) if padded.get_shape().ndims is None: raise ValueError('padded contains no shape.') - _, padded_height, padded_width, _ = _ImageDimensions(padded, rank=4) + _ImageDimensions(padded, rank=4) if not is_batch: padded = array_ops.squeeze(padded, squeeze_dims=[0]) @@ -1750,6 +1753,22 @@ def is_jpeg(contents, name=None): return math_ops.equal(substr, b'\xff\xd8\xff', name=name) +def _is_png(contents, name=None): + r"""Convenience function to check if the 'contents' encodes a PNG image. + + Args: + contents: 0-D `string`. The encoded image bytes. + name: A name for the operation (optional) + + Returns: + A scalar boolean tensor indicating if 'contents' may be a PNG image. + is_png is susceptible to false positives. + """ + with ops.name_scope(name, 'is_png'): + substr = string_ops.substr(contents, 0, 3) + return math_ops.equal(substr, b'\211PN', name=name) + + @tf_export('image.decode_image') def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None): """Convenience function for `decode_bmp`, `decode_gif`, `decode_jpeg`, @@ -1827,8 +1846,8 @@ def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None): def check_png(): """Checks if an image is PNG.""" - is_png = math_ops.equal(substr, b'\211PN', name='is_png') - return control_flow_ops.cond(is_png, _png, check_gif, name='cond_png') + return control_flow_ops.cond( + _is_png(contents), _png, check_gif, name='cond_png') def _jpeg(): """Decodes a jpeg image.""" @@ -2091,6 +2110,50 @@ def non_max_suppression(boxes, iou_threshold, score_threshold) +@tf_export('image.non_max_suppression_overlaps') +def non_max_suppression_with_overlaps(overlaps, + scores, + max_output_size, + overlap_threshold=0.5, + score_threshold=float('-inf'), + name=None): + """Greedily selects a subset of bounding boxes in descending order of score. + + Prunes away boxes that have high overlap with previously selected boxes. + N-by-n overlap values are supplied as square matrix. + The output of this operation is a set of integers indexing into the input + collection of bounding boxes representing the selected boxes. The bounding + box coordinates corresponding to the selected indices can then be obtained + using the `tf.gather operation`. For example: + selected_indices = tf.image.non_max_suppression_overlaps( + overlaps, scores, max_output_size, iou_threshold) + selected_boxes = tf.gather(boxes, selected_indices) + + Args: + overlaps: A 2-D float `Tensor` of shape `[num_boxes, num_boxes]`. + scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single + score corresponding to each box (each row of boxes). + max_output_size: A scalar integer `Tensor` representing the maximum number + of boxes to be selected by non max suppression. + overlap_threshold: A float representing the threshold for deciding whether + boxes overlap too much with respect to the provided overlap values. + score_threshold: A float representing the threshold for deciding when to + remove boxes based on score. + name: A name for the operation (optional). + + Returns: + selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the + selected indices from the overlaps tensor, where `M <= max_output_size`. + """ + with ops.name_scope(name, 'non_max_suppression_overlaps'): + overlap_threshold = ops.convert_to_tensor( + overlap_threshold, name='overlap_threshold') + # pylint: disable=protected-access + return gen_image_ops._non_max_suppression_v3( + overlaps, scores, max_output_size, overlap_threshold, score_threshold) + # pylint: enable=protected-access + + _rgb_to_yiq_kernel = [[0.299, 0.59590059, 0.2115], [0.587, -0.27455667, -0.52273617], [0.114, -0.32134392, 0.31119955]] |