diff options
Diffstat (limited to 'third_party/googleapis/google/cloud/vision/v1/image_annotator.proto')
-rw-r--r-- | third_party/googleapis/google/cloud/vision/v1/image_annotator.proto | 569 |
1 files changed, 569 insertions, 0 deletions
diff --git a/third_party/googleapis/google/cloud/vision/v1/image_annotator.proto b/third_party/googleapis/google/cloud/vision/v1/image_annotator.proto new file mode 100644 index 0000000000..c17f8aeb6f --- /dev/null +++ b/third_party/googleapis/google/cloud/vision/v1/image_annotator.proto @@ -0,0 +1,569 @@ +// Copyright 2016 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.vision.v1; + +import "google/api/annotations.proto"; +import "google/cloud/vision/v1/geometry.proto"; +import "google/cloud/vision/v1/text_annotation.proto"; +import "google/cloud/vision/v1/web_detection.proto"; +import "google/rpc/status.proto"; +import "google/type/color.proto"; +import "google/type/latlng.proto"; + +option cc_enable_arenas = true; +option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1;vision"; +option java_multiple_files = true; +option java_outer_classname = "ImageAnnotatorProto"; +option java_package = "com.google.cloud.vision.v1"; + + +// Service that performs Google Cloud Vision API detection tasks over client +// images, such as face, landmark, logo, label, and text detection. The +// ImageAnnotator service returns detected entities from the images. +service ImageAnnotator { + // Run image detection and annotation for a batch of images. + rpc BatchAnnotateImages(BatchAnnotateImagesRequest) returns (BatchAnnotateImagesResponse) { + option (google.api.http) = { post: "/v1/images:annotate" body: "*" }; + } +} + +// Users describe the type of Google Cloud Vision API tasks to perform over +// images by using *Feature*s. Each Feature indicates a type of image +// detection task to perform. Features encode the Cloud Vision API +// vertical to operate on and the number of top-scoring results to return. +message Feature { + // Type of image feature. + enum Type { + // Unspecified feature type. + TYPE_UNSPECIFIED = 0; + + // Run face detection. + FACE_DETECTION = 1; + + // Run landmark detection. + LANDMARK_DETECTION = 2; + + // Run logo detection. + LOGO_DETECTION = 3; + + // Run label detection. + LABEL_DETECTION = 4; + + // Run OCR. + TEXT_DETECTION = 5; + + // Run dense text document OCR. Takes precedence when both + // DOCUMENT_TEXT_DETECTION and TEXT_DETECTION are present. + DOCUMENT_TEXT_DETECTION = 11; + + // Run computer vision models to compute image safe-search properties. + SAFE_SEARCH_DETECTION = 6; + + // Compute a set of image properties, such as the image's dominant colors. + IMAGE_PROPERTIES = 7; + + // Run crop hints. + CROP_HINTS = 9; + + // Run web detection. + WEB_DETECTION = 10; + } + + // The feature type. + Type type = 1; + + // Maximum number of results of this type. + int32 max_results = 2; +} + +// External image source (Google Cloud Storage image location). +message ImageSource { + // NOTE: For new code `image_uri` below is preferred. + // Google Cloud Storage image URI, which must be in the following form: + // `gs://bucket_name/object_name` (for details, see + // [Google Cloud Storage Request + // URIs](https://cloud.google.com/storage/docs/reference-uris)). + // NOTE: Cloud Storage object versioning is not supported. + string gcs_image_uri = 1; + + // Image URI which supports: + // 1) Google Cloud Storage image URI, which must be in the following form: + // `gs://bucket_name/object_name` (for details, see + // [Google Cloud Storage Request + // URIs](https://cloud.google.com/storage/docs/reference-uris)). + // NOTE: Cloud Storage object versioning is not supported. + // 2) Publicly accessible image HTTP/HTTPS URL. + // This is preferred over the legacy `gcs_image_uri` above. When both + // `gcs_image_uri` and `image_uri` are specified, `image_uri` takes + // precedence. + string image_uri = 2; +} + +// Client image to perform Google Cloud Vision API tasks over. +message Image { + // Image content, represented as a stream of bytes. + // Note: as with all `bytes` fields, protobuffers use a pure binary + // representation, whereas JSON representations use base64. + bytes content = 1; + + // Google Cloud Storage image location. If both `content` and `source` + // are provided for an image, `content` takes precedence and is + // used to perform the image annotation request. + ImageSource source = 2; +} + +// A face annotation object contains the results of face detection. +message FaceAnnotation { + // A face-specific landmark (for example, a face feature). + // Landmark positions may fall outside the bounds of the image + // if the face is near one or more edges of the image. + // Therefore it is NOT guaranteed that `0 <= x < width` or + // `0 <= y < height`. + message Landmark { + // Face landmark (feature) type. + // Left and right are defined from the vantage of the viewer of the image + // without considering mirror projections typical of photos. So, `LEFT_EYE`, + // typically, is the person's right eye. + enum Type { + // Unknown face landmark detected. Should not be filled. + UNKNOWN_LANDMARK = 0; + + // Left eye. + LEFT_EYE = 1; + + // Right eye. + RIGHT_EYE = 2; + + // Left of left eyebrow. + LEFT_OF_LEFT_EYEBROW = 3; + + // Right of left eyebrow. + RIGHT_OF_LEFT_EYEBROW = 4; + + // Left of right eyebrow. + LEFT_OF_RIGHT_EYEBROW = 5; + + // Right of right eyebrow. + RIGHT_OF_RIGHT_EYEBROW = 6; + + // Midpoint between eyes. + MIDPOINT_BETWEEN_EYES = 7; + + // Nose tip. + NOSE_TIP = 8; + + // Upper lip. + UPPER_LIP = 9; + + // Lower lip. + LOWER_LIP = 10; + + // Mouth left. + MOUTH_LEFT = 11; + + // Mouth right. + MOUTH_RIGHT = 12; + + // Mouth center. + MOUTH_CENTER = 13; + + // Nose, bottom right. + NOSE_BOTTOM_RIGHT = 14; + + // Nose, bottom left. + NOSE_BOTTOM_LEFT = 15; + + // Nose, bottom center. + NOSE_BOTTOM_CENTER = 16; + + // Left eye, top boundary. + LEFT_EYE_TOP_BOUNDARY = 17; + + // Left eye, right corner. + LEFT_EYE_RIGHT_CORNER = 18; + + // Left eye, bottom boundary. + LEFT_EYE_BOTTOM_BOUNDARY = 19; + + // Left eye, left corner. + LEFT_EYE_LEFT_CORNER = 20; + + // Right eye, top boundary. + RIGHT_EYE_TOP_BOUNDARY = 21; + + // Right eye, right corner. + RIGHT_EYE_RIGHT_CORNER = 22; + + // Right eye, bottom boundary. + RIGHT_EYE_BOTTOM_BOUNDARY = 23; + + // Right eye, left corner. + RIGHT_EYE_LEFT_CORNER = 24; + + // Left eyebrow, upper midpoint. + LEFT_EYEBROW_UPPER_MIDPOINT = 25; + + // Right eyebrow, upper midpoint. + RIGHT_EYEBROW_UPPER_MIDPOINT = 26; + + // Left ear tragion. + LEFT_EAR_TRAGION = 27; + + // Right ear tragion. + RIGHT_EAR_TRAGION = 28; + + // Left eye pupil. + LEFT_EYE_PUPIL = 29; + + // Right eye pupil. + RIGHT_EYE_PUPIL = 30; + + // Forehead glabella. + FOREHEAD_GLABELLA = 31; + + // Chin gnathion. + CHIN_GNATHION = 32; + + // Chin left gonion. + CHIN_LEFT_GONION = 33; + + // Chin right gonion. + CHIN_RIGHT_GONION = 34; + } + + // Face landmark type. + Type type = 3; + + // Face landmark position. + Position position = 4; + } + + // The bounding polygon around the face. The coordinates of the bounding box + // are in the original image's scale, as returned in `ImageParams`. + // The bounding box is computed to "frame" the face in accordance with human + // expectations. It is based on the landmarker results. + // Note that one or more x and/or y coordinates may not be generated in the + // `BoundingPoly` (the polygon will be unbounded) if only a partial face + // appears in the image to be annotated. + BoundingPoly bounding_poly = 1; + + // The `fd_bounding_poly` bounding polygon is tighter than the + // `boundingPoly`, and encloses only the skin part of the face. Typically, it + // is used to eliminate the face from any image analysis that detects the + // "amount of skin" visible in an image. It is not based on the + // landmarker results, only on the initial face detection, hence + // the <code>fd</code> (face detection) prefix. + BoundingPoly fd_bounding_poly = 2; + + // Detected face landmarks. + repeated Landmark landmarks = 3; + + // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation + // of the face relative to the image vertical about the axis perpendicular to + // the face. Range [-180,180]. + float roll_angle = 4; + + // Yaw angle, which indicates the leftward/rightward angle that the face is + // pointing relative to the vertical plane perpendicular to the image. Range + // [-180,180]. + float pan_angle = 5; + + // Pitch angle, which indicates the upwards/downwards angle that the face is + // pointing relative to the image's horizontal plane. Range [-180,180]. + float tilt_angle = 6; + + // Detection confidence. Range [0, 1]. + float detection_confidence = 7; + + // Face landmarking confidence. Range [0, 1]. + float landmarking_confidence = 8; + + // Joy likelihood. + Likelihood joy_likelihood = 9; + + // Sorrow likelihood. + Likelihood sorrow_likelihood = 10; + + // Anger likelihood. + Likelihood anger_likelihood = 11; + + // Surprise likelihood. + Likelihood surprise_likelihood = 12; + + // Under-exposed likelihood. + Likelihood under_exposed_likelihood = 13; + + // Blurred likelihood. + Likelihood blurred_likelihood = 14; + + // Headwear likelihood. + Likelihood headwear_likelihood = 15; +} + +// Detected entity location information. +message LocationInfo { + // lat/long location coordinates. + google.type.LatLng lat_lng = 1; +} + +// A `Property` consists of a user-supplied name/value pair. +message Property { + // Name of the property. + string name = 1; + + // Value of the property. + string value = 2; +} + +// Set of detected entity features. +message EntityAnnotation { + // Opaque entity ID. Some IDs may be available in + // [Google Knowledge Graph Search API](https://developers.google.com/knowledge-graph/). + string mid = 1; + + // The language code for the locale in which the entity textual + // `description` is expressed. + string locale = 2; + + // Entity textual description, expressed in its `locale` language. + string description = 3; + + // Overall score of the result. Range [0, 1]. + float score = 4; + + // The accuracy of the entity detection in an image. + // For example, for an image in which the "Eiffel Tower" entity is detected, + // this field represents the confidence that there is a tower in the query + // image. Range [0, 1]. + float confidence = 5; + + // The relevancy of the ICA (Image Content Annotation) label to the + // image. For example, the relevancy of "tower" is likely higher to an image + // containing the detected "Eiffel Tower" than to an image containing a + // detected distant towering building, even though the confidence that + // there is a tower in each image may be the same. Range [0, 1]. + float topicality = 6; + + // Image region to which this entity belongs. Currently not produced + // for `LABEL_DETECTION` features. For `TEXT_DETECTION` (OCR), `boundingPoly`s + // are produced for the entire text detected in an image region, followed by + // `boundingPoly`s for each word within the detected text. + BoundingPoly bounding_poly = 7; + + // The location information for the detected entity. Multiple + // `LocationInfo` elements can be present because one location may + // indicate the location of the scene in the image, and another location + // may indicate the location of the place where the image was taken. + // Location information is usually present for landmarks. + repeated LocationInfo locations = 8; + + // Some entities may have optional user-supplied `Property` (name/value) + // fields, such a score or string that qualifies the entity. + repeated Property properties = 9; +} + +// Set of features pertaining to the image, computed by computer vision +// methods over safe-search verticals (for example, adult, spoof, medical, +// violence). +message SafeSearchAnnotation { + // Represents the adult content likelihood for the image. + Likelihood adult = 1; + + // Spoof likelihood. The likelihood that an modification + // was made to the image's canonical version to make it appear + // funny or offensive. + Likelihood spoof = 2; + + // Likelihood that this is a medical image. + Likelihood medical = 3; + + // Violence likelihood. + Likelihood violence = 4; +} + +// Rectangle determined by min and max `LatLng` pairs. +message LatLongRect { + // Min lat/long pair. + google.type.LatLng min_lat_lng = 1; + + // Max lat/long pair. + google.type.LatLng max_lat_lng = 2; +} + +// Color information consists of RGB channels, score, and the fraction of +// the image that the color occupies in the image. +message ColorInfo { + // RGB components of the color. + google.type.Color color = 1; + + // Image-specific score for this color. Value in range [0, 1]. + float score = 2; + + // The fraction of pixels the color occupies in the image. + // Value in range [0, 1]. + float pixel_fraction = 3; +} + +// Set of dominant colors and their corresponding scores. +message DominantColorsAnnotation { + // RGB color values with their score and pixel fraction. + repeated ColorInfo colors = 1; +} + +// Stores image properties, such as dominant colors. +message ImageProperties { + // If present, dominant colors completed successfully. + DominantColorsAnnotation dominant_colors = 1; +} + +// Single crop hint that is used to generate a new crop when serving an image. +message CropHint { + // The bounding polygon for the crop region. The coordinates of the bounding + // box are in the original image's scale, as returned in `ImageParams`. + BoundingPoly bounding_poly = 1; + + // Confidence of this being a salient region. Range [0, 1]. + float confidence = 2; + + // Fraction of importance of this salient region with respect to the original + // image. + float importance_fraction = 3; +} + +// Set of crop hints that are used to generate new crops when serving images. +message CropHintsAnnotation { + repeated CropHint crop_hints = 1; +} + +// Parameters for crop hints annotation request. +message CropHintsParams { + // Aspect ratios in floats, representing the ratio of the width to the height + // of the image. For example, if the desired aspect ratio is 4/3, the + // corresponding float value should be 1.33333. If not specified, the + // best possible crop is returned. The number of provided aspect ratios is + // limited to a maximum of 16; any aspect ratios provided after the 16th are + // ignored. + repeated float aspect_ratios = 1; +} + +// Image context and/or feature-specific parameters. +message ImageContext { + // lat/long rectangle that specifies the location of the image. + LatLongRect lat_long_rect = 1; + + // List of languages to use for TEXT_DETECTION. In most cases, an empty value + // yields the best results since it enables automatic language detection. For + // languages based on the Latin alphabet, setting `language_hints` is not + // needed. In rare cases, when the language of the text in the image is known, + // setting a hint will help get better results (although it will be a + // significant hindrance if the hint is wrong). Text detection returns an + // error if one or more of the specified languages is not one of the + // [supported languages](/vision/docs/languages). + repeated string language_hints = 2; + + // Parameters for crop hints annotation request. + CropHintsParams crop_hints_params = 4; +} + +// Request for performing Google Cloud Vision API tasks over a user-provided +// image, with user-requested features. +message AnnotateImageRequest { + // The image to be processed. + Image image = 1; + + // Requested features. + repeated Feature features = 2; + + // Additional context that may accompany the image. + ImageContext image_context = 3; +} + +// Response to an image annotation request. +message AnnotateImageResponse { + // If present, face detection has completed successfully. + repeated FaceAnnotation face_annotations = 1; + + // If present, landmark detection has completed successfully. + repeated EntityAnnotation landmark_annotations = 2; + + // If present, logo detection has completed successfully. + repeated EntityAnnotation logo_annotations = 3; + + // If present, label detection has completed successfully. + repeated EntityAnnotation label_annotations = 4; + + // If present, text (OCR) detection or document (OCR) text detection has + // completed successfully. + repeated EntityAnnotation text_annotations = 5; + + // If present, text (OCR) detection or document (OCR) text detection has + // completed successfully. + // This annotation provides the structural hierarchy for the OCR detected + // text. + TextAnnotation full_text_annotation = 12; + + // If present, safe-search annotation has completed successfully. + SafeSearchAnnotation safe_search_annotation = 6; + + // If present, image properties were extracted successfully. + ImageProperties image_properties_annotation = 8; + + // If present, crop hints have completed successfully. + CropHintsAnnotation crop_hints_annotation = 11; + + // If present, web detection has completed successfully. + WebDetection web_detection = 13; + + // If set, represents the error message for the operation. + // Note that filled-in image annotations are guaranteed to be + // correct, even when `error` is set. + google.rpc.Status error = 9; +} + +// Multiple image annotation requests are batched into a single service call. +message BatchAnnotateImagesRequest { + // Individual image annotation requests for this batch. + repeated AnnotateImageRequest requests = 1; +} + +// Response to a batch image annotation request. +message BatchAnnotateImagesResponse { + // Individual responses to image annotation requests within the batch. + repeated AnnotateImageResponse responses = 1; +} + +// A bucketized representation of likelihood, which is intended to give clients +// highly stable results across model upgrades. +enum Likelihood { + // Unknown likelihood. + UNKNOWN = 0; + + // It is very unlikely that the image belongs to the specified vertical. + VERY_UNLIKELY = 1; + + // It is unlikely that the image belongs to the specified vertical. + UNLIKELY = 2; + + // It is possible that the image belongs to the specified vertical. + POSSIBLE = 3; + + // It is likely that the image belongs to the specified vertical. + LIKELY = 4; + + // It is very likely that the image belongs to the specified vertical. + VERY_LIKELY = 5; +} |