aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/googleapis/google/cloud/vision/v1/image_annotator.proto
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/googleapis/google/cloud/vision/v1/image_annotator.proto')
-rw-r--r--third_party/googleapis/google/cloud/vision/v1/image_annotator.proto569
1 files changed, 569 insertions, 0 deletions
diff --git a/third_party/googleapis/google/cloud/vision/v1/image_annotator.proto b/third_party/googleapis/google/cloud/vision/v1/image_annotator.proto
new file mode 100644
index 0000000000..c17f8aeb6f
--- /dev/null
+++ b/third_party/googleapis/google/cloud/vision/v1/image_annotator.proto
@@ -0,0 +1,569 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.vision.v1;
+
+import "google/api/annotations.proto";
+import "google/cloud/vision/v1/geometry.proto";
+import "google/cloud/vision/v1/text_annotation.proto";
+import "google/cloud/vision/v1/web_detection.proto";
+import "google/rpc/status.proto";
+import "google/type/color.proto";
+import "google/type/latlng.proto";
+
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1;vision";
+option java_multiple_files = true;
+option java_outer_classname = "ImageAnnotatorProto";
+option java_package = "com.google.cloud.vision.v1";
+
+
+// Service that performs Google Cloud Vision API detection tasks over client
+// images, such as face, landmark, logo, label, and text detection. The
+// ImageAnnotator service returns detected entities from the images.
+service ImageAnnotator {
+ // Run image detection and annotation for a batch of images.
+ rpc BatchAnnotateImages(BatchAnnotateImagesRequest) returns (BatchAnnotateImagesResponse) {
+ option (google.api.http) = { post: "/v1/images:annotate" body: "*" };
+ }
+}
+
+// Users describe the type of Google Cloud Vision API tasks to perform over
+// images by using *Feature*s. Each Feature indicates a type of image
+// detection task to perform. Features encode the Cloud Vision API
+// vertical to operate on and the number of top-scoring results to return.
+message Feature {
+ // Type of image feature.
+ enum Type {
+ // Unspecified feature type.
+ TYPE_UNSPECIFIED = 0;
+
+ // Run face detection.
+ FACE_DETECTION = 1;
+
+ // Run landmark detection.
+ LANDMARK_DETECTION = 2;
+
+ // Run logo detection.
+ LOGO_DETECTION = 3;
+
+ // Run label detection.
+ LABEL_DETECTION = 4;
+
+ // Run OCR.
+ TEXT_DETECTION = 5;
+
+ // Run dense text document OCR. Takes precedence when both
+ // DOCUMENT_TEXT_DETECTION and TEXT_DETECTION are present.
+ DOCUMENT_TEXT_DETECTION = 11;
+
+ // Run computer vision models to compute image safe-search properties.
+ SAFE_SEARCH_DETECTION = 6;
+
+ // Compute a set of image properties, such as the image's dominant colors.
+ IMAGE_PROPERTIES = 7;
+
+ // Run crop hints.
+ CROP_HINTS = 9;
+
+ // Run web detection.
+ WEB_DETECTION = 10;
+ }
+
+ // The feature type.
+ Type type = 1;
+
+ // Maximum number of results of this type.
+ int32 max_results = 2;
+}
+
+// External image source (Google Cloud Storage image location).
+message ImageSource {
+ // NOTE: For new code `image_uri` below is preferred.
+ // Google Cloud Storage image URI, which must be in the following form:
+ // `gs://bucket_name/object_name` (for details, see
+ // [Google Cloud Storage Request
+ // URIs](https://cloud.google.com/storage/docs/reference-uris)).
+ // NOTE: Cloud Storage object versioning is not supported.
+ string gcs_image_uri = 1;
+
+ // Image URI which supports:
+ // 1) Google Cloud Storage image URI, which must be in the following form:
+ // `gs://bucket_name/object_name` (for details, see
+ // [Google Cloud Storage Request
+ // URIs](https://cloud.google.com/storage/docs/reference-uris)).
+ // NOTE: Cloud Storage object versioning is not supported.
+ // 2) Publicly accessible image HTTP/HTTPS URL.
+ // This is preferred over the legacy `gcs_image_uri` above. When both
+ // `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
+ // precedence.
+ string image_uri = 2;
+}
+
+// Client image to perform Google Cloud Vision API tasks over.
+message Image {
+ // Image content, represented as a stream of bytes.
+ // Note: as with all `bytes` fields, protobuffers use a pure binary
+ // representation, whereas JSON representations use base64.
+ bytes content = 1;
+
+ // Google Cloud Storage image location. If both `content` and `source`
+ // are provided for an image, `content` takes precedence and is
+ // used to perform the image annotation request.
+ ImageSource source = 2;
+}
+
+// A face annotation object contains the results of face detection.
+message FaceAnnotation {
+ // A face-specific landmark (for example, a face feature).
+ // Landmark positions may fall outside the bounds of the image
+ // if the face is near one or more edges of the image.
+ // Therefore it is NOT guaranteed that `0 <= x < width` or
+ // `0 <= y < height`.
+ message Landmark {
+ // Face landmark (feature) type.
+ // Left and right are defined from the vantage of the viewer of the image
+ // without considering mirror projections typical of photos. So, `LEFT_EYE`,
+ // typically, is the person's right eye.
+ enum Type {
+ // Unknown face landmark detected. Should not be filled.
+ UNKNOWN_LANDMARK = 0;
+
+ // Left eye.
+ LEFT_EYE = 1;
+
+ // Right eye.
+ RIGHT_EYE = 2;
+
+ // Left of left eyebrow.
+ LEFT_OF_LEFT_EYEBROW = 3;
+
+ // Right of left eyebrow.
+ RIGHT_OF_LEFT_EYEBROW = 4;
+
+ // Left of right eyebrow.
+ LEFT_OF_RIGHT_EYEBROW = 5;
+
+ // Right of right eyebrow.
+ RIGHT_OF_RIGHT_EYEBROW = 6;
+
+ // Midpoint between eyes.
+ MIDPOINT_BETWEEN_EYES = 7;
+
+ // Nose tip.
+ NOSE_TIP = 8;
+
+ // Upper lip.
+ UPPER_LIP = 9;
+
+ // Lower lip.
+ LOWER_LIP = 10;
+
+ // Mouth left.
+ MOUTH_LEFT = 11;
+
+ // Mouth right.
+ MOUTH_RIGHT = 12;
+
+ // Mouth center.
+ MOUTH_CENTER = 13;
+
+ // Nose, bottom right.
+ NOSE_BOTTOM_RIGHT = 14;
+
+ // Nose, bottom left.
+ NOSE_BOTTOM_LEFT = 15;
+
+ // Nose, bottom center.
+ NOSE_BOTTOM_CENTER = 16;
+
+ // Left eye, top boundary.
+ LEFT_EYE_TOP_BOUNDARY = 17;
+
+ // Left eye, right corner.
+ LEFT_EYE_RIGHT_CORNER = 18;
+
+ // Left eye, bottom boundary.
+ LEFT_EYE_BOTTOM_BOUNDARY = 19;
+
+ // Left eye, left corner.
+ LEFT_EYE_LEFT_CORNER = 20;
+
+ // Right eye, top boundary.
+ RIGHT_EYE_TOP_BOUNDARY = 21;
+
+ // Right eye, right corner.
+ RIGHT_EYE_RIGHT_CORNER = 22;
+
+ // Right eye, bottom boundary.
+ RIGHT_EYE_BOTTOM_BOUNDARY = 23;
+
+ // Right eye, left corner.
+ RIGHT_EYE_LEFT_CORNER = 24;
+
+ // Left eyebrow, upper midpoint.
+ LEFT_EYEBROW_UPPER_MIDPOINT = 25;
+
+ // Right eyebrow, upper midpoint.
+ RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
+
+ // Left ear tragion.
+ LEFT_EAR_TRAGION = 27;
+
+ // Right ear tragion.
+ RIGHT_EAR_TRAGION = 28;
+
+ // Left eye pupil.
+ LEFT_EYE_PUPIL = 29;
+
+ // Right eye pupil.
+ RIGHT_EYE_PUPIL = 30;
+
+ // Forehead glabella.
+ FOREHEAD_GLABELLA = 31;
+
+ // Chin gnathion.
+ CHIN_GNATHION = 32;
+
+ // Chin left gonion.
+ CHIN_LEFT_GONION = 33;
+
+ // Chin right gonion.
+ CHIN_RIGHT_GONION = 34;
+ }
+
+ // Face landmark type.
+ Type type = 3;
+
+ // Face landmark position.
+ Position position = 4;
+ }
+
+ // The bounding polygon around the face. The coordinates of the bounding box
+ // are in the original image's scale, as returned in `ImageParams`.
+ // The bounding box is computed to "frame" the face in accordance with human
+ // expectations. It is based on the landmarker results.
+ // Note that one or more x and/or y coordinates may not be generated in the
+ // `BoundingPoly` (the polygon will be unbounded) if only a partial face
+ // appears in the image to be annotated.
+ BoundingPoly bounding_poly = 1;
+
+ // The `fd_bounding_poly` bounding polygon is tighter than the
+ // `boundingPoly`, and encloses only the skin part of the face. Typically, it
+ // is used to eliminate the face from any image analysis that detects the
+ // "amount of skin" visible in an image. It is not based on the
+ // landmarker results, only on the initial face detection, hence
+ // the <code>fd</code> (face detection) prefix.
+ BoundingPoly fd_bounding_poly = 2;
+
+ // Detected face landmarks.
+ repeated Landmark landmarks = 3;
+
+ // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
+ // of the face relative to the image vertical about the axis perpendicular to
+ // the face. Range [-180,180].
+ float roll_angle = 4;
+
+ // Yaw angle, which indicates the leftward/rightward angle that the face is
+ // pointing relative to the vertical plane perpendicular to the image. Range
+ // [-180,180].
+ float pan_angle = 5;
+
+ // Pitch angle, which indicates the upwards/downwards angle that the face is
+ // pointing relative to the image's horizontal plane. Range [-180,180].
+ float tilt_angle = 6;
+
+ // Detection confidence. Range [0, 1].
+ float detection_confidence = 7;
+
+ // Face landmarking confidence. Range [0, 1].
+ float landmarking_confidence = 8;
+
+ // Joy likelihood.
+ Likelihood joy_likelihood = 9;
+
+ // Sorrow likelihood.
+ Likelihood sorrow_likelihood = 10;
+
+ // Anger likelihood.
+ Likelihood anger_likelihood = 11;
+
+ // Surprise likelihood.
+ Likelihood surprise_likelihood = 12;
+
+ // Under-exposed likelihood.
+ Likelihood under_exposed_likelihood = 13;
+
+ // Blurred likelihood.
+ Likelihood blurred_likelihood = 14;
+
+ // Headwear likelihood.
+ Likelihood headwear_likelihood = 15;
+}
+
+// Detected entity location information.
+message LocationInfo {
+ // lat/long location coordinates.
+ google.type.LatLng lat_lng = 1;
+}
+
+// A `Property` consists of a user-supplied name/value pair.
+message Property {
+ // Name of the property.
+ string name = 1;
+
+ // Value of the property.
+ string value = 2;
+}
+
+// Set of detected entity features.
+message EntityAnnotation {
+ // Opaque entity ID. Some IDs may be available in
+ // [Google Knowledge Graph Search API](https://developers.google.com/knowledge-graph/).
+ string mid = 1;
+
+ // The language code for the locale in which the entity textual
+ // `description` is expressed.
+ string locale = 2;
+
+ // Entity textual description, expressed in its `locale` language.
+ string description = 3;
+
+ // Overall score of the result. Range [0, 1].
+ float score = 4;
+
+ // The accuracy of the entity detection in an image.
+ // For example, for an image in which the "Eiffel Tower" entity is detected,
+ // this field represents the confidence that there is a tower in the query
+ // image. Range [0, 1].
+ float confidence = 5;
+
+ // The relevancy of the ICA (Image Content Annotation) label to the
+ // image. For example, the relevancy of "tower" is likely higher to an image
+ // containing the detected "Eiffel Tower" than to an image containing a
+ // detected distant towering building, even though the confidence that
+ // there is a tower in each image may be the same. Range [0, 1].
+ float topicality = 6;
+
+ // Image region to which this entity belongs. Currently not produced
+ // for `LABEL_DETECTION` features. For `TEXT_DETECTION` (OCR), `boundingPoly`s
+ // are produced for the entire text detected in an image region, followed by
+ // `boundingPoly`s for each word within the detected text.
+ BoundingPoly bounding_poly = 7;
+
+ // The location information for the detected entity. Multiple
+ // `LocationInfo` elements can be present because one location may
+ // indicate the location of the scene in the image, and another location
+ // may indicate the location of the place where the image was taken.
+ // Location information is usually present for landmarks.
+ repeated LocationInfo locations = 8;
+
+ // Some entities may have optional user-supplied `Property` (name/value)
+ // fields, such a score or string that qualifies the entity.
+ repeated Property properties = 9;
+}
+
+// Set of features pertaining to the image, computed by computer vision
+// methods over safe-search verticals (for example, adult, spoof, medical,
+// violence).
+message SafeSearchAnnotation {
+ // Represents the adult content likelihood for the image.
+ Likelihood adult = 1;
+
+ // Spoof likelihood. The likelihood that an modification
+ // was made to the image's canonical version to make it appear
+ // funny or offensive.
+ Likelihood spoof = 2;
+
+ // Likelihood that this is a medical image.
+ Likelihood medical = 3;
+
+ // Violence likelihood.
+ Likelihood violence = 4;
+}
+
+// Rectangle determined by min and max `LatLng` pairs.
+message LatLongRect {
+ // Min lat/long pair.
+ google.type.LatLng min_lat_lng = 1;
+
+ // Max lat/long pair.
+ google.type.LatLng max_lat_lng = 2;
+}
+
+// Color information consists of RGB channels, score, and the fraction of
+// the image that the color occupies in the image.
+message ColorInfo {
+ // RGB components of the color.
+ google.type.Color color = 1;
+
+ // Image-specific score for this color. Value in range [0, 1].
+ float score = 2;
+
+ // The fraction of pixels the color occupies in the image.
+ // Value in range [0, 1].
+ float pixel_fraction = 3;
+}
+
+// Set of dominant colors and their corresponding scores.
+message DominantColorsAnnotation {
+ // RGB color values with their score and pixel fraction.
+ repeated ColorInfo colors = 1;
+}
+
+// Stores image properties, such as dominant colors.
+message ImageProperties {
+ // If present, dominant colors completed successfully.
+ DominantColorsAnnotation dominant_colors = 1;
+}
+
+// Single crop hint that is used to generate a new crop when serving an image.
+message CropHint {
+ // The bounding polygon for the crop region. The coordinates of the bounding
+ // box are in the original image's scale, as returned in `ImageParams`.
+ BoundingPoly bounding_poly = 1;
+
+ // Confidence of this being a salient region. Range [0, 1].
+ float confidence = 2;
+
+ // Fraction of importance of this salient region with respect to the original
+ // image.
+ float importance_fraction = 3;
+}
+
+// Set of crop hints that are used to generate new crops when serving images.
+message CropHintsAnnotation {
+ repeated CropHint crop_hints = 1;
+}
+
+// Parameters for crop hints annotation request.
+message CropHintsParams {
+ // Aspect ratios in floats, representing the ratio of the width to the height
+ // of the image. For example, if the desired aspect ratio is 4/3, the
+ // corresponding float value should be 1.33333. If not specified, the
+ // best possible crop is returned. The number of provided aspect ratios is
+ // limited to a maximum of 16; any aspect ratios provided after the 16th are
+ // ignored.
+ repeated float aspect_ratios = 1;
+}
+
+// Image context and/or feature-specific parameters.
+message ImageContext {
+ // lat/long rectangle that specifies the location of the image.
+ LatLongRect lat_long_rect = 1;
+
+ // List of languages to use for TEXT_DETECTION. In most cases, an empty value
+ // yields the best results since it enables automatic language detection. For
+ // languages based on the Latin alphabet, setting `language_hints` is not
+ // needed. In rare cases, when the language of the text in the image is known,
+ // setting a hint will help get better results (although it will be a
+ // significant hindrance if the hint is wrong). Text detection returns an
+ // error if one or more of the specified languages is not one of the
+ // [supported languages](/vision/docs/languages).
+ repeated string language_hints = 2;
+
+ // Parameters for crop hints annotation request.
+ CropHintsParams crop_hints_params = 4;
+}
+
+// Request for performing Google Cloud Vision API tasks over a user-provided
+// image, with user-requested features.
+message AnnotateImageRequest {
+ // The image to be processed.
+ Image image = 1;
+
+ // Requested features.
+ repeated Feature features = 2;
+
+ // Additional context that may accompany the image.
+ ImageContext image_context = 3;
+}
+
+// Response to an image annotation request.
+message AnnotateImageResponse {
+ // If present, face detection has completed successfully.
+ repeated FaceAnnotation face_annotations = 1;
+
+ // If present, landmark detection has completed successfully.
+ repeated EntityAnnotation landmark_annotations = 2;
+
+ // If present, logo detection has completed successfully.
+ repeated EntityAnnotation logo_annotations = 3;
+
+ // If present, label detection has completed successfully.
+ repeated EntityAnnotation label_annotations = 4;
+
+ // If present, text (OCR) detection or document (OCR) text detection has
+ // completed successfully.
+ repeated EntityAnnotation text_annotations = 5;
+
+ // If present, text (OCR) detection or document (OCR) text detection has
+ // completed successfully.
+ // This annotation provides the structural hierarchy for the OCR detected
+ // text.
+ TextAnnotation full_text_annotation = 12;
+
+ // If present, safe-search annotation has completed successfully.
+ SafeSearchAnnotation safe_search_annotation = 6;
+
+ // If present, image properties were extracted successfully.
+ ImageProperties image_properties_annotation = 8;
+
+ // If present, crop hints have completed successfully.
+ CropHintsAnnotation crop_hints_annotation = 11;
+
+ // If present, web detection has completed successfully.
+ WebDetection web_detection = 13;
+
+ // If set, represents the error message for the operation.
+ // Note that filled-in image annotations are guaranteed to be
+ // correct, even when `error` is set.
+ google.rpc.Status error = 9;
+}
+
+// Multiple image annotation requests are batched into a single service call.
+message BatchAnnotateImagesRequest {
+ // Individual image annotation requests for this batch.
+ repeated AnnotateImageRequest requests = 1;
+}
+
+// Response to a batch image annotation request.
+message BatchAnnotateImagesResponse {
+ // Individual responses to image annotation requests within the batch.
+ repeated AnnotateImageResponse responses = 1;
+}
+
+// A bucketized representation of likelihood, which is intended to give clients
+// highly stable results across model upgrades.
+enum Likelihood {
+ // Unknown likelihood.
+ UNKNOWN = 0;
+
+ // It is very unlikely that the image belongs to the specified vertical.
+ VERY_UNLIKELY = 1;
+
+ // It is unlikely that the image belongs to the specified vertical.
+ UNLIKELY = 2;
+
+ // It is possible that the image belongs to the specified vertical.
+ POSSIBLE = 3;
+
+ // It is likely that the image belongs to the specified vertical.
+ LIKELY = 4;
+
+ // It is very likely that the image belongs to the specified vertical.
+ VERY_LIKELY = 5;
+}