diff options
Diffstat (limited to 'third_party/googleapis/google/genomics/v1/annotations.proto')
-rw-r--r-- | third_party/googleapis/google/genomics/v1/annotations.proto | 672 |
1 files changed, 672 insertions, 0 deletions
diff --git a/third_party/googleapis/google/genomics/v1/annotations.proto b/third_party/googleapis/google/genomics/v1/annotations.proto new file mode 100644 index 0000000000..0a1e999eb2 --- /dev/null +++ b/third_party/googleapis/google/genomics/v1/annotations.proto @@ -0,0 +1,672 @@ +// Copyright 2016 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.genomics.v1; + +import "google/api/annotations.proto"; +import "google/protobuf/empty.proto"; +import "google/protobuf/field_mask.proto"; +import "google/protobuf/struct.proto"; +import "google/protobuf/wrappers.proto"; +import "google/rpc/status.proto"; + +option cc_enable_arenas = true; +option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics"; +option java_multiple_files = true; +option java_outer_classname = "AnnotationsProto"; +option java_package = "com.google.genomics.v1"; + + +// This service provides storage and positional retrieval of genomic +// reference annotations, including variant annotations. +service AnnotationServiceV1 { + // Creates a new annotation set. Caller must have WRITE permission for the + // associated dataset. + // + // The following fields are required: + // + // * [datasetId][google.genomics.v1.AnnotationSet.dataset_id] + // * [referenceSetId][google.genomics.v1.AnnotationSet.reference_set_id] + // + // All other fields may be optionally specified, unless documented as being + // server-generated (for example, the `id` field). + rpc CreateAnnotationSet(CreateAnnotationSetRequest) returns (AnnotationSet) { + option (google.api.http) = { post: "/v1/annotationsets" body: "annotation_set" }; + } + + // Gets an annotation set. Caller must have READ permission for + // the associated dataset. + rpc GetAnnotationSet(GetAnnotationSetRequest) returns (AnnotationSet) { + option (google.api.http) = { get: "/v1/annotationsets/{annotation_set_id}" }; + } + + // Updates an annotation set. The update must respect all mutability + // restrictions and other invariants described on the annotation set resource. + // Caller must have WRITE permission for the associated dataset. + rpc UpdateAnnotationSet(UpdateAnnotationSetRequest) returns (AnnotationSet) { + option (google.api.http) = { put: "/v1/annotationsets/{annotation_set_id}" body: "annotation_set" }; + } + + // Deletes an annotation set. Caller must have WRITE permission + // for the associated annotation set. + rpc DeleteAnnotationSet(DeleteAnnotationSetRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { delete: "/v1/annotationsets/{annotation_set_id}" }; + } + + // Searches for annotation sets that match the given criteria. Annotation sets + // are returned in an unspecified order. This order is consistent, such that + // two queries for the same content (regardless of page size) yield annotation + // sets in the same order across their respective streams of paginated + // responses. Caller must have READ permission for the queried datasets. + rpc SearchAnnotationSets(SearchAnnotationSetsRequest) returns (SearchAnnotationSetsResponse) { + option (google.api.http) = { post: "/v1/annotationsets/search" body: "*" }; + } + + // Creates a new annotation. Caller must have WRITE permission + // for the associated annotation set. + // + // The following fields are required: + // + // * [annotationSetId][google.genomics.v1.Annotation.annotation_set_id] + // * [referenceName][google.genomics.v1.Annotation.reference_name] or + // [referenceId][google.genomics.v1.Annotation.reference_id] + // + // ### Transcripts + // + // For annotations of type TRANSCRIPT, the following fields of + // [transcript][google.genomics.v1.Annotation.transcript] must be provided: + // + // * [exons.start][google.genomics.v1.Transcript.Exon.start] + // * [exons.end][google.genomics.v1.Transcript.Exon.end] + // + // All other fields may be optionally specified, unless documented as being + // server-generated (for example, the `id` field). The annotated + // range must be no longer than 100Mbp (mega base pairs). See the + // [Annotation resource][google.genomics.v1.Annotation] + // for additional restrictions on each field. + rpc CreateAnnotation(CreateAnnotationRequest) returns (Annotation) { + option (google.api.http) = { post: "/v1/annotations" body: "annotation" }; + } + + // Creates one or more new annotations atomically. All annotations must + // belong to the same annotation set. Caller must have WRITE + // permission for this annotation set. For optimal performance, batch + // positionally adjacent annotations together. + // + // If the request has a systemic issue, such as an attempt to write to + // an inaccessible annotation set, the entire RPC will fail accordingly. For + // lesser data issues, when possible an error will be isolated to the + // corresponding batch entry in the response; the remaining well formed + // annotations will be created normally. + // + // For details on the requirements for each individual annotation resource, + // see + // [CreateAnnotation][google.genomics.v1.AnnotationServiceV1.CreateAnnotation]. + rpc BatchCreateAnnotations(BatchCreateAnnotationsRequest) returns (BatchCreateAnnotationsResponse) { + option (google.api.http) = { post: "/v1/annotations:batchCreate" body: "*" }; + } + + // Gets an annotation. Caller must have READ permission + // for the associated annotation set. + rpc GetAnnotation(GetAnnotationRequest) returns (Annotation) { + option (google.api.http) = { get: "/v1/annotations/{annotation_id}" }; + } + + // Updates an annotation. Caller must have + // WRITE permission for the associated dataset. + rpc UpdateAnnotation(UpdateAnnotationRequest) returns (Annotation) { + option (google.api.http) = { put: "/v1/annotations/{annotation_id}" body: "annotation" }; + } + + // Deletes an annotation. Caller must have WRITE permission for + // the associated annotation set. + rpc DeleteAnnotation(DeleteAnnotationRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { delete: "/v1/annotations/{annotation_id}" }; + } + + // Searches for annotations that match the given criteria. Results are + // ordered by genomic coordinate (by reference sequence, then position). + // Annotations with equivalent genomic coordinates are returned in an + // unspecified order. This order is consistent, such that two queries for the + // same content (regardless of page size) yield annotations in the same order + // across their respective streams of paginated responses. Caller must have + // READ permission for the queried annotation sets. + rpc SearchAnnotations(SearchAnnotationsRequest) returns (SearchAnnotationsResponse) { + option (google.api.http) = { post: "/v1/annotations/search" body: "*" }; + } +} + +// An annotation set is a logical grouping of annotations that share consistent +// type information and provenance. Examples of annotation sets include 'all +// genes from refseq', and 'all variant annotations from ClinVar'. +message AnnotationSet { + // The server-generated annotation set ID, unique across all annotation sets. + string id = 1; + + // The dataset to which this annotation set belongs. + string dataset_id = 2; + + // The ID of the reference set that defines the coordinate space for this + // set's annotations. + string reference_set_id = 3; + + // The display name for this annotation set. + string name = 4; + + // The source URI describing the file from which this annotation set was + // generated, if any. + string source_uri = 5; + + // The type of annotations contained within this set. + AnnotationType type = 6; + + // A map of additional read alignment information. This must be of the form + // map<string, string[]> (string key mapping to a list of string values). + map<string, google.protobuf.ListValue> info = 17; +} + +// An annotation describes a region of reference genome. The value of an +// annotation may be one of several canonical types, supplemented by arbitrary +// info tags. An annotation is not inherently associated with a specific +// sample or individual (though a client could choose to use annotations in +// this way). Example canonical annotation types are `GENE` and +// `VARIANT`. +message Annotation { + // The server-generated annotation ID, unique across all annotations. + string id = 1; + + // The annotation set to which this annotation belongs. + string annotation_set_id = 2; + + // The display name of this annotation. + string name = 3; + + // The ID of the Google Genomics reference associated with this range. + string reference_id = 4; + + // The display name corresponding to the reference specified by + // `referenceId`, for example `chr1`, `1`, or `chrX`. + string reference_name = 5; + + // The start position of the range on the reference, 0-based inclusive. + int64 start = 6; + + // The end position of the range on the reference, 0-based exclusive. + int64 end = 7; + + // Whether this range refers to the reverse strand, as opposed to the forward + // strand. Note that regardless of this field, the start/end position of the + // range always refer to the forward strand. + bool reverse_strand = 8; + + // The data type for this annotation. Must match the containing annotation + // set's type. + AnnotationType type = 9; + + oneof value { + // A variant annotation, which describes the effect of a variant on the + // genome, the coding sequence, and/or higher level consequences at the + // organism level e.g. pathogenicity. This field is only set for annotations + // of type `VARIANT`. + VariantAnnotation variant = 10; + + // A transcript value represents the assertion that a particular region of + // the reference genome may be transcribed as RNA. An alternative splicing + // pattern would be represented as a separate transcript object. This field + // is only set for annotations of type `TRANSCRIPT`. + Transcript transcript = 11; + } + + // A map of additional read alignment information. This must be of the form + // map<string, string[]> (string key mapping to a list of string values). + map<string, google.protobuf.ListValue> info = 12; +} + +message VariantAnnotation { + message ClinicalCondition { + // A set of names for the condition. + repeated string names = 1; + + // The set of external IDs for this condition. + repeated ExternalId external_ids = 2; + + // The MedGen concept id associated with this gene. + // Search for these IDs at http://www.ncbi.nlm.nih.gov/medgen/ + string concept_id = 3; + + // The OMIM id for this condition. + // Search for these IDs at http://omim.org/ + string omim_id = 4; + } + + enum Type { + TYPE_UNSPECIFIED = 0; + + // `TYPE_OTHER` should be used when no other Type will suffice. + // Further explanation of the variant type may be included in the + // [info][google.genomics.v1.Annotation.info] field. + TYPE_OTHER = 1; + + // `INSERTION` indicates an insertion. + INSERTION = 2; + + // `DELETION` indicates a deletion. + DELETION = 3; + + // `SUBSTITUTION` indicates a block substitution of + // two or more nucleotides. + SUBSTITUTION = 4; + + // `SNP` indicates a single nucleotide polymorphism. + SNP = 5; + + // `STRUCTURAL` indicates a large structural variant, + // including chromosomal fusions, inversions, etc. + STRUCTURAL = 6; + + // `CNV` indicates a variation in copy number. + CNV = 7; + } + + enum Effect { + EFFECT_UNSPECIFIED = 0; + + // `EFFECT_OTHER` should be used when no other Effect + // will suffice. + EFFECT_OTHER = 1; + + // `FRAMESHIFT` indicates a mutation in which the insertion or + // deletion of nucleotides resulted in a frameshift change. + FRAMESHIFT = 2; + + // `FRAME_PRESERVING_INDEL` indicates a mutation in which a + // multiple of three nucleotides has been inserted or deleted, resulting + // in no change to the reading frame of the coding sequence. + FRAME_PRESERVING_INDEL = 3; + + // `SYNONYMOUS_SNP` indicates a single nucleotide polymorphism + // mutation that results in no amino acid change. + SYNONYMOUS_SNP = 4; + + // `NONSYNONYMOUS_SNP` indicates a single nucleotide + // polymorphism mutation that results in an amino acid change. + NONSYNONYMOUS_SNP = 5; + + // `STOP_GAIN` indicates a mutation that leads to the creation + // of a stop codon at the variant site. Frameshift mutations creating + // downstream stop codons do not count as `STOP_GAIN`. + STOP_GAIN = 6; + + // `STOP_LOSS` indicates a mutation that eliminates a + // stop codon at the variant site. + STOP_LOSS = 7; + + // `SPLICE_SITE_DISRUPTION` indicates that this variant is + // found in a splice site for the associated transcript, and alters the + // normal splicing pattern. + SPLICE_SITE_DISRUPTION = 8; + } + + enum ClinicalSignificance { + CLINICAL_SIGNIFICANCE_UNSPECIFIED = 0; + + // `OTHER` should be used when no other clinical significance + // value will suffice. + CLINICAL_SIGNIFICANCE_OTHER = 1; + + UNCERTAIN = 2; + + BENIGN = 3; + + LIKELY_BENIGN = 4; + + LIKELY_PATHOGENIC = 5; + + PATHOGENIC = 6; + + DRUG_RESPONSE = 7; + + HISTOCOMPATIBILITY = 8; + + CONFERS_SENSITIVITY = 9; + + RISK_FACTOR = 10; + + ASSOCIATION = 11; + + PROTECTIVE = 12; + + // `MULTIPLE_REPORTED` should be used when multiple clinical + // signficances are reported for a variant. The original clinical + // significance values may be provided in the `info` field. + MULTIPLE_REPORTED = 13; + } + + // Type has been adapted from ClinVar's list of variant types. + Type type = 1; + + // Effect of the variant on the coding sequence. + Effect effect = 2; + + // The alternate allele for this variant. If multiple alternate alleles + // exist at this location, create a separate variant for each one, as they + // may represent distinct conditions. + string alternate_bases = 3; + + // Google annotation ID of the gene affected by this variant. This should + // be provided when the variant is created. + string gene_id = 4; + + // Google annotation IDs of the transcripts affected by this variant. These + // should be provided when the variant is created. + repeated string transcript_ids = 5; + + // The set of conditions associated with this variant. + // A condition describes the way a variant influences human health. + repeated ClinicalCondition conditions = 6; + + // Describes the clinical significance of a variant. + // It is adapted from the ClinVar controlled vocabulary for clinical + // significance described at: + // http://www.ncbi.nlm.nih.gov/clinvar/docs/clinsig/ + ClinicalSignificance clinical_significance = 7; +} + +// A transcript represents the assertion that a particular region of the +// reference genome may be transcribed as RNA. +message Transcript { + message Exon { + // The start position of the exon on this annotation's reference sequence, + // 0-based inclusive. Note that this is relative to the reference start, and + // **not** the containing annotation start. + int64 start = 1; + + // The end position of the exon on this annotation's reference sequence, + // 0-based exclusive. Note that this is relative to the reference start, and + // *not* the containing annotation start. + int64 end = 2; + + // The frame of this exon. Contains a value of 0, 1, or 2, which indicates + // the offset of the first coding base of the exon within the reading frame + // of the coding DNA sequence, if any. This field is dependent on the + // strandedness of this annotation (see + // [Annotation.reverse_strand][google.genomics.v1.Annotation.reverse_strand]). + // For forward stranded annotations, this offset is relative to the + // [exon.start][google.genomics.v1.Transcript.Exon.start]. For reverse + // strand annotations, this offset is relative to the + // [exon.end][google.genomics.v1.Transcript.Exon.end] `- 1`. + // + // Unset if this exon does not intersect the coding sequence. Upon creation + // of a transcript, the frame must be populated for all or none of the + // coding exons. + google.protobuf.Int32Value frame = 3; + } + + message CodingSequence { + // The start of the coding sequence on this annotation's reference sequence, + // 0-based inclusive. Note that this position is relative to the reference + // start, and *not* the containing annotation start. + int64 start = 1; + + // The end of the coding sequence on this annotation's reference sequence, + // 0-based exclusive. Note that this position is relative to the reference + // start, and *not* the containing annotation start. + int64 end = 2; + } + + // The annotation ID of the gene from which this transcript is transcribed. + string gene_id = 1; + + // The <a href="http://en.wikipedia.org/wiki/Exon">exons</a> that compose + // this transcript. This field should be unset for genomes where transcript + // splicing does not occur, for example prokaryotes. + // + // Introns are regions of the transcript that are not included in the + // spliced RNA product. Though not explicitly modeled here, intron ranges can + // be deduced; all regions of this transcript that are not exons are introns. + // + // Exonic sequences do not necessarily code for a translational product + // (amino acids). Only the regions of exons bounded by the + // [codingSequence][google.genomics.v1.Transcript.coding_sequence] correspond + // to coding DNA sequence. + // + // Exons are ordered by start position and may not overlap. + repeated Exon exons = 2; + + // The range of the coding sequence for this transcript, if any. To determine + // the exact ranges of coding sequence, intersect this range with those of the + // [exons][google.genomics.v1.Transcript.exons], if any. If there are any + // [exons][google.genomics.v1.Transcript.exons], the + // [codingSequence][google.genomics.v1.Transcript.coding_sequence] must start + // and end within them. + // + // Note that in some cases, the reference genome will not exactly match the + // observed mRNA transcript e.g. due to variance in the source genome from + // reference. In these cases, + // [exon.frame][google.genomics.v1.Transcript.Exon.frame] will not necessarily + // match the expected reference reading frame and coding exon reference bases + // cannot necessarily be concatenated to produce the original transcript mRNA. + CodingSequence coding_sequence = 3; +} + +message ExternalId { + // The name of the source of this data. + string source_name = 1; + + // The id used by the source of this data. + string id = 2; +} + +message CreateAnnotationSetRequest { + // The annotation set to create. + AnnotationSet annotation_set = 1; +} + +message GetAnnotationSetRequest { + // The ID of the annotation set to be retrieved. + string annotation_set_id = 1; +} + +message UpdateAnnotationSetRequest { + // The ID of the annotation set to be updated. + string annotation_set_id = 1; + + // The new annotation set. + AnnotationSet annotation_set = 2; + + // An optional mask specifying which fields to update. Mutable fields are + // [name][google.genomics.v1.AnnotationSet.name], + // [source_uri][google.genomics.v1.AnnotationSet.source_uri], and + // [info][google.genomics.v1.AnnotationSet.info]. If unspecified, all + // mutable fields will be updated. + google.protobuf.FieldMask update_mask = 3; +} + +message DeleteAnnotationSetRequest { + // The ID of the annotation set to be deleted. + string annotation_set_id = 1; +} + +message SearchAnnotationSetsRequest { + // Required. The dataset IDs to search within. Caller must have `READ` access + // to these datasets. + repeated string dataset_ids = 1; + + // If specified, only annotation sets associated with the given reference set + // are returned. + string reference_set_id = 2; + + // Only return annotations sets for which a substring of the name matches this + // string (case insensitive). + string name = 3; + + // If specified, only annotation sets that have any of these types are + // returned. + repeated AnnotationType types = 4; + + // The continuation token, which is used to page through large result sets. + // To get the next page of results, set this parameter to the value of + // `nextPageToken` from the previous response. + string page_token = 5; + + // The maximum number of results to return in a single page. If unspecified, + // defaults to 128. The maximum value is 1024. + int32 page_size = 6; +} + +message SearchAnnotationSetsResponse { + // The matching annotation sets. + repeated AnnotationSet annotation_sets = 1; + + // The continuation token, which is used to page through large result sets. + // Provide this value in a subsequent request to return the next page of + // results. This field will be empty if there aren't any additional results. + string next_page_token = 2; +} + +message CreateAnnotationRequest { + // The annotation to be created. + Annotation annotation = 1; +} + +message BatchCreateAnnotationsRequest { + // The annotations to be created. At most 4096 can be specified in a single + // request. + repeated Annotation annotations = 1; + + // A unique request ID which enables the server to detect duplicated requests. + // If provided, duplicated requests will result in the same response; if not + // provided, duplicated requests may result in duplicated data. For a given + // annotation set, callers should not reuse `request_id`s when writing + // different batches of annotations - behavior in this case is undefined. + // A common approach is to use a UUID. For batch jobs where worker crashes are + // a possibility, consider using some unique variant of a worker or run ID. + string request_id = 2; +} + +message BatchCreateAnnotationsResponse { + message Entry { + // The creation status. + google.rpc.Status status = 1; + + // The created annotation, if creation was successful. + Annotation annotation = 2; + } + + // The resulting per-annotation entries, ordered consistently with the + // original request. + repeated Entry entries = 1; +} + +message GetAnnotationRequest { + // The ID of the annotation to be retrieved. + string annotation_id = 1; +} + +message UpdateAnnotationRequest { + // The ID of the annotation to be updated. + string annotation_id = 1; + + // The new annotation. + Annotation annotation = 2; + + // An optional mask specifying which fields to update. Mutable fields are + // [name][google.genomics.v1.Annotation.name], + // [variant][google.genomics.v1.Annotation.variant], + // [transcript][google.genomics.v1.Annotation.transcript], and + // [info][google.genomics.v1.Annotation.info]. If unspecified, all mutable + // fields will be updated. + google.protobuf.FieldMask update_mask = 3; +} + +message DeleteAnnotationRequest { + // The ID of the annotation to be deleted. + string annotation_id = 1; +} + +message SearchAnnotationsRequest { + // Required. The annotation sets to search within. The caller must have + // `READ` access to these annotation sets. + // All queried annotation sets must have the same type. + repeated string annotation_set_ids = 1; + + // Required. `reference_id` or `reference_name` must be set. + oneof reference { + // The ID of the reference to query. + string reference_id = 2; + + // The name of the reference to query, within the reference set associated + // with this query. + string reference_name = 3; + } + + // The start position of the range on the reference, 0-based inclusive. If + // specified, + // [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or + // [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name] + // must be specified. Defaults to 0. + int64 start = 4; + + // The end position of the range on the reference, 0-based exclusive. If + // [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or + // [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name] + // must be specified, Defaults to the length of the reference. + int64 end = 5; + + // The continuation token, which is used to page through large result sets. + // To get the next page of results, set this parameter to the value of + // `nextPageToken` from the previous response. + string page_token = 6; + + // The maximum number of results to return in a single page. If unspecified, + // defaults to 256. The maximum value is 2048. + int32 page_size = 7; +} + +message SearchAnnotationsResponse { + // The matching annotations. + repeated Annotation annotations = 1; + + // The continuation token, which is used to page through large result sets. + // Provide this value in a subsequent request to return the next page of + // results. This field will be empty if there aren't any additional results. + string next_page_token = 2; +} + +// When an [Annotation][google.genomics.v1.Annotation] or +// [AnnotationSet][google.genomics.v1.AnnotationSet] is created, if `type` is +// not specified it will be set to `GENERIC`. +enum AnnotationType { + ANNOTATION_TYPE_UNSPECIFIED = 0; + + // A `GENERIC` annotation type should be used when no other annotation + // type will suffice. This represents an untyped annotation of the reference + // genome. + GENERIC = 1; + + // A `VARIANT` annotation type. + VARIANT = 2; + + // A `GENE` annotation type represents the existence of a gene at the + // associated reference coordinates. The start coordinate is typically the + // gene's transcription start site and the end is typically the end of the + // gene's last exon. + GENE = 3; + + // A `TRANSCRIPT` annotation type represents the assertion that a + // particular region of the reference genome may be transcribed as RNA. + TRANSCRIPT = 4; +} |