diff options
Diffstat (limited to 'third_party/googleapis/google/genomics/v1/reads.proto')
-rw-r--r-- | third_party/googleapis/google/genomics/v1/reads.proto | 468 |
1 files changed, 468 insertions, 0 deletions
diff --git a/third_party/googleapis/google/genomics/v1/reads.proto b/third_party/googleapis/google/genomics/v1/reads.proto new file mode 100644 index 0000000000..f574707e39 --- /dev/null +++ b/third_party/googleapis/google/genomics/v1/reads.proto @@ -0,0 +1,468 @@ +// Copyright 2016 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.genomics.v1; + +import "google/api/annotations.proto"; +import "google/genomics/v1/range.proto"; +import "google/genomics/v1/readalignment.proto"; +import "google/genomics/v1/readgroupset.proto"; +import "google/longrunning/operations.proto"; +import "google/protobuf/empty.proto"; +import "google/protobuf/field_mask.proto"; + +option cc_enable_arenas = true; +option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics"; +option java_multiple_files = true; +option java_outer_classname = "ReadsProto"; +option java_package = "com.google.genomics.v1"; + + +service StreamingReadService { + // Returns a stream of all the reads matching the search request, ordered + // by reference name, position, and ID. + rpc StreamReads(StreamReadsRequest) returns (stream StreamReadsResponse) { + option (google.api.http) = { post: "/v1/reads:stream" body: "*" }; + } +} + +// The Readstore. A data store for DNA sequencing Reads. +service ReadServiceV1 { + // Creates read group sets by asynchronously importing the provided + // information. + // + // For the definitions of read group sets and other genomics resources, see + // [Fundamentals of Google + // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) + // + // The caller must have WRITE permissions to the dataset. + // + // ## Notes on [BAM](https://samtools.github.io/hts-specs/SAMv1.pdf) import + // + // - Tags will be converted to strings - tag types are not preserved + // - Comments (`@CO`) in the input file header will not be preserved + // - Original header order of references (`@SQ`) will not be preserved + // - Any reverse stranded unmapped reads will be reverse complemented, and + // their qualities (also the "BQ" and "OQ" tags, if any) will be reversed + // - Unmapped reads will be stripped of positional information (reference name + // and position) + rpc ImportReadGroupSets(ImportReadGroupSetsRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { post: "/v1/readgroupsets:import" body: "*" }; + } + + // Exports a read group set to a BAM file in Google Cloud Storage. + // + // For the definitions of read group sets and other genomics resources, see + // [Fundamentals of Google + // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) + // + // Note that currently there may be some differences between exported BAM + // files and the original BAM file at the time of import. See + // [ImportReadGroupSets][google.genomics.v1.ReadServiceV1.ImportReadGroupSets] + // for caveats. + rpc ExportReadGroupSet(ExportReadGroupSetRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { post: "/v1/readgroupsets/{read_group_set_id}:export" body: "*" }; + } + + // Searches for read group sets matching the criteria. + // + // For the definitions of read group sets and other genomics resources, see + // [Fundamentals of Google + // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) + // + // Implements + // [GlobalAllianceApi.searchReadGroupSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L135). + rpc SearchReadGroupSets(SearchReadGroupSetsRequest) returns (SearchReadGroupSetsResponse) { + option (google.api.http) = { post: "/v1/readgroupsets/search" body: "*" }; + } + + // Updates a read group set. + // + // For the definitions of read group sets and other genomics resources, see + // [Fundamentals of Google + // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) + // + // This method supports patch semantics. + rpc UpdateReadGroupSet(UpdateReadGroupSetRequest) returns (ReadGroupSet) { + option (google.api.http) = { patch: "/v1/readgroupsets/{read_group_set_id}" body: "read_group_set" }; + } + + // Deletes a read group set. + // + // For the definitions of read group sets and other genomics resources, see + // [Fundamentals of Google + // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) + rpc DeleteReadGroupSet(DeleteReadGroupSetRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { delete: "/v1/readgroupsets/{read_group_set_id}" }; + } + + // Gets a read group set by ID. + // + // For the definitions of read group sets and other genomics resources, see + // [Fundamentals of Google + // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) + rpc GetReadGroupSet(GetReadGroupSetRequest) returns (ReadGroupSet) { + option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}" }; + } + + // Lists fixed width coverage buckets for a read group set, each of which + // correspond to a range of a reference sequence. Each bucket summarizes + // coverage information across its corresponding genomic range. + // + // For the definitions of read group sets and other genomics resources, see + // [Fundamentals of Google + // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) + // + // Coverage is defined as the number of reads which are aligned to a given + // base in the reference sequence. Coverage buckets are available at several + // precomputed bucket widths, enabling retrieval of various coverage 'zoom + // levels'. The caller must have READ permissions for the target read group + // set. + rpc ListCoverageBuckets(ListCoverageBucketsRequest) returns (ListCoverageBucketsResponse) { + option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}/coveragebuckets" }; + } + + // Gets a list of reads for one or more read group sets. + // + // For the definitions of read group sets and other genomics resources, see + // [Fundamentals of Google + // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) + // + // Reads search operates over a genomic coordinate space of reference sequence + // & position defined over the reference sequences to which the requested + // read group sets are aligned. + // + // If a target positional range is specified, search returns all reads whose + // alignment to the reference genome overlap the range. A query which + // specifies only read group set IDs yields all reads in those read group + // sets, including unmapped reads. + // + // All reads returned (including reads on subsequent pages) are ordered by + // genomic coordinate (by reference sequence, then position). Reads with + // equivalent genomic coordinates are returned in an unspecified order. This + // order is consistent, such that two queries for the same content (regardless + // of page size) yield reads in the same order across their respective streams + // of paginated responses. + // + // Implements + // [GlobalAllianceApi.searchReads](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L85). + rpc SearchReads(SearchReadsRequest) returns (SearchReadsResponse) { + option (google.api.http) = { post: "/v1/reads/search" body: "*" }; + } +} + +// The read group set search request. +message SearchReadGroupSetsRequest { + // Restricts this query to read group sets within the given datasets. At least + // one ID must be provided. + repeated string dataset_ids = 1; + + // Only return read group sets for which a substring of the name matches this + // string. + string name = 3; + + // The continuation token, which is used to page through large result sets. + // To get the next page of results, set this parameter to the value of + // `nextPageToken` from the previous response. + string page_token = 2; + + // The maximum number of results to return in a single page. If unspecified, + // defaults to 256. The maximum value is 1024. + int32 page_size = 4; +} + +// The read group set search response. +message SearchReadGroupSetsResponse { + // The list of matching read group sets. + repeated ReadGroupSet read_group_sets = 1; + + // The continuation token, which is used to page through large result sets. + // Provide this value in a subsequent request to return the next page of + // results. This field will be empty if there aren't any additional results. + string next_page_token = 2; +} + +// The read group set import request. +message ImportReadGroupSetsRequest { + enum PartitionStrategy { + PARTITION_STRATEGY_UNSPECIFIED = 0; + + // In most cases, this strategy yields one read group set per file. This is + // the default behavior. + // + // Allocate one read group set per file per sample. For BAM files, read + // groups are considered to share a sample if they have identical sample + // names. Furthermore, all reads for each file which do not belong to a read + // group, if any, will be grouped into a single read group set per-file. + PER_FILE_PER_SAMPLE = 1; + + // Includes all read groups in all imported files into a single read group + // set. Requires that the headers for all imported files are equivalent. All + // reads which do not belong to a read group, if any, will be grouped into a + // separate read group set. + MERGE_ALL = 2; + } + + // Required. The ID of the dataset these read group sets will belong to. The + // caller must have WRITE permissions to this dataset. + string dataset_id = 1; + + // The reference set to which the imported read group sets are aligned to, if + // any. The reference names of this reference set must be a superset of those + // found in the imported file headers. If no reference set id is provided, a + // best effort is made to associate with a matching reference set. + string reference_set_id = 4; + + // A list of URIs pointing at [BAM + // files](https://samtools.github.io/hts-specs/SAMv1.pdf) + // in Google Cloud Storage. + // Those URIs can include wildcards (*), but do not add or remove + // matching files before import has completed. + // + // Note that Google Cloud Storage object listing is only eventually + // consistent: files added may be not be immediately visible to + // everyone. Thus, if using a wildcard it is preferable not to start + // the import immediately after the files are created. + repeated string source_uris = 2; + + // The partition strategy describes how read groups are partitioned into read + // group sets. + PartitionStrategy partition_strategy = 5; +} + +// The read group set import response. +message ImportReadGroupSetsResponse { + // IDs of the read group sets that were created. + repeated string read_group_set_ids = 1; +} + +// The read group set export request. +message ExportReadGroupSetRequest { + // Required. The Google Cloud project ID that owns this + // export. The caller must have WRITE access to this project. + string project_id = 1; + + // Required. A Google Cloud Storage URI for the exported BAM file. + // The currently authenticated user must have write access to the new file. + // An error will be returned if the URI already contains data. + string export_uri = 2; + + // Required. The ID of the read group set to export. The caller must have + // READ access to this read group set. + string read_group_set_id = 3; + + // The reference names to export. If this is not specified, all reference + // sequences, including unmapped reads, are exported. + // Use `*` to export only unmapped reads. + repeated string reference_names = 4; +} + +message UpdateReadGroupSetRequest { + // The ID of the read group set to be updated. The caller must have WRITE + // permissions to the dataset associated with this read group set. + string read_group_set_id = 1; + + // The new read group set data. See `updateMask` for details on mutability of + // fields. + ReadGroupSet read_group_set = 2; + + // An optional mask specifying which fields to update. Supported fields: + // + // * [name][google.genomics.v1.ReadGroupSet.name]. + // * [referenceSetId][google.genomics.v1.ReadGroupSet.reference_set_id]. + // + // Leaving `updateMask` unset is equivalent to specifying all mutable + // fields. + google.protobuf.FieldMask update_mask = 3; +} + +message DeleteReadGroupSetRequest { + // The ID of the read group set to be deleted. The caller must have WRITE + // permissions to the dataset associated with this read group set. + string read_group_set_id = 1; +} + +message GetReadGroupSetRequest { + // The ID of the read group set. + string read_group_set_id = 1; +} + +message ListCoverageBucketsRequest { + // Required. The ID of the read group set over which coverage is requested. + string read_group_set_id = 1; + + // The name of the reference to query, within the reference set associated + // with this query. Optional. + string reference_name = 3; + + // The start position of the range on the reference, 0-based inclusive. If + // specified, `referenceName` must also be specified. Defaults to 0. + int64 start = 4; + + // The end position of the range on the reference, 0-based exclusive. If + // specified, `referenceName` must also be specified. If unset or 0, defaults + // to the length of the reference. + int64 end = 5; + + // The desired width of each reported coverage bucket in base pairs. This + // will be rounded down to the nearest precomputed bucket width; the value + // of which is returned as `bucketWidth` in the response. Defaults + // to infinity (each bucket spans an entire reference sequence) or the length + // of the target range, if specified. The smallest precomputed + // `bucketWidth` is currently 2048 base pairs; this is subject to + // change. + int64 target_bucket_width = 6; + + // The continuation token, which is used to page through large result sets. + // To get the next page of results, set this parameter to the value of + // `nextPageToken` from the previous response. + string page_token = 7; + + // The maximum number of results to return in a single page. If unspecified, + // defaults to 1024. The maximum value is 2048. + int32 page_size = 8; +} + +// A bucket over which read coverage has been precomputed. A bucket corresponds +// to a specific range of the reference sequence. +message CoverageBucket { + // The genomic coordinate range spanned by this bucket. + Range range = 1; + + // The average number of reads which are aligned to each individual + // reference base in this bucket. + float mean_coverage = 2; +} + +message ListCoverageBucketsResponse { + // The length of each coverage bucket in base pairs. Note that buckets at the + // end of a reference sequence may be shorter. This value is omitted if the + // bucket width is infinity (the default behaviour, with no range or + // `targetBucketWidth`). + int64 bucket_width = 1; + + // The coverage buckets. The list of buckets is sparse; a bucket with 0 + // overlapping reads is not returned. A bucket never crosses more than one + // reference sequence. Each bucket has width `bucketWidth`, unless + // its end is the end of the reference sequence. + repeated CoverageBucket coverage_buckets = 2; + + // The continuation token, which is used to page through large result sets. + // Provide this value in a subsequent request to return the next page of + // results. This field will be empty if there aren't any additional results. + string next_page_token = 3; +} + +// The read search request. +message SearchReadsRequest { + // The IDs of the read groups sets within which to search for reads. All + // specified read group sets must be aligned against a common set of reference + // sequences; this defines the genomic coordinates for the query. Must specify + // one of `readGroupSetIds` or `readGroupIds`. + repeated string read_group_set_ids = 1; + + // The IDs of the read groups within which to search for reads. All specified + // read groups must belong to the same read group sets. Must specify one of + // `readGroupSetIds` or `readGroupIds`. + repeated string read_group_ids = 5; + + // The reference sequence name, for example `chr1`, `1`, or `chrX`. If set to + // `*`, only unmapped reads are returned. If unspecified, all reads (mapped + // and unmapped) are returned. + string reference_name = 7; + + // The start position of the range on the reference, 0-based inclusive. If + // specified, `referenceName` must also be specified. + int64 start = 8; + + // The end position of the range on the reference, 0-based exclusive. If + // specified, `referenceName` must also be specified. + int64 end = 9; + + // The continuation token, which is used to page through large result sets. + // To get the next page of results, set this parameter to the value of + // `nextPageToken` from the previous response. + string page_token = 3; + + // The maximum number of results to return in a single page. If unspecified, + // defaults to 256. The maximum value is 2048. + int32 page_size = 4; +} + +// The read search response. +message SearchReadsResponse { + // The list of matching alignments sorted by mapped genomic coordinate, + // if any, ascending in position within the same reference. Unmapped reads, + // which have no position, are returned contiguously and are sorted in + // ascending lexicographic order by fragment name. + repeated Read alignments = 1; + + // The continuation token, which is used to page through large result sets. + // Provide this value in a subsequent request to return the next page of + // results. This field will be empty if there aren't any additional results. + string next_page_token = 2; +} + +// The stream reads request. +message StreamReadsRequest { + // The Google Cloud project ID which will be billed + // for this access. The caller must have WRITE access to this project. + // Required. + string project_id = 1; + + // The ID of the read group set from which to stream reads. + string read_group_set_id = 2; + + // The reference sequence name, for example `chr1`, + // `1`, or `chrX`. If set to *, only unmapped reads are + // returned. + string reference_name = 3; + + // The start position of the range on the reference, 0-based inclusive. If + // specified, `referenceName` must also be specified. + int64 start = 4; + + // The end position of the range on the reference, 0-based exclusive. If + // specified, `referenceName` must also be specified. + int64 end = 5; + + // Restricts results to a shard containing approximately `1/totalShards` + // of the normal response payload for this query. Results from a sharded + // request are disjoint from those returned by all queries which differ only + // in their shard parameter. A shard may yield 0 results; this is especially + // likely for large values of `totalShards`. + // + // Valid values are `[0, totalShards)`. + int32 shard = 6; + + // Specifying `totalShards` causes a disjoint subset of the normal response + // payload to be returned for each query with a unique `shard` parameter + // specified. A best effort is made to yield equally sized shards. Sharding + // can be used to distribute processing amongst workers, where each worker is + // assigned a unique `shard` number and all workers specify the same + // `totalShards` number. The union of reads returned for all sharded queries + // `[0, totalShards)` is equal to those returned by a single unsharded query. + // + // Queries for different values of `totalShards` with common divisors will + // share shard boundaries. For example, streaming `shard` 2 of 5 + // `totalShards` yields the same results as streaming `shard`s 4 and 5 of 10 + // `totalShards`. This property can be leveraged for adaptive retries. + int32 total_shards = 7; +} + +message StreamReadsResponse { + repeated Read alignments = 1; +} |