third_party/googleapis/google/genomics/v1/reads.proto


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468

// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.genomics.v1;

import "google/api/annotations.proto";
import "google/genomics/v1/range.proto";
import "google/genomics/v1/readalignment.proto";
import "google/genomics/v1/readgroupset.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";

option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
option java_multiple_files = true;
option java_outer_classname = "ReadsProto";
option java_package = "com.google.genomics.v1";


service StreamingReadService {
  // Returns a stream of all the reads matching the search request, ordered
  // by reference name, position, and ID.
  rpc StreamReads(StreamReadsRequest) returns (stream StreamReadsResponse) {
    option (google.api.http) = { post: "/v1/reads:stream" body: "*" };
  }
}

// The Readstore. A data store for DNA sequencing Reads.
service ReadServiceV1 {
  // Creates read group sets by asynchronously importing the provided
  // information.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // The caller must have WRITE permissions to the dataset.
  //
  // ## Notes on [BAM](https://samtools.github.io/hts-specs/SAMv1.pdf) import
  //
  // - Tags will be converted to strings - tag types are not preserved
  // - Comments (`@CO`) in the input file header will not be preserved
  // - Original header order of references (`@SQ`) will not be preserved
  // - Any reverse stranded unmapped reads will be reverse complemented, and
  // their qualities (also the "BQ" and "OQ" tags, if any) will be reversed
  // - Unmapped reads will be stripped of positional information (reference name
  // and position)
  rpc ImportReadGroupSets(ImportReadGroupSetsRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = { post: "/v1/readgroupsets:import" body: "*" };
  }

  // Exports a read group set to a BAM file in Google Cloud Storage.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Note that currently there may be some differences between exported BAM
  // files and the original BAM file at the time of import. See
  // [ImportReadGroupSets][google.genomics.v1.ReadServiceV1.ImportReadGroupSets]
  // for caveats.
  rpc ExportReadGroupSet(ExportReadGroupSetRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = { post: "/v1/readgroupsets/{read_group_set_id}:export" body: "*" };
  }

  // Searches for read group sets matching the criteria.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Implements
  // [GlobalAllianceApi.searchReadGroupSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L135).
  rpc SearchReadGroupSets(SearchReadGroupSetsRequest) returns (SearchReadGroupSetsResponse) {
    option (google.api.http) = { post: "/v1/readgroupsets/search" body: "*" };
  }

  // Updates a read group set.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // This method supports patch semantics.
  rpc UpdateReadGroupSet(UpdateReadGroupSetRequest) returns (ReadGroupSet) {
    option (google.api.http) = { patch: "/v1/readgroupsets/{read_group_set_id}" body: "read_group_set" };
  }

  // Deletes a read group set.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc DeleteReadGroupSet(DeleteReadGroupSetRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = { delete: "/v1/readgroupsets/{read_group_set_id}" };
  }

  // Gets a read group set by ID.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  rpc GetReadGroupSet(GetReadGroupSetRequest) returns (ReadGroupSet) {
    option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}" };
  }

  // Lists fixed width coverage buckets for a read group set, each of which
  // correspond to a range of a reference sequence. Each bucket summarizes
  // coverage information across its corresponding genomic range.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Coverage is defined as the number of reads which are aligned to a given
  // base in the reference sequence. Coverage buckets are available at several
  // precomputed bucket widths, enabling retrieval of various coverage 'zoom
  // levels'. The caller must have READ permissions for the target read group
  // set.
  rpc ListCoverageBuckets(ListCoverageBucketsRequest) returns (ListCoverageBucketsResponse) {
    option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}/coveragebuckets" };
  }

  // Gets a list of reads for one or more read group sets.
  //
  // For the definitions of read group sets and other genomics resources, see
  // [Fundamentals of Google
  // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  //
  // Reads search operates over a genomic coordinate space of reference sequence
  // & position defined over the reference sequences to which the requested
  // read group sets are aligned.
  //
  // If a target positional range is specified, search returns all reads whose
  // alignment to the reference genome overlap the range. A query which
  // specifies only read group set IDs yields all reads in those read group
  // sets, including unmapped reads.
  //
  // All reads returned (including reads on subsequent pages) are ordered by
  // genomic coordinate (by reference sequence, then position). Reads with
  // equivalent genomic coordinates are returned in an unspecified order. This
  // order is consistent, such that two queries for the same content (regardless
  // of page size) yield reads in the same order across their respective streams
  // of paginated responses.
  //
  // Implements
  // [GlobalAllianceApi.searchReads](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L85).
  rpc SearchReads(SearchReadsRequest) returns (SearchReadsResponse) {
    option (google.api.http) = { post: "/v1/reads/search" body: "*" };
  }
}

// The read group set search request.
message SearchReadGroupSetsRequest {
  // Restricts this query to read group sets within the given datasets. At least
  // one ID must be provided.
  repeated string dataset_ids = 1;

  // Only return read group sets for which a substring of the name matches this
  // string.
  string name = 3;

  // The continuation token, which is used to page through large result sets.
  // To get the next page of results, set this parameter to the value of
  // `nextPageToken` from the previous response.
  string page_token = 2;

  // The maximum number of results to return in a single page. If unspecified,
  // defaults to 256. The maximum value is 1024.
  int32 page_size = 4;
}

// The read group set search response.
message SearchReadGroupSetsResponse {
  // The list of matching read group sets.
  repeated ReadGroupSet read_group_sets = 1;

  // The continuation token, which is used to page through large result sets.
  // Provide this value in a subsequent request to return the next page of
  // results. This field will be empty if there aren't any additional results.
  string next_page_token = 2;
}

// The read group set import request.
message ImportReadGroupSetsRequest {
  enum PartitionStrategy {
    PARTITION_STRATEGY_UNSPECIFIED = 0;

    // In most cases, this strategy yields one read group set per file. This is
    // the default behavior.
    //
    // Allocate one read group set per file per sample. For BAM files, read
    // groups are considered to share a sample if they have identical sample
    // names. Furthermore, all reads for each file which do not belong to a read
    // group, if any, will be grouped into a single read group set per-file.
    PER_FILE_PER_SAMPLE = 1;

    // Includes all read groups in all imported files into a single read group
    // set. Requires that the headers for all imported files are equivalent. All
    // reads which do not belong to a read group, if any, will be grouped into a
    // separate read group set.
    MERGE_ALL = 2;
  }

  // Required. The ID of the dataset these read group sets will belong to. The
  // caller must have WRITE permissions to this dataset.
  string dataset_id = 1;

  // The reference set to which the imported read group sets are aligned to, if
  // any. The reference names of this reference set must be a superset of those
  // found in the imported file headers. If no reference set id is provided, a
  // best effort is made to associate with a matching reference set.
  string reference_set_id = 4;

  // A list of URIs pointing at [BAM
  // files](https://samtools.github.io/hts-specs/SAMv1.pdf)
  // in Google Cloud Storage.
  // Those URIs can include wildcards (*), but do not add or remove
  // matching files before import has completed.
  //
  // Note that Google Cloud Storage object listing is only eventually
  // consistent: files added may be not be immediately visible to
  // everyone. Thus, if using a wildcard it is preferable not to start
  // the import immediately after the files are created.
  repeated string source_uris = 2;

  // The partition strategy describes how read groups are partitioned into read
  // group sets.
  PartitionStrategy partition_strategy = 5;
}

// The read group set import response.
message ImportReadGroupSetsResponse {
  // IDs of the read group sets that were created.
  repeated string read_group_set_ids = 1;
}

// The read group set export request.
message ExportReadGroupSetRequest {
  // Required. The Google Cloud project ID that owns this
  // export. The caller must have WRITE access to this project.
  string project_id = 1;

  // Required. A Google Cloud Storage URI for the exported BAM file.
  // The currently authenticated user must have write access to the new file.
  // An error will be returned if the URI already contains data.
  string export_uri = 2;

  // Required. The ID of the read group set to export. The caller must have
  // READ access to this read group set.
  string read_group_set_id = 3;

  // The reference names to export. If this is not specified, all reference
  // sequences, including unmapped reads, are exported.
  // Use `*` to export only unmapped reads.
  repeated string reference_names = 4;
}

message UpdateReadGroupSetRequest {
  // The ID of the read group set to be updated. The caller must have WRITE
  // permissions to the dataset associated with this read group set.
  string read_group_set_id = 1;

  // The new read group set data. See `updateMask` for details on mutability of
  // fields.
  ReadGroupSet read_group_set = 2;

  // An optional mask specifying which fields to update. Supported fields:
  //
  // * [name][google.genomics.v1.ReadGroupSet.name].
  // * [referenceSetId][google.genomics.v1.ReadGroupSet.reference_set_id].
  //
  // Leaving `updateMask` unset is equivalent to specifying all mutable
  // fields.
  google.protobuf.FieldMask update_mask = 3;
}

message DeleteReadGroupSetRequest {
  // The ID of the read group set to be deleted. The caller must have WRITE
  // permissions to the dataset associated with this read group set.
  string read_group_set_id = 1;
}

message GetReadGroupSetRequest {
  // The ID of the read group set.
  string read_group_set_id = 1;
}

message ListCoverageBucketsRequest {
  // Required. The ID of the read group set over which coverage is requested.
  string read_group_set_id = 1;

  // The name of the reference to query, within the reference set associated
  // with this query. Optional.
  string reference_name = 3;

  // The start position of the range on the reference, 0-based inclusive. If
  // specified, `referenceName` must also be specified. Defaults to 0.
  int64 start = 4;

  // The end position of the range on the reference, 0-based exclusive. If
  // specified, `referenceName` must also be specified. If unset or 0, defaults
  // to the length of the reference.
  int64 end = 5;

  // The desired width of each reported coverage bucket in base pairs. This
  // will be rounded down to the nearest precomputed bucket width; the value
  // of which is returned as `bucketWidth` in the response. Defaults
  // to infinity (each bucket spans an entire reference sequence) or the length
  // of the target range, if specified. The smallest precomputed
  // `bucketWidth` is currently 2048 base pairs; this is subject to
  // change.
  int64 target_bucket_width = 6;

  // The continuation token, which is used to page through large result sets.
  // To get the next page of results, set this parameter to the value of
  // `nextPageToken` from the previous response.
  string page_token = 7;

  // The maximum number of results to return in a single page. If unspecified,
  // defaults to 1024. The maximum value is 2048.
  int32 page_size = 8;
}

// A bucket over which read coverage has been precomputed. A bucket corresponds
// to a specific range of the reference sequence.
message CoverageBucket {
  // The genomic coordinate range spanned by this bucket.
  Range range = 1;

  // The average number of reads which are aligned to each individual
  // reference base in this bucket.
  float mean_coverage = 2;
}

message ListCoverageBucketsResponse {
  // The length of each coverage bucket in base pairs. Note that buckets at the
  // end of a reference sequence may be shorter. This value is omitted if the
  // bucket width is infinity (the default behaviour, with no range or
  // `targetBucketWidth`).
  int64 bucket_width = 1;

  // The coverage buckets. The list of buckets is sparse; a bucket with 0
  // overlapping reads is not returned. A bucket never crosses more than one
  // reference sequence. Each bucket has width `bucketWidth`, unless
  // its end is the end of the reference sequence.
  repeated CoverageBucket coverage_buckets = 2;

  // The continuation token, which is used to page through large result sets.
  // Provide this value in a subsequent request to return the next page of
  // results. This field will be empty if there aren't any additional results.
  string next_page_token = 3;
}

// The read search request.
message SearchReadsRequest {
  // The IDs of the read groups sets within which to search for reads. All
  // specified read group sets must be aligned against a common set of reference
  // sequences; this defines the genomic coordinates for the query. Must specify
  // one of `readGroupSetIds` or `readGroupIds`.
  repeated string read_group_set_ids = 1;

  // The IDs of the read groups within which to search for reads. All specified
  // read groups must belong to the same read group sets. Must specify one of
  // `readGroupSetIds` or `readGroupIds`.
  repeated string read_group_ids = 5;

  // The reference sequence name, for example `chr1`, `1`, or `chrX`. If set to
  // `*`, only unmapped reads are returned. If unspecified, all reads (mapped
  // and unmapped) are returned.
  string reference_name = 7;

  // The start position of the range on the reference, 0-based inclusive. If
  // specified, `referenceName` must also be specified.
  int64 start = 8;

  // The end position of the range on the reference, 0-based exclusive. If
  // specified, `referenceName` must also be specified.
  int64 end = 9;

  // The continuation token, which is used to page through large result sets.
  // To get the next page of results, set this parameter to the value of
  // `nextPageToken` from the previous response.
  string page_token = 3;

  // The maximum number of results to return in a single page. If unspecified,
  // defaults to 256. The maximum value is 2048.
  int32 page_size = 4;
}

// The read search response.
message SearchReadsResponse {
  // The list of matching alignments sorted by mapped genomic coordinate,
  // if any, ascending in position within the same reference. Unmapped reads,
  // which have no position, are returned contiguously and are sorted in
  // ascending lexicographic order by fragment name.
  repeated Read alignments = 1;

  // The continuation token, which is used to page through large result sets.
  // Provide this value in a subsequent request to return the next page of
  // results. This field will be empty if there aren't any additional results.
  string next_page_token = 2;
}

// The stream reads request.
message StreamReadsRequest {
  // The Google Cloud project ID which will be billed
  // for this access. The caller must have WRITE access to this project.
  // Required.
  string project_id = 1;

  // The ID of the read group set from which to stream reads.
  string read_group_set_id = 2;

  // The reference sequence name, for example `chr1`,
  // `1`, or `chrX`. If set to *, only unmapped reads are
  // returned.
  string reference_name = 3;

  // The start position of the range on the reference, 0-based inclusive. If
  // specified, `referenceName` must also be specified.
  int64 start = 4;

  // The end position of the range on the reference, 0-based exclusive. If
  // specified, `referenceName` must also be specified.
  int64 end = 5;

  // Restricts results to a shard containing approximately `1/totalShards`
  // of the normal response payload for this query. Results from a sharded
  // request are disjoint from those returned by all queries which differ only
  // in their shard parameter. A shard may yield 0 results; this is especially
  // likely for large values of `totalShards`.
  //
  // Valid values are `[0, totalShards)`.
  int32 shard = 6;

  // Specifying `totalShards` causes a disjoint subset of the normal response
  // payload to be returned for each query with a unique `shard` parameter
  // specified. A best effort is made to yield equally sized shards. Sharding
  // can be used to distribute processing amongst workers, where each worker is
  // assigned a unique `shard` number and all workers specify the same
  // `totalShards` number. The union of reads returned for all sharded queries
  // `[0, totalShards)` is equal to those returned by a single unsharded query.
  //
  // Queries for different values of `totalShards` with common divisors will
  // share shard boundaries. For example, streaming `shard` 2 of 5
  // `totalShards` yields the same results as streaming `shard`s 4 and 5 of 10
  // `totalShards`. This property can be leveraged for adaptive retries.
  int32 total_shards = 7;
}

message StreamReadsResponse {
  repeated Read alignments = 1;
}