aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/googleapis/google/genomics/v1alpha2/pipelines.proto
blob: c0f5102234039491d17947018f5e60da00d1e4ec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.genomics.v1alpha2;

import "google/api/annotations.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/code.proto";

option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/genomics/v1alpha2;genomics";
option java_multiple_files = true;
option java_outer_classname = "PipelinesProto";
option java_package = "com.google.genomics.v1a";


// A service for running genomics pipelines.
service PipelinesV1Alpha2 {
  // Creates a pipeline that can be run later. Create takes a Pipeline that
  // has all fields other than `pipelineId` populated, and then returns
  // the same pipeline with `pipelineId` populated. This id can be used
  // to run the pipeline.
  //
  // Caller must have WRITE permission to the project.
  rpc CreatePipeline(CreatePipelineRequest) returns (Pipeline) {
    option (google.api.http) = { post: "/v1alpha2/pipelines" body: "pipeline" };
  }

  // Runs a pipeline. If `pipelineId` is specified in the request, then
  // run a saved pipeline. If `ephemeralPipeline` is specified, then run
  // that pipeline once without saving a copy.
  //
  // The caller must have READ permission to the project where the pipeline
  // is stored and WRITE permission to the project where the pipeline will be
  // run, as VMs will be created and storage will be used.
  rpc RunPipeline(RunPipelineRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = { post: "/v1alpha2/pipelines:run" body: "*" };
  }

  // Retrieves a pipeline based on ID.
  //
  // Caller must have READ permission to the project.
  rpc GetPipeline(GetPipelineRequest) returns (Pipeline) {
    option (google.api.http) = { get: "/v1alpha2/pipelines/{pipeline_id}" };
  }

  // Lists pipelines.
  //
  // Caller must have READ permission to the project.
  rpc ListPipelines(ListPipelinesRequest) returns (ListPipelinesResponse) {
    option (google.api.http) = { get: "/v1alpha2/pipelines" };
  }

  // Deletes a pipeline based on ID.
  //
  // Caller must have WRITE permission to the project.
  rpc DeletePipeline(DeletePipelineRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = { delete: "/v1alpha2/pipelines/{pipeline_id}" };
  }

  // Gets controller configuration information. Should only be called
  // by VMs created by the Pipelines Service and not by end users.
  rpc GetControllerConfig(GetControllerConfigRequest) returns (ControllerConfig) {
    option (google.api.http) = { get: "/v1alpha2/pipelines:getControllerConfig" };
  }

  // Sets status of a given operation. Any new timestamps (as determined by
  // description) are appended to TimestampEvents. Should only be called by VMs
  // created by the Pipelines Service and not by end users.
  rpc SetOperationStatus(SetOperationStatusRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = { put: "/v1alpha2/pipelines:setOperationStatus" body: "*" };
  }
}

// Describes a Compute Engine resource that is being managed by a running
// [pipeline][google.genomics.v1alpha2.Pipeline].
message ComputeEngine {
  // The instance on which the operation is running.
  string instance_name = 1;

  // The availability zone in which the instance resides.
  string zone = 2;

  // The machine type of the instance.
  string machine_type = 3;

  // The names of the disks that were created for this pipeline.
  repeated string disk_names = 4;
}

// Runtime metadata that will be populated in the
// [runtimeMetadata][google.genomics.v1.OperationMetadata.runtime_metadata]
// field of the Operation associated with a RunPipeline execution.
message RuntimeMetadata {
  // Execution information specific to Google Compute Engine.
  ComputeEngine compute_engine = 1;
}

// The pipeline object. Represents a transformation from a set of input
// parameters to a set of output parameters. The transformation is defined
// as a docker image and command to run within that image. Each pipeline
// is run on a Google Compute Engine VM. A pipeline can be created with the
// `create` method and then later run with the `run` method, or a pipeline can
// be defined and run all at once with the `run` method.
message Pipeline {
  // Required. The project in which to create the pipeline. The caller must have
  // WRITE access.
  string project_id = 1;

  // Required. A user specified pipeline name that does not have to be unique.
  // This name can be used for filtering Pipelines in ListPipelines.
  string name = 2;

  // User-specified description.
  string description = 3;

  // Input parameters of the pipeline.
  repeated PipelineParameter input_parameters = 8;

  // Output parameters of the pipeline.
  repeated PipelineParameter output_parameters = 9;

  // Required. The executor indicates in which environment the pipeline runs.
  oneof executor {
    // Specifies the docker run information.
    DockerExecutor docker = 5;
  }

  // Required. Specifies resource requirements for the pipeline run.
  // Required fields:
  //
  // *
  // [minimumCpuCores][google.genomics.v1alpha2.PipelineResources.minimum_cpu_cores]
  //
  // *
  // [minimumRamGb][google.genomics.v1alpha2.PipelineResources.minimum_ram_gb]
  PipelineResources resources = 6;

  // Unique pipeline id that is generated by the service when CreatePipeline
  // is called. Cannot be specified in the Pipeline used in the
  // CreatePipelineRequest, and will be populated in the response to
  // CreatePipeline and all subsequent Get and List calls. Indicates that the
  // service has registered this pipeline.
  string pipeline_id = 7;
}

// The request to create a pipeline. The pipeline field here should not have
// `pipelineId` populated, as that will be populated by the server.
message CreatePipelineRequest {
  // The pipeline to create. Should not have `pipelineId` populated.
  Pipeline pipeline = 1;
}

// The pipeline run arguments.
message RunPipelineArgs {
  // Required. The project in which to run the pipeline. The caller must have
  // WRITER access to all Google Cloud services and resources (e.g. Google
  // Compute Engine) will be used.
  string project_id = 1;

  // Pipeline input arguments; keys are defined in the pipeline documentation.
  // All input parameters that do not have default values  must be specified.
  // If parameters with defaults are specified here, the defaults will be
  // overridden.
  map<string, string> inputs = 2;

  // Pipeline output arguments; keys are defined in the pipeline
  // documentation.  All output parameters of without default values
  // must be specified.  If parameters with defaults are specified
  // here, the defaults will be overridden.
  map<string, string> outputs = 3;

  // The Google Cloud Service Account that will be used to access data and
  // services. By default, the compute service account associated with
  // `projectId` is used.
  ServiceAccount service_account = 4;

  // This field is deprecated. Use `labels` instead. Client-specified pipeline
  // operation identifier.
  string client_id = 5;

  // Specifies resource requirements/overrides for the pipeline run.
  PipelineResources resources = 6;

  // Required. Logging options. Used by the service to communicate results
  // to the user.
  LoggingOptions logging = 7;

  // How long to keep the VM up after a failure (for example docker command
  // failed, copying input or output files failed, etc). While the VM is up, one
  // can ssh into the VM to debug. Default is 0; maximum allowed value is 1 day.
  google.protobuf.Duration keep_vm_alive_on_failure_duration = 8;

  // Labels to apply to this pipeline run. Labels will also be applied to
  // compute resources (VM, disks) created by this pipeline run. When listing
  // operations, operations can [filtered by labels]
  // [google.longrunning.ListOperationsRequest.filter].
  // Label keys may not be empty; label values may be empty. Non-empty labels
  // must be 1-63 characters long, and comply with [RFC1035]
  // (https://www.ietf.org/rfc/rfc1035.txt).
  // Specifically, the name must be 1-63 characters long and match the regular
  // expression `[a-z]([-a-z0-9]*[a-z0-9])?` which means the first
  // character must be a lowercase letter, and all following characters must be
  // a dash, lowercase letter, or digit, except the last character, which cannot
  // be a dash.
  map<string, string> labels = 9;
}

// The request to run a pipeline. If `pipelineId` is specified, it
// refers to a saved pipeline created with CreatePipeline and set as
// the `pipelineId` of the returned Pipeline object. If
// `ephemeralPipeline` is specified, that pipeline is run once
// with the given args and not saved. It is an error to specify both
// `pipelineId` and `ephemeralPipeline`. `pipelineArgs`
// must be specified.
message RunPipelineRequest {
  oneof pipeline {
    // The already created pipeline to run.
    string pipeline_id = 1;

    // A new pipeline object to run once and then delete.
    Pipeline ephemeral_pipeline = 2;
  }

  // The arguments to use when running this pipeline.
  RunPipelineArgs pipeline_args = 3;
}

// A request to get a saved pipeline by id.
message GetPipelineRequest {
  // Caller must have READ access to the project in which this pipeline
  // is defined.
  string pipeline_id = 1;
}

// A request to list pipelines in a given project. Pipelines can be
// filtered by name using `namePrefix`: all pipelines with names that
// begin with `namePrefix` will be returned. Uses standard pagination:
// `pageSize` indicates how many pipelines to return, and
// `pageToken` comes from a previous ListPipelinesResponse to
// indicate offset.
message ListPipelinesRequest {
  // Required. The name of the project to search for pipelines. Caller
  // must have READ access to this project.
  string project_id = 1;

  // Pipelines with names that match this prefix should be
  // returned.  If unspecified, all pipelines in the project, up to
  // `pageSize`, will be returned.
  string name_prefix = 2;

  // Number of pipelines to return at once. Defaults to 256, and max
  // is 2048.
  int32 page_size = 3;

  // Token to use to indicate where to start getting results.
  // If unspecified, returns the first page of results.
  string page_token = 4;
}

// The response of ListPipelines. Contains at most `pageSize`
// pipelines. If it contains `pageSize` pipelines, and more pipelines
// exist, then `nextPageToken` will be populated and should be
// used as the `pageToken` argument to a subsequent ListPipelines
// request.
message ListPipelinesResponse {
  // The matched pipelines.
  repeated Pipeline pipelines = 1;

  // The token to use to get the next page of results.
  string next_page_token = 2;
}

// The request to delete a saved pipeline by ID.
message DeletePipelineRequest {
  // Caller must have WRITE access to the project in which this pipeline
  // is defined.
  string pipeline_id = 1;
}

// Request to get controller configuation.  Should only be used
// by VMs created by the Pipelines Service and not by end users.
message GetControllerConfigRequest {
  // The operation to retrieve controller configuration for.
  string operation_id = 1;

  uint64 validation_token = 2;
}

// Stores the information that the controller will fetch from the
// server in order to run. Should only be used by VMs created by the
// Pipelines Service and not by end users.
message ControllerConfig {
  message RepeatedString {
    repeated string values = 1;
  }

  string image = 1;

  string cmd = 2;

  string gcs_log_path = 3;

  string machine_type = 4;

  map<string, string> vars = 5;

  map<string, string> disks = 6;

  map<string, RepeatedString> gcs_sources = 7;

  map<string, RepeatedString> gcs_sinks = 8;
}

// Stores the list of events and times they occured for major events in job
// execution.
message TimestampEvent {
  // String indicating the type of event
  string description = 1;

  // The time this event occured.
  google.protobuf.Timestamp timestamp = 2;
}

// Request to set operation status. Should only be used by VMs
// created by the Pipelines Service and not by end users.
message SetOperationStatusRequest {
  string operation_id = 1;

  repeated TimestampEvent timestamp_events = 2;

  google.rpc.Code error_code = 3;

  string error_message = 4;

  uint64 validation_token = 5;
}

// A Google Cloud Service Account.
message ServiceAccount {
  // Email address of the service account. Defaults to `default`,
  // which uses the compute service account associated with the project.
  string email = 1;

  // List of scopes to be enabled for this service account on the VM.
  // The following scopes are automatically included:
  //
  // * https://www.googleapis.com/auth/compute
  // * https://www.googleapis.com/auth/devstorage.full_control
  // * https://www.googleapis.com/auth/genomics
  // * https://www.googleapis.com/auth/logging.write
  // * https://www.googleapis.com/auth/monitoring.write
  repeated string scopes = 2;
}

// The logging options for the pipeline run.
message LoggingOptions {
  // The location in Google Cloud Storage to which the pipeline logs
  // will be copied. Can be specified as a fully qualified directory
  // path, in which case logs will be output with a unique identifier
  // as the filename in that directory, or as a fully specified path,
  // which must end in `.log`, in which case that path will be
  // used, and the user must ensure that logs are not
  // overwritten. Stdout and stderr logs from the run are also
  // generated and output as `-stdout.log` and `-stderr.log`.
  string gcs_path = 1;
}

// The system resources for the pipeline run.
message PipelineResources {
  // A Google Compute Engine disk resource specification.
  message Disk {
    // The types of disks that may be attached to VMs.
    enum Type {
      // Default disk type. Use one of the other options below.
      TYPE_UNSPECIFIED = 0;

      // Specifies a Google Compute Engine persistent hard disk. See
      // https://cloud.google.com/compute/docs/disks/#pdspecs for details.
      PERSISTENT_HDD = 1;

      // Specifies a Google Compute Engine persistent solid-state disk. See
      // https://cloud.google.com/compute/docs/disks/#pdspecs for details.
      PERSISTENT_SSD = 2;

      // Specifies a Google Compute Engine local SSD.
      // See https://cloud.google.com/compute/docs/disks/local-ssd for details.
      LOCAL_SSD = 3;
    }

    // Required. The name of the disk that can be used in the pipeline
    // parameters. Must be 1 - 63 characters.
    // The name "boot" is reserved for system use.
    string name = 1;

    // Required. The type of the disk to create.
    Type type = 2;

    // The size of the disk. Defaults to 500 (GB).
    // This field is not applicable for local SSD.
    int32 size_gb = 3;

    // The full or partial URL of the persistent disk to attach. See
    // https://cloud.google.com/compute/docs/reference/latest/instances#resource
    // and
    // https://cloud.google.com/compute/docs/disks/persistent-disks#snapshots
    // for more details.
    string source = 4;

    // Deprecated. Disks created by the Pipelines API will be deleted at the end
    // of the pipeline run, regardless of what this field is set to.
    bool auto_delete = 6;

    // Required at create time and cannot be overridden at run time.
    // Specifies the path in the docker container where files on
    // this disk should be located. For example, if `mountPoint`
    // is `/mnt/disk`, and the parameter has `localPath`
    // `inputs/file.txt`, the docker container can access the data at
    // `/mnt/disk/inputs/file.txt`.
    string mount_point = 8;
  }

  // The minimum number of cores to use. Defaults to 1.
  int32 minimum_cpu_cores = 1;

  // Whether to use preemptible VMs. Defaults to `false`. In order to use this,
  // must be true for both create time and run time. Cannot be true at run time
  // if false at create time.
  bool preemptible = 2;

  // The minimum amount of RAM to use. Defaults to 3.75 (GB)
  double minimum_ram_gb = 3;

  // Disks to attach.
  repeated Disk disks = 4;

  // List of Google Compute Engine availability zones to which resource
  // creation will restricted. If empty, any zone may be chosen.
  repeated string zones = 5;

  // The size of the boot disk. Defaults to 10 (GB).
  int32 boot_disk_size_gb = 6;

  // Whether to assign an external IP to the instance. This is an experimental
  // feature that may go away. Defaults to false.
  // Corresponds to `--no_address` flag for [gcloud compute instances create]
  // (https://cloud.google.com/sdk/gcloud/reference/compute/instances/create).
  // In order to use this, must be true for both create time and run time.
  // Cannot be true at run time if false at create time. If you need to ssh into
  // a private IP VM for debugging, you can ssh to a public VM and then ssh into
  // the private VM's Internal IP.  If noAddress is set, this pipeline run may
  // only load docker images from Google Container Registry and not Docker Hub.
  // ** Note: To use this option, your project must be in Google Access for
  // Private IPs Early Access Program.**
  bool no_address = 7;
}

// Parameters facilitate setting and delivering data into the
// pipeline's execution environment. They are defined at create time,
// with optional defaults, and can be overridden at run time.
//
// If `localCopy` is unset, then the parameter specifies a string that
// is passed as-is into the pipeline, as the value of the environment
// variable with the given name.  A default value can be optionally
// specified at create time. The default can be overridden at run time
// using the inputs map. If no default is given, a value must be
// supplied at runtime.
//
// If `localCopy` is defined, then the parameter specifies a data
// source or sink, both in Google Cloud Storage and on the Docker container
// where the pipeline computation is run. The [service account associated with
// the Pipeline][google.genomics.v1alpha2.RunPipelineArgs.service_account] (by
// default the project's Compute Engine service account) must have access to the
// Google Cloud Storage paths.
//
// At run time, the Google Cloud Storage paths can be overridden if a default
// was provided at create time, or must be set otherwise. The pipeline runner
// should add a key/value pair to either the inputs or outputs map. The
// indicated data copies will be carried out before/after pipeline execution,
// just as if the corresponding arguments were provided to `gsutil cp`.
//
// For example: Given the following `PipelineParameter`, specified
// in the `inputParameters` list:
//
// ```
// {name: "input_file", localCopy: {path: "file.txt", disk: "pd1"}}
// ```
//
// where `disk` is defined in the `PipelineResources` object as:
//
// ```
// {name: "pd1", mountPoint: "/mnt/disk/"}
// ```
//
// We create a disk named `pd1`, mount it on the host VM, and map
// `/mnt/pd1` to `/mnt/disk` in the docker container.  At
// runtime, an entry for `input_file` would be required in the inputs
// map, such as:
//
// ```
//   inputs["input_file"] = "gs://my-bucket/bar.txt"
// ```
//
// This would generate the following gsutil call:
//
// ```
//   gsutil cp gs://my-bucket/bar.txt /mnt/pd1/file.txt
// ```
//
// The file `/mnt/pd1/file.txt` maps to `/mnt/disk/file.txt` in the
// Docker container. Acceptable paths are:
//
// <table>
//   <thead>
//     <tr><th>Google Cloud storage path</th><th>Local path</th></tr>
//   </thead>
//   <tbody>
//     <tr><td>file</td><td>file</td></tr>
//     <tr><td>glob</td><td>directory</td></tr>
//   </tbody>
// </table>
//
// For outputs, the direction of the copy is reversed:
//
// ```
//   gsutil cp /mnt/disk/file.txt gs://my-bucket/bar.txt
// ```
//
// Acceptable paths are:
//
// <table>
//   <thead>
//     <tr><th>Local path</th><th>Google Cloud Storage path</th></tr>
//   </thead>
//   <tbody>
//     <tr><td>file</td><td>file</td></tr>
//     <tr>
//       <td>file</td>
//       <td>directory - directory must already exist</td>
//     </tr>
//     <tr>
//       <td>glob</td>
//       <td>directory - directory will be created if it doesn't exist</td></tr>
//   </tbody>
// </table>
//
// One restriction due to docker limitations, is that for outputs that are found
// on the boot disk, the local path cannot be a glob and must be a file.
message PipelineParameter {
  // LocalCopy defines how a remote file should be copied to and from the VM.
  message LocalCopy {
    // Required. The path within the user's docker container where
    // this input should be localized to and from, relative to the specified
    // disk's mount point. For example: file.txt,
    string path = 1;

    // Required. The name of the disk where this parameter is
    // located. Can be the name of one of the disks specified in the
    // Resources field, or "boot", which represents the Docker
    // instance's boot disk and has a mount point of `/`.
    string disk = 2;
  }

  // Required. Name of the parameter - the pipeline runner uses this string
  // as the key to the input and output maps in RunPipeline.
  string name = 1;

  // Human-readable description.
  string description = 2;

  // The default value for this parameter. Can be overridden at runtime.
  // If `localCopy` is present, then this must be a Google Cloud Storage path
  // beginning with `gs://`.
  string default_value = 5;

  // If present, this parameter is marked for copying to and from the VM.
  // `LocalCopy` indicates where on the VM the file should be. The value
  // given to this parameter (either at runtime or using `defaultValue`)
  // must be the remote path where the file should be.
  LocalCopy local_copy = 6;
}

// The Docker execuctor specification.
message DockerExecutor {
  // Required. Image name from either Docker Hub or Google Container Registry.
  // Users that run pipelines must have READ access to the image.
  string image_name = 1;

  // Required. The command or newline delimited script to run. The command
  // string will be executed within a bash shell.
  //
  // If the command exits with a non-zero exit code, output parameter
  // de-localization will be skipped and the pipeline operation's
  // [`error`][google.longrunning.Operation.error] field will be populated.
  //
  // Maximum command string length is 16384.
  string cmd = 2;
}