diff options
author | 2017-05-30 15:49:37 +0200 | |
---|---|---|
committer | 2017-05-30 15:59:48 +0200 | |
commit | 84a8e95910f069dd03a19b0fc634f95bb0beac95 (patch) | |
tree | d3a5cd8e259799e36834b247194c2d1aa3163e16 /third_party/googleapis/google/cloud/dataproc | |
parent | c7696b47a4a12b1e56e41246770cbd44ad1c9c3e (diff) |
Introduce third_party/googleapis
Add `https://github.com/googleapis/googleapis` as a third_party
dependency at commit `001f6702ac4cd72194a5120ff978fcfa740783d6`.
These protos are required for the upcoming open sourcing of the BES
protocol code.
Additionally, add (java_)proto_library() rules for the protobufs
required by the BES protocol.
Change-Id: Ie78a9941a62f2085a58ad859c91161885e6f390d
Diffstat (limited to 'third_party/googleapis/google/cloud/dataproc')
3 files changed, 1096 insertions, 0 deletions
diff --git a/third_party/googleapis/google/cloud/dataproc/v1/clusters.proto b/third_party/googleapis/google/cloud/dataproc/v1/clusters.proto new file mode 100644 index 0000000000..fc7f45eadf --- /dev/null +++ b/third_party/googleapis/google/cloud/dataproc/v1/clusters.proto @@ -0,0 +1,444 @@ +// Copyright 2016 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.dataproc.v1; + +import "google/api/annotations.proto"; +import "google/cloud/dataproc/v1/operations.proto"; +import "google/longrunning/operations.proto"; +import "google/protobuf/duration.proto"; +import "google/protobuf/field_mask.proto"; +import "google/protobuf/timestamp.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc"; +option java_multiple_files = true; +option java_outer_classname = "ClustersProto"; +option java_package = "com.google.cloud.dataproc.v1"; + + +// The ClusterControllerService provides methods to manage clusters +// of Google Compute Engine instances. +service ClusterController { + // Creates a cluster in a project. + rpc CreateCluster(CreateClusterRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/clusters" body: "cluster" }; + } + + // Updates a cluster in a project. + rpc UpdateCluster(UpdateClusterRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { patch: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" body: "cluster" }; + } + + // Deletes a cluster in a project. + rpc DeleteCluster(DeleteClusterRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { delete: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" }; + } + + // Gets the resource representation for a cluster in a project. + rpc GetCluster(GetClusterRequest) returns (Cluster) { + option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" }; + } + + // Lists all regions/{region}/clusters in a project. + rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) { + option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/clusters" }; + } + + // Gets cluster diagnostic information. + // After the operation completes, the Operation.response field + // contains `DiagnoseClusterOutputLocation`. + rpc DiagnoseCluster(DiagnoseClusterRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose" body: "*" }; + } +} + +// Describes the identifying information, config, and status of +// a cluster of Google Compute Engine instances. +message Cluster { + // [Required] The Google Cloud Platform project ID that the cluster belongs to. + string project_id = 1; + + // [Required] The cluster name. Cluster names within a project must be + // unique. Names of deleted clusters can be reused. + string cluster_name = 2; + + // [Required] The cluster config. Note that Cloud Dataproc may set + // default values, and values may change when clusters are updated. + ClusterConfig config = 3; + + // [Output-only] Cluster status. + ClusterStatus status = 4; + + // [Output-only] The previous cluster status. + repeated ClusterStatus status_history = 7; + + // [Output-only] A cluster UUID (Unique Universal Identifier). Cloud Dataproc + // generates this value when it creates the cluster. + string cluster_uuid = 6; +} + +// The cluster config. +message ClusterConfig { + // [Optional] A Google Cloud Storage staging bucket used for sharing generated + // SSH keys and config. If you do not specify a staging bucket, Cloud + // Dataproc will determine an appropriate Cloud Storage location (US, + // ASIA, or EU) for your cluster's staging bucket according to the Google + // Compute Engine zone where your cluster is deployed, and then it will create + // and manage this project-level, per-location bucket for you. + string config_bucket = 1; + + // [Required] The shared Google Compute Engine config settings for + // all instances in a cluster. + GceClusterConfig gce_cluster_config = 8; + + // [Optional] The Google Compute Engine config settings for + // the master instance in a cluster. + InstanceGroupConfig master_config = 9; + + // [Optional] The Google Compute Engine config settings for + // worker instances in a cluster. + InstanceGroupConfig worker_config = 10; + + // [Optional] The Google Compute Engine config settings for + // additional worker instances in a cluster. + InstanceGroupConfig secondary_worker_config = 12; + + // [Optional] The config settings for software inside the cluster. + SoftwareConfig software_config = 13; + + // [Optional] Commands to execute on each node after config is + // completed. By default, executables are run on master and all worker nodes. + // You can test a node's <code>role</code> metadata to run an executable on + // a master or worker node, as shown below using `curl` (you can also use `wget`): + // + // ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1/instance/attributes/dataproc-role) + // if [[ "${ROLE}" == 'Master' ]]; then + // ... master specific actions ... + // else + // ... worker specific actions ... + // fi + repeated NodeInitializationAction initialization_actions = 11; +} + +// Common config settings for resources of Google Compute Engine cluster +// instances, applicable to all instances in the cluster. +message GceClusterConfig { + // [Required] The zone where the Google Compute Engine cluster will be located. + // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`. + string zone_uri = 1; + + // [Optional] The Google Compute Engine network to be used for machine + // communications. Cannot be specified with subnetwork_uri. If neither + // `network_uri` nor `subnetwork_uri` is specified, the "default" network of + // the project is used, if it exists. Cannot be a "Custom Subnet Network" (see + // [Using Subnetworks](/compute/docs/subnetworks) for more information). + // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default`. + string network_uri = 2; + + // [Optional] The Google Compute Engine subnetwork to be used for machine + // communications. Cannot be specified with network_uri. + // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/sub0`. + string subnetwork_uri = 6; + + // [Optional] If true, all instances in the cluster will only have internal IP + // addresses. By default, clusters are not restricted to internal IP addresses, + // and will have ephemeral external IP addresses assigned to each instance. + // This `internal_ip_only` restriction can only be enabled for subnetwork + // enabled networks, and all off-cluster dependencies must be configured to be + // accessible without external IP addresses. + bool internal_ip_only = 7; + + // [Optional] The URIs of service account scopes to be included in Google + // Compute Engine instances. The following base set of scopes is always + // included: + // + // * https://www.googleapis.com/auth/cloud.useraccounts.readonly + // * https://www.googleapis.com/auth/devstorage.read_write + // * https://www.googleapis.com/auth/logging.write + // + // If no scopes are specified, the following defaults are also provided: + // + // * https://www.googleapis.com/auth/bigquery + // * https://www.googleapis.com/auth/bigtable.admin.table + // * https://www.googleapis.com/auth/bigtable.data + // * https://www.googleapis.com/auth/devstorage.full_control + repeated string service_account_scopes = 3; + + // The Google Compute Engine tags to add to all instances (see + // [Labeling instances](/compute/docs/label-or-tag-resources#labeling_instances)). + repeated string tags = 4; + + // The Google Compute Engine metadata entries to add to all instances (see + // [Project and instance metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)). + map<string, string> metadata = 5; +} + +// [Optional] The config settings for Google Compute Engine resources in +// an instance group, such as a master or worker group. +message InstanceGroupConfig { + // [Required] The number of VM instances in the instance group. + // For master instance groups, must be set to 1. + int32 num_instances = 1; + + // [Optional] The list of instance names. Cloud Dataproc derives the names from + // `cluster_name`, `num_instances`, and the instance group if not set by user + // (recommended practice is to let Cloud Dataproc derive the name). + repeated string instance_names = 2; + + // [Output-only] The Google Compute Engine image resource used for cluster + // instances. Inferred from `SoftwareConfig.image_version`. + string image_uri = 3; + + // [Required] The Google Compute Engine machine type used for cluster instances. + // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`. + string machine_type_uri = 4; + + // [Optional] Disk option config settings. + DiskConfig disk_config = 5; + + // [Optional] Specifies that this instance group contains preemptible instances. + bool is_preemptible = 6; + + // [Output-only] The config for Google Compute Engine Instance Group + // Manager that manages this group. + // This is only used for preemptible instance groups. + ManagedGroupConfig managed_group_config = 7; +} + +// Specifies the resources used to actively manage an instance group. +message ManagedGroupConfig { + // [Output-only] The name of the Instance Template used for the Managed + // Instance Group. + string instance_template_name = 1; + + // [Output-only] The name of the Instance Group Manager for this group. + string instance_group_manager_name = 2; +} + +// Specifies the config of disk options for a group of VM instances. +message DiskConfig { + // [Optional] Size in GB of the boot disk (default is 500GB). + int32 boot_disk_size_gb = 1; + + // [Optional] Number of attached SSDs, from 0 to 4 (default is 0). + // If SSDs are not attached, the boot disk is used to store runtime logs and + // [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. + // If one or more SSDs are attached, this runtime bulk + // data is spread across them, and the boot disk contains only basic + // config and installed binaries. + int32 num_local_ssds = 2; +} + +// Specifies an executable to run on a fully configured node and a +// timeout period for executable completion. +message NodeInitializationAction { + // [Required] Google Cloud Storage URI of executable file. + string executable_file = 1; + + // [Optional] Amount of time executable has to complete. Default is + // 10 minutes. Cluster creation fails with an explanatory error message (the + // name of the executable that caused the error and the exceeded timeout + // period) if the executable is not completed at end of the timeout period. + google.protobuf.Duration execution_timeout = 2; +} + +// The status of a cluster and its instances. +message ClusterStatus { + // The cluster state. + enum State { + // The cluster state is unknown. + UNKNOWN = 0; + + // The cluster is being created and set up. It is not ready for use. + CREATING = 1; + + // The cluster is currently running and healthy. It is ready for use. + RUNNING = 2; + + // The cluster encountered an error. It is not ready for use. + ERROR = 3; + + // The cluster is being deleted. It cannot be used. + DELETING = 4; + + // The cluster is being updated. It continues to accept and process jobs. + UPDATING = 5; + } + + // [Output-only] The cluster's state. + State state = 1; + + // [Output-only] Optional details of cluster's state. + string detail = 2; + + // [Output-only] Time when this state was entered. + google.protobuf.Timestamp state_start_time = 3; +} + +// Specifies the selection and config of software inside the cluster. +message SoftwareConfig { + // [Optional] The version of software inside the cluster. It must match the + // regular expression `[0-9]+\.[0-9]+`. If unspecified, it defaults to the + // latest version (see [Cloud Dataproc Versioning](/dataproc/versioning)). + string image_version = 1; + + // [Optional] The properties to set on daemon config files. + // + // Property keys are specified in `prefix:property` format, such as + // `core:fs.defaultFS`. The following are supported prefixes + // and their mappings: + // + // * core: `core-site.xml` + // * hdfs: `hdfs-site.xml` + // * mapred: `mapred-site.xml` + // * yarn: `yarn-site.xml` + // * hive: `hive-site.xml` + // * pig: `pig.properties` + // * spark: `spark-defaults.conf` + map<string, string> properties = 2; +} + +// A request to create a cluster. +message CreateClusterRequest { + // [Required] The ID of the Google Cloud Platform project that the cluster + // belongs to. + string project_id = 1; + + // [Required] The Cloud Dataproc region in which to handle the request. + string region = 3; + + // [Required] The cluster to create. + Cluster cluster = 2; +} + +// A request to update a cluster. +message UpdateClusterRequest { + // [Required] The ID of the Google Cloud Platform project the + // cluster belongs to. + string project_id = 1; + + // [Required] The Cloud Dataproc region in which to handle the request. + string region = 5; + + // [Required] The cluster name. + string cluster_name = 2; + + // [Required] The changes to the cluster. + Cluster cluster = 3; + + // [Required] Specifies the path, relative to <code>Cluster</code>, of + // the field to update. For example, to change the number of workers + // in a cluster to 5, the <code>update_mask</code> parameter would be + // specified as <code>config.worker_config.num_instances</code>, + // and the `PATCH` request body would specify the new value, as follows: + // + // { + // "config":{ + // "workerConfig":{ + // "numInstances":"5" + // } + // } + // } + // Similarly, to change the number of preemptible workers in a cluster to 5, the + // <code>update_mask</code> parameter would be <code>config.secondary_worker_config.num_instances</code>, + // and the `PATCH` request body would be set as follows: + // + // { + // "config":{ + // "secondaryWorkerConfig":{ + // "numInstances":"5" + // } + // } + // } + // <strong>Note:</strong> Currently, <code>config.worker_config.num_instances</code> + // and <code>config.secondary_worker_config.num_instances</code> are the only + // fields that can be updated. + google.protobuf.FieldMask update_mask = 4; +} + +// A request to delete a cluster. +message DeleteClusterRequest { + // [Required] The ID of the Google Cloud Platform project that the cluster + // belongs to. + string project_id = 1; + + // [Required] The Cloud Dataproc region in which to handle the request. + string region = 3; + + // [Required] The cluster name. + string cluster_name = 2; +} + +// Request to get the resource representation for a cluster in a project. +message GetClusterRequest { + // [Required] The ID of the Google Cloud Platform project that the cluster + // belongs to. + string project_id = 1; + + // [Required] The Cloud Dataproc region in which to handle the request. + string region = 3; + + // [Required] The cluster name. + string cluster_name = 2; +} + +// A request to list the clusters in a project. +message ListClustersRequest { + // [Required] The ID of the Google Cloud Platform project that the cluster + // belongs to. + string project_id = 1; + + // [Required] The Cloud Dataproc region in which to handle the request. + string region = 4; + + // [Optional] The standard List page size. + int32 page_size = 2; + + // [Optional] The standard List page token. + string page_token = 3; +} + +// The list of all clusters in a project. +message ListClustersResponse { + // [Output-only] The clusters in the project. + repeated Cluster clusters = 1; + + // [Output-only] This token is included in the response if there are more + // results to fetch. To fetch additional results, provide this value as the + // `page_token` in a subsequent <code>ListClustersRequest</code>. + string next_page_token = 2; +} + +// A request to collect cluster diagnostic information. +message DiagnoseClusterRequest { + // [Required] The ID of the Google Cloud Platform project that the cluster + // belongs to. + string project_id = 1; + + // [Required] The Cloud Dataproc region in which to handle the request. + string region = 3; + + // [Required] The cluster name. + string cluster_name = 2; +} + +// The location of diagnostic output. +message DiagnoseClusterResults { + // [Output-only] The Google Cloud Storage URI of the diagnostic output. + // The output report is a plain text file with a summary of collected + // diagnostics. + string output_uri = 1; +} diff --git a/third_party/googleapis/google/cloud/dataproc/v1/jobs.proto b/third_party/googleapis/google/cloud/dataproc/v1/jobs.proto new file mode 100644 index 0000000000..854ce9b972 --- /dev/null +++ b/third_party/googleapis/google/cloud/dataproc/v1/jobs.proto @@ -0,0 +1,573 @@ +// Copyright 2016 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.dataproc.v1; + +import "google/api/annotations.proto"; +import "google/protobuf/empty.proto"; +import "google/protobuf/timestamp.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc"; +option java_multiple_files = true; +option java_outer_classname = "JobsProto"; +option java_package = "com.google.cloud.dataproc.v1"; + + +// The JobController provides methods to manage jobs. +service JobController { + // Submits a job to a cluster. + rpc SubmitJob(SubmitJobRequest) returns (Job) { + option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/jobs:submit" body: "*" }; + } + + // Gets the resource representation for a job in a project. + rpc GetJob(GetJobRequest) returns (Job) { + option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" }; + } + + // Lists regions/{region}/jobs in a project. + rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) { + option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/jobs" }; + } + + // Starts a job cancellation request. To access the job resource + // after cancellation, call + // [regions/{region}/jobs.list](/dataproc/reference/rest/v1/projects.regions.jobs/list) or + // [regions/{region}/jobs.get](/dataproc/reference/rest/v1/projects.regions.jobs/get). + rpc CancelJob(CancelJobRequest) returns (Job) { + option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel" body: "*" }; + } + + // Deletes the job from the project. If the job is active, the delete fails, + // and the response returns `FAILED_PRECONDITION`. + rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { delete: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" }; + } +} + +// The runtime logging config of the job. +message LoggingConfig { + // The Log4j level for job execution. When running an + // [Apache Hive](http://hive.apache.org/) job, Cloud + // Dataproc configures the Hive client to an equivalent verbosity level. + enum Level { + // Level is unspecified. Use default level for log4j. + LEVEL_UNSPECIFIED = 0; + + // Use ALL level for log4j. + ALL = 1; + + // Use TRACE level for log4j. + TRACE = 2; + + // Use DEBUG level for log4j. + DEBUG = 3; + + // Use INFO level for log4j. + INFO = 4; + + // Use WARN level for log4j. + WARN = 5; + + // Use ERROR level for log4j. + ERROR = 6; + + // Use FATAL level for log4j. + FATAL = 7; + + // Turn off log4j. + OFF = 8; + } + + // The per-package log levels for the driver. This may include + // "root" package name to configure rootLogger. + // Examples: + // 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' + map<string, Level> driver_log_levels = 2; +} + +// A Cloud Dataproc job for running +// [Apache Hadoop MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html) +// jobs on [Apache Hadoop YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html). +message HadoopJob { + // [Required] Indicates the location of the driver's main class. Specify + // either the jar file that contains the main class or the main class name. + // To specify both, add the jar file to `jar_file_uris`, and then specify + // the main class name in this property. + oneof driver { + // The HCFS URI of the jar file containing the main class. + // Examples: + // 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' + // 'hdfs:/tmp/test-samples/custom-wordcount.jar' + // 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar' + string main_jar_file_uri = 1; + + // The name of the driver's main class. The jar file containing the class + // must be in the default CLASSPATH or specified in `jar_file_uris`. + string main_class = 2; + } + + // [Optional] The arguments to pass to the driver. Do not + // include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job + // properties, since a collision may occur that causes an incorrect job + // submission. + repeated string args = 3; + + // [Optional] Jar file URIs to add to the CLASSPATHs of the + // Hadoop driver and tasks. + repeated string jar_file_uris = 4; + + // [Optional] HCFS (Hadoop Compatible Filesystem) URIs of files to be copied + // to the working directory of Hadoop drivers and distributed tasks. Useful + // for naively parallel tasks. + repeated string file_uris = 5; + + // [Optional] HCFS URIs of archives to be extracted in the working directory of + // Hadoop drivers and tasks. Supported file types: + // .jar, .tar, .tar.gz, .tgz, or .zip. + repeated string archive_uris = 6; + + // [Optional] A mapping of property names to values, used to configure Hadoop. + // Properties that conflict with values set by the Cloud Dataproc API may be + // overwritten. Can include properties set in /etc/hadoop/conf/*-site and + // classes in user code. + map<string, string> properties = 7; + + // [Optional] The runtime log config for job execution. + LoggingConfig logging_config = 8; +} + +// A Cloud Dataproc job for running [Apache Spark](http://spark.apache.org/) +// applications on YARN. +message SparkJob { + // [Required] The specification of the main method to call to drive the job. + // Specify either the jar file that contains the main class or the main class + // name. To pass both a main jar and a main class in that jar, add the jar to + // `CommonJob.jar_file_uris`, and then specify the main class name in `main_class`. + oneof driver { + // The HCFS URI of the jar file that contains the main class. + string main_jar_file_uri = 1; + + // The name of the driver's main class. The jar file that contains the class + // must be in the default CLASSPATH or specified in `jar_file_uris`. + string main_class = 2; + } + + // [Optional] The arguments to pass to the driver. Do not include arguments, + // such as `--conf`, that can be set as job properties, since a collision may + // occur that causes an incorrect job submission. + repeated string args = 3; + + // [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the + // Spark driver and tasks. + repeated string jar_file_uris = 4; + + // [Optional] HCFS URIs of files to be copied to the working directory of + // Spark drivers and distributed tasks. Useful for naively parallel tasks. + repeated string file_uris = 5; + + // [Optional] HCFS URIs of archives to be extracted in the working directory + // of Spark drivers and tasks. Supported file types: + // .jar, .tar, .tar.gz, .tgz, and .zip. + repeated string archive_uris = 6; + + // [Optional] A mapping of property names to values, used to configure Spark. + // Properties that conflict with values set by the Cloud Dataproc API may be + // overwritten. Can include properties set in + // /etc/spark/conf/spark-defaults.conf and classes in user code. + map<string, string> properties = 7; + + // [Optional] The runtime log config for job execution. + LoggingConfig logging_config = 8; +} + +// A Cloud Dataproc job for running +// [Apache PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html) +// applications on YARN. +message PySparkJob { + // [Required] The HCFS URI of the main Python file to use as the driver. Must + // be a .py file. + string main_python_file_uri = 1; + + // [Optional] The arguments to pass to the driver. Do not include arguments, + // such as `--conf`, that can be set as job properties, since a collision may + // occur that causes an incorrect job submission. + repeated string args = 2; + + // [Optional] HCFS file URIs of Python files to pass to the PySpark + // framework. Supported file types: .py, .egg, and .zip. + repeated string python_file_uris = 3; + + // [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the + // Python driver and tasks. + repeated string jar_file_uris = 4; + + // [Optional] HCFS URIs of files to be copied to the working directory of + // Python drivers and distributed tasks. Useful for naively parallel tasks. + repeated string file_uris = 5; + + // [Optional] HCFS URIs of archives to be extracted in the working directory of + // .jar, .tar, .tar.gz, .tgz, and .zip. + repeated string archive_uris = 6; + + // [Optional] A mapping of property names to values, used to configure PySpark. + // Properties that conflict with values set by the Cloud Dataproc API may be + // overwritten. Can include properties set in + // /etc/spark/conf/spark-defaults.conf and classes in user code. + map<string, string> properties = 7; + + // [Optional] The runtime log config for job execution. + LoggingConfig logging_config = 8; +} + +// A list of queries to run on a cluster. +message QueryList { + // [Required] The queries to execute. You do not need to terminate a query + // with a semicolon. Multiple queries can be specified in one string + // by separating each with a semicolon. Here is an example of an Cloud + // Dataproc API snippet that uses a QueryList to specify a HiveJob: + // + // "hiveJob": { + // "queryList": { + // "queries": [ + // "query1", + // "query2", + // "query3;query4", + // ] + // } + // } + repeated string queries = 1; +} + +// A Cloud Dataproc job for running [Apache Hive](https://hive.apache.org/) +// queries on YARN. +message HiveJob { + // [Required] The sequence of Hive queries to execute, specified as either + // an HCFS file URI or a list of queries. + oneof queries { + // The HCFS URI of the script that contains Hive queries. + string query_file_uri = 1; + + // A list of queries. + QueryList query_list = 2; + } + + // [Optional] Whether to continue executing queries if a query fails. + // The default value is `false`. Setting to `true` can be useful when executing + // independent parallel queries. + bool continue_on_failure = 3; + + // [Optional] Mapping of query variable names to values (equivalent to the + // Hive command: `SET name="value";`). + map<string, string> script_variables = 4; + + // [Optional] A mapping of property names and values, used to configure Hive. + // Properties that conflict with values set by the Cloud Dataproc API may be + // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, + // /etc/hive/conf/hive-site.xml, and classes in user code. + map<string, string> properties = 5; + + // [Optional] HCFS URIs of jar files to add to the CLASSPATH of the + // Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes + // and UDFs. + repeated string jar_file_uris = 6; +} + +// A Cloud Dataproc job for running [Apache Spark SQL](http://spark.apache.org/sql/) +// queries. +message SparkSqlJob { + // [Required] The sequence of Spark SQL queries to execute, specified as + // either an HCFS file URI or as a list of queries. + oneof queries { + // The HCFS URI of the script that contains SQL queries. + string query_file_uri = 1; + + // A list of queries. + QueryList query_list = 2; + } + + // [Optional] Mapping of query variable names to values (equivalent to the + // Spark SQL command: SET `name="value";`). + map<string, string> script_variables = 3; + + // [Optional] A mapping of property names to values, used to configure + // Spark SQL's SparkConf. Properties that conflict with values set by the + // Cloud Dataproc API may be overwritten. + map<string, string> properties = 4; + + // [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH. + repeated string jar_file_uris = 56; + + // [Optional] The runtime log config for job execution. + LoggingConfig logging_config = 6; +} + +// A Cloud Dataproc job for running [Apache Pig](https://pig.apache.org/) +// queries on YARN. +message PigJob { + // [Required] The sequence of Pig queries to execute, specified as an HCFS + // file URI or a list of queries. + oneof queries { + // The HCFS URI of the script that contains the Pig queries. + string query_file_uri = 1; + + // A list of queries. + QueryList query_list = 2; + } + + // [Optional] Whether to continue executing queries if a query fails. + // The default value is `false`. Setting to `true` can be useful when executing + // independent parallel queries. + bool continue_on_failure = 3; + + // [Optional] Mapping of query variable names to values (equivalent to the Pig + // command: `name=[value]`). + map<string, string> script_variables = 4; + + // [Optional] A mapping of property names to values, used to configure Pig. + // Properties that conflict with values set by the Cloud Dataproc API may be + // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, + // /etc/pig/conf/pig.properties, and classes in user code. + map<string, string> properties = 5; + + // [Optional] HCFS URIs of jar files to add to the CLASSPATH of + // the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs. + repeated string jar_file_uris = 6; + + // [Optional] The runtime log config for job execution. + LoggingConfig logging_config = 7; +} + +// Cloud Dataproc job config. +message JobPlacement { + // [Required] The name of the cluster where the job will be submitted. + string cluster_name = 1; + + // [Output-only] A cluster UUID generated by the Cloud Dataproc service when + // the job is submitted. + string cluster_uuid = 2; +} + +// Cloud Dataproc job status. +message JobStatus { + // The job state. + enum State { + // The job state is unknown. + STATE_UNSPECIFIED = 0; + + // The job is pending; it has been submitted, but is not yet running. + PENDING = 1; + + // Job has been received by the service and completed initial setup; + // it will soon be submitted to the cluster. + SETUP_DONE = 8; + + // The job is running on the cluster. + RUNNING = 2; + + // A CancelJob request has been received, but is pending. + CANCEL_PENDING = 3; + + // Transient in-flight resources have been canceled, and the request to + // cancel the running job has been issued to the cluster. + CANCEL_STARTED = 7; + + // The job cancellation was successful. + CANCELLED = 4; + + // The job has completed successfully. + DONE = 5; + + // The job has completed, but encountered an error. + ERROR = 6; + } + + // [Output-only] A state message specifying the overall job state. + State state = 1; + + // [Output-only] Optional job state details, such as an error + // description if the state is <code>ERROR</code>. + string details = 2; + + // [Output-only] The time when this state was entered. + google.protobuf.Timestamp state_start_time = 6; +} + +// Encapsulates the full scoping used to reference a job. +message JobReference { + // [Required] The ID of the Google Cloud Platform project that the job + // belongs to. + string project_id = 1; + + // [Optional] The job ID, which must be unique within the project. The job ID + // is generated by the server upon job submission or provided by the user as a + // means to perform retries without creating duplicate jobs. The ID must + // contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or + // hyphens (-). The maximum length is 512 characters. + string job_id = 2; +} + +// A Cloud Dataproc job resource. +message Job { + // [Optional] The fully qualified reference to the job, which can be used to + // obtain the equivalent REST path of the job resource. If this property + // is not specified when a job is created, the server generates a + // <code>job_id</code>. + JobReference reference = 1; + + // [Required] Job information, including how, when, and where to + // run the job. + JobPlacement placement = 2; + + // [Required] The application/framework-specific portion of the job. + oneof type_job { + // Job is a Hadoop job. + HadoopJob hadoop_job = 3; + + // Job is a Spark job. + SparkJob spark_job = 4; + + // Job is a Pyspark job. + PySparkJob pyspark_job = 5; + + // Job is a Hive job. + HiveJob hive_job = 6; + + // Job is a Pig job. + PigJob pig_job = 7; + + // Job is a SparkSql job. + SparkSqlJob spark_sql_job = 12; + } + + // [Output-only] The job status. Additional application-specific + // status information may be contained in the <code>type_job</code> + // and <code>yarn_applications</code> fields. + JobStatus status = 8; + + // [Output-only] The previous job status. + repeated JobStatus status_history = 13; + + // [Output-only] A URI pointing to the location of the stdout of the job's + // driver program. + string driver_output_resource_uri = 17; + + // [Output-only] If present, the location of miscellaneous control files + // which may be used as part of job setup and handling. If not present, + // control files may be placed in the same location as `driver_output_uri`. + string driver_control_files_uri = 15; +} + +// A request to submit a job. +message SubmitJobRequest { + // [Required] The ID of the Google Cloud Platform project that the job + // belongs to. + string project_id = 1; + + // [Required] The Cloud Dataproc region in which to handle the request. + string region = 3; + + // [Required] The job resource. + Job job = 2; +} + +// A request to get the resource representation for a job in a project. +message GetJobRequest { + // [Required] The ID of the Google Cloud Platform project that the job + // belongs to. + string project_id = 1; + + // [Required] The Cloud Dataproc region in which to handle the request. + string region = 3; + + // [Required] The job ID. + string job_id = 2; +} + +// A request to list jobs in a project. +message ListJobsRequest { + // A matcher that specifies categories of job states. + enum JobStateMatcher { + // Match all jobs, regardless of state. + ALL = 0; + + // Only match jobs in non-terminal states: PENDING, RUNNING, or + // CANCEL_PENDING. + ACTIVE = 1; + + // Only match jobs in terminal states: CANCELLED, DONE, or ERROR. + NON_ACTIVE = 2; + } + + // [Required] The ID of the Google Cloud Platform project that the job + // belongs to. + string project_id = 1; + + // [Required] The Cloud Dataproc region in which to handle the request. + string region = 6; + + // [Optional] The number of results to return in each response. + int32 page_size = 2; + + // [Optional] The page token, returned by a previous call, to request the + // next page of results. + string page_token = 3; + + // [Optional] If set, the returned jobs list includes only jobs that were + // submitted to the named cluster. + string cluster_name = 4; + + // [Optional] Specifies enumerated categories of jobs to list + // (default = match ALL jobs). + JobStateMatcher job_state_matcher = 5; +} + +// A list of jobs in a project. +message ListJobsResponse { + // [Output-only] Jobs list. + repeated Job jobs = 1; + + // [Optional] This token is included in the response if there are more results + // to fetch. To fetch additional results, provide this value as the + // `page_token` in a subsequent <code>ListJobsRequest</code>. + string next_page_token = 2; +} + +// A request to cancel a job. +message CancelJobRequest { + // [Required] The ID of the Google Cloud Platform project that the job + // belongs to. + string project_id = 1; + + // [Required] The Cloud Dataproc region in which to handle the request. + string region = 3; + + // [Required] The job ID. + string job_id = 2; +} + +// A request to delete a job. +message DeleteJobRequest { + // [Required] The ID of the Google Cloud Platform project that the job + // belongs to. + string project_id = 1; + + // [Required] The Cloud Dataproc region in which to handle the request. + string region = 3; + + // [Required] The job ID. + string job_id = 2; +} diff --git a/third_party/googleapis/google/cloud/dataproc/v1/operations.proto b/third_party/googleapis/google/cloud/dataproc/v1/operations.proto new file mode 100644 index 0000000000..61227ed2a8 --- /dev/null +++ b/third_party/googleapis/google/cloud/dataproc/v1/operations.proto @@ -0,0 +1,79 @@ +// Copyright 2016 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.dataproc.v1; + +import "google/api/annotations.proto"; +import "google/longrunning/operations.proto"; +import "google/protobuf/empty.proto"; +import "google/protobuf/timestamp.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc"; +option java_multiple_files = true; +option java_outer_classname = "OperationsProto"; +option java_package = "com.google.cloud.dataproc.v1"; + + +// The status of the operation. +message ClusterOperationStatus { + // The operation state. + enum State { + // Unused. + UNKNOWN = 0; + + // The operation has been created. + PENDING = 1; + + // The operation is running. + RUNNING = 2; + + // The operation is done; either cancelled or completed. + DONE = 3; + } + + // [Output-only] A message containing the operation state. + State state = 1; + + // [Output-only] A message containing the detailed operation state. + string inner_state = 2; + + // [Output-only]A message containing any operation metadata details. + string details = 3; + + // [Output-only] The time this state was entered. + google.protobuf.Timestamp state_start_time = 4; +} + +// Metadata describing the operation. +message ClusterOperationMetadata { + // [Output-only] Name of the cluster for the operation. + string cluster_name = 7; + + // [Output-only] Cluster UUID for the operation. + string cluster_uuid = 8; + + // [Output-only] Current operation status. + ClusterOperationStatus status = 9; + + // [Output-only] The previous operation status. + repeated ClusterOperationStatus status_history = 10; + + // [Output-only] The operation type. + string operation_type = 11; + + // [Output-only] Short description of operation. + string description = 12; +} |