Skip to content

Commit

Permalink
feat: add support for temp_bucket, endpoint_config in clusters; add p…
Browse files Browse the repository at this point in the history
…reemptibility for instance group configs (#60)

* feat: Additional fields for the `ClusterConfig` and `InstanceGroupConfig` messages.

This change includes the following updates:
1. There is a new `temp_bucket` field for clusters.
2. There is a new `endpoint_config` field for clusters.
3. There is a new `preemptibility` field for instance group configs.
4. There are various updates to the doc comments.

PiperOrigin-RevId: 323829608

Source-Author: Google APIs <noreply@google.com>
Source-Date: Wed Jul 29 11:26:43 2020 -0700
Source-Repo: googleapis/googleapis
Source-Sha: d8a3dfb82f5cae3f1bcdcec7c5726581532da7d5
Source-Link: googleapis/googleapis@d8a3dfb
  • Loading branch information
yoshi-automation authored Jul 31, 2020
1 parent d5b7666 commit df1f599
Show file tree
Hide file tree
Showing 11 changed files with 597 additions and 203 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Component(enum.IntEnum):
Cluster components that can be activated.
Attributes:
COMPONENT_UNSPECIFIED (int): Unspecified component.
COMPONENT_UNSPECIFIED (int): Unspecified component. Specifying this will cause Cluster creation to fail.
ANACONDA (int): The Anaconda python distribution.
HIVE_WEBHCAT (int): The Hive Web HCatalog (the REST service for accessing HCatalog).
JUPYTER (int): The Jupyter Notebook.
Expand Down Expand Up @@ -103,6 +103,30 @@ class Substate(enum.IntEnum):
STALE_STATUS = 2


class InstanceGroupConfig(object):
class Preemptibility(enum.IntEnum):
"""
Controls the use of [preemptible instances]
(https://cloud.google.com/compute/docs/instances/preemptible) within the
group.
Attributes:
PREEMPTIBILITY_UNSPECIFIED (int): Preemptibility is unspecified, the system will choose the
appropriate setting for each instance group.
NON_PREEMPTIBLE (int): Instances are non-preemptible.
This option is allowed for all instance groups and is the only valid
value for Master and Worker instance groups.
PREEMPTIBLE (int): Instances are preemptible.
This option is allowed only for secondary worker groups.
"""

PREEMPTIBILITY_UNSPECIFIED = 0
NON_PREEMPTIBLE = 1
PREEMPTIBLE = 2


class JobStatus(object):
class State(enum.IntEnum):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,20 +164,26 @@ message BasicYarnAutoscalingConfig {
// Bounds: [0s, 1d].
google.protobuf.Duration graceful_decommission_timeout = 5 [(google.api.field_behavior) = REQUIRED];

// Required. Fraction of average pending memory in the last cooldown period
// Required. Fraction of average YARN pending memory in the last cooldown period
// for which to add workers. A scale-up factor of 1.0 will result in scaling
// up so that there is no pending memory remaining after the update (more
// aggressive scaling). A scale-up factor closer to 0 will result in a smaller
// magnitude of scaling up (less aggressive scaling).
// See [How autoscaling
// works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
// for more information.
//
// Bounds: [0.0, 1.0].
double scale_up_factor = 1 [(google.api.field_behavior) = REQUIRED];

// Required. Fraction of average pending memory in the last cooldown period
// Required. Fraction of average YARN pending memory in the last cooldown period
// for which to remove workers. A scale-down factor of 1 will result in
// scaling down so that there is no available memory remaining after the
// update (more aggressive scaling). A scale-down factor of 0 disables
// removing workers, which can be beneficial for autoscaling a single job.
// See [How autoscaling
// works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
// for more information.
//
// Bounds: [0.0, 1.0].
double scale_down_factor = 2 [(google.api.field_behavior) = REQUIRED];
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,17 @@ message ClusterConfig {
// bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data,
// such as Spark and MapReduce history files.
// If you do not specify a temp bucket,
// Dataproc will determine a Cloud Storage location (US,
// ASIA, or EU) for your cluster's temp bucket according to the
// Compute Engine zone where your cluster is deployed, and then create
// and manage this project-level, per-location bucket. The default bucket has
// a TTL of 90 days, but you can use any TTL (or none) if you specify a
// bucket.
string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL];

// Optional. The shared Compute Engine config settings for
// all instances in a cluster.
GceClusterConfig gce_cluster_config = 8 [(google.api.field_behavior) = OPTIONAL];
Expand Down Expand Up @@ -216,6 +227,20 @@ message ClusterConfig {

// Optional. Lifecycle setting for the cluster.
LifecycleConfig lifecycle_config = 17 [(google.api.field_behavior) = OPTIONAL];

// Optional. Port/endpoint configuration for this cluster
EndpointConfig endpoint_config = 19 [(google.api.field_behavior) = OPTIONAL];
}

// Endpoint config for this cluster
message EndpointConfig {
// Output only. The map of port descriptions to URLs. Will only be populated
// if enable_http_port_access is true.
map<string, string> http_ports = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. If true, enable http access to specific ports on the cluster
// from external sources. Defaults to false.
bool enable_http_port_access = 2 [(google.api.field_behavior) = OPTIONAL];
}

// Autoscaling Policy config associated with the cluster.
Expand Down Expand Up @@ -288,7 +313,7 @@ message GceClusterConfig {
bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];

// Optional. The [Dataproc service
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_cloud_dataproc)
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
// (also see [VM Data Plane
// identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
// used by Dataproc cluster VM instances to access Google Cloud Platform
Expand Down Expand Up @@ -332,6 +357,27 @@ message GceClusterConfig {
// The config settings for Compute Engine resources in
// an instance group, such as a master or worker group.
message InstanceGroupConfig {
// Controls the use of
// [preemptible instances]
// (https://cloud.google.com/compute/docs/instances/preemptible)
// within the group.
enum Preemptibility {
// Preemptibility is unspecified, the system will choose the
// appropriate setting for each instance group.
PREEMPTIBILITY_UNSPECIFIED = 0;

// Instances are non-preemptible.
//
// This option is allowed for all instance groups and is the only valid
// value for Master and Worker instance groups.
NON_PREEMPTIBLE = 1;

// Instances are preemptible.
//
// This option is allowed only for secondary worker groups.
PREEMPTIBLE = 2;
}

// Optional. The number of VM instances in the instance group.
// For master instance groups, must be set to 1.
int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL];
Expand Down Expand Up @@ -382,6 +428,15 @@ message InstanceGroupConfig {
// instances.
bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. Specifies the preemptibility of the instance group.
//
// The default value for master and worker groups is
// `NON_PREEMPTIBLE`. This default cannot be changed.
//
// The default value for secondary instances is
// `PREEMPTIBLE`.
Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL];

// Output only. The config for Compute Engine Instance Group
// Manager that manages this group.
// This is only used for preemptible instance groups.
Expand Down Expand Up @@ -608,7 +663,7 @@ message KerberosConfig {
message SoftwareConfig {
// Optional. The version of software inside the cluster. It must be one of the
// supported [Dataproc
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_cloud_dataproc_versions),
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
// such as "1.2" (including a subminor version, such as "1.2.29"), or the
// ["preview"
// version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
Expand Down
Loading

0 comments on commit df1f599

Please sign in to comment.