proto/google/events/cloud/dataflow/v1beta3/data.proto

// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.events.cloud.dataflow.v1beta3;

import "google/protobuf/struct.proto";
import "google/protobuf/timestamp.proto";

option csharp_namespace = "Google.Events.Protobuf.Cloud.Dataflow.V1Beta3";
option php_namespace = "Google\\Events\\Cloud\\Dataflow\\V1beta3";
option ruby_package = "Google::Events::Cloud::Dataflow::V1beta3";

// Describes the environment in which a Dataflow Job runs.
message Environment {
  // The prefix of the resources the system should use for temporary
  // storage.  The system will append the suffix "/temp-{JOBNAME} to
  // this resource prefix, where {JOBNAME} is the value of the
  // job_name field.  The resulting bucket and object prefix is used
  // as the prefix of the resources used to store temporary data
  // needed during the job execution.  NOTE: This will override the
  // value in taskrunner_settings.
  // The supported resource type is:
  //
  // Google Cloud Storage:
  //
  //   storage.googleapis.com/{bucket}/{object}
  //   bucket.storage.googleapis.com/{object}
  string temp_storage_prefix = 1;

  // The type of cluster manager API to use.  If unknown or
  // unspecified, the service will attempt to choose a reasonable
  // default.  This should be in the form of the API service name,
  // e.g. "compute.googleapis.com".
  string cluster_manager_api_service = 2;

  // The list of experiments to enable. This field should be used for SDK
  // related experiments and not for service related experiments. The proper
  // field for service related experiments is service_options.
  repeated string experiments = 3;

  // The list of service options to enable. This field should be used for
  // service related experiments only. These experiments, when graduating to GA,
  // should be replaced by dedicated fields or become default (i.e. always on).
  repeated string service_options = 16;

  // If set, contains the Cloud KMS key identifier used to encrypt data
  // at rest, AKA a Customer Managed Encryption Key (CMEK).
  //
  // Format:
  //   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
  string service_kms_key_name = 12;

  // The worker pools. At least one "harness" worker pool must be
  // specified in order for the job to have workers.
  repeated WorkerPool worker_pools = 4;

  // A description of the process that generated the request.
  google.protobuf.Struct user_agent = 5;

  // A structure describing which components and their versions of the service
  // are required in order to run the job.
  google.protobuf.Struct version = 6;

  // The dataset for the current project where various workflow
  // related tables are stored.
  //
  // The supported resource type is:
  //
  // Google BigQuery:
  //   bigquery.googleapis.com/{dataset}
  string dataset = 7;

  // The Cloud Dataflow SDK pipeline options specified by the user. These
  // options are passed through the service and are used to recreate the
  // SDK pipeline options on the worker in a language agnostic and platform
  // independent way.
  google.protobuf.Struct sdk_pipeline_options = 8;

  // Identity to run virtual machines as. Defaults to the default account.
  string service_account_email = 10;

  // Which Flexible Resource Scheduling mode to run in.
  FlexResourceSchedulingGoal flex_resource_scheduling_goal = 11;

  // The Compute Engine region
  // (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
  // which worker processing should occur, e.g. "us-west1". Mutually exclusive
  // with worker_zone. If neither worker_region nor worker_zone is specified,
  // default to the control plane's region.
  string worker_region = 13;

  // The Compute Engine zone
  // (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
  // which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
  // with worker_region. If neither worker_region nor worker_zone is specified,
  // a zone in the control plane's region is chosen based on available capacity.
  string worker_zone = 14;

  // Output only. The shuffle mode used for the job.
  ShuffleMode shuffle_mode = 15;

  // Any debugging options to be supplied to the job.
  DebugOptions debug_options = 17;
}

// The packages that must be installed in order for a worker to run the
// steps of the Cloud Dataflow job that will be assigned to its worker
// pool.
//
// This is the mechanism by which the Cloud Dataflow SDK causes code to
// be loaded onto the workers. For example, the Cloud Dataflow Java SDK
// might use this to install jars containing the user's code and all of the
// various dependencies (libraries, data files, etc.) required in order
// for that code to run.
message Package {
  // The name of the package.
  string name = 1;

  // The resource to read the package from. The supported resource type is:
  //
  // Google Cloud Storage:
  //
  //   storage.googleapis.com/{bucket}
  //   bucket.storage.googleapis.com/
  string location = 2;
}

// Settings for WorkerPool autoscaling.
message AutoscalingSettings {
  // The algorithm to use for autoscaling.
  AutoscalingAlgorithm algorithm = 1;

  // The maximum number of workers to cap scaling at.
  int32 max_num_workers = 2;
}

// Defines an SDK harness container for executing Dataflow pipelines.
message SdkHarnessContainerImage {
  // A docker container image that resides in Google Container Registry.
  string container_image = 1;

  // If true, recommends the Dataflow service to use only one core per SDK
  // container instance with this image. If false (or unset) recommends using
  // more than one core per SDK container instance with this image for
  // efficiency. Note that Dataflow service may choose to override this property
  // if needed.
  bool use_single_core_per_container = 2;

  // Environment ID for the Beam runner API proto Environment that corresponds
  // to the current SDK Harness.
  string environment_id = 3;

  // The set of capabilities enumerated in the above Environment proto. See also
  // [beam_runner_api.proto](https://github.com/apache/beam/blob/master/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto)
  repeated string capabilities = 4;
}

// Describes one particular pool of Cloud Dataflow workers to be
// instantiated by the Cloud Dataflow service in order to perform the
// computations required by a job.  Note that a workflow job may use
// multiple pools, in order to match the various computational
// requirements of the various stages of the job.
message WorkerPool {
  // The kind of the worker pool; currently only `harness` and `shuffle`
  // are supported.
  string kind = 1;

  // Number of Google Compute Engine workers in this pool needed to
  // execute the job.  If zero or unspecified, the service will
  // attempt to choose a reasonable default.
  int32 num_workers = 2;

  // Packages to be installed on workers.
  repeated Package packages = 3;

  // The default package set to install.  This allows the service to
  // select a default set of packages which are useful to worker
  // harnesses written in a particular language.
  DefaultPackageSet default_package_set = 4;

  // Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
  // service will attempt to choose a reasonable default.
  string machine_type = 5;

  // Sets the policy for determining when to turndown worker pool.
  // Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
  // `TEARDOWN_NEVER`.
  // `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
  // the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
  // if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
  // down.
  //
  // If the workers are not torn down by the service, they will
  // continue to run and use Google Compute Engine VM resources in the
  // user's project until they are explicitly terminated by the user.
  // Because of this, Google recommends using the `TEARDOWN_ALWAYS`
  // policy except for small, manually supervised test jobs.
  //
  // If unknown or unspecified, the service will attempt to choose a reasonable
  // default.
  TeardownPolicy teardown_policy = 6;

  // Size of root disk for VMs, in GB.  If zero or unspecified, the service will
  // attempt to choose a reasonable default.
  int32 disk_size_gb = 7;

  // Type of root disk for VMs.  If empty or unspecified, the service will
  // attempt to choose a reasonable default.
  string disk_type = 16;

  // Fully qualified source image for disks.
  string disk_source_image = 8;

  // Zone to run the worker pools in.  If empty or unspecified, the service
  // will attempt to choose a reasonable default.
  string zone = 9;

  // The action to take on host maintenance, as defined by the Google
  // Compute Engine API.
  string on_host_maintenance = 11;

  // Metadata to set on the Google Compute Engine VMs.
  map<string, string> metadata = 13;

  // Settings for autoscaling of this WorkerPool.
  AutoscalingSettings autoscaling_settings = 14;

  // Network to which VMs will be assigned.  If empty or unspecified,
  // the service will use the network "default".
  string network = 17;

  // Subnetwork to which VMs will be assigned, if desired.  Expected to be of
  // the form "regions/REGION/subnetworks/SUBNETWORK".
  string subnetwork = 19;

  // Required. Docker container image that executes the Cloud Dataflow worker
  // harness, residing in Google Container Registry.
  //
  // Deprecated for the Fn API path. Use sdk_harness_container_images instead.
  string worker_harness_container_image = 18;

  // The number of threads per worker harness. If empty or unspecified, the
  // service will choose a number of threads (according to the number of cores
  // on the selected machine type for batch, or 1 by convention for streaming).
  int32 num_threads_per_worker = 20;

  // Configuration for VM IPs.
  WorkerIPAddressConfiguration ip_configuration = 21;

  // Set of SDK harness containers needed to execute this pipeline. This will
  // only be set in the Fn API path. For non-cross-language pipelines this
  // should have only one entry. Cross-language pipelines will have two or more
  // entries.
  repeated SdkHarnessContainerImage sdk_harness_container_images = 22;
}

// Describes any options that have an effect on the debugging of pipelines.
message DebugOptions {
  // When true, enables the logging of the literal hot key to the user's Cloud
  // Logging.
  bool enable_hot_key_logging = 1;
}

// Specifies the processing model used by a
// [google.dataflow.v1beta3.Job], which determines the way the Job is
// managed by the Cloud Dataflow service (how workers are scheduled, how
// inputs are sharded, etc).
enum JobType {
  // The type of the job is unspecified, or unknown.
  JOB_TYPE_UNKNOWN = 0;

  // A batch job with a well-defined end point: data is read, data is
  // processed, data is written, and the job is done.
  JOB_TYPE_BATCH = 1;

  // A continuously streaming job with no end: data is read,
  // processed, and written continuously.
  JOB_TYPE_STREAMING = 2;
}

// Specifies the resource to optimize for in Flexible Resource Scheduling.
enum FlexResourceSchedulingGoal {
  // Run in the default mode.
  FLEXRS_UNSPECIFIED = 0;

  // Optimize for lower execution time.
  FLEXRS_SPEED_OPTIMIZED = 1;

  // Optimize for lower cost.
  FLEXRS_COST_OPTIMIZED = 2;
}

// Specifies what happens to a resource when a Cloud Dataflow
// [google.dataflow.v1beta3.Job][google.dataflow.v1beta3.Job] has completed.
enum TeardownPolicy {
  // The teardown policy isn't specified, or is unknown.
  TEARDOWN_POLICY_UNKNOWN = 0;

  // Always teardown the resource.
  TEARDOWN_ALWAYS = 1;

  // Teardown the resource on success. This is useful for debugging
  // failures.
  TEARDOWN_ON_SUCCESS = 2;

  // Never teardown the resource. This is useful for debugging and
  // development.
  TEARDOWN_NEVER = 3;
}

// The default set of packages to be staged on a pool of workers.
enum DefaultPackageSet {
  // The default set of packages to stage is unknown, or unspecified.
  DEFAULT_PACKAGE_SET_UNKNOWN = 0;

  // Indicates that no packages should be staged at the worker unless
  // explicitly specified by the job.
  DEFAULT_PACKAGE_SET_NONE = 1;

  // Stage packages typically useful to workers written in Java.
  DEFAULT_PACKAGE_SET_JAVA = 2;

  // Stage packages typically useful to workers written in Python.
  DEFAULT_PACKAGE_SET_PYTHON = 3;
}

// Specifies the algorithm used to determine the number of worker
// processes to run at any given point in time, based on the amount of
// data left to process, the number of workers, and how quickly
// existing workers are processing data.
enum AutoscalingAlgorithm {
  // The algorithm is unknown, or unspecified.
  AUTOSCALING_ALGORITHM_UNKNOWN = 0;

  // Disable autoscaling.
  AUTOSCALING_ALGORITHM_NONE = 1;

  // Increase worker count over time to reduce job execution time.
  AUTOSCALING_ALGORITHM_BASIC = 2;
}

// Specifies how IP addresses should be allocated to the worker machines.
enum WorkerIPAddressConfiguration {
  // The configuration is unknown, or unspecified.
  WORKER_IP_UNSPECIFIED = 0;

  // Workers should have public IP addresses.
  WORKER_IP_PUBLIC = 1;

  // Workers should have private IP addresses.
  WORKER_IP_PRIVATE = 2;
}

// Specifies the shuffle mode used by a
// [google.dataflow.v1beta3.Job], which determines the approach data is shuffled
// during processing. More details in:
// https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#dataflow-shuffle
enum ShuffleMode {
  // Shuffle mode information is not available.
  SHUFFLE_MODE_UNSPECIFIED = 0;

  // Shuffle is done on the worker VMs.
  VM_BASED = 1;

  // Shuffle is done on the service side.
  SERVICE_BASED = 2;
}

// Defines a job to be run by the Cloud Dataflow service. Do not enter
// confidential information when you supply string values using the API.
// Fields stripped from source Job proto:
// - steps
// - pipeline_description
// - transform_name_mapping
message Job {
  // The unique ID of this job.
  //
  // This field is set by the Cloud Dataflow service when the Job is
  // created, and is immutable for the life of the job.
  string id = 1;

  // The ID of the Cloud Platform project that the job belongs to.
  string project_id = 2;

  // The user-specified Cloud Dataflow job name.
  //
  // Only one Job with a given name can exist in a project within one region at
  // any given time. Jobs in different regions can have the same name.
  // If a caller attempts to create a Job with the same
  // name as an already-existing Job, the attempt returns the
  // existing Job.
  //
  // The name must match the regular expression
  // `[a-z]([-a-z0-9]{0,1022}[a-z0-9])?`
  string name = 3;

  // The type of Cloud Dataflow job.
  JobType type = 4;

  // The environment for the job.
  Environment environment = 5;

  // The Cloud Storage location where the steps are stored.
  string steps_location = 24;

  // The current state of the job.
  //
  // Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
  // specified.
  //
  // A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
  // terminal state. After a job has reached a terminal state, no
  // further state updates may be made.
  //
  // This field may be mutated by the Cloud Dataflow service;
  // callers cannot mutate it.
  JobState current_state = 7;

  // The timestamp associated with the current state.
  google.protobuf.Timestamp current_state_time = 8;

  // The job's requested state.
  //
  // `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
  // `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
  // also be used to directly set a job's requested state to
  // `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
  // job if it has not already reached a terminal state.
  JobState requested_state = 9;

  // Deprecated.
  JobExecutionInfo execution_info = 10;

  // The timestamp when the job was initially created. Immutable and set by the
  // Cloud Dataflow service.
  google.protobuf.Timestamp create_time = 11;

  // If this job is an update of an existing job, this field is the job ID
  // of the job it replaced.
  //
  // When sending a `CreateJobRequest`, you can update a job by specifying it
  // here. The job named here is stopped, and its intermediate state is
  // transferred to this job.
  string replace_job_id = 12;

  // The client's unique identifier of the job, re-used across retried attempts.
  // If this field is set, the service will ensure its uniqueness.
  // The request to create a job will fail if the service has knowledge of a
  // previously submitted job with the same client's ID and job name.
  // The caller may use this field to ensure idempotence of job
  // creation across retried attempts to create a job.
  // By default, the field is empty and, in that case, the service ignores it.
  string client_request_id = 14;

  // If another job is an update of this job (and thus, this job is in
  // `JOB_STATE_UPDATED`), this field contains the ID of that job.
  string replaced_by_job_id = 15;

  // A set of files the system should be aware of that are used
  // for temporary storage. These temporary files will be
  // removed on job completion.
  // No duplicates are allowed.
  // No file patterns are supported.
  //
  // The supported files are:
  //
  // Google Cloud Storage:
  //
  //    storage.googleapis.com/{bucket}/{object}
  //    bucket.storage.googleapis.com/{object}
  repeated string temp_files = 16;

  // User-defined labels for this job.
  //
  // The labels map can contain no more than 64 entries.  Entries of the labels
  // map are UTF8 strings that comply with the following restrictions:
  //
  // * Keys must conform to regexp:  [\p{Ll}\p{Lo}][\p{Ll}\p{Lo}\p{N}_-]{0,62}
  // * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
  // * Both keys and values are additionally constrained to be <= 128 bytes in
  // size.
  map<string, string> labels = 17;

  // The [regional endpoint]
  // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
  // contains this job.
  string location = 18;

  // This field may be mutated by the Cloud Dataflow service;
  // callers cannot mutate it.
  repeated ExecutionStageState stage_states = 20;

  // This field is populated by the Dataflow service to support filtering jobs
  // by the metadata values provided here. Populated for ListJobs and all GetJob
  // views SUMMARY and higher.
  JobMetadata job_metadata = 21;

  // The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
  // Flexible resource scheduling jobs are started with some delay after job
  // creation, so start_time is unset before start and is updated when the
  // job is started by the Cloud Dataflow service. For other jobs, start_time
  // always equals to create_time and is immutable and set by the Cloud Dataflow
  // service.
  google.protobuf.Timestamp start_time = 22;

  // If this is specified, the job's initial state is populated from the given
  // snapshot.
  string created_from_snapshot_id = 23;

  // Reserved for future use. This field is set only in responses from the
  // server; it is ignored if it is set in any requests.
  bool satisfies_pzs = 25;
}

// Metadata for a Datastore connector used by the job.
message DatastoreIODetails {
  // Namespace used in the connection.
  string namespace = 1;

  // ProjectId accessed in the connection.
  string project_id = 2;
}

// Metadata for a Pub/Sub connector used by the job.
message PubSubIODetails {
  // Topic accessed in the connection.
  string topic = 1;

  // Subscription used in the connection.
  string subscription = 2;
}

// Metadata for a File connector used by the job.
message FileIODetails {
  // File Pattern used to access files by the connector.
  string file_pattern = 1;
}

// Metadata for a Cloud Bigtable connector used by the job.
message BigTableIODetails {
  // ProjectId accessed in the connection.
  string project_id = 1;

  // InstanceId accessed in the connection.
  string instance_id = 2;

  // TableId accessed in the connection.
  string table_id = 3;
}

// Metadata for a BigQuery connector used by the job.
message BigQueryIODetails {
  // Table accessed in the connection.
  string table = 1;

  // Dataset accessed in the connection.
  string dataset = 2;

  // Project accessed in the connection.
  string project_id = 3;

  // Query used to access data in the connection.
  string query = 4;
}

// Metadata for a Spanner connector used by the job.
message SpannerIODetails {
  // ProjectId accessed in the connection.
  string project_id = 1;

  // InstanceId accessed in the connection.
  string instance_id = 2;

  // DatabaseId accessed in the connection.
  string database_id = 3;
}

// The version of the SDK used to run the job.
message SdkVersion {
  // The support status of the SDK used to run the job.
  enum SdkSupportStatus {
    // Cloud Dataflow is unaware of this version.
    UNKNOWN = 0;

    // This is a known version of an SDK, and is supported.
    SUPPORTED = 1;

    // A newer version of the SDK family exists, and an update is recommended.
    STALE = 2;

    // This version of the SDK is deprecated and will eventually be
    // unsupported.
    DEPRECATED = 3;

    // Support for this SDK version has ended and it should no longer be used.
    UNSUPPORTED = 4;
  }

  // The version of the SDK used to run the job.
  string version = 1;

  // A readable string describing the version of the SDK.
  string version_display_name = 2;

  // The support status for this SDK version.
  SdkSupportStatus sdk_support_status = 3;
}

// Metadata available primarily for filtering jobs. Will be included in the
// ListJob response and Job SUMMARY view.
message JobMetadata {
  // The SDK version used to run the job.
  SdkVersion sdk_version = 1;

  // Identification of a Spanner source used in the Dataflow job.
  repeated SpannerIODetails spanner_details = 2;

  // Identification of a BigQuery source used in the Dataflow job.
  repeated BigQueryIODetails bigquery_details = 3;

  // Identification of a Cloud Bigtable source used in the Dataflow job.
  repeated BigTableIODetails big_table_details = 4;

  // Identification of a Pub/Sub source used in the Dataflow job.
  repeated PubSubIODetails pubsub_details = 5;

  // Identification of a File source used in the Dataflow job.
  repeated FileIODetails file_details = 6;

  // Identification of a Datastore source used in the Dataflow job.
  repeated DatastoreIODetails datastore_details = 7;
}

// A message describing the state of a particular execution stage.
message ExecutionStageState {
  // The name of the execution stage.
  string execution_stage_name = 1;

  // Executions stage states allow the same set of values as JobState.
  JobState execution_stage_state = 2;

  // The time at which the stage transitioned to this state.
  google.protobuf.Timestamp current_state_time = 3;
}

// Additional information about how a Cloud Dataflow job will be executed that
// isn't contained in the submitted job.
message JobExecutionInfo {
  // A mapping from each stage to the information about that stage.
  map<string, JobExecutionStageInfo> stages = 1;
}

// Contains information about how a particular
// [google.dataflow.v1beta3.Step][google.dataflow.v1beta3.Step] will be
// executed.
message JobExecutionStageInfo {
  // The steps associated with the execution stage.
  // Note that stages may have several steps, and that a given step
  // might be run by more than one stage.
  repeated string step_name = 1;
}

// Describes the overall state of a
// [google.dataflow.v1beta3.Job][google.dataflow.v1beta3.Job].
enum JobState {
  // The job's run state isn't specified.
  JOB_STATE_UNKNOWN = 0;

  // `JOB_STATE_STOPPED` indicates that the job has not
  // yet started to run.
  JOB_STATE_STOPPED = 1;

  // `JOB_STATE_RUNNING` indicates that the job is currently running.
  JOB_STATE_RUNNING = 2;

  // `JOB_STATE_DONE` indicates that the job has successfully completed.
  // This is a terminal job state.  This state may be set by the Cloud Dataflow
  // service, as a transition from `JOB_STATE_RUNNING`. It may also be set via a
  // Cloud Dataflow `UpdateJob` call, if the job has not yet reached a terminal
  // state.
  JOB_STATE_DONE = 3;

  // `JOB_STATE_FAILED` indicates that the job has failed.  This is a
  // terminal job state.  This state may only be set by the Cloud Dataflow
  // service, and only as a transition from `JOB_STATE_RUNNING`.
  JOB_STATE_FAILED = 4;

  // `JOB_STATE_CANCELLED` indicates that the job has been explicitly
  // cancelled. This is a terminal job state. This state may only be
  // set via a Cloud Dataflow `UpdateJob` call, and only if the job has not
  // yet reached another terminal state.
  JOB_STATE_CANCELLED = 5;

  // `JOB_STATE_UPDATED` indicates that the job was successfully updated,
  // meaning that this job was stopped and another job was started, inheriting
  // state from this one. This is a terminal job state. This state may only be
  // set by the Cloud Dataflow service, and only as a transition from
  // `JOB_STATE_RUNNING`.
  JOB_STATE_UPDATED = 6;

  // `JOB_STATE_DRAINING` indicates that the job is in the process of draining.
  // A draining job has stopped pulling from its input sources and is processing
  // any data that remains in-flight. This state may be set via a Cloud Dataflow
  // `UpdateJob` call, but only as a transition from `JOB_STATE_RUNNING`. Jobs
  // that are draining may only transition to `JOB_STATE_DRAINED`,
  // `JOB_STATE_CANCELLED`, or `JOB_STATE_FAILED`.
  JOB_STATE_DRAINING = 7;

  // `JOB_STATE_DRAINED` indicates that the job has been drained.
  // A drained job terminated by stopping pulling from its input sources and
  // processing any data that remained in-flight when draining was requested.
  // This state is a terminal state, may only be set by the Cloud Dataflow
  // service, and only as a transition from `JOB_STATE_DRAINING`.
  JOB_STATE_DRAINED = 8;

  // `JOB_STATE_PENDING` indicates that the job has been created but is not yet
  // running.  Jobs that are pending may only transition to `JOB_STATE_RUNNING`,
  // or `JOB_STATE_FAILED`.
  JOB_STATE_PENDING = 9;

  // `JOB_STATE_CANCELLING` indicates that the job has been explicitly cancelled
  // and is in the process of stopping.  Jobs that are cancelling may only
  // transition to `JOB_STATE_CANCELLED` or `JOB_STATE_FAILED`.
  JOB_STATE_CANCELLING = 10;

  // `JOB_STATE_QUEUED` indicates that the job has been created but is being
  // delayed until launch. Jobs that are queued may only transition to
  // `JOB_STATE_PENDING` or `JOB_STATE_CANCELLED`.
  JOB_STATE_QUEUED = 11;

  // `JOB_STATE_RESOURCE_CLEANING_UP` indicates that the batch job's associated
  // resources are currently being cleaned up after a successful run.
  // Currently, this is an opt-in feature, please reach out to Cloud support
  // team if you are interested.
  JOB_STATE_RESOURCE_CLEANING_UP = 12;
}

// The data within all Job events.
message JobEventData {
  // The Job event payload.
  Job payload = 1;
}