Class BigQueryWriteConfiguration

java.lang.Object
org.apache.beam.sdk.io.gcp.bigquery.providers.BigQueryWriteConfiguration

@DefaultSchema(AutoValueSchema.class) public abstract class BigQueryWriteConfiguration extends Object
Configuration for writing to BigQuery with SchemaTransforms. Used by BigQueryStorageWriteApiSchemaTransformProvider and BigQueryFileLoadsSchemaTransformProvider.
  • Field Details

  • Constructor Details

    • BigQueryWriteConfiguration

      public BigQueryWriteConfiguration()
  • Method Details

    • validate

      public void validate()
    • builder

      public static BigQueryWriteConfiguration.Builder builder()
      Instantiates a BigQueryWriteConfiguration.Builder instance.
    • getTable

      @SchemaFieldDescription("The bigquery table to write to. Format: [${PROJECT}:]${DATASET}.${TABLE}") public abstract String getTable()
    • getCreateDisposition

      @SchemaFieldDescription("Optional field that specifies whether the job is allowed to create new tables. The following values are supported: CREATE_IF_NEEDED (the job may create the table), CREATE_NEVER (the job must fail if the table does not exist already).") public abstract @Nullable String getCreateDisposition()
    • getWriteDisposition

      @SchemaFieldDescription("Specifies the action that occurs if the destination table already exists. The following values are supported: WRITE_TRUNCATE (overwrites the table data), WRITE_APPEND (append the data to the table), WRITE_EMPTY (job must fail if the table is not empty).") public abstract @Nullable String getWriteDisposition()
    • getTriggeringFrequencySeconds

      @SchemaFieldDescription("Determines how often to \'commit\' progress into BigQuery. Default is every 5 seconds.") public abstract @Nullable Long getTriggeringFrequencySeconds()
    • getUseAtLeastOnceSemantics

      @SchemaFieldDescription("This option enables lower latency for insertions to BigQuery but may ocassionally duplicate data elements.") public abstract @Nullable Boolean getUseAtLeastOnceSemantics()
    • getAutoSharding

      @SchemaFieldDescription("This option enables using a dynamically determined number of Storage Write API streams to write to BigQuery. Only applicable to unbounded data.") public abstract @Nullable Boolean getAutoSharding()
    • getNumStreams

      @SchemaFieldDescription("Specifies the number of write streams that the Storage API sink will use. This parameter is only applicable when writing unbounded data.") public abstract @Nullable Integer getNumStreams()
    • getKmsKey

      @SchemaFieldDescription("Use this Cloud KMS key to encrypt your data") public abstract @Nullable String getKmsKey()
    • getErrorHandling

      @SchemaFieldDescription("This option specifies whether and where to output unwritable rows.") public abstract @Nullable BigQueryWriteConfiguration.ErrorHandling getErrorHandling()
    • getUseCdcWrites

      @SchemaFieldDescription("This option enables the use of BigQuery CDC functionality. The expected PCollection should contain Beam Rows with a schema wrapping the record to be inserted and adding the CDC info similar to: {row_mutation_info: {mutation_type:\"...\", change_sequence_number:\"...\"}, record: {...}}") public abstract @Nullable Boolean getUseCdcWrites()
    • getPrimaryKey

      @SchemaFieldDescription("If CREATE_IF_NEEDED disposition is set, BigQuery table(s) will be created with this columns as primary key. Required when CDC writes are enabled with CREATE_IF_NEEDED.") public abstract @Nullable List<String> getPrimaryKey()
    • getKeep

      @SchemaFieldDescription("A list of field names to keep in the input record. All other fields are dropped before writing. Is mutually exclusive with \'drop\' and \'only\'.") public abstract @Nullable List<String> getKeep()
    • getDrop

      @SchemaFieldDescription("A list of field names to drop from the input record before writing. Is mutually exclusive with \'keep\' and \'only\'.") public abstract @Nullable List<String> getDrop()
    • getOnly

      @SchemaFieldDescription("The name of a single record field that should be written. Is mutually exclusive with \'keep\' and \'drop\'.") public abstract @Nullable String getOnly()
    • getClusteringFields

      @SchemaFieldDescription("A list of columns to cluster the BigQuery table by.") public abstract @Nullable List<String> getClusteringFields()