Package org.apache.beam.sdk.io.iceberg
Class IcebergWriteSchemaTransformProvider.Configuration
java.lang.Object
org.apache.beam.sdk.io.iceberg.IcebergWriteSchemaTransformProvider.Configuration
- Enclosing class:
IcebergWriteSchemaTransformProvider
@DefaultSchema(AutoValueSchema.class)
public abstract static class IcebergWriteSchemaTransformProvider.Configuration
extends Object
-
Constructor Details
-
Configuration
public Configuration()
-
-
Method Details
-
builder
-
getTable
@SchemaFieldDescription("A fully-qualified table identifier. You may also provide a template to write to multiple dynamic destinations, for example: `dataset.my_{col1}_{col2.nested}_table`.") public abstract String getTable() -
getCatalogName
@SchemaFieldDescription("Name of the catalog containing the table.") public abstract @Nullable String getCatalogName() -
getCatalogProperties
@SchemaFieldDescription("Properties used to set up the Iceberg catalog.") public abstract @Nullable Map<String,String> getCatalogProperties() -
getConfigProperties
@SchemaFieldDescription("Properties passed to the Hadoop Configuration.") public abstract @Nullable Map<String,String> getConfigProperties() -
getTriggeringFrequencySeconds
@SchemaFieldDescription("For a streaming pipeline, sets the frequency at which snapshots are produced.") public abstract @Nullable Integer getTriggeringFrequencySeconds() -
getDirectWriteByteLimit
@SchemaFieldDescription("For a streaming pipeline, sets the limit for lifting bundles into the direct write path.") public abstract @Nullable Integer getDirectWriteByteLimit() -
getKeep
@SchemaFieldDescription("A list of field names to keep in the input record. All other fields are dropped before writing. Is mutually exclusive with \'drop\' and \'only\'.") public abstract @Nullable List<String> getKeep() -
getDrop
@SchemaFieldDescription("A list of field names to drop from the input record before writing. Is mutually exclusive with \'keep\' and \'only\'.") public abstract @Nullable List<String> getDrop() -
getOnly
@SchemaFieldDescription("The name of a single record field that should be written. Is mutually exclusive with \'keep\' and \'drop\'.") public abstract @Nullable String getOnly() -
getPartitionFields
@SchemaFieldDescription("Fields used to create a partition spec that is applied when tables are created. For a field \'foo\', the available partition transforms are:\n\n- `foo`\n- `truncate(foo, N)`\n- `bucket(foo, N)`\n- `hour(foo)`\n- `day(foo)`\n- `month(foo)`\n- `year(foo)`\n- `void(foo)`\n\nFor more information on partition transforms, please visit https://iceberg.apache.org/spec/#partition-transforms.") public abstract @Nullable List<String> getPartitionFields() -
getTableProperties
@SchemaFieldDescription("Iceberg table properties to be set on the table when it is created.\nFor more information on table properties, please visit https://iceberg.apache.org/docs/latest/configuration/#table-properties.") public abstract @Nullable Map<String,String> getTableProperties() -
getSortFields
@SchemaFieldDescription("Fields used to set the table\'s sort order, applied when the table is created. Each entry has the form `<term> [asc|desc] [nulls first|nulls last]`, where `<term>` is a field name or one of the partition transforms (e.g. `bucket(col, 4)`, `day(ts)`). Direction defaults to ascending; null order defaults to nulls-first for ascending and nulls-last for descending. Note: this sets the table\'s declared sort order as metadata; it does not cause Beam to physically sort records before writing.\nFor more information on sort orders, please visit https://iceberg.apache.org/spec/#sort-orders.") public abstract @Nullable List<String> getSortFields() -
getDistributionMode
@SchemaFieldDescription("Defines distribution of write data. Supported distributions:\n- none: don\'t shuffle rows (default)\n- hash: shuffle rows by partition key before writing data") public abstract @Nullable String getDistributionMode() -
getAutosharding
@SchemaFieldDescription("Enables dynamic sharding to automatically adjust the number of parallel writers based on data volume. It handles data skew by further sub-dividing partitions into multiple shards to prevent bottlenecks during high-throughput writes. Only available with \'hash\' distribution mode.") public abstract @Nullable Boolean getAutosharding() -
getIcebergCatalog
-