Source code for apache_beam.transforms.xlang.io

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# NOTE: This file contains autogenerated external transform(s)
# and should not be edited by hand.
# Refer to gen_xlang_wrappers.py for more info.

"""Cross-language transforms in this module can be imported from the
:py:mod:`apache_beam.io` package."""

# pylint:disable=line-too-long

from apache_beam.transforms.external import BeamJarExpansionService
from apache_beam.transforms.external_transform_provider import ExternalTransform


[docs] class GenerateSequence(ExternalTransform): """ Outputs a PCollection of Beam Rows, each containing a single INT64 number called "value". The count is produced from the given "start" value and either up to the given "end" or until 2^63 - 1. To produce an unbounded PCollection, simply do not specify an "end" value. Unbounded sequences can specify a "rate" for output elements. In all cases, the sequence of numbers is generated in parallel, so there is no inherent ordering between the generated values """ identifier = "beam:schematransform:org.apache.beam:generate_sequence:v1" def __init__(self, start, end=None, rate=None, expansion_service=None): """ :param start: (int64) The minimum number to generate (inclusive). :param end: (int64) The maximum number to generate (exclusive). Will be an unbounded sequence if left unspecified. :param rate: (Row(elements=<class 'int64'>, seconds=typing.Optional[int64])) Specifies the rate to generate a given number of elements per a given number of seconds. Applicable only to unbounded sequences. """ self.default_expansion_service = BeamJarExpansionService( "sdks:java:io:expansion-service:shadowJar") super().__init__( start=start, end=end, rate=rate, expansion_service=expansion_service)
[docs] class TfrecordRead(ExternalTransform): identifier = "beam:schematransform:org.apache.beam:tfrecord_read:v1" def __init__( self, compression, file_pattern, validate, error_handling=None, expansion_service=None): """ :param compression: (str) Decompression type to use when reading input files. :param file_pattern: (str) Filename or file pattern used to find input files. :param validate: (boolean) Validate file pattern. :param error_handling: (Row(output=<class 'str'>)) This option specifies whether and where to output unwritable rows. """ self.default_expansion_service = BeamJarExpansionService( "sdks:java:io:expansion-service:shadowJar") super().__init__( compression=compression, file_pattern=file_pattern, validate=validate, error_handling=error_handling, expansion_service=expansion_service)
[docs] class TfrecordWrite(ExternalTransform): identifier = "beam:schematransform:org.apache.beam:tfrecord_write:v1" def __init__( self, compression, num_shards, output_prefix, error_handling=None, filename_suffix=None, no_spilling=None, shard_template=None, expansion_service=None): """ :param compression: (str) Option to indicate the output sink's compression type. Default is NONE. :param num_shards: (int32) The number of shards to use, or 0 for automatic. :param output_prefix: (str) The directory to which files will be written. :param error_handling: (Row(output=<class 'str'>)) This option specifies whether and where to output unwritable rows. :param filename_suffix: (str) The suffix of each file written, combined with prefix and shardTemplate. :param no_spilling: (boolean) Whether to skip the spilling of data caused by having maxNumWritersPerBundle. :param shard_template: (str) The shard template of each file written, combined with prefix and suffix. """ self.default_expansion_service = BeamJarExpansionService( "sdks:java:io:expansion-service:shadowJar") super().__init__( compression=compression, num_shards=num_shards, output_prefix=output_prefix, error_handling=error_handling, filename_suffix=filename_suffix, no_spilling=no_spilling, shard_template=shard_template, expansion_service=expansion_service)