Source code for apache_beam.transforms.xlang.io

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# NOTE: This file contains autogenerated external transform(s)
# and should not be edited by hand.
# Refer to gen_xlang_wrappers.py for more info.

"""Cross-language transforms in this module can be imported from the
:py:mod:`apache_beam.io` package."""

# pylint:disable=line-too-long

from apache_beam.transforms.external import BeamJarExpansionService
from apache_beam.transforms.external_transform_provider import ExternalTransform



[docs]
class GenerateSequence(ExternalTransform):
  """
  Outputs a PCollection of Beam Rows, each containing a single INT64 number
  called "value". The count is produced from the given "start" value and either
  up to the given "end" or until 2^63 - 1.
  To produce an unbounded PCollection, simply do not specify an "end" value.
  Unbounded sequences can specify a "rate" for output elements.
  In all cases, the sequence of numbers is generated in parallel, so there is no
  inherent ordering between the generated values
  """
  identifier = "beam:schematransform:org.apache.beam:generate_sequence:v1"

  def __init__(self, start, end=None, rate=None, expansion_service=None):
    """
    :param start: (int64)
      The minimum number to generate (inclusive). 
    :param end: (int64)
      The maximum number to generate (exclusive). Will be an unbounded
      sequence if left unspecified. 
    :param rate: (Row(elements=<class 'int64'>, seconds=typing.Optional[int64]))
      Specifies the rate to generate a given number of elements per a given
      number of seconds. Applicable only to unbounded sequences. 
    """
    self.default_expansion_service = BeamJarExpansionService(
        "sdks:java:io:expansion-service:shadowJar")
    super().__init__(
        start=start, end=end, rate=rate, expansion_service=expansion_service)




[docs]
class TfrecordRead(ExternalTransform):

  identifier = "beam:schematransform:org.apache.beam:tfrecord_read:v1"

  def __init__(
      self,
      compression,
      file_pattern,
      validate,
      error_handling=None,
      expansion_service=None):
    """
    :param compression: (str)
      Decompression type to use when reading input files. 
    :param file_pattern: (str)
      Filename or file pattern used to find input files. 
    :param validate: (boolean)
      Validate file pattern. 
    :param error_handling: (Row(output=<class 'str'>))
      This option specifies whether and where to output unwritable rows. 
    """
    self.default_expansion_service = BeamJarExpansionService(
        "sdks:java:io:expansion-service:shadowJar")
    super().__init__(
        compression=compression,
        file_pattern=file_pattern,
        validate=validate,
        error_handling=error_handling,
        expansion_service=expansion_service)




[docs]
class TfrecordWrite(ExternalTransform):

  identifier = "beam:schematransform:org.apache.beam:tfrecord_write:v1"

  def __init__(
      self,
      compression,
      num_shards,
      output_prefix,
      error_handling=None,
      filename_suffix=None,
      max_num_writers_per_bundle=None,
      no_spilling=None,
      shard_template=None,
      expansion_service=None):
    """
    :param compression: (str)
      Option to indicate the output sink's compression type. Default is NONE. 
    :param num_shards: (int32)
      The number of shards to use, or 0 for automatic. 
    :param output_prefix: (str)
      The directory to which files will be written. 
    :param error_handling: (Row(output=<class 'str'>))
      This option specifies whether and where to output unwritable rows. 
    :param filename_suffix: (str)
      The suffix of each file written, combined with prefix and shardTemplate. 
    :param max_num_writers_per_bundle: (int32)
      Maximum number of writers created in a bundle before spilling to
      shuffle. 
    :param no_spilling: (boolean)
      Whether to skip the spilling of data caused by having
      maxNumWritersPerBundle. 
    :param shard_template: (str)
      The shard template of each file written, combined with prefix and
      suffix. 
    """
    self.default_expansion_service = BeamJarExpansionService(
        "sdks:java:io:expansion-service:shadowJar")
    super().__init__(
        compression=compression,
        num_shards=num_shards,
        output_prefix=output_prefix,
        error_handling=error_handling,
        filename_suffix=filename_suffix,
        max_num_writers_per_bundle=max_num_writers_per_bundle,
        no_spilling=no_spilling,
        shard_template=shard_template,
        expansion_service=expansion_service)