Source code for apache_beam.transforms.fully_qualified_named_transform

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# pytype: skip-file

import contextlib
import fnmatch
import importlib

from apache_beam import coders
from apache_beam.portability.api import external_transforms_pb2
from apache_beam.pvalue import Row
from apache_beam.transforms import ptransform
from apache_beam.typehints.native_type_compatibility import convert_to_typing_type
from apache_beam.typehints.schemas import named_fields_to_schema
from apache_beam.typehints.trivial_inference import instance_to_type

PYTHON_FULLY_QUALIFIED_NAMED_TRANSFORM_URN = (
    'beam:transforms:python:fully_qualified_named')


[docs]@ptransform.PTransform.register_urn( PYTHON_FULLY_QUALIFIED_NAMED_TRANSFORM_URN, external_transforms_pb2.ExternalConfigurationPayload) class FullyQualifiedNamedTransform(ptransform.PTransform): _FILTER_GLOB = None
[docs] @classmethod @contextlib.contextmanager def with_filter(cls, filter): old_filter, cls._FILTER_GLOB = cls._FILTER_GLOB, filter yield cls._FILTER_GLOB = old_filter
def __init__(self, constructor, args, kwargs): self._constructor = constructor self._args = args self._kwargs = kwargs
[docs] def expand(self, pinput): if self._constructor in ('__callable__', '__constructor__'): self._check_allowed(self._constructor) if self._args: source, *args = tuple(self._args) kwargs = self._kwargs else: args = self._args kwargs = dict(self._kwargs) source = kwargs.pop('source') if self._constructor == '__constructor__': transform = source(*args, **kwargs) else: transform = ptransform._PTransformFnPTransform(source, *args, **kwargs) else: transform = self._resolve(self._constructor)(*self._args, **self._kwargs) return pinput | transform
@classmethod def _check_allowed(cls, fully_qualified_name): if not cls._FILTER_GLOB or not fnmatch.fnmatchcase(fully_qualified_name, cls._FILTER_GLOB): raise ValueError( f'Fully qualifed name "{fully_qualified_name}" ' f'not allowed by filter {cls._FILTER_GLOB}.') @classmethod def _resolve(cls, fully_qualified_name): cls._check_allowed(fully_qualified_name) o = None path = '' for segment in fully_qualified_name.split('.'): path = '.'.join([path, segment]) if path else segment if o is not None and hasattr(o, segment): o = getattr(o, segment) else: o = importlib.import_module(path) return o
[docs] def to_runner_api_parameter(self, unused_context): _args_schema = named_fields_to_schema([ (f'arg{ix}', convert_to_typing_type(instance_to_type(value))) for (ix, value) in enumerate(self._args) ]) _kwargs_schema = named_fields_to_schema([ (key, convert_to_typing_type(instance_to_type(value))) for (key, value) in self._kwargs.items() ]) payload_schema = named_fields_to_schema({ 'constructor': str, 'args': _args_schema, 'kwargs': _kwargs_schema, }) return ( PYTHON_FULLY_QUALIFIED_NAMED_TRANSFORM_URN, external_transforms_pb2.ExternalConfigurationPayload( schema=payload_schema, payload=coders.RowCoder(payload_schema).encode( Row( constructor=self._constructor, args=Row( **{ f'arg{ix}': arg for (ix, arg) in enumerate(self._args) }), kwargs=Row(**self._kwargs)), )))
[docs] @staticmethod def from_runner_api_parameter(unused_ptransform, payload, unused_context): row = coders.RowCoder(payload.schema).decode(payload.payload) maybe_as_dict = lambda x: x._asdict() if x else {} return FullyQualifiedNamedTransform( row.constructor, tuple(getattr(row, 'args', ())), maybe_as_dict(getattr(row, 'kwargs', None)))