Class DoFnOperator<PreInputT,InputT,OutputT>

java.lang.Object
org.apache.flink.streaming.api.operators.AbstractStreamOperator<WindowedValue<OutputT>>
org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator<PreInputT,InputT,OutputT>
Type Parameters:
InputT - the input type of the DoFn
OutputT - the output type of the DoFn
All Implemented Interfaces:
Serializable, org.apache.flink.api.common.state.CheckpointListener, org.apache.flink.streaming.api.operators.Input<WindowedValue<PreInputT>>, org.apache.flink.streaming.api.operators.KeyContext, org.apache.flink.streaming.api.operators.KeyContextHandler, org.apache.flink.streaming.api.operators.OneInputStreamOperator<WindowedValue<PreInputT>,WindowedValue<OutputT>>, org.apache.flink.streaming.api.operators.SetupableStreamOperator<WindowedValue<OutputT>>, org.apache.flink.streaming.api.operators.StreamOperator<WindowedValue<OutputT>>, org.apache.flink.streaming.api.operators.StreamOperatorStateHandler.CheckpointedStreamOperator, org.apache.flink.streaming.api.operators.Triggerable<FlinkKey,org.apache.beam.runners.core.TimerInternals.TimerData>, org.apache.flink.streaming.api.operators.TwoInputStreamOperator<WindowedValue<PreInputT>,RawUnionValue,WindowedValue<OutputT>>
Direct Known Subclasses:
ExecutableStageDoFnOperator, PartialReduceBundleOperator, SplittableDoFnOperator, WindowDoFnOperator

public class DoFnOperator<PreInputT,InputT,OutputT> extends org.apache.flink.streaming.api.operators.AbstractStreamOperator<WindowedValue<OutputT>> implements org.apache.flink.streaming.api.operators.OneInputStreamOperator<WindowedValue<PreInputT>,WindowedValue<OutputT>>, org.apache.flink.streaming.api.operators.TwoInputStreamOperator<WindowedValue<PreInputT>,RawUnionValue,WindowedValue<OutputT>>, org.apache.flink.streaming.api.operators.Triggerable<FlinkKey,org.apache.beam.runners.core.TimerInternals.TimerData>
Flink operator for executing DoFns.
See Also:
  • Field Details

    • doFn

      protected DoFn<InputT,OutputT> doFn
    • serializedOptions

      protected final org.apache.beam.runners.core.construction.SerializablePipelineOptions serializedOptions
    • mainOutputTag

      protected final TupleTag<OutputT> mainOutputTag
    • additionalOutputTags

      protected final List<TupleTag<?>> additionalOutputTags
    • sideInputs

      protected final Collection<PCollectionView<?>> sideInputs
    • sideInputTagMapping

      protected final Map<Integer,PCollectionView<?>> sideInputTagMapping
    • windowingStrategy

      protected final WindowingStrategy<?,?> windowingStrategy
    • outputManagerFactory

      protected final org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator.OutputManagerFactory<OutputT> outputManagerFactory
    • doFnRunner

      protected transient org.apache.beam.runners.core.DoFnRunner<InputT,OutputT> doFnRunner
    • pushbackDoFnRunner

      protected transient org.apache.beam.runners.core.PushbackSideInputDoFnRunner<InputT,OutputT> pushbackDoFnRunner
    • bufferingDoFnRunner

      protected transient BufferingDoFnRunner<InputT,OutputT> bufferingDoFnRunner
    • sideInputHandler

      protected transient org.apache.beam.runners.core.SideInputHandler sideInputHandler
    • sideInputReader

      protected transient org.apache.beam.runners.core.SideInputReader sideInputReader
    • outputManager

      protected transient DoFnOperator.BufferedOutputManager<OutputT> outputManager
    • keyedStateInternals

      protected transient FlinkStateInternals<?> keyedStateInternals
    • timerInternals

      protected transient DoFnOperator<PreInputT,InputT,OutputT>.org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator.FlinkTimerInternals timerInternals
    • stepName

      protected final String stepName
    • timerService

      protected transient org.apache.flink.streaming.api.operators.InternalTimerService<org.apache.beam.runners.core.TimerInternals.TimerData> timerService
  • Constructor Details

  • Method Details

    • getDoFn

      protected DoFn<InputT,OutputT> getDoFn()
    • preProcess

      protected Iterable<WindowedValue<InputT>> preProcess(WindowedValue<PreInputT> input)
    • createWrappingDoFnRunner

      protected org.apache.beam.runners.core.DoFnRunner<InputT,OutputT> createWrappingDoFnRunner(org.apache.beam.runners.core.DoFnRunner<InputT,OutputT> wrappedRunner, org.apache.beam.runners.core.StepContext stepContext)
    • setup

      public void setup(org.apache.flink.streaming.runtime.tasks.StreamTask<?,?> containingTask, org.apache.flink.streaming.api.graph.StreamConfig config, org.apache.flink.streaming.api.operators.Output<org.apache.flink.streaming.runtime.streamrecord.StreamRecord<WindowedValue<OutputT>>> output)
      Specified by:
      setup in interface org.apache.flink.streaming.api.operators.SetupableStreamOperator<PreInputT>
      Overrides:
      setup in class org.apache.flink.streaming.api.operators.AbstractStreamOperator<WindowedValue<OutputT>>
    • shoudBundleElements

      protected boolean shoudBundleElements()
    • initializeState

      public void initializeState(org.apache.flink.runtime.state.StateInitializationContext context) throws Exception
      Specified by:
      initializeState in interface org.apache.flink.streaming.api.operators.StreamOperatorStateHandler.CheckpointedStreamOperator
      Overrides:
      initializeState in class org.apache.flink.streaming.api.operators.AbstractStreamOperator<WindowedValue<OutputT>>
      Throws:
      Exception
    • getLockToAcquireForStateAccessDuringBundles

      protected Lock getLockToAcquireForStateAccessDuringBundles()
      Subclasses may provide a lock to ensure that the state backend is not accessed concurrently during bundle execution.
    • open

      public void open() throws Exception
      Specified by:
      open in interface org.apache.flink.streaming.api.operators.StreamOperator<PreInputT>
      Overrides:
      open in class org.apache.flink.streaming.api.operators.AbstractStreamOperator<WindowedValue<OutputT>>
      Throws:
      Exception
    • finish

      public void finish() throws Exception
      Specified by:
      finish in interface org.apache.flink.streaming.api.operators.StreamOperator<PreInputT>
      Overrides:
      finish in class org.apache.flink.streaming.api.operators.AbstractStreamOperator<WindowedValue<OutputT>>
      Throws:
      Exception
    • close

      public void close() throws Exception
      Specified by:
      close in interface org.apache.flink.streaming.api.operators.StreamOperator<PreInputT>
      Overrides:
      close in class org.apache.flink.streaming.api.operators.AbstractStreamOperator<WindowedValue<OutputT>>
      Throws:
      Exception
    • numProcessingTimeTimers

      protected int numProcessingTimeTimers()
    • getEffectiveInputWatermark

      public long getEffectiveInputWatermark()
    • getCurrentOutputWatermark

      public long getCurrentOutputWatermark()
    • setPreBundleCallback

      protected final void setPreBundleCallback(Runnable callback)
    • setBundleFinishedCallback

      protected final void setBundleFinishedCallback(Runnable callback)
    • processElement

      public final void processElement(org.apache.flink.streaming.runtime.streamrecord.StreamRecord<WindowedValue<PreInputT>> streamRecord)
      Specified by:
      processElement in interface org.apache.flink.streaming.api.operators.Input<PreInputT>
    • processElement1

      public final void processElement1(org.apache.flink.streaming.runtime.streamrecord.StreamRecord<WindowedValue<PreInputT>> streamRecord) throws Exception
      Specified by:
      processElement1 in interface org.apache.flink.streaming.api.operators.TwoInputStreamOperator<PreInputT,InputT,OutputT>
      Throws:
      Exception
    • addSideInputValue

      protected void addSideInputValue(org.apache.flink.streaming.runtime.streamrecord.StreamRecord<RawUnionValue> streamRecord)
      Add the side input value. Here we are assuming that views have already been materialized and are sent over the wire as Iterable. Subclasses may elect to perform materialization in state and receive side input incrementally instead.
      Parameters:
      streamRecord -
    • processElement2

      public final void processElement2(org.apache.flink.streaming.runtime.streamrecord.StreamRecord<RawUnionValue> streamRecord) throws Exception
      Specified by:
      processElement2 in interface org.apache.flink.streaming.api.operators.TwoInputStreamOperator<PreInputT,InputT,OutputT>
      Throws:
      Exception
    • processWatermark

      public final void processWatermark(org.apache.flink.streaming.api.watermark.Watermark mark) throws Exception
      Specified by:
      processWatermark in interface org.apache.flink.streaming.api.operators.Input<PreInputT>
      Overrides:
      processWatermark in class org.apache.flink.streaming.api.operators.AbstractStreamOperator<WindowedValue<OutputT>>
      Throws:
      Exception
    • processWatermark1

      public final void processWatermark1(org.apache.flink.streaming.api.watermark.Watermark mark) throws Exception
      Specified by:
      processWatermark1 in interface org.apache.flink.streaming.api.operators.TwoInputStreamOperator<PreInputT,InputT,OutputT>
      Overrides:
      processWatermark1 in class org.apache.flink.streaming.api.operators.AbstractStreamOperator<WindowedValue<OutputT>>
      Throws:
      Exception
    • applyInputWatermarkHold

      public long applyInputWatermarkHold(long inputWatermark)
      Allows to apply a hold to the input watermark. By default, just passes the input watermark through.
    • applyOutputWatermarkHold

      public long applyOutputWatermarkHold(long currentOutputWatermark, long potentialOutputWatermark)
      Allows to apply a hold to the output watermark before it is sent out. Used to apply hold on output watermark for delayed (asynchronous or buffered) processing.
      Parameters:
      currentOutputWatermark - the current output watermark
      potentialOutputWatermark - The potential new output watermark which can be adjusted, if needed. The input watermark hold has already been applied.
      Returns:
      The new output watermark which will be emitted.
    • processWatermark2

      public final void processWatermark2(org.apache.flink.streaming.api.watermark.Watermark mark) throws Exception
      Specified by:
      processWatermark2 in interface org.apache.flink.streaming.api.operators.TwoInputStreamOperator<PreInputT,InputT,OutputT>
      Overrides:
      processWatermark2 in class org.apache.flink.streaming.api.operators.AbstractStreamOperator<WindowedValue<OutputT>>
      Throws:
      Exception
    • scheduleForCurrentProcessingTime

      protected void scheduleForCurrentProcessingTime(org.apache.flink.api.common.operators.ProcessingTimeService.ProcessingTimeCallback callback)
    • invokeFinishBundle

      protected final void invokeFinishBundle()
    • prepareSnapshotPreBarrier

      public void prepareSnapshotPreBarrier(long checkpointId)
      Specified by:
      prepareSnapshotPreBarrier in interface org.apache.flink.streaming.api.operators.StreamOperator<PreInputT>
      Overrides:
      prepareSnapshotPreBarrier in class org.apache.flink.streaming.api.operators.AbstractStreamOperator<WindowedValue<OutputT>>
    • snapshotState

      public void snapshotState(org.apache.flink.runtime.state.StateSnapshotContext context) throws Exception
      Specified by:
      snapshotState in interface org.apache.flink.streaming.api.operators.StreamOperatorStateHandler.CheckpointedStreamOperator
      Overrides:
      snapshotState in class org.apache.flink.streaming.api.operators.AbstractStreamOperator<WindowedValue<OutputT>>
      Throws:
      Exception
    • getBundleFinalizer

      public DoFn.BundleFinalizer getBundleFinalizer()
    • notifyCheckpointComplete

      public void notifyCheckpointComplete(long checkpointId) throws Exception
      Specified by:
      notifyCheckpointComplete in interface org.apache.flink.api.common.state.CheckpointListener
      Overrides:
      notifyCheckpointComplete in class org.apache.flink.streaming.api.operators.AbstractStreamOperator<WindowedValue<OutputT>>
      Throws:
      Exception
    • onEventTime

      public void onEventTime(org.apache.flink.streaming.api.operators.InternalTimer<FlinkKey,org.apache.beam.runners.core.TimerInternals.TimerData> timer)
      Specified by:
      onEventTime in interface org.apache.flink.streaming.api.operators.Triggerable<PreInputT,InputT>
    • onProcessingTime

      public void onProcessingTime(org.apache.flink.streaming.api.operators.InternalTimer<FlinkKey,org.apache.beam.runners.core.TimerInternals.TimerData> timer)
      Specified by:
      onProcessingTime in interface org.apache.flink.streaming.api.operators.Triggerable<PreInputT,InputT>
    • fireTimerInternal

      protected void fireTimerInternal(FlinkKey key, org.apache.beam.runners.core.TimerInternals.TimerData timerData)
    • fireTimer

      protected void fireTimer(org.apache.beam.runners.core.TimerInternals.TimerData timerData)