#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""A profiler context manager based on cProfile.Profile objects.
For internal use only; no backwards-compatibility guarantees.
"""
# pytype: skip-file
from __future__ import absolute_import
import cProfile # pylint: disable=bad-python3-import
import io
import logging
import os
import pstats
import random
import tempfile
import time
import warnings
from builtins import object
from threading import Timer
from typing import Callable
from typing import Optional
from apache_beam.io import filesystems
_LOGGER = logging.getLogger(__name__)
[docs]class Profile(object):
"""cProfile wrapper context for saving and logging profiler results."""
SORTBY = 'cumulative'
def __init__(
self,
profile_id,
profile_location=None,
log_results=False,
file_copy_fn=None,
time_prefix='%Y-%m-%d_%H_%M_%S-'):
self.stats = None
self.profile_id = str(profile_id)
self.profile_location = profile_location
self.log_results = log_results
self.file_copy_fn = file_copy_fn or self.default_file_copy_fn
self.time_prefix = time_prefix
self.profile_output = None
def __enter__(self):
_LOGGER.info('Start profiling: %s', self.profile_id)
self.profile = cProfile.Profile()
self.profile.enable()
return self
def __exit__(self, *args):
self.profile.disable()
_LOGGER.info('Stop profiling: %s', self.profile_id)
if self.profile_location:
dump_location = os.path.join(
self.profile_location,
time.strftime(self.time_prefix + self.profile_id))
fd, filename = tempfile.mkstemp()
try:
os.close(fd)
self.profile.dump_stats(filename)
_LOGGER.info('Copying profiler data to: [%s]', dump_location)
self.file_copy_fn(filename, dump_location)
finally:
os.remove(filename)
self.profile_output = dump_location
if self.log_results:
try:
import StringIO # Python 2
s = StringIO.StringIO()
except ImportError:
s = io.StringIO()
self.stats = pstats.Stats(
self.profile, stream=s).sort_stats(Profile.SORTBY)
self.stats.print_stats()
_LOGGER.info('Profiler data: [%s]', s.getvalue())
[docs] @staticmethod
def default_file_copy_fn(src, dest):
dest_handle = filesystems.FileSystems.create(dest + '.tmp')
try:
with open(src, 'rb') as src_handle:
dest_handle.write(src_handle.read())
finally:
dest_handle.close()
filesystems.FileSystems.rename([dest + '.tmp'], [dest])
[docs] @staticmethod
def factory_from_options(options):
# type: (...) -> Optional[Callable[..., Profile]]
if options.profile_cpu:
def create_profiler(profile_id, **kwargs):
if random.random() < options.profile_sample_rate:
return Profile(profile_id, options.profile_location, **kwargs)
return create_profiler
return None
[docs]class MemoryReporter(object):
"""A memory reporter that reports the memory usage and heap profile.
Usage:::
mr = MemoryReporter(interval_second=30.0)
mr.start()
while ...
<do something>
# this will report continuously with 30 seconds between reports.
mr.stop()
NOTE: A reporter with start() should always stop(), or the parent process can
never finish.
Or simply the following which does star() and stop():
with MemoryReporter(interval_second=100):
while ...
<do some thing>
Also it could report on demand without continuous reporting.::
mr = MemoryReporter() # default interval 60s but not started.
<do something>
mr.report_once()
"""
def __init__(self, interval_second=60.0):
# guppy might not be installed.
# Python 2.7: https://pypi.org/project/guppy/0.1.10
# Python 3.x: https://pypi.org/project/guppy3/3.0.9
# The reporter can be set up only when guppy is installed (and guppy cannot
# be added to the required packages in setup.py, since it's not available
# in all platforms).
try:
from guppy import hpy # pylint: disable=import-error
self._hpy = hpy
self._interval_second = interval_second
self._timer = None
except ImportError:
warnings.warn('guppy is not installed; MemoryReporter not available.')
self._hpy = None
self._enabled = False
def __enter__(self):
self.start()
return self
def __exit__(self, *args):
self.stop()
[docs] def start(self):
if self._enabled or not self._hpy:
return
self._enabled = True
def report_with_interval():
if not self._enabled:
return
self.report_once()
self._timer = Timer(self._interval_second, report_with_interval)
self._timer.start()
self._timer = Timer(self._interval_second, report_with_interval)
self._timer.start()
[docs] def stop(self):
if not self._enabled:
return
self._timer.cancel()
self._enabled = False
[docs] def report_once(self):
if not self._hpy:
return
report_start_time = time.time()
heap_profile = self._hpy().heap()
_LOGGER.info(
'*** MemoryReport Heap:\n %s\n MemoryReport took %.1f seconds',
heap_profile,
time.time() - report_start_time)