import os
import sys
import time
import datetime
import math
_ERASE_LINE = '\x1b[2K'
[docs]class ProgressBar:
"""
The progress bar can be used to print progress for a specific task where either a specified
number of work items or an iterable has to be processed. The progress bar also measures the
execution time and provides an estimated remaining time for the operation. A common use case
for the progress bar are for-loops where one work item is completed per iteration.
Examples
--------
.. code-block:: python
:caption: Iterate over a range of integers
for i in ProgressBar(4): # equivalently initialized as ProgressBar(range(4))
time.sleep(2.5)
.. code-block:: python
:caption: Iterate over an iterable of known length
l = [1, 5, 7]
for i in ProgressBar(l):
time.sleep(2.5)
.. code-block:: python
:caption: Iterate over an iterable of unknown size
it = (x + 1 for x in range(3))
for i in ProgressBar(it):
time.sleep(1.5)
.. code-block:: python
:caption: Visualize some complex manual progress
with ProgressBar() as p:
time.sleep(3)
p.step()
for _ in range(10):
time.sleep(0.1)
p.step()
for _ in range(5):
time.sleep(0.4)
p.step()
"""
########################################################################################
### INITIALIZATION
########################################################################################
[docs] def __init__(self, iterable=None, denom=None, file=None, verbose=True):
"""
Initializes a new progress bar with the given number of work items.
Parameters
----------
iterable: int or iterable, default: None
Either the number of work items to be processed or an iterable whose values are returned
when iterating over this progress bar. If no value is given, this iterable can not be
used within for-loops.
denom: int, default: None
If the first parameter is an integer, this value may also be given. In that case, the
first parameter acts as the numerator and the second parameter as the denominator. The
rounded up division of these two values is used as the number of work items.
file: str, default: None
If given, defines the file where the progress bar should write to instead of the
command line. Intermediate directories are created automatically.
verbose: bool, default: True
Whether to actually log anything. This is useful in cases where logging should be
turned of dynamically without introducing additional control flow.
"""
if not (denom is None or (isinstance(denom, int) and isinstance(iterable, int))):
raise ValueError("If second parameter is given, first parameter must be integer.")
self.verbose = verbose
if file is not None:
os.makedirs(file, exist_ok=True)
self.stream = open(file, 'a+')
else:
self.stream = sys.stdout
if iterable is None:
self.iterable = None
elif hasattr(iterable, '__iter__'):
self.iterable = iterable
elif isinstance(iterable, int) and denom is not None:
self.iterable = range(math.ceil(iterable / denom))
elif isinstance(iterable, int):
self.iterable = range(iterable)
else:
raise ValueError(
f"First parameter must be iterable or integer but found {type(iterable)}."
)
self.haslength = hasattr(self.iterable, '__len__')
if self.haslength:
self.iteration_max = len(self.iterable)
self._iteration_count = 0
self._start_time = None
self._latest_print_length = None
########################################################################################
### INSTANCE METHODS
########################################################################################
[docs] def start(self):
"""
Starts to record the progress of the operation. Time measuring is initiated and the
beginning of the operation is indicated on the command line.
Note
----
This method should usually not be called explicitly. It is implicitly called at the
beginning of a :code:`with` statement.
"""
self._iteration_count = 0
self._start_time = time.time()
self._latest_print_length = 0
self._print_progress(compute_eta=False)
[docs] def step(self):
"""
Tells the progress bar that one work item has been processed. The command line output is
updated as well as the estimated finishing time of the operation.
If used from within a with statement, this method must be called explicitly, otherwise, it
should not be called.
"""
self._iteration_count += 1
self._print_progress()
[docs] def finish(self, kv=None):
"""
Stops the progress bar and prints the total duration of the operation. If metrics are given,
these will be printed along with the elapsed time and the number of iterations per second.
Metrics may be provided in the form :code:`<name>__<format specifier>` to e.g. format
floating point numbers with a fixed number of decimal points.
If key value pairs are given, this method must be called explicitly, otherwise, it is
implicitly called at the end of a with statement or for loop.
Note
----
If this method is called mutliple times with not calls to :meth:`start` in between, all but
the first call are no-ops.
Parameters
----------
metrics: dict, default: None
The metrics to print as key-value pairs. Usually, they provide more information about
the operation whose progress has been tracked.
"""
if self._start_time is None:
return
if kv is None:
kv = {}
self._print_done(kv)
self._start_time = None
self._latest_print_length = None
########################################################################################
### SPECIAL METHODS
########################################################################################
def __enter__(self):
self.start()
return self
def __exit__(self, exc_type, exc_value, traceback):
self.finish()
return False
def __iter__(self):
if self.iterable is None:
raise ValueError("ProgressBar must be given an iterable if used within a for-loop.")
self.start()
it = iter(self.iterable)
try:
while True:
yield next(it)
self.step()
except StopIteration:
self.finish()
########################################################################################
### PRIVATE METHODS
########################################################################################
def _print_progress(self, compute_eta=True):
if not self.verbose:
return
elapsed = time.time() - self._start_time
elapsed_time = datetime.timedelta(0, int(elapsed))
it_per_sec = self._iteration_count / elapsed
if self.haslength:
perc = self._iteration_count / self.iteration_max
p = int(round(perc * 30))
pbar = "" if p == 0 else "=" * (p - 1) + ">" if p < 30 else "=" * 30
whitespace = " " * (30 - p)
progress = f"[{pbar}{whitespace}] ({perc:02.1%})"
if compute_eta:
eta = datetime.timedelta(0, int((1 - perc) / perc * elapsed))
else:
eta = "n/a"
else:
progress = f"[{self._iteration_count:,} iterations]"
eta = "n/a"
text = " {} ETA {} [Elapsed {} | {:,.2f} it/s]".format(
progress, eta, elapsed_time, it_per_sec
)
print(f"{_ERASE_LINE}{self._pad_whitespace(text)}", end='\r', file=self.stream)
self.stream.flush()
def _print_done(self, metrics):
if not self.verbose:
return
elapsed = time.time() - self._start_time
elapsed_time = datetime.timedelta(0, int(elapsed))
it_per_sec = self._iteration_count / elapsed
m_strings = []
for k, v in sorted(metrics.items(), key=lambda k: k[0]):
split = k.split('__')
if len(split) > 1:
f = split[1]
else:
f = '{:.5f}'
string = f'{split[0]}: {f}'.format(v)
m_strings += [string]
text = " [Elapsed {} | {:,.2f} it/s] {}".format(
elapsed_time, it_per_sec, ", ".join(m_strings)
)
print(f"{_ERASE_LINE}{self._pad_whitespace(text)}", file=self.stream)
self._latest_print_length = 0
self.stream.flush()
def _pad_whitespace(self, text):
diff = self._latest_print_length - len(text)
self._latest_print_length = max(len(text), self._latest_print_length)
if diff > 0:
return text + " " * diff
return text
def __del__(self):
if self.stream != sys.stdout:
self.stream.close()
def ensure_valid_directories(file):
"""
Creates intermediate directories to ensure that files can be saved to their specified directory.
Parameters
----------
file: str
The absolute path to the file whose directory structure to ensure.
"""
directory = "/".join(file.split('/')[:-1])
os.makedirs(directory, exist_ok=True)