Source code for evalml.automl.progress
"""Progress abstraction holding stopping criteria and progress information."""
import logging
import time
from evalml.utils.logger import get_logger
[docs]class Progress:
"""Progress object holding stopping criteria and progress information.
Args:
max_time (int): Maximum time to search for pipelines.
max_iterations (int): Maximum number of iterations to search.
max_batches (int): The maximum number of batches of pipelines to search. Parameters max_time, and
max_iterations have precedence over stopping the search.
patience (int): Number of iterations without improvement to stop search early.
tolerance (float): Minimum percentage difference to qualify as score improvement for early stopping.
automl_algorithm (str): The automl algorithm to use. Used to calculate iterations if max_batches is selected as stopping criteria.
objective (str, ObjectiveBase): The objective used in search.
verbose (boolean): Whether or not to log out stopping information.
"""
def __init__(
self,
max_time=None,
max_batches=None,
max_iterations=None,
patience=None,
tolerance=None,
automl_algorithm=None,
objective=None,
verbose=False,
):
self.max_time = max_time
self.current_time = None
self.start_time = None
self.max_batches = max_batches
self.current_batch = 0
self.max_iterations = max_iterations
self.current_iterations = 0
self.patience = patience
self.tolerance = tolerance
self.automl_algorithm = automl_algorithm
self.objective = objective
self._best_score = None
self._without_improvement = 0
self._last_id = 0
if verbose:
self.logger = get_logger(f"{__name__}.verbose")
else:
self.logger = logging.getLogger(__name__)
[docs] def start_timing(self):
"""Sets start time to current time."""
self.start_time = time.time()
[docs] def elapsed(self):
"""Return time elapsed using the start time and current time."""
return self.current_time - self.start_time
[docs] def should_continue(self, results, interrupted=False, mid_batch=False):
"""Given AutoML Results, return whether or not the search should continue.
Args:
results (dict): AutoMLSearch results.
interrupted (bool): whether AutoMLSearch was given an keyboard interrupt. Defaults to False.
mid_batch (bool): whether this method was called while in the middle of a batch or not. Defaults to False.
Returns:
bool: True if search should continue, False otherwise.
"""
if interrupted:
return False
# update and check max_time, max_iterations, and max_batches
self.current_time = time.time()
self.current_iterations = len(results["pipeline_results"])
self.current_batch = self.automl_algorithm.batch_number
if self.max_time and self.elapsed() >= self.max_time:
return False
elif self.max_iterations and self.current_iterations >= self.max_iterations:
return False
elif (
self.max_batches
and self.current_batch >= self.max_batches
and not mid_batch
):
return False
# check for early stopping
if self.patience is not None and self.tolerance is not None:
last_id = results["search_order"][-1]
curr_score = results["pipeline_results"][last_id]["mean_cv_score"]
if self._best_score is None:
self._best_score = curr_score
return True
elif last_id > self._last_id:
self._last_id = last_id
score_improved = (
curr_score > self._best_score
if self.objective.greater_is_better
else curr_score < self._best_score
)
significant_change = (
abs((curr_score - self._best_score) / self._best_score)
> self.tolerance
)
if score_improved and significant_change:
self._best_score = curr_score
self._without_improvement = 0
else:
self._without_improvement += 1
if self._without_improvement >= self.patience:
self.logger.info(
"\n\n{} iterations without improvement. Stopping search early...".format(
self.patience,
),
)
return False
return True
[docs] def return_progress(self):
"""Return information about current and end state of each stopping criteria in order of priority.
Returns:
List[Dict[str, unit]]: list of dictionaries containing information of each stopping criteria.
"""
progress = []
if self.max_time:
progress.append(
{
"stopping_criteria": "max_time",
"current_state": self.elapsed(),
"end_state": self.max_time,
"unit": "seconds",
},
)
if self.max_iterations or self.max_batches:
max_iterations = (
self.max_iterations
if self.max_iterations
else sum(
[
self.automl_algorithm.num_pipelines_per_batch(n)
for n in range(self.max_batches)
],
)
)
progress.append(
{
"stopping_criteria": "max_iterations",
"current_state": self.current_iterations,
"end_state": max_iterations,
"unit": "iterations",
},
)
if self.max_batches:
progress.append(
{
"stopping_criteria": "max_batches",
"current_state": self.current_batch,
"end_state": self.max_batches,
"unit": "batches",
},
)
return progress