Source code for evalml.automl.engine.sequential_engine
"""A Future-like api for jobs created by the SequentialEngine, an Engine that sequentially computes the submitted jobs."""fromevalml.automl.engine.engine_baseimport(EngineBase,EngineComputation,evaluate_pipeline,score_pipeline,train_pipeline,)fromevalml.objectives.utilsimportget_objective
[docs]classSequentialComputation(EngineComputation):"""A Future-like api for jobs created by the SequentialEngine, an Engine that sequentially computes the submitted jobs. In order to separate the engine from the AutoMLSearch loop, we need the sequential computations to behave the same way as concurrent computations from AutoMLSearch's point-of-view. One way to do this is by delaying the computation in the sequential engine until get_result is called. Since AutoMLSearch will call get_result only when the computation is "done", by always returning True in done() we make sure that get_result is called in the order that the jobs are submitted. So the computations happen sequentially! Args: work (callable): Computation that should be done by the engine. """def__init__(self,work,**kwargs):self.work=workself.kwargs=kwargsself.meta_data={}
[docs]defdone(self):"""Whether the computation is done. Returns: bool: Always returns True. """returnTrue
[docs]defget_result(self):"""Gets the computation result. Will block until the computation is finished. Raises: Exception: If computation fails. Returns traceback. Returns: Computation results. """returnself.work(**self.kwargs)
[docs]defcancel(self):"""Cancel the current computation."""
[docs]classSequentialEngine(EngineBase):"""The default engine for the AutoML search. Trains and scores pipelines locally and sequentially. """
[docs]defsubmit_evaluation_job(self,automl_config,pipeline,X,y,X_holdout=None,y_holdout=None,):"""Submit a job to evaluate a pipeline. Args: automl_config: Structure containing data passed from AutoMLSearch instance. pipeline (pipeline.PipelineBase): Pipeline to evaluate. X (pd.DataFrame): Input data for modeling. y (pd.Series): Target data for modeling. X_holdout (pd.Series): Holdout input data for holdout scoring. y_holdout (pd.Series): Holdout target data for holdout scoring. Returns: SequentialComputation: Computation result. """logger=self.setup_job_log()returnSequentialComputation(work=evaluate_pipeline,pipeline=pipeline,automl_config=automl_config,X=X,y=y,X_holdout=X_holdout,y_holdout=y_holdout,logger=logger,)
[docs]defsubmit_training_job(self,automl_config,pipeline,X,y):"""Submit a job to train a pipeline. Args: automl_config: Structure containing data passed from AutoMLSearch instance. pipeline (pipeline.PipelineBase): Pipeline to evaluate. X (pd.DataFrame): Input data for modeling. y (pd.Series): Target data for modeling. Returns: SequentialComputation: Computation result. """returnSequentialComputation(work=train_pipeline,pipeline=pipeline,X=X,y=y,automl_config=automl_config,schema=False,)
[docs]defsubmit_scoring_job(self,automl_config,pipeline,X,y,objectives,X_train=None,y_train=None,):"""Submit a job to score a pipeline. Args: automl_config: Structure containing data passed from AutoMLSearch instance. pipeline (pipeline.PipelineBase): Pipeline to train. X (pd.DataFrame): Input data for modeling. y (pd.Series): Target data for modeling. X_train (pd.DataFrame): Training features. Used for feature engineering in time series. y_train (pd.Series): Training target. Used for feature engineering in time series. objectives (list[ObjectiveBase]): List of objectives to score on. Returns: SequentialComputation: Computation result. """objectives=[get_objective(o,return_instance=True)foroinobjectives]computation=SequentialComputation(work=score_pipeline,pipeline=pipeline,X=X,y=y,objectives=objectives,X_schema=X.ww.schema,y_schema=y.ww.schema,X_train=X_train,y_train=y_train,)computation.meta_data["pipeline_name"]=pipeline.namereturncomputation