Source code for evalml.pipelines.utils

import cloudpickle

from .classification import (
    CatBoostClassificationPipeline,
    LogisticRegressionPipeline,
    RFClassificationPipeline,
    XGBoostPipeline
)
from .regression import (
    CatBoostRegressionPipeline,
    LinearRegressionPipeline,
    RFRegressionPipeline
)

from evalml.model_types import handle_model_types
from evalml.problem_types import handle_problem_types

ALL_PIPELINES = [RFClassificationPipeline,
                 XGBoostPipeline,
                 LogisticRegressionPipeline,
                 LinearRegressionPipeline,
                 RFRegressionPipeline,
                 CatBoostClassificationPipeline,
                 CatBoostRegressionPipeline]


[docs]def get_pipelines(problem_type, model_types=None): """Returns potential pipelines by model type Arguments: problem_type(ProblemTypes or str): the problem type the pipelines work for. model_types(list[ModelTypes or str]): model types to match. if none, return all pipelines Returns: pipelines, list of all pipeline """ if model_types is not None and not isinstance(model_types, list): raise TypeError("model_types parameter is not a list.") problem_pipelines = [] if model_types: model_types = [handle_model_types(model_type) for model_type in model_types] problem_type = handle_problem_types(problem_type) for p in ALL_PIPELINES: if problem_type in p.problem_types: problem_pipelines.append(p) if model_types is None: return problem_pipelines all_model_types = list_model_types(problem_type) for model_type in model_types: if model_type not in all_model_types: raise RuntimeError("Unrecognized model type for problem type %s: %s" % (problem_type, model_type)) pipelines = [] for p in problem_pipelines: if p.model_type in model_types: pipelines.append(p) return pipelines
[docs]def list_model_types(problem_type): """List model type for a particular problem type Arguments: problem_types (ProblemTypes or str): binary, multiclass, or regression Returns: model_types, list of model types """ problem_pipelines = [] problem_type = handle_problem_types(problem_type) for p in ALL_PIPELINES: if problem_type in p.problem_types: problem_pipelines.append(p) return list(set([p.model_type for p in problem_pipelines]))
[docs]def save_pipeline(pipeline, file_path): """Saves pipeline at file path Args: file_path (str) : location to save file Returns: None """ with open(file_path, 'wb') as f: cloudpickle.dump(pipeline, f)
[docs]def load_pipeline(file_path): """Loads pipeline at file path Args: file_path (str) : location to load file Returns: Pipeline obj """ with open(file_path, 'rb') as f: return cloudpickle.load(f)