Source code for evalml.pipelines.components.component_base
"""Base class for all components."""importcopyfromabcimportABC,abstractmethodimportcloudpicklefromevalml.exceptionsimportMethodPropertyNotFoundErrorfromevalml.pipelines.components.component_base_metaimportComponentBaseMetafromevalml.utilsimport(_downcast_nullable_X,_downcast_nullable_y,classproperty,infer_feature_types,log_subtitle,safe_repr,)fromevalml.utils.loggerimportget_logger
[docs]classComponentBase(ABC,metaclass=ComponentBaseMeta):"""Base class for all components. Args: parameters (dict): Dictionary of parameters for the component. Defaults to None. component_obj (obj): Third-party objects useful in component implementation. Defaults to None. random_seed (int): Seed for the random number generator. Defaults to 0. """_default_parameters=None_can_be_used_for_fast_partial_dependence=True# Referring to the pandas nullable dtypes; not just woodwork logical types_integer_nullable_incompatibilities=[]_boolean_nullable_incompatibilities=[]def__init__(self,parameters=None,component_obj=None,random_seed=0,**kwargs):"""Base class for all components. Args: parameters (dict): Dictionary of parameters for the component. Defaults to None. component_obj (obj): Third-party objects useful in component implementation. Defaults to None. random_seed (int): Seed for the random number generator. Defaults to 0. kwargs (Any): Any keyword arguments to pass into the component. """self.random_seed=random_seedself._component_obj=component_objself._parameters=parametersor{}self._is_fitted=False@property@classmethod@abstractmethoddefname(cls):"""Returns string name of this component."""@property@classmethod@abstractmethoddefmodifies_features(cls):"""Returns whether this component modifies (subsets or transforms) the features variable during transform. For Estimator objects, this attribute determines if the return value from `predict` or `predict_proba` should be used as features or targets. """@property@classmethod@abstractmethoddefmodifies_target(cls):"""Returns whether this component modifies (subsets or transforms) the target variable during transform. For Estimator objects, this attribute determines if the return value from `predict` or `predict_proba` should be used as features or targets. """@property@classmethod@abstractmethoddeftraining_only(cls):"""Returns whether or not this component should be evaluated during training-time only, or during both training and prediction time."""@classpropertydefneeds_fitting(self):"""Returns boolean determining if component needs fitting before calling predict, predict_proba, transform, or feature_importances. This can be overridden to False for components that do not need to be fit or whose fit methods do nothing. Returns: True. """returnTrue@propertydefparameters(self):"""Returns the parameters which were used to initialize the component."""returncopy.copy(self._parameters)@classpropertydefdefault_parameters(cls):"""Returns the default parameters for this component. Our convention is that Component.default_parameters == Component().parameters. Returns: dict: Default parameters for this component. """ifcls._default_parametersisNone:cls._default_parameters=cls().parametersreturncls._default_parameters@classpropertydef_supported_by_list_API(cls):returnnotcls.modifies_targetdef_handle_partial_dependence_fast_mode(self,pipeline_parameters,X=None,target=None,):"""Determines whether or not a component can be used with partial dependence's fast mode. Args: pipeline_parameters (dict): Pipeline parameters that will be used to create the pipelines used in partial dependence fast mode. X (pd.DataFrame, optional): Holdout data being used for partial dependence calculations. target (str, optional): The target whose values we are trying to predict. """ifself._can_be_used_for_fast_partial_dependence:returnpipeline_parametersraiseTypeError(f"Component {self.name} cannot run partial dependence fast mode.",)
[docs]defclone(self):"""Constructs a new component with the same parameters and random state. Returns: A new instance of this component with identical parameters and random state. """returnself.__class__(**self.parameters,random_seed=self.random_seed)
[docs]deffit(self,X,y=None):"""Fits component to data. Args: X (pd.DataFrame): The input training data of shape [n_samples, n_features] y (pd.Series, optional): The target training data of length [n_samples] Returns: self Raises: MethodPropertyNotFoundError: If component does not have a fit method or a component_obj that implements fit. """X=infer_feature_types(X)ifyisnotNone:y=infer_feature_types(y)try:self._component_obj.fit(X,y)returnselfexceptAttributeError:raiseMethodPropertyNotFoundError("Component requires a fit method or a component_obj that implements fit",)
[docs]defdescribe(self,print_name=False,return_dict=False):"""Describe a component and its parameters. Args: print_name(bool, optional): whether to print name of component return_dict(bool, optional): whether to return description as dictionary in the format {"name": name, "parameters": parameters} Returns: None or dict: Returns dictionary if return_dict is True, else None. """logger=get_logger(f"{__name__}.describe")ifprint_name:title=self.namelog_subtitle(logger,title)forparameterinself.parameters:parameter_str=("\t * {} : {}").format(parameter,self.parameters[parameter],)logger.info(parameter_str)ifreturn_dict:component_dict={"name":self.name}component_dict.update({"parameters":self.parameters})returncomponent_dict
[docs]defsave(self,file_path,pickle_protocol=cloudpickle.DEFAULT_PROTOCOL):"""Saves component at file path. Args: file_path (str): Location to save file. pickle_protocol (int): The pickle data stream format. """withopen(file_path,"wb")asf:cloudpickle.dump(self,f,protocol=pickle_protocol)
[docs]@staticmethoddefload(file_path):"""Loads component at file path. Args: file_path (str): Location to load file. Returns: ComponentBase object """withopen(file_path,"rb")asf:returncloudpickle.load(f)
def__eq__(self,other):"""Check for equality."""ifnotisinstance(other,self.__class__):returnFalserandom_seed_eq=self.random_seed==other.random_seedifnotrandom_seed_eq:returnFalseattributes_to_check=["_parameters","_is_fitted"]forattributeinattributes_to_check:ifgetattr(self,attribute)!=getattr(other,attribute):returnFalsereturnTruedef__str__(self):"""String representation of a component."""returnself.namedef__repr__(self):"""String representation of a component."""parameters_repr=", ".join([f"{key}={safe_repr(value)}"forkey,valueinself.parameters.items()],)returnf"{(type(self).__name__)}({parameters_repr})"
[docs]defupdate_parameters(self,update_dict,reset_fit=True):"""Updates the parameter dictionary of the component. Args: update_dict (dict): A dict of parameters to update. reset_fit (bool, optional): If True, will set `_is_fitted` to False. """self._parameters.update(update_dict)ifreset_fit:self._is_fitted=False
def_handle_nullable_types(self,X=None,y=None):"""Transforms X and y to remove any incompatible nullable types according to a component's needs. Args: X (pd.DataFrame, optional): Input data to a component of shape [n_samples, n_features]. May contain nullable types. y (pd.Series, optional): The target of length [n_samples]. May contain nullable types. Returns: X, y with any incompatible nullable types downcasted to compatible equivalents. """X_bool_incompatible="X"inself._boolean_nullable_incompatibilitiesX_int_incompatible="X"inself._integer_nullable_incompatibilitiesifXisnotNoneand(X_bool_incompatibleorX_int_incompatible):X=_downcast_nullable_X(X,handle_boolean_nullable=X_bool_incompatible,handle_integer_nullable=X_int_incompatible,)y_bool_incompatible="y"inself._boolean_nullable_incompatibilitiesy_int_incompatible="y"inself._integer_nullable_incompatibilitiesifyisnotNoneand(y_bool_incompatibleory_int_incompatible):y=_downcast_nullable_y(y,handle_boolean_nullable=y_bool_incompatible,handle_integer_nullable=y_int_incompatible,)returnX,y