[docs]classBaselineClassifier(Estimator):"""Classifier that predicts using the specified strategy. This is useful as a simple baseline classifier to compare with other classifiers. Args: strategy (str): Method used to predict. Valid options are "mode", "random" and "random_weighted". Defaults to "mode". random_seed (int): Seed for the random number generator. Defaults to 0. """name="Baseline Classifier"hyperparameter_ranges={}"""{}"""model_family=ModelFamily.BASELINE"""ModelFamily.BASELINE"""supported_problem_types=[ProblemTypes.BINARY,ProblemTypes.MULTICLASS]"""[ProblemTypes.BINARY, ProblemTypes.MULTICLASS]"""def__init__(self,strategy="mode",random_seed=0,**kwargs):ifstrategynotin["mode","random","random_weighted"]:raiseValueError("'strategy' parameter must equal either 'mode', 'random', or 'random_weighted'",)parameters={"strategy":strategy}parameters.update(kwargs)self._classes=Noneself._percentage_freq=Noneself._num_features=Noneself._num_unique=Noneself._mode=Nonesuper().__init__(parameters=parameters,component_obj=None,random_seed=random_seed,)
[docs]deffit(self,X,y=None):"""Fits baseline classifier component to data. Args: X (pd.DataFrame): The input training data of shape [n_samples, n_features]. y (pd.Series): The target training data of length [n_samples]. Returns: self Raises: ValueError: If y is None. """ifyisNone:raiseValueError("Cannot fit Baseline classifier if y is None")X=infer_feature_types(X)y=infer_feature_types(y)vals,counts=np.unique(y,return_counts=True)self._classes=list(vals)self._percentage_freq=counts.astype(float)/len(y)self._num_unique=len(self._classes)self._num_features=X.shape[1]ifself.parameters["strategy"]=="mode":self._mode=y.mode()[0]returnself
[docs]defpredict(self,X):"""Make predictions using the baseline classification strategy. Args: X (pd.DataFrame): Data of shape [n_samples, n_features]. Returns: pd.Series: Predicted values. """X=infer_feature_types(X)strategy=self.parameters["strategy"]ifstrategy=="mode":predictions=pd.Series([self._mode]*len(X))elifstrategy=="random":predictions=get_random_state(self.random_seed).choice(self._classes,len(X),)else:predictions=get_random_state(self.random_seed).choice(self._classes,len(X),p=self._percentage_freq,)returninfer_feature_types(predictions)
[docs]defpredict_proba(self,X):"""Make prediction probabilities using the baseline classification strategy. Args: X (pd.DataFrame): Data of shape [n_samples, n_features]. Returns: pd.DataFrame: Predicted probability values. """X=infer_feature_types(X)strategy=self.parameters["strategy"]ifstrategy=="mode":mode_index=self._classes.index(self._mode)proba_arr=np.array([[1.0ifi==mode_indexelse0.0foriinrange(self._num_unique)]]*len(X),)elifstrategy=="random":proba_arr=np.array([[1.0/self._num_uniqueforiinrange(self._num_unique)]]*len(X),)else:proba_arr=np.array([[self._percentage_freq[i]foriinrange(self._num_unique)]]*len(X),)predictions=pd.DataFrame(proba_arr,columns=self._classes)returninfer_feature_types(predictions)
@propertydeffeature_importance(self):"""Returns importance associated with each feature. Since baseline classifiers do not use input features to calculate predictions, returns an array of zeroes. Returns: pd.Series: An array of zeroes """returnpd.Series(np.zeros(self._num_features))@propertydefclasses_(self):"""Returns class labels. Will return None before fitting. Returns: list[str] or list(float) : Class names """returnself._classes