Source code for evalml.pipelines.components.transformers.column_selectors
"""Initalizes an transformer that selects specified columns in input data."""fromabcimportabstractmethodfromevalml.pipelines.components.transformersimportTransformerfromevalml.utilsimportinfer_feature_types
[docs]classColumnSelector(Transformer):"""Initalizes an transformer that selects specified columns in input data. Args: columns (list(string)): List of column names, used to determine which columns to select. random_seed (int): Seed for the random number generator. Defaults to 0. """def__init__(self,columns=None,random_seed=0,**kwargs):ifcolumnsandnotisinstance(columns,list):raiseValueError(f"Parameter columns must be a list. Received {type(columns)}.",)parameters={"columns":columns}parameters.update(kwargs)super().__init__(parameters=parameters,component_obj=None,random_seed=random_seed,)def_check_input_for_columns(self,X):cols=self.parameters.get("columns")or[]column_names=X.columnsmissing_cols=set(cols)-set(column_names)ifmissing_cols:raiseValueError(f"Columns of type {missing_cols} not found in input data.")@abstractmethoddef_modify_columns(self,cols,X,y=None):"""How the transformer modifies the columns of the input data."""
[docs]deffit(self,X,y=None):"""Fits the transformer by checking if column names are present in the dataset. Args: X (pd.DataFrame): Data to check. y (pd.Series, ignored): Targets. Returns: self """X=infer_feature_types(X)self._check_input_for_columns(X)returnself
[docs]deftransform(self,X,y=None):"""Transform data using fitted column selector component. Args: X (pd.DataFrame): The input training data of shape [n_samples, n_features]. y (pd.Series, optional): The target training data of length [n_samples]. Returns: pd.DataFrame: Transformed data. """X=infer_feature_types(X)self._check_input_for_columns(X)cols=self.parameters.get("columns")or[]modified_cols=self._modify_columns(cols,X,y)returninfer_feature_types(modified_cols)
[docs]classDropColumns(ColumnSelector):"""Drops specified columns in input data. Args: columns (list(string)): List of column names, used to determine which columns to drop. random_seed (int): Seed for the random number generator. Defaults to 0. """name="Drop Columns Transformer"hyperparameter_ranges={}"""{}"""needs_fitting=Falsedef_check_input_for_columns(self,X):passdef_modify_columns(self,cols,X,y=None):column_intersection=list(set(cols).intersection(X.columns))returnX.ww.drop(column_intersection)
[docs]deftransform(self,X,y=None):"""Transforms data X by dropping columns. Args: X (pd.DataFrame): Data to transform. y (pd.Series, optional): Targets. Returns: pd.DataFrame: Transformed X. """returnsuper().transform(X,y)
[docs]classSelectColumns(ColumnSelector):"""Selects specified columns in input data. Args: columns (list(string)): List of column names, used to determine which columns to select. If columns are not present, they will not be selected. random_seed (int): Seed for the random number generator. Defaults to 0. """name="Select Columns Transformer"hyperparameter_ranges={}"""{}"""needs_fitting=Falsedef_check_input_for_columns(self,X):pass
[docs]deffit(self,X,y=None):"""Fits the transformer by checking if column names are present in the dataset. Args: X (pd.DataFrame): Data to check. y (pd.Series, optional): Targets. Returns: self """returnself
[docs]classSelectByType(Transformer):"""Selects columns by specified Woodwork logical type or semantic tag in input data. Args: column_types (string, ww.LogicalType, list(string), list(ww.LogicalType)): List of Woodwork types or tags, used to determine which columns to select or exclude. exclude (bool): If true, exclude the column_types instead of including them. Defaults to False. random_seed (int): Seed for the random number generator. Defaults to 0. """name="Select Columns By Type Transformer"hyperparameter_ranges={}"""{}"""needs_fitting=Falsedef__init__(self,column_types=None,exclude=False,random_seed=0,**kwargs):parameters={"column_types":column_types,"exclude":exclude}parameters.update(kwargs)super().__init__(parameters=parameters,component_obj=None,random_seed=random_seed,)def_modify_columns(self,cols,X,y=None):ifself.parameters.get("exclude"):returnX.ww.select(exclude=cols)returnX.ww.select(include=cols)
[docs]deffit(self,X,y=None):"""Fits the transformer by checking if column names are present in the dataset. Args: X (pd.DataFrame): Data to check. y (pd.Series, ignored): Targets. Returns: self """X=infer_feature_types(X)returnself
[docs]deftransform(self,X,y=None):"""Transforms data X by selecting columns. Args: X (pd.DataFrame): Data to transform. y (pd.Series, optional): Targets. Returns: pd.DataFrame: Transformed X. """X=infer_feature_types(X)cols=self.parameters.get("column_types")or[]modified_cols=self._modify_columns(cols,X,y)returninfer_feature_types(modified_cols)