Source code for evalml.data_checks.data_check_action_option
"""Recommended action returned by a DataCheck."""fromenumimportEnumfromevalml.data_checks.data_check_actionimportDataCheckActionfromevalml.data_checks.data_check_action_codeimportDataCheckActionCodefromevalml.utilsimportclassproperty
[docs]classDataCheckActionOption:"""A recommended action option returned by a DataCheck. It contains an action code that indicates what the action should be, a data check name that indicates what data check was used to generate the action, and parameters and metadata which can be used to further refine the action. Args: action_code (DataCheckActionCode): Action code associated with the action option. data_check_name (str): Name of the data check that produced this option. parameters (dict): Parameters associated with the action option. Defaults to None. metadata (dict, optional): Additional useful information associated with the action option. Defaults to None. Examples: >>> parameters = { ... "global_parameter_name": { ... "parameter_type": "global", ... "type": "float", ... "default_value": 0.0, ... }, ... "column_parameter_name": { ... "parameter_type": "column", ... "columns": { ... "a": { ... "impute_strategy": { ... "categories": ["mean", "most_frequent"], ... "type": "category", ... "default_value": "mean", ... }, ... "constant_fill_value": {"type": "float", "default_value": 0}, ... }, ... }, ... }, ... } >>> data_check_action = DataCheckActionOption(DataCheckActionCode.DROP_COL, None, metadata={}, parameters=parameters) """def__init__(self,action_code,data_check_name,parameters=None,metadata=None):self.action_code=action_codeself.data_check_name=data_check_nameself.parameters=parametersor{}self.metadata={"columns":None,"rows":None}ifmetadataisnotNone:self.metadata.update(metadata)self._validate_parameters()def__eq__(self,other):"""Check for equality. Two DataCheckActionOption objs are considered equivalent if all of their attributes are equivalent. Args: other: An object to compare equality with. Returns: bool: True if the other object is considered an equivalent data check action, False otherwise. """attributes_to_check=["action_code","data_check_name","parameters","metadata",]forattributeinattributes_to_check:ifgetattr(self,attribute)!=getattr(other,attribute):returnFalsereturnTrue
[docs]defto_dict(self):"""Return a dictionary form of the data check action option."""action_option_dict={"code":self.action_code.name,"data_check_name":self.data_check_name,"metadata":self.metadata,}parameters_dict=self.parameters.copy()forparameter_dictinparameters_dict.values():parameter_dict["parameter_type"]=(DCAOParameterType.handle_dcao_parameter_type(parameter_dict["parameter_type"],).value)action_option_dict.update({"parameters":parameters_dict})returnaction_option_dict
[docs]@staticmethoddefconvert_dict_to_option(action_dict):"""Convert a dictionary into a DataCheckActionOption. Args: action_dict: Dictionary to convert into an action option. Should have keys "code", "data_check_name", and "metadata". Raises: ValueError: If input dictionary does not have keys `code` and `metadata` and if the `metadata` dictionary does not have keys `columns` and `rows`. Returns: DataCheckActionOption object from the input dictionary. """if"code"notinaction_dictor"metadata"notinaction_dict:raiseValueError("The input dictionary should have the keys `code` and `metadata`.",)if("columns"notinaction_dict["metadata"]and"rows"notinaction_dict["metadata"]):raiseValueError("The metadata dictionary should have the keys `columns` or `rows`. Set to None if not using.",)returnDataCheckActionOption(action_code=DataCheckActionCode._all_values[action_dict["code"]],metadata=action_dict["metadata"],data_check_name=(action_dict["data_check_name"]if"data_check_name"inaction_dictelseNone),parameters=(action_dict["parameters"]if"parameters"inaction_dictelseNone),)
def_validate_parameters(self):"""Validate parameters associated with the action option."""for_,parameter_valueinself.parameters.items():if"parameter_type"notinparameter_value:raiseValueError("Each parameter must have a parameter_type key.")try:parameter_type=DCAOParameterType.handle_dcao_parameter_type(parameter_value["parameter_type"],)exceptKeyErroraske:raiseValueError("Each parameter must have a parameter_type key with a value of `global` or `column`. "+str(ke),)ifparameter_type==DCAOParameterType.GLOBAL:if"type"notinparameter_value:raiseValueError("Each global parameter must have a type key.")elifparameter_type==DCAOParameterType.COLUMN:if"columns"notinparameter_value:raiseValueError("Each `column` parameter type must also have a `columns` key indicating which columns the parameter should address.",)columns=parameter_value["columns"]ifnotisinstance(columns,dict):raiseValueError("`columns` must be a dictionary, where each key is the name of a column and the associated value is a dictionary of parameters for that column.",)forcolumn_parametersincolumns.values():forcolumn_parameter_valuesincolumn_parameters.values():if"type"notincolumn_parameter_values:raiseValueError("Each column parameter must have a type key.",)if"default_value"notincolumn_parameter_values:raiseValueError("Each column parameter must have a default_value key.",)
[docs]defget_action_from_defaults(self):"""Returns an action based on the defaults parameters. Returns: DataCheckAction: An based on the defaults parameters the option. """parameters=self.parametersactions_parameters={}forparameter,parameter_infoinparameters.items():parameter_type=DCAOParameterType.handle_dcao_parameter_type(parameter_info["parameter_type"],)ifparameter_type==DCAOParameterType.GLOBAL:actions_parameters[parameter]=parameter_info["default_value"]elifparameter_type==DCAOParameterType.COLUMN:actions_parameters[parameter]={}column_parameters=parameter_info["columns"]for(column_parameter_name,column_parameter_values,)incolumn_parameters.items():actions_parameters[parameter][column_parameter_name]={}for(column_specific_parameter,column_specific_parameter_value,)incolumn_parameter_values.items():actions_parameters[parameter][column_parameter_name][column_specific_parameter]=column_specific_parameter_value["default_value"]metadata=self.metadatametadata.update({"parameters":actions_parameters})returnDataCheckAction(self.action_code,self.data_check_name,metadata=metadata,)
[docs]classDCAOParameterType(Enum):"""Enum for data check action option parameter type."""GLOBAL="global""""Global parameter type. Parameters that apply to the entire data set."""COLUMN="column""""Column parameter type. Parameters that apply to a specific column in the data set."""def__str__(self):"""String representation of the DCAOParameterType enum."""parameter_type_dict={DCAOParameterType.GLOBAL.name:"global",DCAOParameterType.COLUMN.name:"column",}returnparameter_type_dict[self.name]@classpropertydef_all_values(cls):return{pt.value.upper():ptforptincls.all_parameter_types}@classpropertydefall_parameter_types(cls):"""Get a list of all defined parameter types. Returns: list(DCAOParameterType): List of all defined parameter types. """returnlist(cls)
[docs]@staticmethoddefhandle_dcao_parameter_type(dcao_parameter_type):"""Handles the data check action option parameter type by either returning the DCAOParameterType enum or converting from a str. Args: dcao_parameter_type (str or DCAOParameterType): Data check action option parameter type that needs to be handled. Returns: DCAOParameterType enum Raises: KeyError: If input is not a valid DCAOParameterType enum value. ValueError: If input is not a string or DCAOParameterType object. """ifisinstance(dcao_parameter_type,str):try:tpe=DCAOParameterType._all_values[dcao_parameter_type.upper()]exceptKeyError:raiseKeyError("Parameter type '{}' does not exist".format(dcao_parameter_type),)returntpeifisinstance(dcao_parameter_type,DCAOParameterType):returndcao_parameter_typeraiseValueError("`handle_dcao_parameter_type` was not passed a str or DCAOParameterType object",)
[docs]classDCAOParameterAllowedValuesType(Enum):"""Enum for data check action option parameter allowed values type."""CATEGORICAL="categorical""""Categorical allowed values type. Parameters that have a set of allowed values."""NUMERICAL="numerical""""Numerical allowed values type. Parameters that have a range of allowed values."""