Source code for evalml.data_checks.data_check_action_option

"""Recommended action returned by a DataCheck."""
from enum import Enum

from evalml.data_checks.data_check_action import DataCheckAction
from evalml.data_checks.data_check_action_code import DataCheckActionCode
from evalml.utils import classproperty


[docs]class DataCheckActionOption: """A recommended action option returned by a DataCheck. It contains an action code that indicates what the action should be, a data check name that indicates what data check was used to generate the action, and parameters and metadata which can be used to further refine the action. Args: action_code (DataCheckActionCode): Action code associated with the action option. data_check_name (str): Name of the data check that produced this option. parameters (dict): Parameters associated with the action option. Defaults to None. metadata (dict, optional): Additional useful information associated with the action option. Defaults to None. Examples: >>> parameters = { ... "global_parameter_name": { ... "parameter_type": "global", ... "type": "float", ... "default_value": 0.0, ... }, ... "column_parameter_name": { ... "parameter_type": "column", ... "columns": { ... "a": { ... "impute_strategy": { ... "categories": ["mean", "most_frequent"], ... "type": "category", ... "default_value": "mean", ... }, ... "constant_fill_value": {"type": "float", "default_value": 0}, ... }, ... }, ... }, ... } >>> data_check_action = DataCheckActionOption(DataCheckActionCode.DROP_COL, None, metadata={}, parameters=parameters) """ def __init__(self, action_code, data_check_name, parameters=None, metadata=None): self.action_code = action_code self.data_check_name = data_check_name self.parameters = parameters or {} self.metadata = {"columns": None, "rows": None} if metadata is not None: self.metadata.update(metadata) self._validate_parameters() def __eq__(self, other): """Check for equality. Two DataCheckActionOption objs are considered equivalent if all of their attributes are equivalent. Args: other: An object to compare equality with. Returns: bool: True if the other object is considered an equivalent data check action, False otherwise. """ attributes_to_check = [ "action_code", "data_check_name", "parameters", "metadata", ] for attribute in attributes_to_check: if getattr(self, attribute) != getattr(other, attribute): return False return True
[docs] def to_dict(self): """Return a dictionary form of the data check action option.""" action_option_dict = { "code": self.action_code.name, "data_check_name": self.data_check_name, "metadata": self.metadata, } parameters_dict = self.parameters.copy() for parameter_dict in parameters_dict.values(): parameter_dict[ "parameter_type" ] = DCAOParameterType.handle_dcao_parameter_type( parameter_dict["parameter_type"], ).value action_option_dict.update({"parameters": parameters_dict}) return action_option_dict
[docs] @staticmethod def convert_dict_to_option(action_dict): """Convert a dictionary into a DataCheckActionOption. Args: action_dict: Dictionary to convert into an action option. Should have keys "code", "data_check_name", and "metadata". Raises: ValueError: If input dictionary does not have keys `code` and `metadata` and if the `metadata` dictionary does not have keys `columns` and `rows`. Returns: DataCheckActionOption object from the input dictionary. """ if "code" not in action_dict or "metadata" not in action_dict: raise ValueError( "The input dictionary should have the keys `code` and `metadata`.", ) if ( "columns" not in action_dict["metadata"] and "rows" not in action_dict["metadata"] ): raise ValueError( "The metadata dictionary should have the keys `columns` or `rows`. Set to None if not using.", ) return DataCheckActionOption( action_code=DataCheckActionCode._all_values[action_dict["code"]], metadata=action_dict["metadata"], data_check_name=action_dict["data_check_name"] if "data_check_name" in action_dict else None, parameters=action_dict["parameters"] if "parameters" in action_dict else None, )
def _validate_parameters(self): """Validate parameters associated with the action option.""" for _, parameter_value in self.parameters.items(): if "parameter_type" not in parameter_value: raise ValueError("Each parameter must have a parameter_type key.") try: parameter_type = DCAOParameterType.handle_dcao_parameter_type( parameter_value["parameter_type"], ) except KeyError as ke: raise ValueError( "Each parameter must have a parameter_type key with a value of `global` or `column`. " + str(ke), ) if parameter_type == DCAOParameterType.GLOBAL: if "type" not in parameter_value: raise ValueError("Each global parameter must have a type key.") elif parameter_type == DCAOParameterType.COLUMN: if "columns" not in parameter_value: raise ValueError( "Each `column` parameter type must also have a `columns` key indicating which columns the parameter should address.", ) columns = parameter_value["columns"] if not isinstance(columns, dict): raise ValueError( "`columns` must be a dictionary, where each key is the name of a column and the associated value is a dictionary of parameters for that column.", ) for column_parameters in columns.values(): for column_parameter_values in column_parameters.values(): if "type" not in column_parameter_values: raise ValueError( "Each column parameter must have a type key.", ) if "default_value" not in column_parameter_values: raise ValueError( "Each column parameter must have a default_value key.", )
[docs] def get_action_from_defaults(self): """Returns an action based on the defaults parameters. Returns: DataCheckAction: An based on the defaults parameters the option. """ parameters = self.parameters actions_parameters = {} for parameter, parameter_info in parameters.items(): parameter_type = DCAOParameterType.handle_dcao_parameter_type( parameter_info["parameter_type"], ) if parameter_type == DCAOParameterType.GLOBAL: actions_parameters[parameter] = parameter_info["default_value"] elif parameter_type == DCAOParameterType.COLUMN: actions_parameters[parameter] = {} column_parameters = parameter_info["columns"] for ( column_parameter_name, column_parameter_values, ) in column_parameters.items(): actions_parameters[parameter][column_parameter_name] = {} for ( column_specific_parameter, column_specific_parameter_value, ) in column_parameter_values.items(): actions_parameters[parameter][column_parameter_name][ column_specific_parameter ] = column_specific_parameter_value["default_value"] metadata = self.metadata metadata.update({"parameters": actions_parameters}) return DataCheckAction( self.action_code, self.data_check_name, metadata=metadata, )
[docs]class DCAOParameterType(Enum): """Enum for data check action option parameter type.""" GLOBAL = "global" """Global parameter type. Parameters that apply to the entire data set.""" COLUMN = "column" """Column parameter type. Parameters that apply to a specific column in the data set.""" def __str__(self): """String representation of the DCAOParameterType enum.""" parameter_type_dict = { DCAOParameterType.GLOBAL.name: "global", DCAOParameterType.COLUMN.name: "column", } return parameter_type_dict[self.name] @classproperty def _all_values(cls): return {pt.value.upper(): pt for pt in cls.all_parameter_types} @classproperty def all_parameter_types(cls): """Get a list of all defined parameter types. Returns: list(DCAOParameterType): List of all defined parameter types. """ return list(cls)
[docs] @staticmethod def handle_dcao_parameter_type(dcao_parameter_type): """Handles the data check action option parameter type by either returning the DCAOParameterType enum or converting from a str. Args: dcao_parameter_type (str or DCAOParameterType): Data check action option parameter type that needs to be handled. Returns: DCAOParameterType enum Raises: KeyError: If input is not a valid DCAOParameterType enum value. ValueError: If input is not a string or DCAOParameterType object. """ if isinstance(dcao_parameter_type, str): try: tpe = DCAOParameterType._all_values[dcao_parameter_type.upper()] except KeyError: raise KeyError( "Parameter type '{}' does not exist".format(dcao_parameter_type), ) return tpe if isinstance(dcao_parameter_type, DCAOParameterType): return dcao_parameter_type raise ValueError( "`handle_dcao_parameter_type` was not passed a str or DCAOParameterType object", )
[docs]class DCAOParameterAllowedValuesType(Enum): """Enum for data check action option parameter allowed values type.""" CATEGORICAL = "categorical" """Categorical allowed values type. Parameters that have a set of allowed values.""" NUMERICAL = "numerical" """Numerical allowed values type. Parameters that have a range of allowed values."""