Source code for evalml.data_checks.data_check_message_code
"""Enum for data check message code."""
from enum import Enum
[docs]class DataCheckMessageCode(Enum):
"""Enum for data check message code."""
COLS_WITH_NULL = "cols_with_null"
"""Message code for columns with null values."""
HIGHLY_NULL_COLS = "highly_null_cols"
"""Message code for highly null columns."""
HIGHLY_NULL_ROWS = "highly_null_rows"
"""Message code for highly null rows."""
HAS_ID_COLUMN = "has_id_column"
"""Message code for data that has ID columns."""
HAS_ID_FIRST_COLUMN = "has_id_first_column"
"""Message code for data that has an ID column as the first column."""
TARGET_INCOMPATIBLE_OBJECTIVE = "target_incompatible_objective"
"""Message code for target data that has incompatible values for the specified objective"""
TARGET_IS_NONE = "target_is_none"
"""Message code for when target is None."""
TARGET_IS_EMPTY_OR_FULLY_NULL = "target_is_empty_or_fully_null"
"""Message code for target data that is empty or has all null values."""
TARGET_HAS_NULL = "target_has_null"
"""Message code for target data that has null values."""
TARGET_UNSUPPORTED_TYPE = "target_unsupported_type"
"""Message code for target data that is of an unsupported type."""
TARGET_UNSUPPORTED_TYPE_REGRESSION = "target_unsupported_type_regression"
"""Message code for target data that is incompatible with regression"""
TARGET_UNSUPPORTED_PROBLEM_TYPE = "target_unsupported_problem_type"
"""Message code for target data that is being checked against an unsupported problem type."""
TARGET_BINARY_NOT_TWO_UNIQUE_VALUES = "target_binary_not_two_unique_values"
"""Message code for target data for a binary classification problem that does not have two unique values."""
TARGET_MULTICLASS_NOT_TWO_EXAMPLES_PER_CLASS = (
"target_multiclass_not_two_examples_per_class"
)
"""Message code for target data for a multi classification problem that does not have two examples per class."""
TARGET_MULTICLASS_NOT_ENOUGH_CLASSES = "target_multiclass_not_enough_classes"
"""Message code for target data for a multi classification problem that does not have more than two unique classes."""
TARGET_MULTICLASS_HIGH_UNIQUE_CLASS = "target_multiclass_high_unique_class_warning"
"""Message code for target data for a multi classification problem that has an abnormally large number of unique classes relative to the number of target values."""
TARGET_LOGNORMAL_DISTRIBUTION = "target_lognormal_distribution"
"""Message code for target data with a lognormal distribution."""
HIGH_VARIANCE = "high_variance"
"""Message code for when high variance is detected for cross-validation."""
TARGET_LEAKAGE = "target_leakage"
"""Message code for when target leakage is detected."""
HAS_OUTLIERS = "has_outliers"
"""Message code for when outliers are detected."""
CLASS_IMBALANCE_BELOW_THRESHOLD = "class_imbalance_below_threshold"
"""Message code for when balance in classes is less than the threshold."""
CLASS_IMBALANCE_SEVERE = "class_imbalance_severe"
"""Message code for when balance in classes is less than the threshold and minimum class is less than minimum number of accepted samples."""
CLASS_IMBALANCE_BELOW_FOLDS = "class_imbalance_below_folds"
"""Message code for when the number of values for each target is below 2 * number of CV folds."""
NO_VARIANCE = "no_variance"
"""Message code for when data has no variance (1 unique value)."""
NO_VARIANCE_ZERO_UNIQUE = "no_variance_zero_unique"
"""Message code for when data has no variance (0 unique value)"""
NO_VARIANCE_WITH_NULL = "no_variance_with_null"
"""Message code for when data has one unique value and NaN values."""
IS_MULTICOLLINEAR = "is_multicollinear"
"""Message code for when data is potentially multicollinear."""
NOT_UNIQUE_ENOUGH = "not_unique_enough"
"""Message code for when data does not possess enough unique values."""
TOO_UNIQUE = "too_unique"
"""Message code for when data possesses too many unique values."""
TOO_SPARSE = "too sparse"
"""Message code for when multiclass data has values that are too sparsely populated."""
MISMATCHED_INDICES = "mismatched_indices"
"""Message code for when input target and features have mismatched indices."""
MISMATCHED_INDICES_ORDER = "mismatched_indices_order"
"""Message code for when input target and features have mismatched indices order. The two inputs have the same index values, but shuffled."""
MISMATCHED_LENGTHS = "mismatched_lengths"
"""Message code for when input target and features have different lengths."""
DATETIME_HAS_NAN = "datetime_has_nan"
"""Message code for when input datetime columns contain NaN values."""
NATURAL_LANGUAGE_HAS_NAN = "natural_language_has_nan"
"""Message code for when input natural language columns contain NaN values."""
DATETIME_INFORMATION_NOT_FOUND = "datetime_information_not_found"
"""Message code for when datetime information can not be found or is in an unaccepted format."""
DATETIME_NO_FREQUENCY_INFERRED = "datetime_no_frequency_inferred"
"""Message code for when no frequency can be inferred in the datetime values through Woodwork's infer_frequency."""
DATETIME_HAS_UNEVEN_INTERVALS = "datetime_has_uneven_intervals"
"""Message code for when the datetime values have uneven intervals."""
DATETIME_HAS_REDUNDANT_ROW = "datetime_has_redundant_row"
"""Message code for when datetime information has more than one row per datetime."""
DATETIME_IS_MISSING_VALUES = "datetime_is_missing_values"
"""Message code for when datetime feature has values missing between the start and end dates."""
DATETIME_HAS_MISALIGNED_VALUES = "datetime_has_misaligned_values"
"""Message code for when datetime information has values that are not aligned with the inferred frequency."""
DATETIME_IS_NOT_MONOTONIC = "datetime_is_not_monotonic"
"""Message code for when the datetime values are not monotonically increasing."""
TIMESERIES_PARAMETERS_NOT_COMPATIBLE_WITH_SPLIT = (
"timeseries_parameters_not_compatible_with_split"
)
"""Message code when the time series parameters are too large for the smallest data split."""
TIMESERIES_TARGET_NOT_COMPATIBLE_WITH_SPLIT = (
"timeseries_target_not_compatible_with_split"
)
"""Message code when any training and validation split of the time series target doesn't contain all classes."""