Source code for ml_grid.model_classes.gaussiannb_class

"""Defines the GaussianNB model class.."""

import logging
from typing import Any, Dict, List, Optional, Union

import pandas as pd
from sklearn.naive_bayes import GaussianNB
from skopt.space import Categorical, Real

from ml_grid.util import param_space
from ml_grid.util.global_params import global_parameters

logging.getLogger("ml_grid").debug("Imported gaussiannb class")


[docs] class GaussianNBWrapper(GaussianNB): """A wrapper for GaussianNB to handle integer-mapped priors for Bayesian search. This wrapper allows for the use of integer-based categorical variables for the 'priors' parameter in Bayesian optimization, which is then mapped to the actual list of prior probabilities. """
[docs] def set_params(self, **params: Any) -> "GaussianNBWrapper": """Sets the parameters of the estimator. This method intercepts the 'priors' parameter if it's an integer index and maps it to the corresponding list of prior probabilities before passing it to the parent's set_params method. Args: **params: Estimator parameters. Returns: The instance with updated parameters. """ prior_mapping: Dict[int, Optional[List[float]]] = { 0: None, # Default priors (based on the class distribution in the dataset) 1: [0.5, 0.5], # Equal probabilities 2: [0.6, 0.4], # Slight imbalance favoring class 0 3: [0.4, 0.6], # Slight imbalance favoring class 1 4: [0.7, 0.3], # Moderate imbalance favoring class 0 5: [0.3, 0.7], # Moderate imbalance favoring class 1 6: [0.8, 0.2], # Strong imbalance favoring class 0 7: [0.2, 0.8], # Strong imbalance favoring class 1 8: [0.9, 0.1], # Extreme imbalance favoring class 0 9: [0.1, 0.9], # Extreme imbalance favoring class 1 } if "priors" in params: priors_idx = params.pop("priors") params["priors"] = prior_mapping[priors_idx] return super().set_params(**params)
[docs] class GaussianNBClassifierClass: """A GaussianNB classifier with support for hyperparameter tuning. This class defines the parameter space for the GaussianNB classifier and initializes it with either a standard implementation or a wrapper for Bayesian search. """ def __init__( self, X: Optional[pd.DataFrame] = None, y: Optional[pd.Series] = None, parameter_space_size: Optional[str] = None, ) -> None: """Initializes the GaussianNBClassifierClass. Args: X (Optional[pd.DataFrame]): The input features. Defaults to None. y (Optional[pd.Series]): The target variable. Defaults to None. parameter_space_size (Optional[str]): The size of the parameter space. Defaults to None. Raises: ValueError: If `parameter_space_size` is not a valid key (though current implementation does not explicitly raise this). """
[docs] self.X: Optional[pd.DataFrame] = X
[docs] self.y: Optional[pd.Series] = y
if not global_parameters.bayessearch: self.algorithm_implementation: Union[GaussianNB, GaussianNBWrapper] = ( GaussianNB() ) else: self.algorithm_implementation: Union[GaussianNB, GaussianNBWrapper] = ( GaussianNBWrapper() ) # Wrapper necessary for passing priors to bayescv
[docs] self.method_name: str = "GaussianNB"
[docs] self.parameter_space: Dict[str, Any]
if global_parameters.bayessearch: # For BayesSearchCV, use distributions from skopt.space self.parameter_space = { "var_smoothing": Real(1e-9, 1e-2, prior="log-uniform"), "priors": Categorical([0, 1, 2]), # Integer mapping } # Log parameter space for verification logging.getLogger("ml_grid").debug( f"Parameter Space: {self.parameter_space}" ) else: # For traditional grid search, use lists self.parameter_space = { "priors": [ None, [0.1, 0.9], [0.9, 0.1], [0.7, 0.3], [0.3, 0.7], [0.5, 0.5], [0.6, 0.4], [0.4, 0.6], ], # Enumerates possible values as a list "var_smoothing": list( param_space.ParamSpace(parameter_space_size).param_dict.get( "log_small" ) ) + [1e-9], }