Source code for ml_grid.model_classes.logistic_regression_class

"""Logistic Regression Classifier.

This module contains the LogisticRegression_class, which is a configuration
class for the LogisticRegression. It provides parameter spaces for
grid search and Bayesian optimization.
"""

import logging
from typing import Any, Dict, List, Optional

import pandas as pd
from sklearn.linear_model import LogisticRegression
from skopt.space import Categorical, Integer, Real

from ml_grid.util import param_space
from ml_grid.util.global_params import global_parameters

logging.getLogger("ml_grid").debug("Imported logistic regression class")


[docs] class LogisticRegressionClass: """LogisticRegression with support for both Bayesian and non-Bayesian parameter spaces.""" def __init__( self, X: Optional[pd.DataFrame] = None, y: Optional[pd.Series] = None, parameter_space_size: Optional[str] = None, ): """Initializes the LogisticRegressionClass. Args: X (Optional[pd.DataFrame]): Feature matrix for training. Defaults to None. y (Optional[pd.Series]): Target vector for training. Defaults to None. parameter_space_size (Optional[str]): Size of the parameter space for optimization. Defaults to None. Raises: ValueError: If `parameter_space_size` is not a valid key (though current implementation does not explicitly raise this). """ global_params = global_parameters
[docs] self.X: Optional[pd.DataFrame] = X
[docs] self.y: Optional[pd.Series] = y
# Set the base implementation
[docs] self.algorithm_implementation: LogisticRegression = LogisticRegression()
[docs] self.method_name: str = "LogisticRegression"
[docs] self.parameter_vector_space: param_space.ParamSpace = param_space.ParamSpace( parameter_space_size )
[docs] self.parameter_space: List[Dict[str, Any]]
# Define parameter space based on search type if global_params.bayessearch: # Bayesian search parameter definitions as a list of parameter spaces self.parameter_space = [ # ElasticNet penalty: solver must be 'saga' { "C": Real(1e-5, 1e-2, prior="log-uniform"), "class_weight": Categorical([None, "balanced"]), "dual": Categorical([False]), "fit_intercept": Categorical([True]), "intercept_scaling": Real(0.1, 10.0, prior="log-uniform"), "l1_ratio": Real( 0.0, 1.0, prior="uniform" ), # For elasticnet penalty only "max_iter": Integer(100, 1000), "multi_class": Categorical(["auto", "ovr", "multinomial"]), "n_jobs": Categorical([None, -1]), "penalty": Categorical(["elasticnet"]), "solver": Categorical(["saga"]), "tol": Real(1e-5, 1e-2, prior="log-uniform"), "verbose": Categorical([0]), "warm_start": Categorical([False]), }, # L1 penalty: solver must be 'saga' { "C": Real(1e-5, 1e-2, prior="log-uniform"), "class_weight": Categorical([None, "balanced"]), "dual": Categorical([False]), "fit_intercept": Categorical([True]), "intercept_scaling": Real(0.1, 10.0, prior="log-uniform"), "l1_ratio": Categorical([None]), # No l1_ratio for l1 penalty "max_iter": Integer(100, 1000), "multi_class": Categorical(["auto", "ovr", "multinomial"]), "n_jobs": Categorical([None, -1]), "penalty": Categorical(["l1"]), "solver": Categorical(["saga"]), "tol": Real(1e-5, 1e-2, prior="log-uniform"), "verbose": Categorical([0]), "warm_start": Categorical([False]), }, # L2 penalty: solver can be 'saga', 'newton-cg', or 'lbfgs' { "C": Real(1e-5, 1e-2, prior="log-uniform"), "class_weight": Categorical([None, "balanced"]), "dual": Categorical([False]), "fit_intercept": Categorical([True]), "intercept_scaling": Real(0.1, 10.0, prior="log-uniform"), "l1_ratio": Categorical([None]), # No l1_ratio for l2 penalty "max_iter": Integer(100, 1000), "multi_class": Categorical(["auto", "ovr", "multinomial"]), "n_jobs": Categorical([None, -1]), "penalty": Categorical(["l2"]), "solver": Categorical(["newton-cg", "lbfgs", "saga"]), "tol": Real(1e-5, 1e-2, prior="log-uniform"), "verbose": Categorical([0]), "warm_start": Categorical([False]), }, ] else: # Grid search parameter definitions as a list of parameter spaces self.parameter_space = [ { "C": self.parameter_vector_space.param_dict.get("log_small"), "class_weight": [None, "balanced"], "dual": [False], "fit_intercept": [True], "intercept_scaling": [1], "l1_ratio": [0.5], # Only for elasticnet penalty "max_iter": self.parameter_vector_space.param_dict.get( "log_large_long" ), "multi_class": ["auto"], "n_jobs": [None, -1], "penalty": ["elasticnet"], "solver": ["saga"], "tol": self.parameter_vector_space.param_dict.get("log_small"), "verbose": [0], "warm_start": [False], }, { "C": self.parameter_vector_space.param_dict.get("log_small"), "class_weight": [None, "balanced"], "dual": [False], "fit_intercept": [True], "intercept_scaling": [1], "l1_ratio": [None], # No l1_ratio for l1 and l2 penalties "max_iter": self.parameter_vector_space.param_dict.get( "log_large_long" ), "multi_class": ["auto"], "n_jobs": [None, -1], "penalty": ["l1"], "solver": ["saga"], "tol": self.parameter_vector_space.param_dict.get("log_small"), "verbose": [0], "warm_start": [False], }, { "C": self.parameter_vector_space.param_dict.get("log_small"), "class_weight": [None, "balanced"], "dual": [False], "fit_intercept": [True], "intercept_scaling": [1], "l1_ratio": [None], # No l1_ratio for l2 penalty "max_iter": self.parameter_vector_space.param_dict.get( "log_large_long" ), "multi_class": ["auto"], "n_jobs": [None, -1], "penalty": ["l2"], "solver": ["newton-cg", "lbfgs", "saga"], # All solvers work for l2 "tol": self.parameter_vector_space.param_dict.get("log_small"), "verbose": [0], "warm_start": [False], }, ]