Source code for ml_grid.model_classes.gradientboosting_classifier_class

from typing import Optional

import numpy as np
import pandas as pd
from ml_grid.util import param_space
from ml_grid.util.global_params import global_parameters
from sklearn.ensemble import GradientBoostingClassifier
from skopt.space import Categorical, Real, Integer

[docs] class GradientBoostingClassifier_class: """GradientBoostingClassifier with support for both Bayesian and non-Bayesian parameter spaces.""" def __init__( self, X: Optional[pd.DataFrame] = None, y: Optional[pd.Series] = None, parameter_space_size: Optional[str] = None, ): """Initializes the GradientBoostingClassifier_class. Args: X (Optional[pd.DataFrame]): Feature matrix for training. Defaults to None. y (Optional[pd.Series]): Target vector for training. Defaults to None. parameter_space_size (Optional[str]): Size of the parameter space for optimization. Defaults to None. """ global_params = global_parameters
[docs] self.X = X
[docs] self.y = y
# Use the standard GradientBoostingClassifier directly
[docs] self.algorithm_implementation = GradientBoostingClassifier()
[docs] self.method_name = "GradientBoostingClassifier"
# Define the parameter vector space
[docs] self.parameter_vector_space = param_space.ParamSpace(parameter_space_size)
if global_params.bayessearch: # Define the parameter space for Bayesian optimization self.parameter_space = { "ccp_alpha": Real(1e-5, 1e-1, prior="log-uniform"), "criterion": Categorical(["friedman_mse"]), "init": Categorical([None]), "learning_rate": Real(1e-5, 1e-1, prior="log-uniform"), "loss": Categorical(["log_loss", "exponential"]), # "max_depth": Integer(2, 10), # Uncomment if needed "max_features": Categorical(["sqrt", "log2"]), # "max_leaf_nodes": Integer(10, 1000), # Uncomment if needed # "min_impurity_decrease": Real(1e-5, 1e-1, prior="log-uniform"), # Uncomment if needed # "min_samples_leaf": Integer(1, 10), # Uncomment if needed # "min_samples_split": Integer(2, 20), # Uncomment if needed # "min_weight_fraction_leaf": Real(0.0, 0.5, prior="uniform"), # Uncomment if needed "n_estimators": Integer(50, 500), # "n_iter_no_change": Integer(5, 50), # Uncomment if needed "subsample": Real(0.1, 1.0, prior="uniform"), "tol": Real(1e-5, 1e-1, prior="log-uniform"), "validation_fraction": Real(0.1, 0.3, prior="uniform"), "verbose": Categorical([0]), "warm_start": Categorical([False]), } else: # Define the parameter space for traditional grid search self.parameter_space = { "ccp_alpha": list(self.parameter_vector_space.param_dict.get("log_small")), "criterion": ["friedman_mse"], "init": [None], "learning_rate": list(self.parameter_vector_space.param_dict.get("log_small")), "loss": ["log_loss", "exponential"], # "max_depth": list(range(2, 11)), # Uncomment if needed "max_features": ["sqrt", "log2"], # "max_leaf_nodes": list(range(10, 1001)), # Uncomment if needed # "min_impurity_decrease": list(self.parameter_vector_space.param_dict.get("log_small")), # Uncomment if needed # "min_samples_leaf": list(range(1, 11)), # Uncomment if needed # "min_samples_split": list(range(2, 21)), # Uncomment if needed # "min_weight_fraction_leaf": np.linspace(0.0, 0.5, 6).tolist(), # Uncomment if needed "n_estimators": list(self.parameter_vector_space.param_dict.get("log_large_long")), # "n_iter_no_change": list(range(5, 51)), # Uncomment if needed "subsample": list(np.delete(self.parameter_vector_space.param_dict.get("lin_zero_one"), 0)), "tol": list(self.parameter_vector_space.param_dict.get("log_small")), "validation_fraction": [0.1], "verbose": [0], "warm_start": [False], } return None