Source code for ml_grid.model_classes.knn_classifier_class

"""KNeighbors Classifier.

This module contains the knn_classifiers_class, which is a configuration
class for the KNeighborsClassifier. It provides parameter spaces for
grid search and Bayesian optimization.
"""

import logging
from typing import Any, Dict, Optional

import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from skopt.space import Categorical, Integer

from ml_grid.util import param_space
from ml_grid.util.global_params import global_parameters

logging.getLogger("ml_grid").debug("Imported KNeighborsClassifier class")


[docs] class KNeighborsClassifierClass: """KNeighborsClassifier with support for both Bayesian and non-Bayesian parameter spaces.""" def __init__( self, X: Optional[pd.DataFrame] = None, y: Optional[pd.Series] = None, parameter_space_size: Optional[str] = None, ): """Initializes the KNeighborsClassifierClass. Args: X (Optional[pd.DataFrame]): Feature matrix for training. Defaults to None. y (Optional[pd.Series]): Target vector for training. Defaults to None. parameter_space_size (Optional[str]): Size of the parameter space for optimization. Defaults to None. Raises: ValueError: If `parameter_space_size` is not a valid key (though current implementation does not explicitly raise this). """ knn_n_jobs: int = ( global_parameters.knn_n_jobs ) # Get the number of jobs from global parameters
[docs] self.X: Optional[pd.DataFrame] = X
[docs] self.y: Optional[pd.Series] = y
# Initialize KNeighborsClassifier
[docs] self.algorithm_implementation: KNeighborsClassifier = KNeighborsClassifier()
[docs] self.method_name: str = "KNeighborsClassifier"
# Define the parameter vector space
[docs] self.parameter_vector_space: param_space.ParamSpace = param_space.ParamSpace( parameter_space_size )
[docs] self.parameter_space: Dict[str, Any]
if global_parameters.bayessearch: # Bayesian Optimization: Use skopt's Real, Integer, and Categorical for continuous, integer, and categorical parameters self.parameter_space = { "algorithm": Categorical(["auto", "ball_tree", "kd_tree", "brute"]), "leaf_size": Integer(10, 100), # Integer range for leaf_size "metric": Categorical(["minkowski"]), # Categorical choice for metric "metric_params": Categorical([None]), # No parameter for the metric "n_jobs": Categorical( [knn_n_jobs] ), # Set the number of jobs to the global param "n_neighbors": Integer( 1, self.X.shape[0] - 1 ), # Integer range for n_neighbors "p": Integer(1, 5), # Integer range for p (distance metric parameter) "weights": Categorical( ["uniform", "distance"] ), # Categorical choice for weights } else: # Traditional Grid Search: Define parameter space using lists for traditional grid search self.parameter_space = { "algorithm": ["auto", "ball_tree", "kd_tree", "brute"], "leaf_size": list( self.parameter_vector_space.param_dict.get("log_large_long") ), "metric": ["minkowski"], "metric_params": [None], "n_jobs": [knn_n_jobs], "n_neighbors": list( self.parameter_vector_space.param_dict.get("log_med") ), "p": list(self.parameter_vector_space.param_dict.get("log_med")), "weights": ["uniform", "distance"], } return None