"""KNeighbors Classifier.
This module contains the knn_classifiers_class, which is a configuration
class for the KNeighborsClassifier. It provides parameter spaces for
grid search and Bayesian optimization.
"""
import logging
from typing import Any, Dict, Optional
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from skopt.space import Categorical, Integer
from ml_grid.util import param_space
from ml_grid.util.global_params import global_parameters
logging.getLogger("ml_grid").debug("Imported KNeighborsClassifier class")
[docs]
class KNeighborsClassifierClass:
"""KNeighborsClassifier with support for both Bayesian and non-Bayesian parameter spaces."""
def __init__(
self,
X: Optional[pd.DataFrame] = None,
y: Optional[pd.Series] = None,
parameter_space_size: Optional[str] = None,
):
"""Initializes the KNeighborsClassifierClass.
Args:
X (Optional[pd.DataFrame]): Feature matrix for training.
Defaults to None.
y (Optional[pd.Series]): Target vector for training.
Defaults to None.
parameter_space_size (Optional[str]): Size of the parameter space for
optimization. Defaults to None.
Raises:
ValueError: If `parameter_space_size` is not a valid key (though current
implementation does not explicitly raise this).
"""
knn_n_jobs: int = (
global_parameters.knn_n_jobs
) # Get the number of jobs from global parameters
[docs]
self.X: Optional[pd.DataFrame] = X
[docs]
self.y: Optional[pd.Series] = y
# Initialize KNeighborsClassifier
[docs]
self.algorithm_implementation: KNeighborsClassifier = KNeighborsClassifier()
[docs]
self.method_name: str = "KNeighborsClassifier"
# Define the parameter vector space
[docs]
self.parameter_vector_space: param_space.ParamSpace = param_space.ParamSpace(
parameter_space_size
)
[docs]
self.parameter_space: Dict[str, Any]
if global_parameters.bayessearch:
# Bayesian Optimization: Use skopt's Real, Integer, and Categorical for continuous, integer, and categorical parameters
self.parameter_space = {
"algorithm": Categorical(["auto", "ball_tree", "kd_tree", "brute"]),
"leaf_size": Integer(10, 100), # Integer range for leaf_size
"metric": Categorical(["minkowski"]), # Categorical choice for metric
"metric_params": Categorical([None]), # No parameter for the metric
"n_jobs": Categorical(
[knn_n_jobs]
), # Set the number of jobs to the global param
"n_neighbors": Integer(
1, self.X.shape[0] - 1
), # Integer range for n_neighbors
"p": Integer(1, 5), # Integer range for p (distance metric parameter)
"weights": Categorical(
["uniform", "distance"]
), # Categorical choice for weights
}
else:
# Traditional Grid Search: Define parameter space using lists for traditional grid search
self.parameter_space = {
"algorithm": ["auto", "ball_tree", "kd_tree", "brute"],
"leaf_size": list(
self.parameter_vector_space.param_dict.get("log_large_long")
),
"metric": ["minkowski"],
"metric_params": [None],
"n_jobs": [knn_n_jobs],
"n_neighbors": list(
self.parameter_vector_space.param_dict.get("log_med")
),
"p": list(self.parameter_vector_space.param_dict.get("log_med")),
"weights": ["uniform", "distance"],
}
return None