Source code for ml_grid.model_classes.NeuralNetworkKerasClassifier

"""Keras Neural Network Classifier Wrapper.

This module provides a scikit-learn compatible wrapper for a Keras Sequential
neural network for binary classification.
"""

from typing import Optional

import numpy as np
import tensorflow as tf
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import accuracy_score
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential, clone_model



[docs]
class NeuralNetworkClassifier(BaseEstimator, ClassifierMixin):
    """A scikit-learn compatible wrapper for a Keras Sequential neural network.

    This class builds a simple feed-forward neural network for binary
    classification and wraps it to be compatible with scikit-learn's API,
    allowing it to be used in pipelines and hyperparameter tuning tools like
    GridSearchCV.
    """

    def __init__(
        self,
        hidden_layer_sizes: tuple[int, ...] = (64, 64),
        dropout_rate: float = 0.3,
        learning_rate: float = 0.001,
        activation_func: str = "relu",
        epochs: int = 10,
        batch_size: int = 32,
        early_stopping_patience: int = 3,
        random_state: Optional[int] = None,
    ):
        """Initializes the NeuralNetworkClassifier.

        Args:
            hidden_layer_sizes (tuple[int, ...]): The number of units per hidden layer.
            dropout_rate (float): Dropout rate for the dropout layers.
            learning_rate (float): Learning rate for the Adam optimizer.
            activation_func (str): Activation function for the hidden layers.
            epochs (int): Number of epochs to train the model.
            batch_size (int): Number of samples per gradient update.
            early_stopping_patience (int): Number of epochs with no improvement
                on validation loss after which training will be stopped.
            random_state (Optional[int]): Seed for reproducibility. Defaults to None.
        """
        # CRITICAL FIX: Handle tuple parameter that may come as string from skopt
        # During sklearn's cross-validation, estimators are cloned and parameters
        # may be converted to/from strings. We need to handle both cases robustly.

        # Store the raw parameter first

[docs]
        self.hidden_layer_sizes = hidden_layer_sizes


        # Set random seed for reproducibility

[docs]
        self.dropout_rate = dropout_rate


[docs]
        self.learning_rate = learning_rate


[docs]
        self.activation_func = activation_func


[docs]
        self.epochs = epochs


[docs]
        self.batch_size = batch_size


[docs]
        self.early_stopping_patience = early_stopping_patience


[docs]
        self.random_state = random_state


[docs]
        self.model: Optional[Sequential] = None


[docs]
        self.classes_: Optional[np.ndarray] = None


        if self.random_state is not None:
            np.random.seed(self.random_state)
            tf.random.set_seed(self.random_state)

    def _normalize_hidden_layer_sizes(self):
        """Convert hidden_layer_sizes to a tuple regardless of input format.

        This method handles three cases:
        1. Already a tuple: return as-is
        2. String representation from skopt: parse with ast.literal_eval
        3. List: convert to tuple

        Returns:
            tuple: Normalized hidden layer sizes as tuple of integers
        """
        if isinstance(self.hidden_layer_sizes, tuple):
            # Already a tuple, use it directly
            return self.hidden_layer_sizes
        elif isinstance(self.hidden_layer_sizes, str):
            # String from skopt - need to parse it
            import ast

            try:
                parsed = ast.literal_eval(self.hidden_layer_sizes)
                # Ensure it's a tuple (could be parsed as list)
                if isinstance(parsed, (list, tuple)):
                    return tuple(parsed)
                else:
                    raise ValueError(
                        f"Parsed hidden_layer_sizes is not a sequence: {parsed}"
                    )
            except (ValueError, SyntaxError) as e:
                raise ValueError(
                    f"Could not parse hidden_layer_sizes string: '{self.hidden_layer_sizes}'. "
                    f"Expected format like '(64, 32)'. Error: {e}"
                )
        elif isinstance(self.hidden_layer_sizes, list):
            # Convert list to tuple
            return tuple(self.hidden_layer_sizes)
        else:
            raise ValueError(
                f"hidden_layer_sizes must be a tuple, list, or string, "
                f"got {type(self.hidden_layer_sizes)}: {self.hidden_layer_sizes}"
            )


[docs]
    def build_model(self, input_dim: int) -> Sequential:
        """Builds and compiles the Keras Sequential model.

        Args:
            input_dim (int): The number of input features.

        Returns:
            Sequential: The compiled Keras model.
        """
        # CRITICAL: Normalize hidden_layer_sizes here, right before use
        # This ensures it's always a proper tuple when building the model
        hidden_sizes = self._normalize_hidden_layer_sizes()

        model = Sequential()
        # Add input layer
        model.add(
            Dense(
                units=hidden_sizes[0],
                activation=self.activation_func,
                input_dim=input_dim,
            )
        )
        model.add(Dropout(rate=self.dropout_rate))
        # Add subsequent hidden layers
        for units in hidden_sizes[1:]:
            model.add(Dense(units=units, activation=self.activation_func))
            model.add(Dropout(rate=self.dropout_rate))
        model.add(Dense(units=1, activation="sigmoid"))
        optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)
        model.compile(
            loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"]
        )
        return model



[docs]
    def fit(self, X: np.ndarray, y: np.ndarray, **kwargs) -> "NeuralNetworkClassifier":
        """Fits the neural network model to the training data.

        Args:
            X (np.ndarray): The training input samples.
            y (np.ndarray): The target values.

        Returns:
            NeuralNetworkClassifier: The fitted estimator.
        """
        # Clear previous session to avoid layer name conflicts
        tf.keras.backend.clear_session()

        # --- FIX for 'Invalid dtype: category' ---
        # Keras expects numerical labels, not pandas categoricals.
        # If y is a categorical Series, convert it to its numerical codes.
        if hasattr(y, "dtype") and str(y.dtype) == "category":
            y = y.cat.codes.to_numpy()

        # Store class labels
        self.classes_ = np.unique(y)

        # Build or re-compile the model
        if self.model is None:
            self.model = self.build_model(input_dim=X.shape[1])
        else:
            # Re-compile the model if it's being re-fitted (e.g., in a pipeline)
            self.model = clone_model(self.model)
            optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)
            self.model.compile(
                loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"]
            )

        callbacks = []
        if "validation_data" in kwargs and self.early_stopping_patience > 0:
            callbacks.append(
                tf.keras.callbacks.EarlyStopping(
                    monitor="val_loss",
                    patience=self.early_stopping_patience,
                    restore_best_weights=True,
                )
            )

        self.model.fit(
            X,
            y,
            epochs=self.epochs,
            batch_size=self.batch_size,
            callbacks=callbacks,
            verbose=0,
            **kwargs,
        )
        return self



[docs]
    def predict(self, X: np.ndarray) -> np.ndarray:
        """Predicts class labels for samples in X.

        Args:
            X (np.ndarray): The input samples to predict.

        Returns:
            np.ndarray: The predicted class labels (0 or 1).
        """
        if self.model is None:
            raise RuntimeError(
                "The model has not been fitted yet. Call fit() before predict()."
            )
        # Predict class probabilities
        y_pred = self.model.predict(X, verbose=0)
        # Convert probabilities to class labels (0 or 1)
        return np.round(y_pred).astype(int)



[docs]
    def predict_proba(self, X: np.ndarray) -> np.ndarray:
        """Predicts class probabilities for samples in X.

        Args:
            X (np.ndarray): The input samples.

        Returns:
            np.ndarray: The class probabilities of the input samples.
        """
        if self.model is None:
            raise RuntimeError(
                "The model has not been fitted yet. Call fit() before predict_proba()."
            )
        # Return class probabilities
        return self.model.predict(X, verbose=0)



[docs]
    def score(self, X: np.ndarray, y: np.ndarray) -> float:
        """Returns the mean accuracy on the given test data and labels.

        Args:
            X (np.ndarray): Test samples.
            y (np.ndarray): True labels for X.

        Returns:
            float: Mean accuracy of self.predict(X) wrt. y.
        """
        # Calculate accuracy
        y_pred = self.predict(X)
        return accuracy_score(y, y_pred)