Source code for plot_hyperparameters

"""
Hyperparameter analysis plotting module for ML results analysis.
Focuses on visualizing the impact of hyperparameters on model performance.
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import List, Optional, Tuple, Dict, Any
import warnings
import ast
import scipy.stats as stats
from sklearn.metrics import r2_score

from ml_grid.results_processing.core import get_clean_data


[docs]
class HyperparameterAnalysisPlotter:
    """Analyzes and visualizes the impact of hyperparameters on model performance.

    This class extracts hyperparameter settings from model string representations
    in the results data, allowing for detailed analysis of how different
    hyperparameters affect a given performance metric.
    """
    # Define algorithms to ignore for hyperparameter parsing as they don't store them in a parsable format.
    _ALGOS_TO_IGNORE = ['CatBoostClassifier', 'KNNWrapper', 'knn_wrapper_class']

    def __init__(self, data: pd.DataFrame):
        if 'algorithm_implementation' not in data.columns:
            raise ValueError("Data must contain an 'algorithm_implementation' column for hyperparameter analysis.")
        

[docs]
        self.data = data


[docs]
        self.clean_data = get_clean_data(data)

        
        # Extract algorithm name from algorithm_implementation
        self.clean_data['algorithm_name'] = self.clean_data['algorithm_implementation'].apply(
            lambda x: x.split('(')[0].strip() if isinstance(x, str) and '(' in x else None
        )

        # Filter out ignored algorithms before parsing
        self.clean_data = self.clean_data[~self.clean_data['algorithm_name'].isin(self._ALGOS_TO_IGNORE)]
        
        # Parse parameters
        self.clean_data['params_dict'] = self.clean_data['algorithm_implementation'].apply(
            self._parse_model_string_to_params
        )
        
        # Drop rows where parsing failed
        self.clean_data = self.clean_data.dropna(subset=['params_dict', 'algorithm_name']).copy()

        plt.style.use('default')
        sns.set_palette("muted")

    @staticmethod
    def _parse_model_string_to_params(model_str: str) -> Optional[Dict[str, Any]]:
        """Parses a scikit-learn model's string representation into a parameter dictionary.

        This method uses Abstract Syntax Trees (AST) to safely parse the
        string representation of a model (e.g., "RandomForestClassifier(n_estimators=100)")
        and extract its hyperparameters into a dictionary.

        Args:
            model_str (str): The string representation of the model.

        Returns:
            Optional[Dict[str, Any]]: A dictionary of the model's hyperparameters,
            or None if parsing fails.
        """
        if not isinstance(model_str, str) or '(' not in model_str:
            return None
        try:
            # Handle sklearn-style string representations
            tree = ast.parse(model_str, mode='eval')
            
            if not isinstance(tree.body, ast.Call):
                return None
                
            params = {}
            for kw in tree.body.keywords:
                try:
                    params[kw.arg] = ast.literal_eval(kw.value)
                except (ValueError, SyntaxError):
                    if isinstance(kw.value, ast.Name):
                        val_id = kw.value.id
                        if val_id == 'True':
                            params[kw.arg] = True
                        elif val_id == 'False':
                            params[kw.arg] = False
                        elif val_id == 'None':
                            params[kw.arg] = None
                        else:
                            params[kw.arg] = val_id
                    elif isinstance(kw.value, ast.Constant):
                        params[kw.arg] = kw.value.value
                    else:
                        # Store as string representation
                        params[kw.arg] = ast.unparse(kw.value)
            return params
        except Exception as e:
            warnings.warn(f"Failed to parse model string '{model_str[:50]}...'. Error: {e}")
            return None


[docs]
    def get_available_algorithms(self):
        """Gets a list of available, parsable algorithms from the data.

        Returns:
            List[str]: A sorted list of unique algorithm names.
        """
        return sorted(self.clean_data['algorithm_name'].unique())



[docs]
    def plot_performance_by_hyperparameter(self,
                                           algorithm_name: str,
                                           hyperparameters: List[str],
                                           metric: str = 'auc',
                                           figsize: Optional[Tuple[int, int]] = None):
        """Plots performance against a list of hyperparameters in a grid.

        This function provides a visual analysis of how individual parameter
        values affect the model's metric score. It creates a grid of subplots,
        where each subplot visualizes the relationship between a specific
        hyperparameter and the performance metric, automatically detecting
        whether to use a scatter plot (for continuous) or a box plot
        (for categorical/discrete).

        Args:
            algorithm_name (str): The name of the algorithm to analyze (e.g.,
                'RandomForestClassifier').
            hyperparameters (List[str]): A list of hyperparameter names to plot.
            metric (str, optional): The performance metric for the y-axis.
                Defaults to 'auc'.
            figsize (Optional[Tuple[int, int]], optional): The overall figure
                size. If None, a default is calculated. Defaults to None.
        """
        algo_data = self.clean_data[self.clean_data['algorithm_name'] == algorithm_name].copy()
        
        if algo_data.empty:
            available_algos = self.get_available_algorithms()
            print(f"No data found for algorithm: {algorithm_name}")
            print(f"Available algorithms: {available_algos}")
            return

        # Cap the number of plots to prevent excessively large figures
        MAX_PLOTS = 9
        if len(hyperparameters) > MAX_PLOTS:
            warnings.warn(
                f"Plotting for {len(hyperparameters)} hyperparameters. To avoid an overly large figure, "
                f"only the first {MAX_PLOTS} will be displayed. "
                f"Consider passing a smaller list of hyperparameters if you need to see others.",
                stacklevel=2
            )
            hyperparameters = hyperparameters[:MAX_PLOTS]

        n_params = len(hyperparameters)
        if n_params == 0:
            print(f"No valid hyperparameters were provided to plot for algorithm '{algorithm_name}'.")
            return

        cols = min(3, n_params)
        rows = (n_params + cols - 1) // cols
        fig_size = figsize or (cols * 6, rows * 5)
        
        fig, axes = plt.subplots(rows, cols, figsize=fig_size, squeeze=False)
        axes = axes.flatten()

        for i, param in enumerate(hyperparameters):
            ax = axes[i]
            self._plot_single_performance_vs_hyperparameter(ax, algo_data, param, metric)

        # Hide any unused subplots
        for j in range(n_params, len(axes)):
            axes[j].set_visible(False)

        fig.suptitle(f'Performance vs. Hyperparameters for {algorithm_name}', fontsize=16, fontweight='bold')
        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        plt.show()


    def _plot_single_performance_vs_hyperparameter(self, ax: plt.Axes, algo_data: pd.DataFrame, hyperparameter: str, metric: str):
        """Helper to plot performance vs a single hyperparameter on a given axis.

        Args:
            ax (plt.Axes): The matplotlib axis to plot on.
            algo_data (pd.DataFrame): The data for the specific algorithm.
            hyperparameter (str): The name of the hyperparameter to plot.
            metric (str): The name of the performance metric.
        """
        # Extract hyperparameter value for each run
        plot_data = algo_data.copy()
        plot_data[hyperparameter] = plot_data['params_dict'].apply(lambda p: p.get(hyperparameter) if p else None)
        plot_data = plot_data.dropna(subset=[hyperparameter, metric])

        if plot_data.empty:
            ax.text(0.5, 0.5, f"No data for\n'{hyperparameter}'", ha='center', va='center', transform=ax.transAxes)
            ax.set_title(hyperparameter, fontsize=11)
            return

        # Determine if the hyperparameter is numeric or categorical
        param_values = plot_data[hyperparameter]
        is_numeric = pd.api.types.is_numeric_dtype(param_values)
        is_float = pd.api.types.is_float_dtype(param_values)

        # Treat as continuous if it's a float, or an integer with many unique values
        if is_numeric and (is_float or param_values.nunique() > 8):
            # Use log scale for wide ranges
            if param_values.min() > 0 and param_values.max() / param_values.min() > 100:
                ax.set_xscale('log')
                x_label = f'{hyperparameter} (log scale)'
            else:
                x_label = hyperparameter

            # Check if outcome_variable exists for coloring
            scatter_kwargs = {"alpha": 0.6}
            if 'outcome_variable' in plot_data.columns and plot_data['outcome_variable'].nunique() > 1:
                n_outcomes = plot_data['outcome_variable'].nunique()
                if n_outcomes <= 10:
                    scatter_kwargs['hue'] = 'outcome_variable'
                    scatter_kwargs['style'] = 'outcome_variable'
                else:
                    warnings.warn(
                        f"Number of unique outcomes ({n_outcomes}) exceeds the limit of 10 for color encoding. "
                        "Plotting without outcome-based colors.",
                        stacklevel=3
                    )
            
            sns.scatterplot(data=plot_data, x=hyperparameter, y=metric, ax=ax, **scatter_kwargs)
            
            if 'hue' in scatter_kwargs:
                ax.legend(title='Outcome', fontsize='small')
            
            ax.set_xlabel(x_label, fontsize=10)
            ax.set_title(f'{metric.upper()} vs. {hyperparameter}', fontsize=11, fontweight='bold')

        else:  # Categorical or discrete numeric
            if is_numeric:
                # Sort numerically for discrete numeric types
                order = sorted(param_values.unique())
                sns.boxplot(data=plot_data, x=hyperparameter, y=metric, order=order, ax=ax)
            else:
                # Sort alphabetically for categorical types
                param_values_str = param_values.astype(str)
                plot_data[hyperparameter + '_str'] = param_values_str
                order = sorted(param_values_str.unique())
                sns.boxplot(data=plot_data, x=hyperparameter + '_str', y=metric, order=order, ax=ax)
            
            ax.tick_params(axis='x', rotation=45, labelsize=9)
            plt.setp(ax.get_xticklabels(), ha='right')
            ax.set_xlabel(hyperparameter, fontsize=10)
            ax.set_title(f'{metric.upper()} by {hyperparameter}', fontsize=11, fontweight='bold')

        ax.set_ylabel(metric.upper(), fontsize=10)
        ax.grid(True, alpha=0.3)

    def _get_continuous_hyperparameter_correlations(self,
                                                    algorithm_name: str,
                                                    metric: str,
                                                    method: str = 'pearson') -> Optional[pd.DataFrame]:
        """Helper to calculate correlations between continuous hyperparameters and a metric.

        Args:
            algorithm_name (str): The name of the algorithm to analyze.
            metric (str): The performance metric to correlate against.
            method (str, optional): The correlation method ('pearson' or
                'spearman'). Defaults to 'pearson'.

        Returns:
            Optional[pd.DataFrame]: A DataFrame with correlation results, or
            None if no continuous hyperparameters are found.
        """
        algo_data = self.clean_data[self.clean_data['algorithm_name'] == algorithm_name].copy()
        if algo_data.empty:
            return None

        all_params = set().union(*(d.keys() for d in algo_data['params_dict'] if d))
        
        correlations = []
        for param in sorted(list(all_params)):
            param_values = algo_data['params_dict'].apply(lambda p: p.get(param) if p else None).dropna()
            
            is_numeric = pd.api.types.is_numeric_dtype(param_values)
            is_float = pd.api.types.is_float_dtype(param_values)
            
            # Consider a hyperparameter continuous if it's float, or an integer with many unique values
            if not param_values.empty and is_numeric and (is_float or param_values.nunique() > 8):
                # Create a temporary DataFrame for correlation calculation
                temp_df = pd.DataFrame({
                    'param': param_values,
                    'metric': algo_data.loc[param_values.index, metric]
                }).dropna()

                if len(temp_df) < 2:
                    continue

                if method == 'pearson':
                    correlation, p_value = stats.pearsonr(temp_df['param'], temp_df['metric'])
                elif method == 'spearman':
                    correlation, p_value = stats.spearmanr(temp_df['param'], temp_df['metric'])
                else:
                    raise ValueError("Correlation method must be 'pearson' or 'spearman'.")

                if not pd.isna(correlation):
                    correlations.append({
                        'hyperparameter': param,
                        'correlation': correlation,
                        'abs_correlation': abs(correlation),
                        'p_value': p_value,
                        'n_samples': len(temp_df)
                    })
        
        if not correlations:
            return None
            
        return pd.DataFrame(correlations)


[docs]
    def plot_hyperparameter_importance(self,
                                       algorithm_name: str,
                                       metric: str = 'auc',
                                       top_n_percent: int = 20,
                                       figsize: Optional[Tuple[int, int]] = None):
        """Plots hyperparameter distributions for top models vs. all models.

        This method provides insight into which hyperparameter values are more
        prevalent in high-performing models compared to the overall distribution
        of values explored during the search.

        Args:
            algorithm_name (str): The name of the algorithm to analyze.
            metric (str, optional): The metric used to define "top" models.
                Defaults to 'auc'.
            top_n_percent (int, optional): The percentage of top models to
                compare against. Defaults to 20.
            figsize (Optional[Tuple[int, int]], optional): The figure size for
                the plot. Defaults to None.
        """
        algo_data = self.clean_data[self.clean_data['algorithm_name'] == algorithm_name].copy()
        
        if algo_data.empty:
            available_algos = self.get_available_algorithms()
            print(f"No data found for algorithm: {algorithm_name}")
            print(f"Available algorithms: {available_algos}")
            return

        # Check if metric exists
        if metric not in algo_data.columns:
            print(f"Metric '{metric}' not found. Available metrics: {algo_data.select_dtypes(include=[np.number]).columns.tolist()}")
            return

        # Identify top models
        threshold = algo_data[metric].quantile(1 - (top_n_percent / 100.0))
        top_models = algo_data[algo_data[metric] >= threshold]

        if top_models.empty:
            print(f"No models found in the top {top_n_percent}% for algorithm '{algorithm_name}'.")
            return

        # Get all hyperparameters
        all_params = set()
        for params in algo_data['params_dict']:
            if params:
                all_params.update(params.keys())
        
        hyperparameters = sorted(all_params)
        
        if not hyperparameters:
            print(f"No hyperparameters found for {algorithm_name}.")
            return
        
        n_params = len(hyperparameters)
        cols = min(3, n_params)
        rows = (n_params + cols - 1) // cols
        fig_size = figsize or (cols * 5, rows * 4)
        
        fig, axes = plt.subplots(rows, cols, figsize=fig_size, squeeze=False)
        axes = axes.flatten()

        for i, param in enumerate(hyperparameters):
            ax = axes[i]
            
            # Extract param values
            all_values = algo_data['params_dict'].apply(lambda p: p.get(param) if p else None).dropna()
            top_values = top_models['params_dict'].apply(lambda p: p.get(param) if p else None).dropna()

            if all_values.empty:
                ax.text(0.5, 0.5, 'No Data', ha='center', va='center', transform=ax.transAxes)
                ax.set_title(param, fontsize=11)
                continue

            is_numeric = pd.api.types.is_numeric_dtype(all_values)
            is_float = pd.api.types.is_float_dtype(all_values)

            # Case 1: Continuous numeric data -> Scatter plot of value vs. metric
            # Treat as continuous if it's a float, or an integer with many unique values
            if is_numeric and (is_float or all_values.nunique() > 8):
                # Add the hyperparameter as a column to the dataframe for easy plotting
                plot_df = algo_data.copy()
                plot_df[param] = plot_df['params_dict'].apply(lambda p: p.get(param) if p else None)
                plot_df = plot_df.dropna(subset=[param, metric])

                if plot_df.empty:
                    ax.text(0.5, 0.5, 'No Numeric Data', ha='center', va='center', transform=ax.transAxes)
                    ax.set_title(param, fontsize=11)
                    continue

                # Add a column to distinguish top models
                plot_df['is_top'] = plot_df[metric] >= threshold
                
                sns.scatterplot(data=plot_df, x=param, y=metric, hue='is_top', ax=ax, alpha=0.7, style='is_top', s=50)
                
                ax.set_title(f'{metric.upper()} vs. {param}', fontsize=11, fontweight='bold')
                ax.set_xlabel(param)
                ax.set_ylabel(metric.upper())

                # Use log scale if range is large
                if not plot_df.empty and plot_df[param].min() > 0 and plot_df[param].max() / plot_df[param].min() > 100:
                    ax.set_xscale('log')
                    ax.set_xlabel(f'{param} (log scale)')
                
                # Customize legend
                leg = ax.get_legend()
                if leg:
                    leg.set_title('Performance Tier')
                    for t in leg.get_texts():
                        if t.get_text() == 'False': t.set_text(f'Bottom {100-top_n_percent}%')
                        if t.get_text() == 'True': t.set_text(f'Top {top_n_percent}%')
            # Case 2: Discrete numeric or Categorical data
            else:
                # Sub-case: Discrete numeric data -> Line plot to show trend
                if is_numeric:
                    all_counts = all_values.value_counts(normalize=True)
                    top_counts = top_values.value_counts(normalize=True)
                    df_plot = pd.concat([all_counts.rename('All'), top_counts.rename('Top')],
                                      axis=1).fillna(0).sort_index()
                    
                    df_plot.plot(kind='line', marker='o', ax=ax, linestyle='-')
                    ax.set_title(f'{param}', fontsize=11, fontweight='bold')
                    ax.set_ylabel('Proportion')
                    ax.set_xlabel(param)
                    # Ensure all discrete ticks are shown and formatted nicely
                    ax.set_xticks(df_plot.index) # Ensure all discrete ticks are shown
                    ax.get_xaxis().set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:g}')) # Use general format for ticks
                    plt.setp(ax.get_xticklabels(), rotation=45, ha='right') # Correctly set rotation and alignment
                # Sub-case: Categorical data -> Bar plot
                else:
                    all_counts = all_values.value_counts(normalize=True)
                    top_counts = top_values.value_counts(normalize=True)
                    df_plot = pd.concat([all_counts.rename('All'), top_counts.rename('Top')],
                                      axis=1).fillna(0).sort_index()
                    df_plot.plot(kind='bar', ax=ax, width=0.8)
                    ax.set_title(f'{param}', fontsize=11, fontweight='bold')
                    ax.set_ylabel('Proportion')
                    plt.setp(ax.get_xticklabels(), rotation=45, ha='right')

                ax.legend()

        # Hide unused subplots
        for j in range(i + 1, len(axes)):
            axes[j].set_visible(False)

        plt.suptitle(f'Hyperparameter Analysis: {algorithm_name}\n(All vs. Top {top_n_percent}% by {metric.upper()})', 
                    fontsize=16, fontweight='bold')
        plt.tight_layout(rect=[0, 0, 1, 0.96])
        plt.show()



[docs]
    def plot_hyperparameter_correlations(self,
                                        algorithm_name: str,
                                        metric: str = 'auc',
                                        method: str = 'pearson',
                                        figsize: Optional[Tuple[int, int]] = None,
                                        show_correlation_stats: bool = True):
        """Plots correlation between continuous hyperparameters and a performance metric.

        This method creates scatter plots to visualize the relationship between
        each continuous hyperparameter and the target metric, including a
        regression line and correlation statistics.

        Args:
            algorithm_name (str): The name of the algorithm to analyze.
            metric (str, optional): The performance metric. Defaults to 'auc'.
            method (str, optional): The correlation method ('pearson' or 'spearman').
                Defaults to 'pearson'.
            figsize (Optional[Tuple[int, int]], optional): The figure size.
                Defaults to None.
            show_correlation_stats (bool, optional): Whether to print a summary
                table of correlations. Defaults to True.
        """
        algo_data = self.clean_data[self.clean_data['algorithm_name'] == algorithm_name].copy()
        
        if algo_data.empty:
            available_algos = self.get_available_algorithms()
            print(f"No data found for algorithm: {algorithm_name}")
            print(f"Available algorithms: {available_algos}")
            return

        # Check if metric exists
        if metric not in algo_data.columns:
            print(f"Metric '{metric}' not found. Available metrics: {algo_data.select_dtypes(include=[np.number]).columns.tolist()}")
            return

        if method not in ['pearson', 'spearman']:
            raise ValueError("Method must be 'pearson' or 'spearman'")

        # Get correlations
        correlation_results_df = self._get_continuous_hyperparameter_correlations(algorithm_name, metric, method)

        if correlation_results_df is None or correlation_results_df.empty:
            print(f"No continuous hyperparameters found for {algorithm_name}.")
            return
        
        n_params = len(correlation_results_df)
        cols = min(3, n_params)
        rows = (n_params + cols - 1) // cols
        fig_size = figsize or (cols * 6, rows * 5)
        
        fig, axes = plt.subplots(rows, cols, figsize=fig_size, squeeze=False)
        axes = axes.flatten()

        for i, row in correlation_results_df.iterrows():
            param = row['hyperparameter']
            ax = axes[i]
            
            # Extract param values and create plotting dataframe
            plot_df = algo_data.copy()
            plot_df[param] = plot_df['params_dict'].apply(lambda p: p.get(param) if p else None)
            plot_df = plot_df.dropna(subset=[param, metric])

            if plot_df.empty:
                ax.text(0.5, 0.5, 'No Data', ha='center', va='center', transform=ax.transAxes)
                ax.set_title(param, fontsize=12)
                continue

            x_values = plot_df[param]
            y_values = plot_df[metric]

            # Calculate correlation statistics
            correlation = row['correlation']
            p_value = row['p_value']
            
            # Create scatter plot
            sns.scatterplot(data=plot_df, x=param, y=metric, ax=ax, alpha=0.6, s=50)
            
            # Add trend line
            z = np.polyfit(x_values, y_values, 1)
            p = np.poly1d(z)
            x_trend = np.linspace(x_values.min(), x_values.max(), 100)
            ax.plot(x_trend, p(x_trend), "r--", alpha=0.8, linewidth=2)
            
            # Use log scale if range is large
            if x_values.min() > 0 and x_values.max() / x_values.min() > 100:
                ax.set_xscale('log')
                param_label = f'{param} (log scale)'
            else:
                param_label = param
            
            if method == 'pearson':
                corr_label = 'r'
            else:
                corr_label = 'ρ'

            # Create title with correlation info
            if show_correlation_stats:
                title = f'{param}\n{corr_label} = {correlation:.3f}'
                if p_value < 0.001:
                    title += ' (p < 0.001)'
                elif p_value < 0.01:
                    title += f' (p < 0.01)'
                elif p_value < 0.05:
                    title += f' (p = {p_value:.3f})'
                else:
                    title += f' (p = {p_value:.3f})'
            else:
                title = param
                
            ax.set_title(title, fontsize=11, fontweight='bold')
            ax.set_xlabel(param_label, fontsize=10)
            ax.set_ylabel(metric.upper(), fontsize=10)
            ax.grid(True, alpha=0.3)

        # Hide unused subplots
        for j in range(i + 1, len(axes)):
            axes[j].set_visible(False)

        plt.suptitle(f'Hyperparameter Correlations with {metric.upper()}: {algorithm_name}', 
                    fontsize=16, fontweight='bold')
        plt.tight_layout(rect=[0, 0, 1, 0.96])
        plt.show()
        
        # Print correlation summary
        if show_correlation_stats:
            print(f"\nCorrelation Summary for {algorithm_name}:")
            print("-" * 60)
            corr_df = correlation_results_df.sort_values('abs_correlation', ascending=False)
            if method == 'pearson':
                corr_label = 'r'
            else:
                corr_label = 'ρ'
            
            for _, row in corr_df.iterrows():
                significance = ""
                if row['p_value'] < 0.001:
                    significance = "***"
                elif row['p_value'] < 0.01:
                    significance = "**"
                elif row['p_value'] < 0.05:
                    significance = "*"
                
                print(f"{row['hyperparameter']:20s}: {corr_label} = {row['correlation']:6.3f}{significance:3s} "
                      f"(p = {row['p_value']:.3f}, n = {row['n_samples']})")
            
            print("\nSignificance levels: *** p<0.001, ** p<0.01, * p<0.05")



[docs]
    def plot_top_correlations(self,
                             algorithm_name: str,
                             metric: str = 'auc',
                             method: str = 'pearson',
                             top_n: int = 5,
                             figsize: Tuple[int, int] = (15, 10)):
        """Plots only the top N most correlated hyperparameters with the metric.

        Args:
            algorithm_name (str): The name of the algorithm to analyze.
            metric (str, optional): The performance metric. Defaults to 'auc'.
            method (str, optional): The correlation method ('pearson' or 'spearman').
                Defaults to 'pearson'.
            top_n (int, optional): The number of top correlated hyperparameters
                to plot. Defaults to 5.
            figsize (Tuple[int, int], optional): The figure size.
                Defaults to (15, 10).
        """
        algo_data = self.clean_data[self.clean_data['algorithm_name'] == algorithm_name].copy()
        
        if algo_data.empty:
            available_algos = self.get_available_algorithms()
            print(f"No data found for algorithm: {algorithm_name}")
            print(f"Available algorithms: {available_algos}")
            return

        if method not in ['pearson', 'spearman']:
            raise ValueError("Method must be 'pearson' or 'spearman'")

        # Get correlations and take top N
        correlations_df = self._get_continuous_hyperparameter_correlations(algorithm_name, metric, method)
        
        if correlations_df is None or correlations_df.empty:
            print(f"No continuous hyperparameters found for {algorithm_name} to plot correlations.")
            return
        top_correlations = correlations_df.sort_values('abs_correlation', ascending=False).head(top_n)
        
        # Plot top correlations
        n_plots = len(top_correlations)
        cols = min(3, n_plots)
        rows = (n_plots + cols - 1) // cols
        
        fig, axes = plt.subplots(rows, cols, figsize=figsize, squeeze=False)
        axes = axes.flatten()

        for i, (_, row) in enumerate(top_correlations.iterrows()):
            ax = axes[i]
            param = row['hyperparameter']
            
            # Create plotting dataframe
            plot_df = algo_data.copy()
            plot_df[param] = plot_df['params_dict'].apply(lambda p: p.get(param) if p else None)
            plot_df = plot_df.dropna(subset=[param, metric])
            
            x_values = plot_df[param]
            y_values = plot_df[metric]
            
            # Create scatter plot with color coding by performance
            scatter = sns.scatterplot(data=plot_df, x=param, y=metric, ax=ax, 
                                     c=plot_df[metric], cmap='viridis', alpha=0.7, s=60)
            
            # Add trend line
            z = np.polyfit(x_values, y_values, 1)
            p = np.poly1d(z)
            x_trend = np.linspace(x_values.min(), x_values.max(), 100)
            ax.plot(x_trend, p(x_trend), "r-", alpha=0.8, linewidth=3)
            
            # Use log scale if needed
            if x_values.min() > 0 and x_values.max() / x_values.min() > 100:
                ax.set_xscale('log')
                param_label = f'{param} (log scale)'
            else:
                param_label = param
            
            # Title with ranking and correlation
            significance = ""
            if row['p_value'] < 0.001:
                significance = "***"
            elif row['p_value'] < 0.01:
                significance = "**"
            elif row['p_value'] < 0.05:
                significance = "*"
            
            if method == 'pearson':
                corr_label = 'r'
            else:
                corr_label = 'ρ'
            
            title = f'#{i+1}: {param}\n{corr_label} = {row["correlation"]:.3f}{significance}'
            ax.set_title(title, fontsize=12, fontweight='bold')
            ax.set_xlabel(param_label, fontsize=11)
            ax.set_ylabel(metric.upper(), fontsize=11)
            ax.grid(True, alpha=0.3)
            
            # Add colorbar for the first plot
            if i == 0:
                cbar = plt.colorbar(scatter.collections[0], ax=ax)
                cbar.set_label(metric.upper(), fontsize=10)

        # Hide unused subplots
        for j in range(len(top_correlations), len(axes)):
            axes[j].set_visible(False)

        plt.suptitle(f'Top {min(top_n, len(top_correlations))} Hyperparameter Correlations: {algorithm_name}', 
                    fontsize=16, fontweight='bold')
        plt.tight_layout(rect=[0, 0, 1, 0.96])
        plt.show()
        
        return top_correlations