Source code for ml_grid.pipeline.data_scale

from typing import List

import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.compose import make_column_selector as selector
from sklearn.preprocessing import StandardScaler


[docs] class data_scale_methods: """A class for applying scaling methods to data.""" def __init__(self) -> None: """Initializes the data_scale_methods class.""" pass
[docs] def standard_scale_method(self, X: pd.DataFrame) -> pd.DataFrame: """Applies StandardScaler to numeric columns of a DataFrame. This method identifies numeric columns and applies standard scaling. Non-numeric columns are passed through without modification. Args: X (pd.DataFrame): The input DataFrame to scale. Returns: pd.DataFrame: The scaled DataFrame, with numeric columns scaled and non-numeric columns preserved. Note: The column order may change. """ # Separate numeric and non-numeric columns numeric_cols: List[str] = selector(dtype_exclude=object)(X) non_numeric_cols: List[str] = [col for col in X.columns if col not in numeric_cols] # Define transformers transformers = [("scaler", StandardScaler(), numeric_cols)] # Include non-numeric columns to be passed through if non_numeric_cols: transformers.append(("passthrough", "passthrough", non_numeric_cols)) # Apply transformations preprocessor = ColumnTransformer(transformers) X_scaled = preprocessor.fit_transform(X) # Convert back to DataFrame X_scaled = pd.DataFrame(X_scaled, columns=numeric_cols + non_numeric_cols) return X_scaled