Source code for pat2vec.pat2vec_get_methods.get_method_news

from typing import Callable, Dict, Optional, Tuple

import numpy as np
import pandas as pd
from IPython.display import display

from pat2vec.util.filter_dataframe_by_timestamp import \
    filter_dataframe_by_timestamp
from pat2vec.util.get_start_end_year_month import get_start_end_year_month



[docs]
def compute_feature_stats(
    data: pd.DataFrame, column: str, feature_name: str, config_obj: object
) -> Dict:
    """Computes summary statistics for a feature column in the NEWS dataset.

    Args:
        data (pd.DataFrame): Subset of patient data for the feature.
        column (str): Column to compute stats from (e.g.,
            'observation_valuetext_analysed').
        feature_name (str): Base name for the output feature columns.
        config_obj (object): Configuration object with `negate_biochem` attribute.

    Returns:
        Dict: A dictionary of calculated feature statistics (mean, median, std,
            max, min, n).
    """
    stats = {}
    if len(data) > 0:
        values = data[column].dropna().astype(float)
        if len(values) > 0:
            stats[f"{feature_name}_mean"] = values.mean()
            stats[f"{feature_name}_median"] = values.median()
            stats[f"{feature_name}_std"] = values.std()
            stats[f"{feature_name}_max"] = values.max()
            stats[f"{feature_name}_min"] = values.min()
            stats[f"{feature_name}_n"] = values.shape[0]
            return stats

    if config_obj.negate_biochem:
        for suffix in ["mean", "median", "std", "max", "min", "n"]:
            stats[f"{feature_name}_{suffix}"] = np.nan
    return stats




[docs]
def get_news(
    current_pat_client_id_code: str,
    target_date_range: Tuple,
    pat_batch: pd.DataFrame,
    config_obj: Optional[object] = None,
    cohort_searcher_with_terms_and_search: Optional[Callable] = None,
) -> pd.DataFrame:
    """Retrieves NEWS/NEWS2 features for a patient within a date range.

    This function fetches NEWS (National Early Warning Score) observation data,
    either from a pre-loaded batch or by searching. It then calculates summary
    statistics (mean, median, std, etc.) for each component of the NEWS score.

    Args:
        current_pat_client_id_code (str): The client ID code of the patient.
        target_date_range (Tuple): A tuple representing the target date range.
        pat_batch (pd.DataFrame): The DataFrame containing patient data for batch mode.
        config_obj (Optional[object]): Configuration object with settings like
            `batch_mode` and `client_idcode_term_name`. Defaults to None.
        cohort_searcher_with_terms_and_search (Optional[Callable]): The function for
            cohort searching. Defaults to None.

    Returns:
        pd.DataFrame: A DataFrame containing NEWS features for the specified patient.
    """

    start_year, start_month, end_year, end_month, start_day, end_day = get_start_end_year_month(
        target_date_range, config_obj=config_obj
    )

    if config_obj.batch_mode:
        current_pat_raw_news = filter_dataframe_by_timestamp(
            pat_batch,
            start_year,
            start_month,
            end_year,
            end_month,
            start_day,
            end_day,
            "observationdocument_recordeddtm",
        )
    else:
        current_pat_raw_news = cohort_searcher_with_terms_and_search(
            index_name="observations",
            fields_list=[
                "observation_guid",
                "client_idcode",
                "obscatalogmasteritem_displayname",
                "observation_valuetext_analysed",
                "observationdocument_recordeddtm",
                "clientvisit_visitidcode",
            ],
            term_name=config_obj.client_idcode_term_name,
            entered_list=[current_pat_client_id_code],
            search_string=(
                'obscatalogmasteritem_displayname:("NEWS" OR "NEWS2") AND '
                f"observationdocument_recordeddtm:[{start_year}-{start_month}-{start_day} "
                f"TO {end_year}-{end_month}-{end_day}]"
            ),
        )

    # Always start with client_idcode
    news_features = {"client_idcode": current_pat_client_id_code}

    # Define mappings between display names and feature names
    feature_map = {
        "NEWS2_Score": "news_score",
        "NEWS_Systolic_BP": "news_systolic_bp",
        "NEWS_Diastolic_BP": "news_diastolic_bp",
        "NEWS_Respiration_Rate": "news_respiration_rate",
        "NEWS_Heart_Rate": "news_heart_rate",
        "NEWS_Oxygen_Saturation": "news_oxygen_saturation",
        "NEWS Temperature": "news_temperature",
        "NEWS_AVPU": "news_avpu",
        "NEWS_Supplemental_Oxygen": "news_supplemental_oxygen",
        "NEWS2_Sp02_Target": "news_sp02_target",
        "NEWS2_Sp02_Scale": "news_sp02_scale",
        "NEWS_Pulse_Type": "news_pulse_type",
        "NEWS_Pain_Score": "news_pain_score",
        "NEWS Oxygen Litres": "news_oxygen_litres",
        "NEWS Oxygen Delivery": "news_oxygen_delivery",
    }

    for display_name, feature_name in feature_map.items():
        subset = current_pat_raw_news[
            current_pat_raw_news["obscatalogmasteritem_displayname"] == display_name
        ].copy()

        subset.dropna(subset=["observation_valuetext_analysed"], inplace=True)

        # special case: cap NEWS2 score at [-20, 20]
        if feature_name == "news_score" and len(subset) > 0:
            subset = subset[
                (subset["observation_valuetext_analysed"].astype(float) < 20)
                & (subset["observation_valuetext_analysed"].astype(float) > -20)
            ]

        stats = compute_feature_stats(
            subset, "observation_valuetext_analysed", feature_name, config_obj)
        news_features.update(stats)

    news_features_df = pd.DataFrame([news_features])

    if config_obj.verbosity >= 6:
        display(news_features_df)

    return news_features_df