Source code for pat2vec.pat2vec_get_methods.get_method_news

from typing import Callable, Dict, Optional, Tuple

import numpy as np
import pandas as pd
from IPython.display import display

from pat2vec.util.filter_dataframe_by_timestamp import \
    filter_dataframe_by_timestamp
from pat2vec.util.get_start_end_year_month import get_start_end_year_month


[docs] def compute_feature_stats( data: pd.DataFrame, column: str, feature_name: str, config_obj: object ) -> Dict: """Computes summary statistics for a feature column in the NEWS dataset. Args: data (pd.DataFrame): Subset of patient data for the feature. column (str): Column to compute stats from (e.g., 'observation_valuetext_analysed'). feature_name (str): Base name for the output feature columns. config_obj (object): Configuration object with `negate_biochem` attribute. Returns: Dict: A dictionary of calculated feature statistics (mean, median, std, max, min, n). """ stats = {} if len(data) > 0: values = data[column].dropna().astype(float) if len(values) > 0: stats[f"{feature_name}_mean"] = values.mean() stats[f"{feature_name}_median"] = values.median() stats[f"{feature_name}_std"] = values.std() stats[f"{feature_name}_max"] = values.max() stats[f"{feature_name}_min"] = values.min() stats[f"{feature_name}_n"] = values.shape[0] return stats if config_obj.negate_biochem: for suffix in ["mean", "median", "std", "max", "min", "n"]: stats[f"{feature_name}_{suffix}"] = np.nan return stats
[docs] def get_news( current_pat_client_id_code: str, target_date_range: Tuple, pat_batch: pd.DataFrame, config_obj: Optional[object] = None, cohort_searcher_with_terms_and_search: Optional[Callable] = None, ) -> pd.DataFrame: """Retrieves NEWS/NEWS2 features for a patient within a date range. This function fetches NEWS (National Early Warning Score) observation data, either from a pre-loaded batch or by searching. It then calculates summary statistics (mean, median, std, etc.) for each component of the NEWS score. Args: current_pat_client_id_code (str): The client ID code of the patient. target_date_range (Tuple): A tuple representing the target date range. pat_batch (pd.DataFrame): The DataFrame containing patient data for batch mode. config_obj (Optional[object]): Configuration object with settings like `batch_mode` and `client_idcode_term_name`. Defaults to None. cohort_searcher_with_terms_and_search (Optional[Callable]): The function for cohort searching. Defaults to None. Returns: pd.DataFrame: A DataFrame containing NEWS features for the specified patient. """ start_year, start_month, end_year, end_month, start_day, end_day = get_start_end_year_month( target_date_range, config_obj=config_obj ) if config_obj.batch_mode: current_pat_raw_news = filter_dataframe_by_timestamp( pat_batch, start_year, start_month, end_year, end_month, start_day, end_day, "observationdocument_recordeddtm", ) else: current_pat_raw_news = cohort_searcher_with_terms_and_search( index_name="observations", fields_list=[ "observation_guid", "client_idcode", "obscatalogmasteritem_displayname", "observation_valuetext_analysed", "observationdocument_recordeddtm", "clientvisit_visitidcode", ], term_name=config_obj.client_idcode_term_name, entered_list=[current_pat_client_id_code], search_string=( 'obscatalogmasteritem_displayname:("NEWS" OR "NEWS2") AND ' f"observationdocument_recordeddtm:[{start_year}-{start_month}-{start_day} " f"TO {end_year}-{end_month}-{end_day}]" ), ) # Always start with client_idcode news_features = {"client_idcode": current_pat_client_id_code} # Define mappings between display names and feature names feature_map = { "NEWS2_Score": "news_score", "NEWS_Systolic_BP": "news_systolic_bp", "NEWS_Diastolic_BP": "news_diastolic_bp", "NEWS_Respiration_Rate": "news_respiration_rate", "NEWS_Heart_Rate": "news_heart_rate", "NEWS_Oxygen_Saturation": "news_oxygen_saturation", "NEWS Temperature": "news_temperature", "NEWS_AVPU": "news_avpu", "NEWS_Supplemental_Oxygen": "news_supplemental_oxygen", "NEWS2_Sp02_Target": "news_sp02_target", "NEWS2_Sp02_Scale": "news_sp02_scale", "NEWS_Pulse_Type": "news_pulse_type", "NEWS_Pain_Score": "news_pain_score", "NEWS Oxygen Litres": "news_oxygen_litres", "NEWS Oxygen Delivery": "news_oxygen_delivery", } for display_name, feature_name in feature_map.items(): subset = current_pat_raw_news[ current_pat_raw_news["obscatalogmasteritem_displayname"] == display_name ].copy() subset.dropna(subset=["observation_valuetext_analysed"], inplace=True) # special case: cap NEWS2 score at [-20, 20] if feature_name == "news_score" and len(subset) > 0: subset = subset[ (subset["observation_valuetext_analysed"].astype(float) < 20) & (subset["observation_valuetext_analysed"].astype(float) > -20) ] stats = compute_feature_stats( subset, "observation_valuetext_analysed", feature_name, config_obj) news_features.update(stats) news_features_df = pd.DataFrame([news_features]) if config_obj.verbosity >= 6: display(news_features_df) return news_features_df