Source code for pat2vec.pat2vec_get_methods.get_method_pat_annotations

from typing import Callable, Optional, Tuple

import pandas as pd
from IPython.display import display

from pat2vec.util.filter_dataframe_by_timestamp import \
    filter_dataframe_by_timestamp
from pat2vec.util.get_start_end_year_month import get_start_end_year_month
from pat2vec.util.methods_annotation import \
    calculate_pretty_name_count_features
from pat2vec.util.methods_get import update_pbar


[docs] def get_current_pat_annotations( current_pat_client_id_code: str, target_date_range: Tuple, batch_epr_docs_annotations: Optional[pd.DataFrame], config_obj: Optional[object] = None, t: Optional[object] = None, cohort_searcher_with_terms_and_search: Optional[Callable] = None, cat: Optional[object] = None, ) -> pd.DataFrame: """Retrieves and processes EPR document annotations for a patient. This function filters a batch of pre-existing EPR document annotations for a specific patient within a given date range. It then calculates count-based features from the 'pretty_name' of the annotations. Args: current_pat_client_id_code (str): The unique identifier for the patient. target_date_range (Tuple): The date range to filter annotations by. batch_epr_docs_annotations (Optional[pd.DataFrame]): DataFrame containing EPR document annotations for a batch of patients. config_obj (Optional[object]): Configuration object with settings such as `verbosity` and `start_time`. Defaults to None. t (Optional[object]): A progress bar object for updating status. Defaults to None. cohort_searcher_with_terms_and_search (Optional[Callable]): Placeholder for a cohort searcher function, unused in this implementation. Defaults to None. cat (Optional[object]): Placeholder for a MedCAT object, unused in this implementation. Defaults to None. Returns: pd.DataFrame: A DataFrame containing the calculated annotation features for the specified patient. If no annotations are found, a DataFrame with only the 'client_idcode' is returned. Raises: ValueError: If `config_obj` is None. TypeError: If `batch_epr_docs_annotations` is provided and is not a pandas DataFrame. """ if config_obj is None: raise ValueError( "config_obj cannot be None. Please provide a valid configuration. (get_current_pat_annotations)" ) if batch_epr_docs_annotations is not None and not isinstance( batch_epr_docs_annotations, pd.DataFrame ): raise TypeError("batch_epr_docs_annotations must be a pd.DataFrame.") start_time = config_obj.start_time p_bar_entry = "annotations_epr" update_pbar( current_pat_client_id_code, start_time, 0, p_bar_entry, t, config_obj, config_obj.skipped_counter, ) # Extract start and end dates from the target date range start_year, start_month, end_year, end_month, start_day, end_day = ( get_start_end_year_month(target_date_range, config_obj=config_obj) ) # Filter the batch_epr_docs_annotations DataFrame based on the target_date_range if batch_epr_docs_annotations is not None: # Filter the dataframe based on the target date range filtered_batch_epr_docs_annotations = filter_dataframe_by_timestamp( batch_epr_docs_annotations, start_year, start_month, end_year, end_month, start_day, end_day, "updatetime", dropna=True, ) if len(filtered_batch_epr_docs_annotations) > 0: # Calculate pretty name count features for the filtered dataframe df_pat_target = calculate_pretty_name_count_features( filtered_batch_epr_docs_annotations ) else: # If filtered annotations don't exist, create a DataFrame with the client_idcode if config_obj.verbosity >= 6: print( "len(filtered_batch_epr_docs_annotations)>0", len(filtered_batch_epr_docs_annotations) > 0, ) df_pat_target = pd.DataFrame( data=[current_pat_client_id_code], columns=["client_idcode"] ) else: # If the batch_epr_docs_annotations DataFrame is None, create a DataFrame with the client_idcode df_pat_target = pd.DataFrame( data=[current_pat_client_id_code], columns=["client_idcode"] ) if config_obj.verbosity >= 6: # Display the processed DataFrame if the verbosity level is 6 or higher display(df_pat_target) return df_pat_target