Source code for pat2vec.pat2vec_get_methods.get_method_textual_obs_annotations

from typing import Callable, Optional, Tuple

import pandas as pd
from IPython.display import display
from IPython.utils import io

from pat2vec.util.filter_dataframe_by_timestamp import \
    filter_dataframe_by_timestamp
from pat2vec.util.get_start_end_year_month import get_start_end_year_month
from pat2vec.util.methods_annotation import \
    calculate_pretty_name_count_features
from pat2vec.util.methods_get import update_pbar


[docs] def get_current_pat_textual_obs_annotations( current_pat_client_id_code: str, target_date_range: Tuple, textual_obs_annotations: Optional[pd.DataFrame], config_obj: Optional[object] = None, t: Optional[object] = None, cohort_searcher_with_terms_and_search: Optional[Callable] = None, cat: Optional[object] = None, ) -> pd.DataFrame: """Retrieves and processes textual observation annotations for a patient. This function filters a batch of pre-existing textual observation annotations for a specific patient within a given date range. It then calculates count-based features from the 'pretty_name' of the annotations. Args: current_pat_client_id_code (str): The unique identifier for the patient. target_date_range (Tuple): The date range to filter annotations by. textual_obs_annotations (Optional[pd.DataFrame]): DataFrame containing textual observation annotations for a batch of patients. config_obj (Optional[object]): Configuration object with settings such as `verbosity` and `start_time`. Defaults to None. t (Optional[object]): A progress bar object for updating status. Defaults to None. cohort_searcher_with_terms_and_search (Optional[Callable]): Placeholder for a cohort searcher function, unused in this implementation. Defaults to None. cat (Optional[object]): Placeholder for a MedCAT object, unused in this implementation. Defaults to None. Returns: pd.DataFrame: A DataFrame containing the calculated annotation features for the specified patient. If no annotations are found, a DataFrame with only the 'client_idcode' is returned. Raises: ValueError: If `config_obj`, `textual_obs_annotations`, `current_pat_client_id_code`, or `target_date_range` is None. """ if config_obj is None: raise ValueError( "config_obj cannot be None. Please provide a valid configuration." ) if textual_obs_annotations is None: raise ValueError( "textual_obs_annotations cannot be None. Please provide a valid DataFrame." ) if current_pat_client_id_code is None: raise ValueError( "current_pat_client_id_code cannot be None. Please provide a valid client ID code." ) if target_date_range is None: raise ValueError( "target_date_range cannot be None. Please provide a valid target date range." ) # get the start and end time for the provided target date range if config_obj is None: raise ValueError( "config_obj cannot be None. Please provide a valid configuration." ) start_time = config_obj.start_time p_bar_entry = "annotations_textual_obs" try: update_pbar( current_pat_client_id_code, start_time, 0, p_bar_entry, t, config_obj, config_obj.skipped_counter, ) except Exception as e: print(e) start_year, start_month, end_year, end_month, start_day, end_day = ( get_start_end_year_month(target_date_range, config_obj=config_obj) ) # filter the textual observation annotations based on the provided target date range if textual_obs_annotations is not None: filtered_textual_obs_annotations = filter_dataframe_by_timestamp( textual_obs_annotations, start_year, start_month, end_year, end_month, start_day, end_day, "basicobs_entered", dropna=True, ) # if there are any filtered annotations, calculate the pretty name count features if len(filtered_textual_obs_annotations) > 0: processed_annotations = calculate_pretty_name_count_features( filtered_textual_obs_annotations ) else: # if there are no filtered annotations, create a DataFrame with the client ID code if config_obj.verbosity >= 6: print( "len(filtered_report_annotations)>0", len(filtered_textual_obs_annotations) > 0, ) processed_annotations = pd.DataFrame( data=[current_pat_client_id_code], columns=["client_idcode"] ) else: # if the textual observation annotations are None, create a DataFrame with the client ID code processed_annotations = pd.DataFrame( data=[current_pat_client_id_code], columns=["client_idcode"] ) # display the processed annotations if the verbosity level is 6 or higher if config_obj.verbosity >= 6: display(processed_annotations) return processed_annotations # Return the processed annotation vector