#' Data Pre-Processor
#'
#' @description A helper function to assist in pre-processing the user-supplied
#' input data in a standardized format for use with other functions in the \code{bp} package.
#' See Vignette for further details.
#'
#' @param data User-supplied dataset containing blood pressure data. Must
#' contain data for Systolic blood pressure and Diastolic blood pressure at a
#' minimum.
#'
#' @param bp_type Required argument specifying which of the three BP data types
#' ("HBPM", "ABPM", or "AP") the input data is. Default \code{bp_type} set to "HBPM".
#' This argument determines which processing steps are necessary to yield sensible
#' output.
#'
#' HBPM - Home Blood Pressure Monitor | ABPM - Ambulatory Blood Pressure | AP - Arterial Pressure
#'
#' NOTE: \code{bp_type} impacts blood pressure staging in \code{bp_stages} if \code{guidelines = "AHA"}, for which the
#' cutoffs for each blood pressure stage are automatically adjusted according to \code{bp_type}.
#'
#' @param ap (For AP data only) Required column name (character string) corresponding
#' to continuous Arterial Pressure (AP) (mmHg). Note that this is a required argument
#' so long as bp_type = "AP". Ensure that bp_type is set accordingly.
#'
#' @param time_elap (For AP data only) Column name corresponding to the time elapsed
#' for the given AP waveform data.
#'
#' @param sbp Required column name (character string) corresponding to Systolic Blood
#' Pressure (mmHg)
#'
#' @param dbp Required column name (character string) corresponding to Diastolic Blood
#' Pressure (mmHg)
#'
#' @param date_time Optional column name (character string) corresponding to Date/Time,
#' but HIGHLY recommended to supply if available.
#'
#' For DATE-only columns (with no associated time), leave date_time = NULL. DATE-only
#' adjustments are automatic. Dates can be automatically calculated off DATE_TIME column
#' provided that it is called "DATE_TIME" exactly.
#'
#' @param id Optional column name (character string) corresponding to subject ID. Typically
#' needed for data corresponding to more than one subject. For one-subject datasets, ID
#' will default to 1 (if ID column not found in dataset)
#'
#' @param group Optional column name (character string) corresponding to an additional
#' grouping variable that can be used to further break down data. NOTE that this simply
#' sets the column as "GROUP" so that other functions recognize which column to use as
#' the grouping variable.
#'
#' @param wake Optional column name (character string) corresponding to sleep status. A
#' WAKE value of 1 indicates that the subject is awake and 0 implies asleep.
#'
#' @param visit Optional column name (character string) corresponding to Visit number
#'
#' @param hr Optional column name (character string) corresponding to Heart Rate (bpm)
#'
#' @param pp Optional column name (character string) corresponding to Pulse Pressure
#' (SBP - DBP). If not supplied, it will be calculated automatically.
#'
#' @param map Optional column name (character string) corresponding to Mean Arterial
#' Pressure
#'
#' @param rpp Optional column name (character string) corresponding to Rate Pulse
#' Pressure (SBP * HR). If not supplied, but HR column available, then
#' RPP will be calculated automatically.
#'
#' @param guidelines A string designation for the guidelines to follow when mapping BP
#' readings to a respective BP stage. \code{guidelines} can take on either "Lee_2020" corresponding to staging in Lee et al (2020), "AHA" corresponding to
#' guidelines by the American Heart Association, see e.g. Muntner et al (2019), or "Custom" based on user-defined cutoffs in \code{bp_cutoffs}. By default, AHA guidelines adjust \code{bp_cutoffs} depending on the BP type.
#'
#' @param bp_cutoffs A list containing two vectors corresponding to SBP and DBP cutoffs, respectively.
#' Each vector contains 5 values.
#'
#' The SBP vector (100, 120, 130, 140, 180) corresponds to the upper limits for the following stages:
#' Low (0-100), Normal (100-120), Elevated (120-130), Stage 1 Hypertension (130-140), Stage 2 Hypertension
#' (140-180). When utilizing Lee et al (2020) guidelines,
#' additional stages are included: Isolated Systolic Hypertension
#' for Stage 1 (ISH - S1) (130-140), Isolated Diastolic Hypertension for Stage 1 (IDH - S1) (0-130), ISH - S2
#' (140-180), and IDH - S2 (0-140).
#'
#' The DBP vector (60, 80, 80, 90, 120) corresponds to the upper limits for the following stages:
#' Low (0-60), Normal (60-80), Elevated (0-80), Stage 1 Hypertension (80-90), Stage 2 Hypertension
#' (90-120). The upper limit of the "Elevated" category repeats in the DBP vector and matches that of Normal.
#' This because according to most guidelines, there is no distinction between DBP cutoffs for Normal and
#' Elevated - these stages are discerned by SBP, not DBP. When utilizing Lee et al (2020) guidelines,
#' additional stages are included: Isolated Diastolic Hypertension
#' for Stage 1 (ISH - S1) (0-80), Isolated Diastolic Hypertension for Stage 1 (IDH - S1) (80-90), ISH - S2
#' (0-90), and IDH - S2 (90-120).
#'
#' Any SBP reading below 100 or DBP reading below 60 is considered Hypotension ("Low").
#' Any SBP reading above 180 or DBP reading above 120 is considered a Crisis.
#'
#'When \code{guidelines = "AHA"}, the cutoffs are automatically adjusted to match BP type without user input, For \code{bp_type = "hbpm"},
#'SBP = (100, 120, 130, 135, 160), DBP = (60, 80, 80, 85, 110). For\code{bp_type = "abpm"}, SBP = (100, 115, 125, 130, 160), DBP = (60, 75, 75, 80, 105). An adjustment of default cutoffs by the user
#' These choices can be overwritten by directly changing \code{bp_cutoffs}, in which case the guidelines are ignored and automatically treated as "Custom".
#'
#'
#' If \code{inc_low = FALSE}, although an upper limit value is still required in the SBP vector, the "Low"
#' stage will be omitted in the final output. Similarly, if \code{inc_crisis = FALSE}, then the "Crisis"
#' category will be omitted from the final output.
#'
#' @param DoW Optional column name (character string) corresponding to the Day of the Week.
#' If not supplied, but DATE or DATE_TIME columns available, then DoW will be created
#' automatically. DoW values must be abbreviated as such \code{c("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat")}
#'
#' @param ToD_int Optional vector of length 4, acceptable values are from 0 to 23 in a an order corresponding to hour for Morning, Afternoon, Evening, Night). This vector allows to override the default interval for the Time-of-Day periods: if NULL, the Morning, Afternoon, Evening, and Night periods are set at 6, 12, 18, 0 respectively,
#' where 0 corresponds to the 24th hour of the day (i.e. Midnight).
#' For example, ToD_int = c(5, 13, 18, 23) would correspond to a period for
#' Morning starting at 5:00 (until 13:00), Afternoon starting at 13:00 (until 18:00),
#' Evening starting at 18:00 (until 23:00), and Night starting at 23:00 (until 5:00)
#'
#' @param eod Optional argument to adjust the delineation for the end of day (eod). The supplied value should be a character string with 4 characters representing the digits of 24-hour time, e.g. "1310" corresponds to 1:10pm. For individuals who
#' do not go to bed early or work night-shifts, this argument adjusts the \code{DATE} column so that the days change at specified time. \code{eod = "0000"} means no change. \code{eod = "1130"} will adjust the date of the readings up to 11:30am to the previous date. \code{eod = "1230"} will adjust the date of the readings after 12:30pm to the next date.
#'
#' @param data_screen Optional logical argument; default set to TRUE. Screens for extreme values in the data
#' for both \code{SBP} and \code{DBP} according to Omboni, et al (1995) paper - Calculation of Trough:Peak
#' Ratio of Antihypertensive Treatment from Ambulatory Blood Pressure: Methodological Aspects
#'
#' @param SUL Systolic Upper Limit (SUL). If \code{data_screen = TRUE}, then \code{SUL} sets the upper limit by which
#' to exclude any \code{SBP} values that exceed this threshold. The default is set to 240 per Omboni, et al (1995)
#' paper - Calculation of Trough:Peak Ratio of Antihypertensive Treatment from Ambulatory Blood Pressure:
#' Methodological Aspects
#'
#' @param SLL Systolic Lower Limit (SLL). If \code{data_screen = TRUE}, then \code{SLL} sets the lower limit by which
#' to exclude any \code{SBP} values that fall below this threshold. The default is set to 50 per Omboni, et al (1995)
#' paper - Calculation of Trough:Peak Ratio of Antihypertensive Treatment from Ambulatory Blood Pressure:
#' Methodological Aspects
#'
#' @param DUL Diastolic Upper Limit (DUL). If \code{data_screen = TRUE}, then \code{DUL} sets the upper limit by which
#' to exclude any \code{DBP} values that exceed this threshold. The default is set to 140 per Omboni, et al (1995)
#' paper - Calculation of Trough:Peak Ratio of Antihypertensive Treatment from Ambulatory Blood Pressure:
#' Methodological Aspects
#'
#' @param DLL Diastolic Lower Limit (DLL). If \code{data_screen = TRUE}, then \code{DLL} sets the lower limit by which
#' to exclude any \code{DBP} values that fall below this threshold. The default is set to 40 per Omboni, et al (1995)
#' paper - Calculation of Trough:Peak Ratio of Antihypertensive Treatment from Ambulatory Blood Pressure:
#' Methodological Aspects
#'
#' @param HRUL Heart Rate Upper Limit (HRUL). If \code{data_screen = TRUE}, then \code{HRUL} sets the upper limit
#' by which to exclude any \code{HR} values that exceed this threshold. The default is set to 220 per the upper limit
#' of the common max heart rate formula: 220 - age
#'
#' see https://www.cdc.gov/physicalactivity/basics/measuring/heartrate.htm
#'
#' @param HRLL Heart Rate Upper Limit (HRUL). If \code{data_screen = TRUE}, then \code{HRUL} sets the upper limit
#' by which to exclude any \code{HR} values that exceed this threshold. The default is set to 27 per Guinness
#' World Records - lowest heart rate (https://www.guinnessworldrecords.com/world-records/lowest-heart-rate)
#'
#' @param inc_low Optional logical argument dictating whether or not to include the "Low" category for BP
#' classification column (and the supplementary SBP/DBP Category columns). Default set to TRUE.
#'
#' @param inc_crisis Optional logical argument dictating whether or not to include the "Crisis" category for BP
#' classification column (and the supplementary SBP/DBP Category columns). Default set to TRUE.
#'
#' @param agg Optional argument specifying whether or not to aggregate the data based on the amount of time
#' between observations. If \code{agg = TRUE} then any two (or more) observations within the amount of
#' time alloted by the agg_thresh argument will be averaged together.
#'
#' @param agg_thresh Optional argument specifying the threshold of how many minutes can pass between readings
#' (observations) and still be considered part of the same sitting. The default is set to 3 minutes. This implies
#' that if two or more readings are within 3 minutes of each other, they will be averaged together (if agg is
#' set to TRUE).
#'
#' @param collapse_df Optional argument that collapses the dataframe to eliminate repeating rows after
#' aggregation.
#'
#' @param dt_fmt Optional argument that specifies the input date/time format (dt_fmt). Default set to "ymd HMS"
#' but can take on any format specified by the lubridate package.
#'
#' @param chron_order Optional argument that specifies whether to order the data in chronological (Oldest
#' dates & times at the top / first) or reverse chronological order (Most recent dates & times at the top / first).
#' TRUE refers to chronological order; FALSE refers to reverse chronological order. The default is set to
#' FALSE (i.e. most recent observations listed first in the dataframe).
#'
#' See https://lubridate.tidyverse.org/reference/parse_date_time.html for more details.
#'
#' @param tz Optional argument denoting the respective time zone. Default time zone set to "UTC". See
#' Use \code{OlsonNames()} for a complete listing of all available time zones that can be used in this
#' argument.
#'
#' @return A processed dataframe object with standardized column names and formats to use with the rest of bp package functions. The following standardized column names are used throughout
#' \item{BP_TYPE}{One of AP, HBPM or ABPM}
#' \item{ID}{Subject ID}
#' \item{SBP}{Systolic Blood Pressure}
#' \item{DBP}{Diastolic Blood Pressure}
#' \item{SBP_CATEGORY}{Ordinal, SBP characterization into "Low" < "Normal"<"Elevated"<"Stage 1"< "Stage 2" < "Crisis". "Low" is not included if \code{inc_low = FALSE}. "Crisis" is not included if \code{inc_crisis = FALSE}.}
#' \item{DBP_CATEGORY}{Ordinal, DBP characterization into "Low" < "Normal"<"Elevated"<"Stage 1"< "Stage 2" < "Crisis". "Low" is not included if \code{inc_low = FALSE}. "Crisis" is not included if \code{inc_crisis = FALSE}.}
#' \item{BP_CLASS}{Blood pressure categorization based on paired values (SBP, DBP) into one of the 8 stages according to Lee et al. 2020. See \code{\link{bp_scatter}}}
#' \item{HR}{Heart Rate}
#' \item{MAP}{Mean Arterial Pressure}
#' \item{PP}{Pulse Pressure, SBP-DBP}
#' \item{DATE_TIME}{Date and time in POSIXct format}
#' \item{DATE}{Date only in Date format}
#' \item{MONTH}{Month, integer from 1 to 12}
#' \item{DAY}{Day, integer from 1 to 31}
#' \item{YEAR}{Four digit year}
#' \item{DAY_OF_WEEK}{Ordinal, with "Sun"<"Mon"<"Tue"<"Wed"<"Thu"<"Fri"<"Sat"}
#' \item{TIME}{Time in character format}
#' \item{HOUR}{Integer, from 0 to 23}
#' \item{TIME_OF_DAY}{One of "Morning", "Afternoon", "Evening" or "Night"}
#'
#' @references
#' Lee H, Yano Y, Cho SMJ, Park JH, Park S, Lloyd-Jones DM, Kim HC. Cardiovascular risk of isolated
#' systolic or diastolic hypertension in young adults. \emph{Circulation}. 2020; 141:1778–1786.
#' \doi{10.1161/CIRCULATIONAHA.119.044838}
#'
#' Muntner, P., Carey, R. M., Jamerson, K., Wright Jr, J. T., & Whelton, P. K. (2019). Rationale for ambulatory and home blood pressure monitoring thresholds in the 2017 American College of Cardiology/American Heart Association Guideline. Hypertension, 73(1), 33-38. \doi{10.1161/HYPERTENSIONAHA.118.11946}
#'
#'
#' Omboni, S., Parati, G*., Zanchetti, A., Mancia, G. Calculation of trough: peak ratio of
#' antihypertensive treatment from ambulatory blood pressure: methodological aspects
#' \emph{Journal of Hypertension}. October 1995 - Volume 13 - Issue 10 - p 1105-1112
#' \doi{10.1097/00004872-199510000-00005}
#'
#' Unger, T., Borghi, C., Charchar, F., Khan, N. A., Poulter, N. R., Prabhakaran, D., ... & Schutte,
#' A. E. (2020). 2020 International Society of Hypertension global hypertension practice guidelines.
#' \emph{Hypertension}, 75(6), 1334-1357.
#' \doi{10.1161/HYPERTENSIONAHA.120.15026}
#'
#' @export
#'
#' @examples
#' # Load bp_hypnos
#' data("bp_hypnos")
#'
#' # Process data for bp_hypnos
#' hypnos_proc <- process_data(bp_hypnos,
#' bp_type = 'abpm',
#' sbp = 'syst',
#' dbp = 'diast',
#' date_time = 'date.time',
#' hr = 'hr',
#' pp = 'PP',
#' map = 'MaP',
#' rpp = 'Rpp',
#' id = 'id',
#' visit = 'Visit',
#' wake = 'wake',
#' data_screen = FALSE)
#'
#' hypnos_proc
#'
#'
#' # Load bp_jhs data
#' data("bp_jhs")
#'
#' # Process data for bp_jhs
#' # Note that bp_type defaults to "hbpm" and is therefore not specified
#' jhs_proc <- process_data(bp_jhs,
#' sbp = "Sys.mmHg.",
#' dbp = "Dias.mmHg.",
#' date_time = "DateTime",
#' hr = "Pulse.bpm.")
#'
#' jhs_proc
#'
process_data <- function(data,
# Home Blood Pressure Monitor (HBPM) | Ambulatory Blood Pressure Monitor (ABPM) | Arterial Pressure (AP)
bp_type = c("hbpm", "abpm", "ap"),
guidelines = c("Lee_2020", "AHA", "Custom"),
bp_cutoffs = list( c(100, 120, 130, 140, 180), c(60, 80, 80, 90, 120)),
# For AP data
ap = NULL,
time_elap = NULL,
# For all other data (HBPM, ABPM)
sbp = NULL,
dbp = NULL,
date_time = NULL,
id = NULL,
group = NULL,
wake = NULL,
visit = NULL,
hr = NULL,
pp = NULL,
map = NULL,
rpp = NULL,
# Options
DoW = NULL,
ToD_int = NULL,
eod = NULL,
data_screen = TRUE,
SUL = 240,
SLL = 50,
DUL = 140,
DLL = 40,
HRUL = 220,
HRLL = 27,
inc_low = TRUE,
inc_crisis = TRUE,
agg = FALSE,
agg_thresh = 3,
collapse_df = FALSE,
dt_fmt = "ymd HMS",
chron_order = FALSE,
tz = "UTC" ){
# Prepare all variables used via dplyr
SBP = DBP = HR = SBP_Category = DBP_Category = TIME_OF_DAY = BP_CLASS = NULL
rm(list = c("SBP", "DBP", "HR", "SBP_Category", "DBP_Category", "TIME_OF_DAY", "BP_CLASS"))
# Match BP Type: Home Blood Pressure Monitor (HBPM) | Ambulatory Blood Pressure Monitor (ABPM) | Arterial Pressure (AP)
bp_type <- tolower(bp_type)
bp_type <- toupper( match.arg(bp_type) )
# Ensure that data is either data.frame or matrix
if(is.data.frame(data) == FALSE){
if(is.matrix(data) == FALSE){
stop('Invalid data type. Please use either data.frame or matrix\n')
}else{
warning('Converted matrix data type to data.frame\n')
data <- as.data.frame(data)
}
}
# Convert all column names to upper case for consistency
colnames(data) <- toupper(colnames(data))
# ************************************************************************************************************ #
# Arterial Pressure
if(bp_type == "AP"){
# Adjust AP Values
data <- ap_adj(data = data, ap = ap)
# Time Adjustment - Numeric Values representing a fraction of time that has elapsed (i.e. 0.25 minutes, 1.75 seconds, etc)
# This column is typically the sampling rate column
data <- time_adj(data = data, time_elap = time_elap)
# ID
data <- id_adj(data = data, id = id)
# Group
data <- group_adj(data = data, group = group)
# Create column indicating blood pressure type (bp_type)
data$BP_TYPE <- bp_type
}
# ************************************************************************************************************ #
if(toupper(bp_type) == "ABPM" | toupper(bp_type) == "HBPM"){
# Throw error if SBP and DBP columns aren't specified
if(is.null(sbp) | is.null(dbp)){
stop('Both "SBP" and "DBP" column names must be specified.\n')
}
# Convert all column names to upper case for consistency
colnames(data) <- toupper(colnames(data))
# Adjust SBP
data <- sbp_adj(data = data, sbp = sbp, data_screen = data_screen, SUL = SUL, SLL = SLL)
# Adjust DBP
data <- dbp_adj(data = data, dbp = dbp, data_screen = data_screen, DUL = DUL, DLL = DLL)
# Adjust ID
data <- id_adj(data = data, id = id)
# Adjust Group
data <- group_adj(data = data, group = group)
# Adjust Visit
data <- visit_adj(data = data, visit = visit)
# Adjust Date/Time values
if(!is.null(date_time)){
data <- date_time_adj(data = data, date_time = date_time, dt_fmt = dt_fmt, ToD_int = ToD_int, chron_order = chron_order, tz = tz)
}
# Adjust eod / dates
if(!is.null(eod)){
# Incorporate End-of-Day argument and calibrate dates
data <- eod_adj(data = data, eod = eod)
}
# Adjust WAKE indicator
data <- wake_adj(data = data, wake = wake, bp_type = bp_type)
# Adjust Day of Week
data <- dow_adj(data = data, DoW = DoW)
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #
# Adjust Heart Rate (HR)
data <- hr_adj(data = data, hr = hr, data_screen = data_screen, HRUL = HRUL, HRLL = HRLL)
# Adjust Pulse Pressure (PP)
data <- pp_adj(data = data, pp = pp)
# Adjust Rate Pressure Product (RPP)
data <- rpp_adj(data = data, rpp = rpp)
# Adjust Mean Arterial Pressure (MAP)
data <- map_adj(data = data, map = map)
# Relocate HR to after DBP column
if("HR" %in% colnames(data)){
data <- data %>% dplyr::relocate(HR, .after = DBP)
}
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #
# Aggregate data if selected
if(agg == TRUE){
data <- agg_adj(data = data, bp_type = bp_type, agg_thresh = agg_thresh, collapse_df = collapse_df)
}
# Create column indicating blood pressure type (bp_type)
data$BP_TYPE <- bp_type
# SBP Adjustment, DBP Adjustment, and BP Stages
data <- bp_stages(data = data,
bp_type = bp_type,
sbp = sbp,
dbp = dbp,
inc_low = inc_low,
inc_crisis = inc_crisis,
data_screen = data_screen,
SUL = SUL,
SLL = SLL,
DUL = DUL,
DLL = DLL,
adj_sbp_dbp = FALSE,
guidelines = guidelines,
bp_cutoffs = bp_cutoffs)
# Move Classification columns to correct positions
data <- data %>%
dplyr::relocate(BP_CLASS, .after = DBP) #%>%
#dplyr::relocate(SBP_CATEGORY, .after = BP_CLASS) %>%
#dplyr::relocate(DBP_CATEGORY, .after = SBP_CATEGORY)
}
# Sanity check for any future additions to this function to ensure all columns are capitalized for consistency
colnames(data) <- toupper( colnames(data) )
# Convert back to data frame
data <- as.data.frame(data)
return(data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.