Nothing
#' @title Find exam data within a given timeframe using parallel CPU computing.
#' @export
#'
#' @description Finds all, earliest or closest examination to a given timepoints using parallel computing. A progress bar is also reported in the terminal to show the progress of the computation.
#'
#' @param d_from data table, the database which is searched to find examinations within the timeframe.
#' @param d_to data table, the database to which we wish to find examinations within the timeframe.
#' @param d_from_ID string, column name of the patient ID column in d_from. Defaults to \emph{ID_MERGE}.
#' @param d_to_ID string, column name of the patient ID column in d_to. Defaults to \emph{ID_MERGE}.
#' @param d_from_time string, column name of the time variable column in d_from. Defaults to \emph{time_rad_exam}.
#' @param d_to_time string, column name of the time variable column in d_to. Defaults to \emph{time_enc_admit}.
#' @param time_diff_name string, column name of the new column created which holds the time difference between the exam and the time provided by d_to. Defaults to \emph{timediff_exam_to_db}.
#' @param before boolean, should times before the given time be considered. Defaults to \emph{TRUE}.
#' @param after boolean, should times after the given time be considered. Defaults to \emph{TRUE}.
#' @param time integer, the timeframe considered between the exam and the d_to timepoints. Defaults to \emph{1}.
#' @param time_unit string, the unit of time used. Time variables are in d_to and d_from are truncated to the supplied time unit.
#' For example: "2005-09-18 08:15:01 PDT" would be truncated to "2005-09-18 PDT" if \emph{time_unit} is set to days.
#' Then the time differences is calculated using \emph{difftime} passing the argument to \emph{units}.
#' The following time units are supported: "secs", "mins", "hours", "days", "months" and "years" are supported. Defautls to \emph{days}.
#' @param multiple string, which exams to give back. \emph{closest} gives back the exam closest to the time provided by d_to.
#' \emph{all} gives back all occurrences within the timeframe. \emph{earliest} the earliest exam within the timeframe.
#' In case of ties for \emph{closest} or \emph{earliest}, all are returned. Defaults to \emph{closest}.
#' @param add_column string, a column name in d_to to add to the output. Defaults to \emph{NULL}.
#' @param keep_data boolean, whether to include empty rows with only the \emph{d_from_ID} column filed out for cases that have data in the \emph{d_from}, but not within the time range. Defaults to \emph{FALSE}.
#' @param nThread integer, number of threads to use for parallelization. If it is set to 1, then no parallel backends are created and the function is executed sequentially.
#' @param shared_RAM boolean, depreciated from version 1.1.0 onwards, only kept for compatibility, as Bigmemory package has issues on running on different operating systems.
#' Now all computations are run using the memory usage specifications of the paralellization strategy.
#'
#' @return data table, with \emph{d_from} filtered to ones only within the timeframe. The columns of \emph{d_from} are returned with the corresponding time column in \emph{data_to}
#' where the rows are instances which comply with the time constraints specified by the function. An additional column specified in \emph{time_diff_name} is also returned,
#' which shows the time difference between the time column in \emph{d_from} and \emph{d_to} for that given case.
#' Also the time column from \emph{d_to} specified by \emph{d_to_time} is returned under the name of \emph{time_to_db}.
#' An additional column specified in \emph{add_column} may be added from \emph{data_to} to the data table.
#'
#' @encoding UTF-8
#'
#' @examples \dontrun{
#' #Filter encounters for first emergency visits at one of MGH's ED departments
#' data_enc_ED <- data_enc[enc_clinic == "MGH EMERGENCY 10020010608"]
#' data_enc_ED <- data_enc_ED[!duplicated(data_enc_ED$ID_MERGE)]
#'
#' #Find all radiological examinations within 3 day of the ED registration
#' rdt_ED <- find_exam(d_from = data_rdt, d_to = data_enc_ED,
#' d_from_ID = "ID_MERGE", d_to_ID = "ID_MERGE",
#' d_from_time = "time_rdt_exam", d_to_time = "time_enc_admit", time_diff_name = "time_diff_ED_rdt",
#' before = TRUE, after = TRUE, time = 3, time_unit = "days", multiple = "all",
#' nThread = 2)
#'
#' #Find earliest radiological examinations within 3 day of the ED registration
#' rdt_ED <- find_exam(d_from = data_rdt, d_to = data_enc_ED,
#' d_from_ID = "ID_MERGE", d_to_ID = "ID_MERGE",
#' d_from_time = "time_rdt_exam", d_to_time = "time_enc_admit", time_diff_name = "time_diff_ED_rdt",
#' before = TRUE, after = TRUE, time = 3, time_unit = "days", multiple = "earliest",
#' nThread = 2)
#'
#' #Find closest radiological examinations on or after 1 day of the ED registration
#' #and add primary diagnosis column from encounters
#' rdt_ED <- find_exam(d_from = data_rdt, d_to = data_enc_ED,
#' d_from_ID = "ID_MERGE", d_to_ID = "ID_MERGE",
#' d_from_time = "time_rdt_exam", d_to_time = "time_enc_admit", time_diff_name = "time_diff_ED_rdt",
#' before = FALSE, after = TRUE, time = 1, time_unit = "days", multiple = "earliest",
#' add_column = "enc_diag_princ", nThread = 2)
#'
#' #Find closest radiological examinations on or after 1 day of the ED registration
#' #but also provide empty rows for patients with exam data but not within the timeframe
#' rdt_ED <- find_exam(d_from = data_rdt, d_to = data_enc_ED,
#' d_from_ID = "ID_MERGE", d_to_ID = "ID_MERGE",
#' d_from_time = "time_rdt_exam", d_to_time = "time_enc_admit", time_diff_name = "time_diff_ED_rdt",
#' before = FALSE, after = TRUE, time = 1, time_unit = "days", multiple = "earliest",
#' add_column = "enc_diag_princ", keep_data = TRUE nThread = 2)
#' }
find_exam <- function(d_from, d_to,
d_from_ID = "ID_MERGE", d_to_ID = "ID_MERGE",
d_from_time = "time_rad_exam", d_to_time = "time_enc_admit",
time_diff_name = "timediff_exam_to_db", before = TRUE, after = TRUE, time = 1, time_unit = "days",
multiple = "closest", add_column = NULL, keep_data = FALSE, nThread = parallel::detectCores()-1, shared_RAM = FALSE) {
shared_RAM = FALSE
progressr::with_progress({out <- find_exam_ram(d_from = d_from, d_to = d_to,
d_from_ID = d_from_ID, d_to_ID = d_to_ID,
d_from_time = d_from_time, d_to_time = d_to_time,
time_diff_name = time_diff_name, before = before, after = after, time = time, time_unit = time_unit,
multiple = multiple, add_column = add_column, keep_data = keep_data, nThread = nThread)})
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.