R/dog_combine_accession_numbers.R

Defines functions dog_combine_accession_numbers

Documented in dog_combine_accession_numbers

#' Combines a ID variable
#'
#' If some ID variable, for the same person are within a specified distance they get combined into one.
#'
#' @param df A data frame, which is in long format i.e. multiple rows for every Accession number.
#' @param Accession_number An ID for the service.
#' @param Study_date_time_PACS Timestamp for the service. Must be as a datetime format.
#' @param CPR ID for the individual person.
#' @param time_passed Ho much time i minuttes are allowed to pass between the timestamps.
#'
#' @return A data frame with the original columns and some new columns:
#' \itemize{
#' \item \code{diff:} Tests if the conditin if satisfied.
#' \item \code{diff2:} Tests if timestamp is the same as the one above, i.e. if an accession number has multiple rows.
#' \item \code{diff3:} A combination of diff and diff2.
#' \item \code{Accession_number2:} This is the important column and we can count this instead of Accession_number.
#'  }
#' @export
#' @importFrom rlang .data
#'
#' @examples
#' dog_combine_accession_numbers(df = rispacs_left, CPR = Patient_RK, time_passed = 30)
dog_combine_accession_numbers <- function(df, Accession_number = Accession_number,
                                          Study_date_time_PACS = Study_date_time_PACS,
                                          CPR,
                                          time_passed = 60) {

  Accession_number <- rlang::enquo(Accession_number)
  Study_date_time_PACS <- rlang::enquo(Study_date_time_PACS)
  CPR <- rlang::enquo(CPR)


  out <-  df %>%
  #select(Patient_RK, Accession_number, Study_date_time_PACS ) %>%
  dplyr::group_by(!!CPR) %>%
  dplyr::arrange(!!CPR, !!Study_date_time_PACS, .by_group=TRUE) %>%
  dplyr::mutate(

    diff = dplyr::case_when(
    as.numeric(difftime(!!Study_date_time_PACS, dplyr::lag(!!Study_date_time_PACS), units="mins")) <= time_passed
    & !!Study_date_time_PACS != lag(!!Study_date_time_PACS) ~ 1,
    TRUE ~ 0),

    diff2 = dplyr::case_when(
      !!Study_date_time_PACS ==lag(!!Study_date_time_PACS)  ~ 1,
      TRUE ~ 0),

    diff3 = dplyr::case_when(
      diff==1 ~ 1,
     (diff2 == 1 & lag(diff) == 1) | (diff2 == 1 & lag(diff, n =2L) == 1) | (diff2 == 1 & lag(diff, n =3L) == 1) ~ 1,
      TRUE ~ 0),

    Accession_number2 = dplyr::case_when(
      diff3 == 0  ~ !!Accession_number,
      diff3== 1   ~ NA_character_,
      TRUE ~ !!Accession_number)) %>%
  # tidyr::fill(.data$Accession_number2, .direction = "down") %>%
  dplyr::ungroup() %>%
  dplyr::arrange(!!CPR, !!Study_date_time_PACS) %>%
  dplyr::select(!!Accession_number, .data$Accession_number2, !!Study_date_time_PACS, !!CPR, .data$diff, .data$diff2, .data$diff3, dplyr::everything() )

  out$Accession_number2 <-  zoo::na.locf(out$Accession_number2)

  return(out)

}

# a <- df %>%
# group_by(grp = data.table::rleid(df$diff)) %>%
# filter(n() >= 1 & all(diff == 1)) %>%
# ungroup()
# df[df$cpr %in% a$cpr,]
davidbaniadam/rispacs documentation built on Nov. 4, 2019, 9:43 a.m.