R/dataPrep.R

Defines functions tsData shift

shift <- function(x,n){
  length <- length(x)
  c(rep(NA,n),x)[1:length]
}

# Data preperation function:
tsData <- function(data,
                     vars,
                     beepvar,
                     dayvar,
                     idvar,
                   lags = 1,
                     scale = TRUE,
                     centerWithin = TRUE,
                   deleteMissings = TRUE){
  . <- NULL

  data <- as.data.frame(data)
  
  # Add subject:
  if (missing(idvar)){
    idvar <- "ID"
    data[[idvar]] <- 1
  }
  
  # Add day:
  if (missing(dayvar)){
    dayvar <- "DAY"
    data[[dayvar]] <- 1
  }

  # Add beepvar:
  if (missing(beepvar)){
    beepvar <- "BEEP"
    data <- data %>% dplyr::group_by(.data[[dayvar]],.data[[idvar]]) %>% 
      dplyr::mutate(BEEP = seq_len(n()))
  }
  
  # Vars:
  if (missing(vars)){
    vars <- names(data[!names(data)%in%c(idvar,dayvar,beepvar)])
  }
  
  
  # Only retain important columns:
  data <- data[,c(vars,idvar,dayvar,beepvar)]
  
  # Center and scale data:
  for (v in vars){
    data[,v] <- as.numeric(scale(data[,v], TRUE, scale))
  }
  
  # Obtain person specific means:
  MeansData <- data %>% dplyr::group_by(.data[[idvar]]) %>% dplyr::summarise_at(list(~mean(.,na.rm=TRUE)),.vars = vars)
  
  # Within-person center:
  if (centerWithin){
    # Only if N > 1 (very minimal floating point error can lead to different layout to older version otherwise)
    if (length(unique(data[[idvar]])) > 1){
      data <- data %>% dplyr::group_by(.data[[idvar]]) %>% dplyr::mutate_at(funs(scale(.,center=TRUE,scale=FALSE)),.vars = vars)          
    }
  }

  # From mlVAR: Augment data:
  # Augment the data
  augData <- data
  
  # Add missing rows for missing beeps

  
  # Check for errors in data:
  beepsummary <- data %>% group_by(.data[[idvar]],.data[[dayvar]],.data[[beepvar]]) %>% tally
  if (any(beepsummary$n!=1)){
    print_and_capture <- function(x)
    {
      paste(capture.output(print(x)), collapse = "\n")
    }
    
    warning(paste0("Some beeps are recorded more than once! Results are likely unreliable.\n\n",print_and_capture(
      beepsummary %>% filter(.data[["n"]]!=1) %>% select(.data[[idvar]],.data[[dayvar]],.data[[beepvar]]) %>% as.data.frame
    )))
  }
  
  beepsPerDay <-  dplyr::summarize(data %>% group_by(.data[[idvar]],.data[[dayvar]]), 
                                                    first = min(.data[[beepvar]],na.rm=TRUE),
                                                    last = max(.data[[beepvar]],na.rm=TRUE))
 
  # all beeps:
  allBeeps <- expand.grid(unique(data[[idvar]]),unique(data[[dayvar]]),seq(min(data[[beepvar]],na.rm=TRUE),max(data[[beepvar]],na.rm=TRUE))) 
  names(allBeeps) <- c(idvar,dayvar,beepvar)
  
  # Left join the beeps per day:

  # },  list(BEEP = as.name(beepvar))))
  allBeeps <- allBeeps %>% dplyr::left_join(beepsPerDay, by = c(idvar,dayvar)) %>% 
      dplyr::group_by(.data[[idvar]],.data[[dayvar]]) %>% dplyr::filter(.data[[beepvar]] >= .data$first, .data[[beepvar]] <= .data$last)%>%
      dplyr::arrange(.data[[idvar]],.data[[dayvar]],.data[[beepvar]])
  
  
  # Enter NA's:
  augData <- augData %>% dplyr::right_join(allBeeps, by = c(idvar,dayvar,beepvar)) %>%
    arrange(.data[[idvar]],.data[[dayvar]],.data[[beepvar]])
  
  # Obtain data_c (slice away first row per day/subject):
  data_c <- augData %>% ungroup %>% dplyr::select(all_of(vars))
  
  # Lagged datasets:
  data_l <- do.call(cbind,lapply(lags, function(l){
    data_lagged <- augData %>% dplyr::group_by(.data[[idvar]],.data[[dayvar]]) %>% dplyr::mutate_at(funs(shift),.vars = vars) %>% ungroup %>% dplyr::select(all_of(vars))
    names(data_lagged) <- paste0(vars,"_lag",l)
    data_lagged
  }))

  # # Remove rows with missings:
  if (deleteMissings){
    isNA <- rowSums(is.na(data_c)) > 0 | rowSums(is.na(data_l)) > 0
    data_c <- data_c[!isNA,]
    data_l <- data_l[!isNA,]
    
    if (nrow(data_l) == 0 || nrow(data_c) == 0){
      stop("No data or all data has been deleted")
    }
  }

  # Return datasets:
  Results <- list(
    data = augData,
    data_c = data_c[,vars],
    data_l = cbind(1,data_l),
    data_means = MeansData,
    vars=vars,
    idvar=idvar,
    dayvar=dayvar,
    beepvar=beepvar,
    lags = lags
  )
  
  class(Results) <- "tsData"
  return(Results)
}

Try the graphicalVAR package in your browser

Any scripts or data that you put into this service are public.

graphicalVAR documentation built on Oct. 18, 2023, 9:09 a.m.