shift <- function(x,n){
length <- length(x)
c(rep(NA,n),x)[1:length]
}
# Data preperation function:
tsData <- function(data,
vars,
beepvar,
dayvar,
idvar,
lags = 1,
scale = TRUE,
centerWithin = TRUE,
deleteMissings = TRUE){
. <- NULL
data <- as.data.frame(data)
# Add subject:
if (missing(idvar)){
idvar <- "ID"
data[[idvar]] <- 1
}
# Add day:
if (missing(dayvar)){
dayvar <- "DAY"
data[[dayvar]] <- 1
}
# Add beepvar:
if (missing(beepvar)){
beepvar <- "BEEP"
data <- data %>% dplyr::group_by(.data[[dayvar]],.data[[idvar]]) %>%
dplyr::mutate(BEEP = seq_len(n()))
}
# Vars:
if (missing(vars)){
vars <- names(data[!names(data)%in%c(idvar,dayvar,beepvar)])
}
# Only retain important columns:
data <- data[,c(vars,idvar,dayvar,beepvar)]
# Center and scale data:
for (v in vars){
data[,v] <- as.numeric(scale(data[,v], TRUE, scale))
}
# Obtain person specific means:
MeansData <- data %>% dplyr::group_by(.data[[idvar]]) %>% dplyr::summarise_at(list(~mean(.,na.rm=TRUE)),.vars = vars)
# Within-person center:
if (centerWithin){
# Only if N > 1 (very minimal floating point error can lead to different layout to older version otherwise)
if (length(unique(data[[idvar]])) > 1){
data <- data %>% dplyr::group_by(.data[[idvar]]) %>% dplyr::mutate_at(funs(scale(.,center=TRUE,scale=FALSE)),.vars = vars)
}
}
# From mlVAR: Augment data:
# Augment the data
augData <- data
# Add missing rows for missing beeps
# Check for errors in data:
beepsummary <- data %>% group_by(.data[[idvar]],.data[[dayvar]],.data[[beepvar]]) %>% tally
if (any(beepsummary$n!=1)){
print_and_capture <- function(x)
{
paste(capture.output(print(x)), collapse = "\n")
}
warning(paste0("Some beeps are recorded more than once! Results are likely unreliable.\n\n",print_and_capture(
beepsummary %>% filter(.data[["n"]]!=1) %>% select(.data[[idvar]],.data[[dayvar]],.data[[beepvar]]) %>% as.data.frame
)))
}
beepsPerDay <- dplyr::summarize(data %>% group_by(.data[[idvar]],.data[[dayvar]]),
first = min(.data[[beepvar]],na.rm=TRUE),
last = max(.data[[beepvar]],na.rm=TRUE))
# all beeps:
allBeeps <- expand.grid(unique(data[[idvar]]),unique(data[[dayvar]]),seq(min(data[[beepvar]],na.rm=TRUE),max(data[[beepvar]],na.rm=TRUE)))
names(allBeeps) <- c(idvar,dayvar,beepvar)
# Left join the beeps per day:
# }, list(BEEP = as.name(beepvar))))
allBeeps <- allBeeps %>% dplyr::left_join(beepsPerDay, by = c(idvar,dayvar)) %>%
dplyr::group_by(.data[[idvar]],.data[[dayvar]]) %>% dplyr::filter(.data[[beepvar]] >= .data$first, .data[[beepvar]] <= .data$last)%>%
dplyr::arrange(.data[[idvar]],.data[[dayvar]],.data[[beepvar]])
# Enter NA's:
augData <- augData %>% dplyr::right_join(allBeeps, by = c(idvar,dayvar,beepvar)) %>%
arrange(.data[[idvar]],.data[[dayvar]],.data[[beepvar]])
# Obtain data_c (slice away first row per day/subject):
data_c <- augData %>% ungroup %>% dplyr::select(all_of(vars))
# Lagged datasets:
data_l <- do.call(cbind,lapply(lags, function(l){
data_lagged <- augData %>% dplyr::group_by(.data[[idvar]],.data[[dayvar]]) %>% dplyr::mutate_at(funs(shift),.vars = vars) %>% ungroup %>% dplyr::select(all_of(vars))
names(data_lagged) <- paste0(vars,"_lag",l)
data_lagged
}))
# # Remove rows with missings:
if (deleteMissings){
isNA <- rowSums(is.na(data_c)) > 0 | rowSums(is.na(data_l)) > 0
data_c <- data_c[!isNA,]
data_l <- data_l[!isNA,]
if (nrow(data_l) == 0 || nrow(data_c) == 0){
stop("No data or all data has been deleted")
}
}
# Return datasets:
Results <- list(
data = augData,
data_c = data_c[,vars],
data_l = cbind(1,data_l),
data_means = MeansData,
vars=vars,
idvar=idvar,
dayvar=dayvar,
beepvar=beepvar,
lags = lags
)
class(Results) <- "tsData"
return(Results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.