R/modularCount.R

Defines functions modularCount

#' Discrete Counts for Three-Level Data
#' @author E. N. Aslinger
#' @param df Dataframe with nest_1, nest_2, and nest_3 columns (date/POSIX (yy-mm-dd) or numeric data must be in nest_2 and nest_3 columns)
#' @param nest_1 Column name (string) within data frame for top level (e.g. person)
#' @param nest_2 Column name (string) for nested level (e.g. day)
#' @param nest_3 Column name (string) for lowest nested level (e.g. moments). Only necessary if continuous == TRUE.
#' @param type Character vector or string to specify return of continuous and/or discrete counts. Default is c('continuous', 'discrete') to return both.
#' @return Dataframe with within-nesting unit (cycling) counts for bottom level (e.g. moments within day within person)
#' @export
#' 
modularCount <- function(df, nest_1, nest_2, nest_3 = NULL, type = c('continuous', 'discrete')) {
  
  ## Appropriate date conversions
  if (is.character(df[, nest_2])) { # if in character format, try to convert to date
    if (grepl('-', df[, nest_2][1])) format <- '%Y-%m-%d' else if (grepl('/', df[, nest_2][1])) format <- '%Y/%m/%d' else stop('Not a recognized date format.')
    df[, nest_2] <- as.POSIXct(strptime(df$date, format)) # convert to POSIX
  }
  
  ## Create lists of rows belonging to each nested level
  nest_1_rows <- sapply(unique(df[, nest_1]), function(i) which(df[, nest_1] == i)) # list of rows belonging to each level on nesting unit 1 (top)
  nest_rows <- structure(lapply(nest_1_rows, function(i) { # iterate across elements of above list (levels on nesting unit 1)
    sapply(unique(df[i, nest_2]), function(j) which(df[i, nest_2] == j)) # iterate across unit 2 levels within unit 1
  }), names = unique(df[, nest_1])) # list of lists of rows (for unit 1, then unit 2 within unit 1) named after unit 1 levels 
  
  ## Create discrete within-level counts (sequences)
  if ('DISCRETE' %in% toupper(type)) {
    df <- data.frame(df, data.table::rbindlist(sapply(1:length(nest_rows), function(i) { # iterate across nest 1 groups of rows
      data.frame(nest_1 = i, data.table::rbindlist(sapply(1:length(nest_rows[[i]]), function(j) { # iterate across groups of nest_2 rows w/i ith nest_1
        data.frame(nest_2 = nest_rows[[i]][[j]], count = 1:length(nest_rows[[i]][[j]])) # make data frame
      }, simplify = FALSE)))
    }, simplify = FALSE)))
    colnames(df)[which(colnames(df) == 'nest_1')] <- paste0(nest_1, '_discrete') # rename discrete version of variable based on user-specification of original name
    colnames(df)[which(colnames(df) == 'nest_2')] <- paste0(nest_2, '_discrete') # rename discrete version of variable based on user-specification of original name
  }
  
  ## Create continuous within-level counts
  if ('CONTINUOUS' %in% toupper(type)) {
    continuous <- data.table::rbindlist(sapply(1:length(nest_rows), function(i) { # iterate across nest 1 groups of rows
      data.frame(nest_1 = names(nest_rows)[i], # unit 1 id
                 data.table::rbindlist(sapply(1:length(nest_rows[[i]]), function(j) { # iterate across groups of nest_2 rows w/i ith nest_1
                   lagged_col <- c(df[unlist(nest_rows[[i]][[j]][1]), nest_3], df[unlist(nest_rows[[i]][[j]]), nest_3])
                   lagged_col <- lagged_col[-length(lagged_col)] # remove final time point (b/c t-1 shift)
                   n_2 <- df[nest_rows[[i]][[j]], nest_2][1] # nest 2 id
                   data.frame(nest_2 = n_2, nest_3 = (df[unlist(nest_rows[[i]][[j]]), nest_3] - lagged_col)) # continuous time difference (as vector)
                 }, simplify = FALSE)))
    }, simplify = FALSE)) # data frame of continuous time on lowest level (nest_3) and nest_1 and nest_2 columns
    colnames(continuous)[which(colnames(continuous) == 'nest_3')] <- paste0(nest_3, '_continuous') # rename continuous variable based on user-specified name
    colnames(continuous)[which(colnames(continuous) == 'nest_1')] <- 'n1' # rename to avoid redundancy with user-specification of original name
    colnames(continuous)[which(colnames(continuous) == 'nest_2')] <- 'n2' # rename to avoid redundancy with user-specification of original name
    df <- data.frame(df, continuous) # bind day-level data together
    # Ensure proper order of df merge
    if (any(as.character(df[, nest_1]) != as.character(df[, 'n1']))) {
      warning('ID mismatch in continuous data merge at level of nest_1. Note that date variables may result in spurious warning here.') 
    }
    if (any(as.character(df[, nest_2]) != as.character(df[, 'n2']))) {
      warning('Potential ID mismatch in continuous data merge at level of nest_2. Note that date variables may result in spurious warning here.')
    }
  }
  
  return(df)
  
}
enaY15/MultilevelFunctions documentation built on Aug. 22, 2020, 4:42 p.m.