R/wrap.generate.R

Defines functions wrap.generate

Documented in wrap.generate

#' Generate columns
#'
#' @description Searches the data frame for pairs of columns whose names are identical
#' except for specific strings that you've embedded within them--such as "T1" versus "T2"
#' or "Participant1" versus "Participant2--and then generates new columns by computing
#' sums, differences, or means between pairs of corresponding columns.
#'
#' @param df The data frame
#' @param string1,string2 Strings embedded within the names of corresponding columns
#' @param operation String to specify the operation: must be \code{"sum"}, \code{"difference"}, or
#' \code{"mean"}
#'
#' @examples
#' ## Computing differences between T1 responses and T2 responses
#' wrap.generate(df = bdata, string1 = "T1", string2 = "T2", operation = "difference")
#'
#' @import stringr
#' @importFrom varhandle check.numeric
#' @export
wrap.generate <- function(df,string1,string2,operation) {

  # Error checks
  if(is.null(string1)==T|is.null(string2)==T) {return("Must enter character strings for parameters string1 and string2")}
  if(is.character(string1)==F|is.character(string2)==F) {return("Must enter character strings for parameters string1 and string2")}
  if(operation!="difference"&operation!="sum"&operation!="mean") {return("operation must be set equal to \"difference\", \"sum\", or \"mean\".")}
  if(length(string1)>1|length(string2)>1) {return("Parameters string1 and string2 must have one element each.")}

  fargs <- as.list(match.call(expand.dots = TRUE)); for (i in 1:length(fargs)) {fargs[i] <- gsub("*","",fargs[i],fixed=T); fargs[i] <- gsub("-","",fargs[i],fixed=T); fargs[i] <- gsub(" ", "", fargs[i],fixed=T)}
  newcolumns = 0; overwritten = 0

  # Add double back-slash when the first character is a period, to ensure it searches correctly.
  for (i in 1:length(string1)) {
    if(substr(string1[[i]],1,1)==".") {
      string1[[i]] <- paste("\\",string1[[i]],sep="")
    }
  }

  for (i in 1:length(string2)) {
    if(substr(string2[[i]],1,1)==".") {
      string2[[i]] <- paste("\\",string2[[i]],sep="")
    }
  }

  # Find matching columns and compute sums, differences, or means
  if(is.null(df)==F&length(string1)==1&length(string2)==1) {
    temp1 <- colnames(df)
    temp2 <- gsub(string1,"",temp1)
    temp3 <- gsub(string2,"",temp1)
    for (i in 1:length(temp1)) {
      for (j in 1:length(temp1)) {
        if(temp2[i]==temp3[j]&is.numeric(df[,i])==T&is.numeric(df[,j])==T&i!=j&temp1[i]!=temp2[i]&temp1[j]!=temp3[j]) {
          newcolumns = newcolumns+1
          if(operation=="mean") {
            if(gsub(string1,operation,temp1[i]) %in% names(df)) {overwritten <- overwritten + 1}
            df[gsub(string1,operation,temp1[i])] <- (df[,i]+df[,j])/2
          }
          if(operation=="difference") {
            if(gsub(string1,operation,temp1[i]) %in% names(df)) {overwritten <- overwritten + 1}
            df[gsub(string1,operation,temp1[i])] <- (df[,i]-df[,j])
          }
          if(operation=="sum") {
            if(gsub(string1,operation,temp1[i]) %in% names(df)) {overwritten <- overwritten + 1}
            df[gsub(string1,operation,temp1[i])] <- (df[,i]+df[,j])
          }
        }

        # If the column is not numeric, try going row by row instead
        if(temp2[i]==temp3[j]&(is.numeric(df[,i])==F|is.numeric(df[,j])==F)&i!=j&temp1[i]!=temp2[i]&temp1[j]!=temp3[j]) {
          newcolumns = newcolumns+1
          if(gsub(string1,operation,temp1[i]) %in% names(df)) {overwritten <- overwritten + 1}
          df[gsub(string1,operation,temp1[i])] <- NA
          for (k in 1:nrow(df)) {
            if(check.numeric(paste(df[k,i]))==T&is.na(df[k,i])==F&is.null(df[k,i])==F&df[k,i]!=""&check.numeric(paste(df[k,j]))==T&is.na(df[k,j])==F&is.null(df[k,j])==F&df[k,j]!="") {
              if(operation=="mean") {
                df[k,gsub(string1,operation,temp1[i])] <- (as.numeric(paste(df[k,i]))+as.numeric(paste(df[k,j])))/2
              }
              if(operation=="difference") {
                df[k,gsub(string1,operation,temp1[i])] <- (as.numeric(paste(df[k,i]))-as.numeric(paste(df[k,j])))
              }
              if(operation=="sum") {
                df[k,gsub(string1,operation,temp1[i])] <- (as.numeric(paste(df[k,i]))+as.numeric(paste(df[k,j])))
              }
            }
          }
        }
      }
    }
  }

  if(operation=="mean"|operation=="sum") {
    print(paste("Note: Generated ",newcolumns," columns.",sep=""))
    if(overwritten>0) {print("While performing this operation, note that the function overwrote one or more columns that already existed in the data frame.")}
  }
  if(operation=="difference") {
    print(paste("Note: Generated ",newcolumns," columns. Computed differences as ",string1," minus ",string2,".",sep=""))
    if(overwritten>0) {print("While performing this operation, note that the function overwrote one or more columns that already existed in the data frame.")}
  }
  assign(fargs[[2]],df,.GlobalEnv)
}
michaelkardas/behavioralwrappers documentation built on Jan. 2, 2020, 7:46 a.m.