#' Generate columns
#'
#' @description Searches the data frame for pairs of columns whose names are identical
#' except for specific strings that you've embedded within them--such as "T1" versus "T2"
#' or "Participant1" versus "Participant2--and then generates new columns by computing
#' sums, differences, or means between pairs of corresponding columns.
#'
#' @param df The data frame
#' @param string1,string2 Strings embedded within the names of corresponding columns
#' @param operation String to specify the operation: must be \code{"sum"}, \code{"difference"}, or
#' \code{"mean"}
#'
#' @examples
#' ## Computing differences between T1 responses and T2 responses
#' wrap.generate(df = bdata, string1 = "T1", string2 = "T2", operation = "difference")
#'
#' @import stringr
#' @importFrom varhandle check.numeric
#' @export
wrap.generate <- function(df,string1,string2,operation) {
# Error checks
if(is.null(string1)==T|is.null(string2)==T) {return("Must enter character strings for parameters string1 and string2")}
if(is.character(string1)==F|is.character(string2)==F) {return("Must enter character strings for parameters string1 and string2")}
if(operation!="difference"&operation!="sum"&operation!="mean") {return("operation must be set equal to \"difference\", \"sum\", or \"mean\".")}
if(length(string1)>1|length(string2)>1) {return("Parameters string1 and string2 must have one element each.")}
fargs <- as.list(match.call(expand.dots = TRUE)); for (i in 1:length(fargs)) {fargs[i] <- gsub("*","",fargs[i],fixed=T); fargs[i] <- gsub("-","",fargs[i],fixed=T); fargs[i] <- gsub(" ", "", fargs[i],fixed=T)}
newcolumns = 0; overwritten = 0
# Add double back-slash when the first character is a period, to ensure it searches correctly.
for (i in 1:length(string1)) {
if(substr(string1[[i]],1,1)==".") {
string1[[i]] <- paste("\\",string1[[i]],sep="")
}
}
for (i in 1:length(string2)) {
if(substr(string2[[i]],1,1)==".") {
string2[[i]] <- paste("\\",string2[[i]],sep="")
}
}
# Find matching columns and compute sums, differences, or means
if(is.null(df)==F&length(string1)==1&length(string2)==1) {
temp1 <- colnames(df)
temp2 <- gsub(string1,"",temp1)
temp3 <- gsub(string2,"",temp1)
for (i in 1:length(temp1)) {
for (j in 1:length(temp1)) {
if(temp2[i]==temp3[j]&is.numeric(df[,i])==T&is.numeric(df[,j])==T&i!=j&temp1[i]!=temp2[i]&temp1[j]!=temp3[j]) {
newcolumns = newcolumns+1
if(operation=="mean") {
if(gsub(string1,operation,temp1[i]) %in% names(df)) {overwritten <- overwritten + 1}
df[gsub(string1,operation,temp1[i])] <- (df[,i]+df[,j])/2
}
if(operation=="difference") {
if(gsub(string1,operation,temp1[i]) %in% names(df)) {overwritten <- overwritten + 1}
df[gsub(string1,operation,temp1[i])] <- (df[,i]-df[,j])
}
if(operation=="sum") {
if(gsub(string1,operation,temp1[i]) %in% names(df)) {overwritten <- overwritten + 1}
df[gsub(string1,operation,temp1[i])] <- (df[,i]+df[,j])
}
}
# If the column is not numeric, try going row by row instead
if(temp2[i]==temp3[j]&(is.numeric(df[,i])==F|is.numeric(df[,j])==F)&i!=j&temp1[i]!=temp2[i]&temp1[j]!=temp3[j]) {
newcolumns = newcolumns+1
if(gsub(string1,operation,temp1[i]) %in% names(df)) {overwritten <- overwritten + 1}
df[gsub(string1,operation,temp1[i])] <- NA
for (k in 1:nrow(df)) {
if(check.numeric(paste(df[k,i]))==T&is.na(df[k,i])==F&is.null(df[k,i])==F&df[k,i]!=""&check.numeric(paste(df[k,j]))==T&is.na(df[k,j])==F&is.null(df[k,j])==F&df[k,j]!="") {
if(operation=="mean") {
df[k,gsub(string1,operation,temp1[i])] <- (as.numeric(paste(df[k,i]))+as.numeric(paste(df[k,j])))/2
}
if(operation=="difference") {
df[k,gsub(string1,operation,temp1[i])] <- (as.numeric(paste(df[k,i]))-as.numeric(paste(df[k,j])))
}
if(operation=="sum") {
df[k,gsub(string1,operation,temp1[i])] <- (as.numeric(paste(df[k,i]))+as.numeric(paste(df[k,j])))
}
}
}
}
}
}
}
if(operation=="mean"|operation=="sum") {
print(paste("Note: Generated ",newcolumns," columns.",sep=""))
if(overwritten>0) {print("While performing this operation, note that the function overwrote one or more columns that already existed in the data frame.")}
}
if(operation=="difference") {
print(paste("Note: Generated ",newcolumns," columns. Computed differences as ",string1," minus ",string2,".",sep=""))
if(overwritten>0) {print("While performing this operation, note that the function overwrote one or more columns that already existed in the data frame.")}
}
assign(fargs[[2]],df,.GlobalEnv)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.