#Data frame manipulation functions
#' @title Paste that removes NAs
#'
#' @description Use this Function to ignore NAs in paste
#'
#' @name paste2
#'
#' @param sep character to separate paste2 arguments
#'
#' @return Character string
#'
#' @source https://stackoverflow.com/questions/13673894/suppress-nas-in-paste
#'
#' @export
paste2 <- function(...,sep=", ") {
L <- list(...)
L <- lapply(L,function(x) {x[is.na(x)] <- ""; x})
gsub(paste0("(^",sep,"|",sep,"$)"),"",
gsub(paste0(sep,sep),sep,
do.call(paste,c(L,list(sep=sep)))))
}
#' @title Index or Column name
#'
#' @description Generally internal funciton mostly used to see if vector
#' passed specifies column by character name or index
#'
#' @name index.o.coln
#'
#' @param vec vector we are testing
#' @param v.size expected size of vec
#' @param v.name argument name of vec in its function call
#' @param name.col name of all column vectors
#'
#' @return integer vector of size v.size
#'
#' @export
index.o.coln <- function(vec, v.size, v.name, name.col) {
if(length(vec)!=v.size) {
stop(paste(v.name,"must be of length 1", sep = " "))
}
if(is.character(vec)|is.numeric(vec)) {
if(is.character(vec)){
index <- vector("integer", length = v.size)
for(i in 1:v.size) {
if(any(vec[i]==name.col)) {
index[i] <- which(vec[i]==name.col)
} else {
stop(paste("ERROR: at least 1 element of",v.name,"does not match the column names of df",sep = " "))
}
}
}
if(is.numeric(vec)){
if(all((vec%%1)==0)){
index <- vec
} else {
stop(paste("ERROR:",v.name,"was not given whole integers", sep = " "))
}
}
} else {
stop(paste(v.name,"de.atomic must be character vector of column name or column index",sep = " "))
}
return(index)
}
#' @title Find positions of a particular pattern.
#'
#' @description designed to find the position of a particular character
#' in a string. Useful with substr function
#'
#' @name char.position
#'
#' @param x Character vector to search through
#' @param pattern character string that the function will attempt to
#' find in x
#' @param position positive or negative integer used to modulate the position
#' returned if pattern is found in x, char.position returns the exact position by
#' default. -1 returns position before pattern and 1 returns position after pattern.
#' @param instance positive integer used to modulate which instance of the pattern
#' is returned. Default is first instance. Assign "last" to return last instance in
#' string
#'
#' @return Number vector of position of pattern found, returns NA if not found
#'
#' @export
char.position <- function(x, pattern, position = 0, instance = 1){
if(!is.character(x)){
stop("x must be a character")
}
if(!is.character(pattern)) {
stop("pattern must be a character")
}
if(!is.numeric(position) && ((position %% 1)!=0)) {
stop("position must be an integer")
}
r <- nchar(x)
pat.length <- nchar(pattern)
inst <- instance
if(!is.numeric(inst)){
if(is.character(inst) && inst != "last") {
stop("instance must be a positive integer or be the string \"last\"")
} else {
inst <- max(r)
}
} else{
if((inst %% 1) !=0 || inst <= 0) {
stop("instance must be a positive integer or be the string \"last\"")
}
}
pos <- matrix(nrow = length(x), ncol = max(r))
for(j in 1:length(x)){
b <- 0
c <- 0
a <- r[j]
for(i in 1:a) {
if(substr(x[j],i,(i+pat.length-1)) == pattern) {
b <- b + 1
if(((position + i) <= 0)|((position + i) >= a)) {
if((position + i) <= 0) {
pos[j,c(b:max(r))] <- 1
}
if((position + i) >= a) {
pos[j,c(b:max(r))] <- a
}
c <- 1
} else{
pos[j, c(b:max(r))] <- i + position
c <- 1
}
} else {
if(c == 0){
pos[j] <- NA
}
}
}
}
p <- pos[,inst]
return(p)
}
#' @title Find Common Column Names
#'
#' @description Used to find common column names of two data frames.
#' Useful for large data sets
#'
#' @name commoncol
#'
#' @param x data frame
#' @param y data frame
#' @param join Logical, augments the return. Default set to FALSE. Still
#' a work in progress
#'
#' @return If join = FALSE this will return character string vector of the
#' common names. If join = TRUE this will try to return character string in
#' format used for innerjoin.
#'
#' @export
commoncol <- function(x, y, join=FALSE) {
if(!is.data.frame(x)) {
stop("x must be a data frame")
}
if(!is.data.frame(y)){
stop("y must be a data frame")
}
if(!is.logical(join)){
stop("join must be logical")
}
xcolnames <- colnames(x)
ycolnames <- colnames(y)
x.n <- length(xcolnames)
y.n <- length(ycolnames)
if(x.n<=y.n){
n <- x.n
small <- xcolnames
large <- ycolnames
} else {
n <- y.n
small <- ycolnames
large <- xcolnames
}
vec <- rep(NA, n)
crctn <- 0
for(i in 1:n) {
if(any(small[i]==large)){
vec[i-crctn] <- small[i]
} else{
crctn <- crctn+1
}
}
m <- n-crctn
vec <- vec[1:m]
if(join==TRUE){
new.vec <- rep(NA,length(vec))
for(i in 1:length(vec)){
new.vec[i] <- paste(vec[i],"=",vec[i], sep ="")
}
return(new.vec)
}else {
return(vec)
}
}
#' @title Make data frame atomic
#'
#' @description Used when one column has nested values that
#' make the data frame not atomic
#'
#' @name make.atomic
#'
#' @param df data frame
#' @param de.atomic Index of the column or character column
#' name to make atomic
#' @param sep character pattern delimiter for nested factors
#' @param new.name Name of the atomic column
#'
#' @return denested data frame
#'
#' @export
make.atomic <- function(df, de.atomic, sep, new.name = "atomic") {
if(!is.character(new.name)) {
stop("new.name must be a character vector")
}
name.col <- colnames(df)
index <- index.o.coln(vec = de.atomic,v.size = 1,v.name = "de.atomic",name.col = name.col)
df <- df %>%
mutate(atomic=(strsplit(as.character(df[,name.col[index]]),split = sep))) %>%
unnest(atomic)
colnames(df) <- c(name.col, new.name)
return(df)
}
#' @title Mode Type
#'
#' @description Find the mode of the variables of a df as
#' dictated by str
#'
#' @name mode.type
#'
#' @param df data frame
#'
#' @return Return character vector of mode types
#'
#' @export
mode.type <- function(df) {
output <- capture.output(str(df))
output <- output[-1]
output <- substr(x = output,
start = justinmisc::char.position(x = output,
pattern = ":",
position = 2,
instance = 1),
stop = str_length(output))
output <- substr(x = output,
start = 1,
stop = justinmisc::char.position(x = output,
pattern = " ",
position = -1,
instance = 1))
return(output)
}
#' @title correct mode types
#'
#' @description Function to correct modes after df
#' manipulation changes columns to characters
#'
#' @name correct.mode
#'
#' @param df data frame
#' @param mode.vec designed to take output of function mode.type(),
#' mode.vec must be one of the following: "num","int","Factor",
#' "cplx","chr","logi","raw", "ignore". Mode type is changed by index.
#' @param space_replace character to replace " " characters in column names.
#'
#' @return df with mode types
#'
#' @export
correct.mode <- function(df, mode.vec, space_replace = "_") {
if(ncol(df)!=length(mode.vec)) {
stop("ERROR: number of data frame columns does not equal length of mode.vec ")
}
modes <- c("num","int","Factor","cplx","chr","logi","raw","ignore")
for(j in 1:(length(mode.vec))) {
if(!any(mode.vec[j]==modes)) {
stop("mode.vec must be one of the following: \"num\",\"int\",\"Factor\",\"cplx\",\"chr\",\"logi\",\"raw\", \"ignore\"")
}
}
cnames <- colnames(df)
if(any(str_detect(cnames, " "))) {
warning("Colnames contain spaces, replaceing spaces with \"_\"")
cnames <- gsub(" ", space_replace, cnames)
colnames(df) <- cnames
}
for(i in 1:(ncol(df))){
if(mode.vec[i]=="num") {
eval(parse(text = paste("df$",colnames(df)[i],
" <- as.numeric(as.character(",
"df$",colnames(df)[i],
"))",
sep = "")))
next
}
if(mode.vec[i]=="int") {
eval(parse(text = paste("df$",colnames(df)[i],
" <- as.integer(as.character(",
"df$",colnames(df)[i],
"))",
sep = "")))
next
}
if(mode.vec[i]=="Factor") {
eval(parse(text = paste("df$",colnames(df)[i],
" <- as.factor(as.character(",
"df$",colnames(df)[i],
"))",
sep = "")))
next
}
if(mode.vec[i]=="chr") {
eval(parse(text = paste("df$",colnames(df)[i],
" <- as.character(",
"df$",colnames(df)[i],
")",
sep = "")))
next
}
if(mode.vec[i]=="cplx") {
eval(parse(text = paste("df$",colnames(df)[i],
" <- as.cplx(as.character(",
"df$",colnames(df)[i],
"))",
sep = "")))
next
}
if(mode.vec[i]=="logi") {
eval(parse(text = paste("df$",colnames(df)[i],
" <- as.logical(as.character(",
"df$",colnames(df)[i],
"))",
sep = "")))
next
}
if(mode.vec[i]=="raw") {
eval(parse(text = paste("df$",colnames(df)[i],
" <- as.raw(as.character(",
"df$",colnames(df)[i],
"))",
sep = "")))
next
}
}
return(df)
}
#' @title reshape count data into frequency data
#'
#' @description Change count data into frequency data by repeating
#' lines of data frame via the count column.
#'
#' @name count.to.frequency
#'
#' @param df data frame
#' @param count Column vector name (character) or index (integer)
#' that specifies count data
#' @param drop.na Logical: Default set to FALSE for speed, NA may
#' affect output thus set to TRUE if unsure.
#'
#' @return df with mode types
#'
#' @export
count.to.frequency <- function(df,count,drop.na = FALSE){
count <- index.o.coln(vec = count,
v.size = 1,
v.name = "count",
name.col = colnames(df))
if(drop.na==TRUE){
df <- df[!is.na(df[,count]),]
}
df.work <- df[,-count]
df.mode <- mode.type(df = df.work)
count <- df[,count]
if(any(is.na(count))){
stop("count vector cannot contain NA")
}
if(sum(count%%1)>=(1e-15*length(count))){
stop("count vector must have only integers")
}
coln <- colnames(df.work)
df.new <- rep(df.work[,coln[1]], count )
for(i in 2:length(coln)){
vec <- rep(as.character(df.work[,coln[i]]), count)
df.new <- cbind(df.new, vec)
}
colnames(df.new) <- coln
df.new <- as.data.frame(df.new)
df.new <- correct.mode(df = df.new, mode.vec = df.mode)
return(df.new)
}
#' @title Reorder factors according to simple funciton
#'
#' @description Will reorder factors based off of a group subset
#'
#' @name reorderFactor.by
#'
#' @param data data frame
#' @param factor.to.order column name or index of factors that will be reordered
#' @param factor.col column name or index that contains 'by.factor'
#' @param by.factor character name of factor in factor.col that data will
#' be subsetted for calculations and group desired for order
#' @param FUN Simple function that takes a vector of numbers and returns a single
#' output. Default set to NULL, must assign a function if group.by is not NULL.
#' @param applied.on Numeric vector used as inputs to FUN. Will ultamitly
#' determine order of factor.to.order
#' @param group.by column names or indexes that further groups by.factor subset to
#' which FUN is applied. Default set to NULL.
#' @param decreasing logical, default set to FALSE. Determines if order is increasing
#' or decreasing.
#'
#' @return returns data with factor.to.order reordered by FUN of by.factor subsetted through group.by.
#'
#' @export
reorderFactor.by <- function(data, factor.to.order,factor.col, by.factor, FUN=NULL, applied.on,group.by=NULL, decreasing = FALSE) {
df <- data
names <- colnames(df)
index.t.o <- index.o.coln(vec = factor.to.order, v.size = 1, v.name = "factor.to.order", name.col = names)
index.f <- index.o.coln(vec = factor.col, v.size = 1, v.name = "factor.col", name.col = names)
index.on <- index.o.coln(vec = applied.on, v.size = 1, v.name = "applied.on", name.col = names)
if(!is.character(by.factor)){
stop("by.factor must be a character")
}
if(!is.factor(df[,index.t.o])) {
stop("factor.to.order must be a factor vector")
}
if(!is.numeric(df[,index.on])) {
stop("applied.on must be a numeric vector")
}
a <- df[df[,index.f]==by.factor,]
if(nrow(a)==0){
stop(paste("by.factor was not found in",names[index.f], sep = " "))
}
a$temp.vector.ignore <- a[,index.on]
if(!is.null(group.by)){
if(is.null(FUN)) {
stop("Please assign a function when using group.by")
}
index.gb <- index.o.coln(vec = group.by, v.size = length(group.by), v.name = "group.by", name.col = names)
a$id <- interaction(a[,index.gb])
factor.list <- levels(a$id)
a.build <- subset(a, id %in% factor.list[1])
a.build <- a.build %>%
mutate(new.order=FUN(a.build$temp.vector.ignore))
for(i in 2:length(factor.list)){
a.work <- subset(a, id %in% factor.list[i])
a.work <- a.work %>%
mutate(new.order=FUN(as.numeric(a.work$temp.vector.ignore)))
a.build <- rbind(a.build, a.work)
}
} else {
a.build <- a
a.build$new.order <- a[,index.on]
}
df[,index.t.o] <- factor(df[,index.t.o], levels = unique(a.build[order(a.build$new.order, decreasing = decreasing),index.t.o]))
return(df)
}
#' @name merge_multi
#'
#' @title merge_multi
#'
#' @description extends merge function to 3 or more vectors
#'
#' @param ... vectors to be merged. argument names become column names in returned value.
#' If no arguments specified, default column names set to LETTERS set.
#'
#' @return data frame of combinded vectors.
#'
#' @examples
#'
#' merge_multi(x = c(1,2,3), y = c(4,5,6), z = c(7,8,9))
#' merge_multi(c(1,2,3),c(4,5,6),c(7,8,9))
#'
#' @export
merge_multi <- function(...) {
z <- list(...)
modes <- mode.type(z)
if(is.null(names(z))) {
name <- letters[seq(1,length(z))]
} else {
name <- names(z)
}
z.new <- interaction(merge(z[[1]],z[[2]]),sep = ",")
if(length(z)>2) {
for(i in 3:(length(z))) {
z.new <- interaction(merge(z.new,z[[i]]),sep = ",")
}
}
z.new <- as.data.frame(z.new)
colnames(z.new) <- "combind"
new <- separate(z.new,col = "combind",into = name,sep = ",")
new <- correct.mode(df = new,mode.vec = modes)
return(new)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.