#' Extract random subset of a dataframe
#'
#' Currently only works with data with 2-levels. Notes that if the size of the dataframe is too small to accomodate n.sample then with.rep is set to TRUE.
#'
#' @param dat R dataframe to randomly subset
#' @param my.code Unique code, such as your email address. Must contain only letters and numbers and not the at symbol or other punctuation.
#' @param cat.var Categorical variable by which to split the data up
#' @param n.sample Number of samples per group to randomly select. 2*n.sample will be the total size of the dataframe.
#' @param with.rep Should sampling be with replacement? If with.rep = TRUE then individuals can occur more than once within the dataframe.
#'
#'
#' @examples
#' #Subset frogarms data
#' data(frogarms)
#' my.frogs <- make_my_data2L(dat = frogarms,
#' my.code = "nlb24",
#' cat.var = "sex",
#' n.sample = 20,
#' with.rep = FALSE)
#'
#' summary(my.frogs)
#'
#' @export
make_my_data2L <- function(dat,
my.code = "nlb24",
cat.var,
n.sample = 20,
with.rep = FALSE){
# warning("Codes should only contain letters and numbers")
#
# if(n.sample*2 > nrow(dat)){
# warning("\n WARNING: number of samples requested per group exceeds size of dataframe.
# \n Setting with.rep == TRUE will alleviate this.")
# }
#split string into character vector
my.code <- unlist(strsplit(my.code,split = ""))
#extract any numeric parts of code
i.num <- grep("[01-9]",my.code)
my.letters <- my.code[-i.num]
my.numbers <- as.numeric(my.code[i.num])
#turn letters to numeric position in alaphabet
my.letters <- match(my.letters,letters)
#create vector of numbers
my.code <- c(my.letters,my.numbers)
#ceate unique variable for seet
my.seed <- (min(my.code, na.rm = TRUE)+1)^max(my.code, na.rm = TRUE)
# warning("\nYour special code is ", my.seed)
# warning("\n(You don't really need to know this, though).", my.seed)
#set seed
set.seed(seed = my.seed)
# warning("\n")
# warning("NOTE: This function only works properly for data with TWO levels to the categorical var.")
# warning("\n eg male vs. female; it doesn't work for >2 levels (eg red vs blue vs. green)")
#extract levels of categorical varibale
cat.var.1 <- levels(dat[,cat.var])[1]
#define 1st subgroup
i.group1 <- which(dat[,cat.var] == cat.var.1)
#create numeric row index if it doesn't already exist
dat$i.rows.working <- 1:nrow(dat)
#extract groups
group1 <- sample(dat$i.rows.working[i.group1],size = n.sample,replace = with.rep)
group2 <- sample(dat$i.rows.working[-i.group1],size = n.sample,replace = with.rep)
#create new dataframe
my.dat <- rbind(dat[group1,],
dat[group2,])
return(my.dat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.