split_by <- function(x, split = NULL,
factors = NULL,
split.mode = c('minor', 'minor.equal', 'major.equal')
) {
g.name <- as.character(substitute(x)[[3]])
if(!is.null(factors)) {
if (length(factors) != 2)
{stop("\n\nYou must name exactly two factors, otherwise let me do it for you ;-)")}
low.factor <- factors[1]
high.factor <- factors[2]
} else {
low.factor <- as.character(paste('low.', g.name, sep = ''))
high.factor <- paste('high.', g.name, sep = '')
}
name <- NA
if(is.null(split) || split == 'median') {
if(missing(split.mode)) {
name[x <= median(x, na.rm = T)] <- low.factor
name[x > median(x, na.rm = T)] <- high.factor
} else
if(split.mode == 'minor.equal') {
name[x <= median(x, na.rm = T)] <- low.factor
name[x > median(x, na.rm = T)] <- high.factor
} else
if(split.mode == 'minor') {
name[x < median(x, na.rm = T)] <- low.factor
name[x > median(x, na.rm = T)] <- high.factor
} else
if(split.mode == 'major.equal') {
name[x < median(x, na.rm = T)] <- low.factor
name[x >= median(x, na.rm = T)] <- high.factor
} else {stop("\n\nYou should define how the split has to be performed, otherwise let me do it for you ;-)")}
}
if(is.numeric(split)) {
if(missing(split.mode)) {
name[x <= split] <- low.factor
name[x > split] <- high.factor
} else
if(split.mode == 'minor.equal') {
name[x <= split] <- low.factor
name[x > split] <- high.factor
} else
if(split.mode == 'minor') {
name[x < split] <- low.factor
name[x > split] <- high.factor
} else
if(split.mode == 'major.equal') {
name[x < split] <- low.factor
name[x >= split] <- high.factor
} else {stop("\n\nYou should define how the split has to be performed, otherwise let me do it for you ;-)")}
}
if(split == 'mean') {
if(missing(split.mode)) {
name[x <= mean(x, na.rm = T)] <- low.factor
name[x > mean(x, na.rm = T)] <- high.factor
} else
if(split.mode == 'minor.equal') {
name[x <= mean(x, na.rm = T)] <- low.factor
name[x > mean(x, na.rm = T)] <- high.factor
} else
if(split.mode == 'minor') {
name[x < mean(x, na.rm = T)] <- low.factor
name[x > mean(x, na.rm = T)] <- high.factor
} else
if(split.mode == 'major.equal') {
name[x < mean(x, na.rm = T)] <- low.factor
name[x >= mean(x, na.rm = T)] <- high.factor
} else {stop("\n\nYou should define how the split has to be performed, otherwise let me do it for you ;-)")}
}
if(split == 'fivenum') {
if(!is.null(factors)) {stop('\n\nLet me select your factor names for you ;-)')}
if(!missing(split.mode)) {stop("\n\n No way to select 'split.mode' in here")}
g1 <- paste(g.name, '_1', sep = '')
g2 <- paste(g.name, '_2', sep = '')
g3 <- paste(g.name, '_3', sep = '')
g4 <- paste(g.name, '_4', sep = '')
name <- symnum(x, legend = F, corr = FALSE, na = FALSE,
cutpoints = c(fivenum(x, na.rm = TRUE)[1],
fivenum(x, na.rm = TRUE)[2],
fivenum(x, na.rm = TRUE)[3],
fivenum(x, na.rm = TRUE)[4],
fivenum(x, na.rm = TRUE)[5]),
symbols = c(g1, g2, g3, g4))
}
name <- as.factor(name)
return(name)
}
# split_by(mtcars$mpg, 'median')
# split_by(mtcars$mpg, 'mean')
# split_by(mtcars$mpg, 0, factors = c('jhg', 'kjhg'))
# split_by(mtcars$mpg, split = 'fivenum')
# split_by(mtcars$mpg , factors = c('belowMedian', 'aboveMedian'))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.