#' A function to create stacked density plot using ggplot
#'
#'
#' @param data Data frame containing columns for density calculation.
#' @param densityColNames The column names of those passed to density calculation.
#' @param stackFactor A factor to create the stacks, length equalt to number of rows in data.
#' @param stackColor Default use ggplot2 gg_color_hue colour palette, but can be specified manually.
#' @param kernel kernel applied for density calculation, includes "gaussian", "epanechnikov", "rectangular", "triangular", "biweight", "cosine", "optcosine".
#' @param bw The smoothing bandwidth to be used. The default, "nrd0", has remained the default for historical and compatibility reasons, rather than as a general recommendation, where e.g., "SJ" would rather fit.
#' @param adjust The bandwidth used is actually adjust*bw.
#' @param reomoveOutliers If coerce the outliers to 1.5*IQR level.
#' @param stackRotation How many degrees to rotate the density stacks.
#' @param stackSeperation Adjust the stack height intervels, use "auto" for automatic adjustment; or specify a value form 0 to 1 times the median height of to all density plots.
#' @param x_text_size Adjust the text size on x ticks.
#' @param strip_text_size Adjust the text size on strip.
#' @param legend_text_size Adjust the text size on legend.
#' @param legendRow The number of rows of the legend.
#' @param legend_title The title of the legend.
#' @param legend_title_size The size of the legend title.
#' @param legend_key_size Adjust the size of the legend key.
#'
#' @import ggplot2
#' @importFrom plyr ldply
#' @importFrom reshape2 melt
#'
#' @return a ggplot object
#' @export
#'
#' @examples
#' data <- iris[ ,1:4]
#' densityColNames <- colnames(data)
#' stackFactor <- iris[ ,5]
#' stackedDenistyPlot(data, densityColNames, stackFactor, strip_text_size = 9, legend_text_size = 0.8, legend_key_size = 0.15)
stackedDenistyPlot <- function(data, densityColNames, stackFactor,
stackColor,
kernel = c("gaussian", "epanechnikov", "rectangular",
"triangular", "biweight",
"cosine", "optcosine"),
bw = "nrd0", adjust = 1,
reomoveOutliers = FALSE,
stackRotation = 0,
stackSeperation = "auto",
scaleHeight = FALSE,
scaleWidth = FALSE,
x_text_size = 2,
strip_text_size = 8,
legend_text_size = 0.8,
legendRow = 1,
legend_title = "stackName",
legend_title_size = 1,
legend_key_size = 0.15){
if(missing(stackFactor)){
warning("No stackFactor was provided!")
stackFactor <- rep("stack", length = nrow(data))
}else if(length(stackFactor) != nrow(data)){
stop("Length of stackFactor unequal row number of input data!")
}
if(missing(densityColNames)){
densityColNames <- colnames(data)
}else if(any(!(densityColNames %in% colnames(data)))){
stop("Unmatch densityColNames found:", paste(densityColNames[!(densityColNames %in% colnames(data))], collapse = " "))
}
kernel <- match.arg(kernel)
if(!is.numeric(stackRotation)){
stop("stackRotation must be a numeric number")
}else if(stackRotation < 0 || stackRotation > 90){
stop("stackRotation must be a numeric number in range 0-90")
}
stackCount <- length(unique(stackFactor))
densityCount <- length(densityColNames)
data <- data.frame(data[ ,densityColNames, drop=FALSE],
stackFactor = stackFactor,
check.names = FALSE)
densData <- densityCal(data, kernel = kernel, bw = bw, adjust = adjust,
scaleHeight = scaleHeight, scaleWidth = scaleWidth,
reomoveOutliers = reomoveOutliers)
## dataframe densData contains {stackName, x , y , densityName}
xStat <- aggregate(x ~ stackName + densityName, densData, max)
yStat <- aggregate(y ~ stackName + densityName, densData, max)
if(stackSeperation == "auto"){
stackIntervals <- aggregate(y ~ densityName, yStat, function(x){0.8*median(x) * (1-(stackRotation/90)^0.2)^2})
}else if(stackSeperation < 0 || stackSeperation > 1){
stop("stackSeperation must be value in range 0-1")
}else{
stackIntervals <- aggregate(y ~ densityName, yStat, function(x){median(x)*stackSeperation})
}
stackShifts <- aggregate(x ~ densityName, xStat, function(x){max(x) * (stackRotation/90)})
densData$stack_x <- densData$x + (as.numeric(densData$stackName)-1) * stackShifts$x[match(densData$densityName, stackShifts$densityName)]
densData$stack_y <- densData$y + (as.numeric(densData$stackName)-1) * stackIntervals$y[match(densData$densityName, stackIntervals$densityName)]
## segment lines, x tick, x label
alignSegments <- ldply(split(densData$x, densData$densityName),
function(x){seq(min(x), max(x), length.out=5)},
.id = "densityName")
alignSegments <- melt(alignSegments, id.vars="densityName", variable.name="x_tick", value.name = "x")
alignSegments$y <- min(densData$y)
alignSegments$xend <- alignSegments$x + (length(unique(densData$stackName))-1) * stackShifts$x[match(alignSegments$densityName, stackShifts$densityName)]
alignSegments$yend <- min(densData$y) + (length(unique(densData$stackName))-1) * stackIntervals$y[match(alignSegments$densityName, stackIntervals$densityName)]
densityHeights <- aggregate(y ~ densityName, yStat, max)
alignSegments$tickXend <- alignSegments$x
alignSegments$tickYend <- alignSegments$y - densityHeights$y[match(alignSegments$densityName, densityHeights$densityName)] * 0.01
## Automatic adjust the tick text on x axis, default 5 ticks
tickText <- alignSegments$x
if(mean(tickText) < 0.0001 || mean(tickText) > 10000){
tickText <- format(tickText, scientific=TRUE, digits=3)
}else if(mean(tickText) > 0.1){
tickText <- round(tickText, digits = 2)
}
alignSegments$tickText <- tickText
alignSegments$textY <- alignSegments$y - densityHeights$y[match(alignSegments$densityName, densityHeights$densityName)] * 0.03
cat(" Plotting ...\n")
stackDensityPlot_theme <- theme(legend.position = "top",
legend.title = element_text(size = rel(legend_title_size)),
legend.text = element_text(size = rel(legend_text_size)),
legend.key.size = unit(legend_key_size, "in"),
strip.text = element_text(size=strip_text_size, lineheight=1, hjust = 0.5, vjust = 0.5),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
strip.background=element_rect(fill = "grey90", colour = NA))
gp <- ggplot(densData, aes(x=stack_x, y=stack_y)) +
geom_segment(data = alignSegments,
aes(x = x, y = y, xend = xend, yend = yend),
color = "grey80", size=0.3) +
geom_segment(data = alignSegments,
aes(x = x, y = y, xend = tickXend, yend = tickYend),
color = "grey20", size=0.3) +
geom_text(data = alignSegments, aes(x = x, y = textY, label = tickText),
hjust = 0.3, vjust = 1.1, size = x_text_size) +
geom_polygon(aes(fill=stackName, color=stackName), alpha = 0.15) +
facet_wrap(~densityName, scale = "free") +
xlab("") + ylab("")
## set the color of stacks
if(!missing(stackColor)){
if(length(stackColor) == stackCount){
gp <- gp + scale_fill_manual(values=stackColor) + scale_colour_manual(values=stackColor)
}else{
warning("Color provided for stacks doesn't match the number of stacks, change to use default colour palette!")
}
}
gp <- gp + guides(col = guide_legend(title = legend_title, nrow = legendRow, byrow = TRUE),
fill = guide_legend(title = legend_title, nrow = legendRow, byrow = TRUE)) +
theme_bw() + stackDensityPlot_theme
return(gp)
}
#' Internal density calculation function serves for \code{stackDenistyPlot}
#'
#'
#' @param data Input data frame including variable columns and stackFactor column.
#' @param kernel Kernel applied for density calculation, includes "gaussian", "epanechnikov", "rectangular", "triangular", "biweight", "cosine", "optcosine".
#' @param bw The smoothing bandwidth to be used. The default, "nrd0", has remained the default for historical and compatibility reasons, rather than as a general recommendation, where e.g., "SJ" would rather fit.
#' @param adjust The bandwidth used is actually adjust*bw.
#' @param reomoveOutliers If coerce the outliers to 1.5*IQR level.
#'
#' @return data frame containing stackName, x, y and densityName
#' @importFrom plyr ldply
#' @importFrom scales rescale
#'
#' @examples
#' data <- iris
#' data$stackFactor <- data$Species
#' data$Species <- NULL
#' dr <- densityCal(data = data)
densityCal <- function(data, kernel = c("gaussian", "epanechnikov", "rectangular",
"triangular", "biweight", "cosine", "optcosine"),
bw = "nrd0", adjust = 1, scaleHeight = FALSE, scaleWidth = FALSE, reomoveOutliers = FALSE){
cat(" Calculating Density for each stack column...\n")
kernel <- match.arg(kernel)
#print(table(data$stackFactor))
dataBystackFactor <- split(subset(data, select = -stackFactor), data$stackFactor)
densityWrap <- function(d, ...){
resOut <- NULL
for(i in colnames(d)){
x <- d[,i]
if(reomoveOutliers){
cat(" Remove outliers...\n")
x_IQR <- IQR(x)
x_lowLimit <- quantile(x, 0.25) - 1.5 * x_IQR
x_highLimit <- quantile(x, 0.75) + 1.5 * x_IQR
x <- x[x >= x_lowLimit && x <= x_highLimit]
}
dens <- density(x, ...)
dx <- dens$x
dy <- dens$y
if(scaleWidth){
dx <- rescale(dx, c(0,5))
}
if(scaleHeight){
dy <- rescale(dy, c(0,1))
}
densOut <- data.frame(x=dx, y=dy, densityName = i)
resOut <- rbind(resOut, densOut)
}
return(resOut)
}
r <- ldply(dataBystackFactor, densityWrap,
kernel = kernel, bw = bw, adjust = adjust,
.progress = "text",
.id = "stackName")
return(r)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.