#' Two histograms and one plot
#'
#' Prints two histograms.
#'
#' @param x a vector.
#' @param y a vector.
#' @param k number of bins in histgrams.
#'
#' @references
#' \url{http://stackoverflow.com/questions/11193901/plotting-two-histograms-together}
#'
#' @importFrom graphics hist barplot box
#' @export
twohist <- function(x, y, k=20, main="", col=c("lightgrey", "darkorange"),
border="white", box=TRUE, ...) {
breaks <- pretty(range(c(x, y)), n=k)
X <- hist(x, breaks=breaks, plot=FALSE)$counts/length(x)
Y <- hist(y, breaks=breaks, plot=FALSE)$counts/length(y)
dat <- rbind(X, Y)
colnames(dat) <- paste(breaks[-length(breaks)], breaks[-1], sep="-")
md <- max(dat, na.rm=TRUE)
barplot(dat, beside=TRUE, space=c(0, 0.1), las=2, col=col,
main=main, ylim=c(0, md+(md/15)), border=border, ...)
if (box) box()
}
#' Empirical cumulative distribution function
#'
#' Plots empirical cumulative distribution function.
#'
#' @param x a vector.
#' @param add if true adds this plot to previously plotted.
#'
#' @references
#' \url{http://chemicalstatistician.wordpress.com/2013/06/25/exploratory-data-analysis-2-ways-of-plotting-empirical-cumulative-distribution-functions-in-r/}
#'
#' @importFrom graphics lines plot
#' @export
cdfplot <- function(x, add=FALSE, ylab="F(x)", xlab="",
lwd=1, main="", ...) {
x <- sort(x)
n <- length(x)
nx <- (1:n)/n
if (!add) {
plot(x, nx, ylab=ylab, xlab=xlab,
type="s", lwd=lwd, main=main, ...)
} else lines(x, nx, lwd=lwd, ...)
invisible(tapply(nx, x, sum))
}
#' Boxplots for many variables
#'
#' Plot boxplots for multiple variables.
#'
#' @param data a data.frame.
#' @param ranq a criterion for sorting the plots.
#' @param col color for plots.
#'
#' @references
#' \url{http://www.r-bloggers.com/r-version-of-an-exploratory-technique-for-visualizing-the-distributions-of-100-variables/}
#'
#' @export
plotMany <- function(data, ranq=0.75, col="steelblue") {
require(reshape2)
require(ggplot2)
data <- data[, !sapply(data, function(x) is.character(x) || is.factor(x))]
for (i in 1:ncol(data)) {
tmp <- as.numeric(data[, i])
data[, i] <- (tmp-min(tmp, na.rm=T))/(max(tmp, na.rm=T)-min(tmp, na.rm=T))
}
ranks <- names(sort(rank(sapply(colnames(data), function(x) {
as.numeric(quantile(data[,x], ranq, na.rm=T))
}))))
data_m <- melt(as.matrix(data))
data_m$Var2 <- factor(data_m$Var2, ranks)
gg <- ggplot(data_m, aes(x=Var2, y=value))
gg <- gg + geom_boxplot(fill=col, notch=TRUE, outlier.size=1)
gg <- gg + labs(x="", y="")
gg <- gg + theme_bw()
gg <- gg + theme(panel.grid=element_blank())
gg <- gg + theme(axis.text.x=element_text(angle=-45, hjust=0.001))
gg
}
#' Plots summarising whole data frame
#'
#' @importFrom stats density
#' @importFrom graphics plot par barplot
#' @export
summary_plots <- function(df) {
mar_tmp <- par()$mar
uniq <- sapply(df, function(x) length(unique(x)))
df <- df[, uniq > 1]
k <- ceiling(sqrt(ncol(df)))
if (!is.null(colnames(df))) {
nam <- colnames(df)
} else {
nam <- NULL
}
par(mfrow=c(k, k), cex=0.8, mar=c(2.5, 2.5, 2, 0.8))
for (i in 1:ncol(df)) {
if (length(unique(df[, i])) < 2) next()
if (is.factor(df[, i]) | is.logical(df[, i])) {
barplot(table(df[, i]), xlab="", ylab="", main=nam[i])
} else if (is.numeric(df[, i])) {
plot(density(na.omit(df[, i])), xlab="", ylab="", main=nam[i])
}
}
par(mfrow=c(1, 1), cex=1, mar=mar_tmp)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.