R/plots.R

Defines functions twohist cdfplot plotMany summary_plots

Documented in cdfplot plotMany summary_plots twohist

#' Two histograms and one plot
#'
#' Prints two histograms.
#' 
#' @param x a vector.
#' @param y a vector.
#' @param k number of bins in histgrams.
#' 
#' @references
#' \url{http://stackoverflow.com/questions/11193901/plotting-two-histograms-together}
#' 
#' @importFrom graphics hist barplot box
#' @export

twohist <- function(x, y, k=20, main="", col=c("lightgrey", "darkorange"),
                    border="white", box=TRUE, ...) {
  breaks <- pretty(range(c(x, y)), n=k)
  X <- hist(x, breaks=breaks, plot=FALSE)$counts/length(x)
  Y <- hist(y, breaks=breaks, plot=FALSE)$counts/length(y)
  dat <- rbind(X, Y)
  colnames(dat) <- paste(breaks[-length(breaks)], breaks[-1], sep="-")
  md <- max(dat, na.rm=TRUE)
  barplot(dat, beside=TRUE, space=c(0, 0.1), las=2, col=col,
          main=main, ylim=c(0, md+(md/15)), border=border, ...)
  if (box) box()
}


#' Empirical cumulative distribution function
#'
#' Plots empirical cumulative distribution function.
#' 
#' @param x a vector.
#' @param add if true adds this plot to previously plotted.
#' 
#' @references
#' \url{http://chemicalstatistician.wordpress.com/2013/06/25/exploratory-data-analysis-2-ways-of-plotting-empirical-cumulative-distribution-functions-in-r/}
#' 
#' @importFrom graphics lines plot
#' @export

cdfplot <- function(x, add=FALSE, ylab="F(x)", xlab="",
                            lwd=1, main="", ...) {
  x <- sort(x)
  n <- length(x)
  nx <- (1:n)/n
  if (!add) {
    plot(x, nx, ylab=ylab, xlab=xlab,
         type="s", lwd=lwd, main=main, ...)
  } else lines(x, nx, lwd=lwd, ...)
  invisible(tapply(nx, x, sum))
}


#' Boxplots for many variables
#'
#' Plot boxplots for multiple variables.
#' 
#' @param data a data.frame.
#' @param ranq a criterion for sorting the plots.
#' @param col color for plots.
#' 
#' @references
#' \url{http://www.r-bloggers.com/r-version-of-an-exploratory-technique-for-visualizing-the-distributions-of-100-variables/}
#' 
#' @export

plotMany <- function(data, ranq=0.75, col="steelblue") {  
  require(reshape2)
  require(ggplot2)
  
  data <- data[, !sapply(data, function(x) is.character(x) || is.factor(x))]
  
  for (i in 1:ncol(data)) {
    tmp <- as.numeric(data[, i])
    data[, i] <- (tmp-min(tmp, na.rm=T))/(max(tmp, na.rm=T)-min(tmp, na.rm=T))
  }
  
  ranks <- names(sort(rank(sapply(colnames(data), function(x) {
    as.numeric(quantile(data[,x], ranq, na.rm=T))
  }))))
  
  data_m <- melt(as.matrix(data))
  
  data_m$Var2 <- factor(data_m$Var2, ranks)
  
  gg <- ggplot(data_m, aes(x=Var2, y=value))
  gg <- gg + geom_boxplot(fill=col, notch=TRUE, outlier.size=1)
  gg <- gg + labs(x="", y="")
  gg <- gg + theme_bw()
  gg <- gg + theme(panel.grid=element_blank())
  gg <- gg + theme(axis.text.x=element_text(angle=-45, hjust=0.001))
  gg
  
}


#' Plots summarising whole data frame
#' 
#' @importFrom stats density
#' @importFrom graphics plot par barplot 
#' @export

summary_plots <- function(df) {
  mar_tmp <- par()$mar
  
  uniq <- sapply(df, function(x) length(unique(x)))
  df <- df[, uniq > 1]
  k <- ceiling(sqrt(ncol(df)))
  
  if (!is.null(colnames(df))) {
    nam <- colnames(df)
  } else {
    nam <- NULL
  }
  
  par(mfrow=c(k, k), cex=0.8, mar=c(2.5, 2.5, 2, 0.8))
  for (i in 1:ncol(df)) {
    if (length(unique(df[, i])) < 2) next()
    
    if (is.factor(df[, i]) | is.logical(df[, i])) {
      barplot(table(df[, i]), xlab="", ylab="", main=nam[i])
    } else if (is.numeric(df[, i])) {
      plot(density(na.omit(df[, i])), xlab="", ylab="", main=nam[i])
    }
  }
  par(mfrow=c(1, 1), cex=1, mar=mar_tmp)
}
twolodzko/twextras documentation built on May 3, 2019, 1:52 p.m.