R/plot_landscape.R

Defines functions densityplot plot_landscape

Documented in densityplot plot_landscape

#' @title Landscape Plot
#' @description Wrapper for visualizing sample similarity landscape
#' ie. sample density in various 2D projections.
#' @param x \code{\link{phyloseq-class}} object or a data matrix 
#' (samples x features; eg. samples vs. OTUs). If the input x is a 2D matrix
#' then it is plotted as is.
#' @param method Ordination method, see phyloseq::plot_ordination; or "PCA",
#'        or "t-SNE" (from the \pkg{Rtsne} package)
#' @param distance Ordination distance, see phyloseq::plot_ordination; for
#'        method = "PCA", only euclidean distance is implemented now.
#' @param transformation Transformation applied on the input object x
#' @param col Variable name to highlight samples (points) with colors
#' @param main title text
#' @param x.ticks Number of ticks on the X axis
#' @param rounding Rounding for X axis tick values
#' @param add.points Plot the data points as well
#' @param adjust Kernel width adjustment
#' @param size point size
#' @param legend plot legend TRUE/FALSE
#' @param shading Add shading in the background.
#' @return A \code{\link{ggplot}} plot object.
#' @export
#' @details For consistent results, set random seet (set.seed) before
#' function call. Note that the distance and transformation arguments may
#' have a drastic effect on the outputs.
#' @examples
#'
#' data(dietswap)
#'
#' # PCoA
#' p <- plot_landscape(transform(dietswap, "compositional"),
#'    distance = "bray", method = "PCoA")
#'
#  # t-SNE
#' p <- plot_landscape(dietswap, method = "t-SNE", distance = "bray",
#'        transformation = "compositional")
#'
#' # PCA
#' p <- plot_landscape(dietswap, method = "PCA", transformation = "clr")
#'
#' @keywords utilities
plot_landscape <- function(x, method="PCoA", distance="bray",
    transformation = "identity", 
    col=NULL, main=NULL, x.ticks=10, rounding=0, add.points=TRUE,
    adjust=1, size=1, legend=FALSE, shading=TRUE) {

    if (is.matrix(x) || is.data.frame(x)) {

        if (ncol(x) == 2) {
            proj <- as.data.frame(x)
        } else if (ncol(x) > 2) {
            # Convert the matrix into phyloseq object
            x <- phyloseq(otu_table(t(x), taxa_are_rows = TRUE))
        }
    }

    if (is.phyloseq(x)) {

        x <- transform(x, transformation)

        if (method == "PCA") {

            # TODO: add option to calculate PCA with different
            # distances using the distance argument
            # (now assumes the default and cannot be altered)
            d <- t(abundances(x))
            pca <- princomp(d)
            proj <- pca$scores[, c(1,2)]
            rownames(proj) <- sample_names(x)

            # TODO add robust PCA
            #(pc.rob <- princomp(stackloss, covmat = MASS::cov.rob(stackloss)))
        
        } else if (method == "t-SNE") {

            dm <- vegdist(t(otu_table(x)), distance)

            ## Run TSNE
            tsne_out <- Rtsne(dm, dims = 2)
            proj <- tsne_out$Y
            rownames(proj) <- sample_names(x)

        } else {
    
            #quiet(proj <- get_ordination(x, method, distance))
            quiet(x.ord <- ordinate(x, method, distance))
            # Pick the projected data (first two columns + metadata)
            quiet(proj <- phyloseq::plot_ordination(x, x.ord, justDF=TRUE))
            # Rename the projection axes
        } 
        
    }

    proj <- as.data.frame(proj)
    colnames(proj) <- paste0("PC", c(1,2))

    guide.title <- "color"
    if (is.null(col)) {
        proj$col <- as.factor(rep("black", nrow(proj)))
    } else if (length(col) == 1 && col %in% names(meta(x))) {
        proj$col <- meta(x)[, col]
        guide.title <- col
    } else {
        proj$col <- col
    }

    p <- densityplot(proj[, c(1,2)], main=NULL, x.ticks=10,
            rounding=0, add.points=add.points, 
            adjust=1, size=size, col=proj$col,
            legend = TRUE,
            shading=shading) +
            guides(color = guide_legend(title = guide.title))

    p
    
}





#' @title Density Plot
#' @description Density visualization for data points overlaid on cross-plot.
#' @param x Data matrix to plot. The first two columns will be visualized as a
#'    cross-plot.
#' @param main title text
#' @param x.ticks Number of ticks on the X axis
#' @param rounding Rounding for X axis tick values
#' @param add.points Plot the data points as well
#' @param col Color of the data points. NAs are marked with darkgray.
#' @param adjust Kernel width adjustment
#' @param size point size
#' @param legend plot legend TRUE/FALSE
#' @param shading Shading
#' @return ggplot2 object
#' @examples
#' # p <- densityplot(cbind(rnorm(100), rnorm(100)))
#' @references See citation('microbiome') 
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
densityplot <- function(x, main=NULL, x.ticks=10, rounding=0,
    add.points=TRUE, col="black",
    adjust=1, size=1,
    legend=FALSE, shading=TRUE) {

    df <- x
    if (!is.data.frame(df)) {
        df <- as.data.frame(as.matrix(df))
    }
    
    # Avoid warnings
    x <- y <- ..density.. <- color <- NULL
    
    # If colors are NA then mark them dark gray
    lev <- levels(col)
    if (!is.numeric(col) & any(is.na(col))) {
        col <- as.character(col)
        col[unname(which(is.na(col)))] <- "darkgray"
        col <- factor(col, levels = unique(c(lev, "darkgray")))    
    }
    
    xvar <- colnames(df)[[1]]
    yvar <- colnames(df)[[2]]
    df[["x"]] <- df[, 1]
    df[["y"]] <- df[, 2]

    df[["color"]] <- col
    df[["size"]] <- size
    
    # Remove NAs
    df <- df[!(is.na(df[["x"]]) | is.na(df[["y"]])), ]
    
    # Determine bandwidth for density estimation
    bw <- adjust * c(bwi(df[["x"]]), bwi(df[["y"]]))
    if (any(bw == 0)) {
        warning("Zero bandwidths 
                (possibly due to small number of observations). 
                Using minimal bandwidth.")
        bw[bw == 0]=bw[bw == 0] + min(bw[!bw == 0])
    }
    
    # Construct the figure
    p <- ggplot(df)

    if (shading) {
        p <- p + stat_density2d(aes(x, y, fill=..density..),
                geom="raster", h=bw, 
                contour=FALSE)
        p <- p + scale_fill_gradient(low="white", high="black")             
    }
    
    if (add.points) {

        if (length(unique(df$color)) == 1 && length(unique(df$size)) == 1) {    
            p <- p + geom_point(aes(x=x, y=y),
            col=unique(df$color), size=unique(df$size))
        } else if (length(unique(df$color)) == 1 &&
            length(unique(df$size)) > 1) {
            p <- p + geom_point(aes(x=x, y=y, size=size),
            col=unique(df$color))
        } else if (length(unique(df$color)) > 1 &&
            length(unique(df$size)) == 1) {
            p <- p + geom_point(aes(x=x, y=y, col=color),
            size=unique(df$size))
        } else {
            p <- p + geom_point(aes(x=x, y=y, col=color, size=size))
        }
    }
    
    p <- p + xlab(xvar) + ylab(yvar)
    
    if (!legend) {
        p <- p + theme(legend.position="none")
    }
    
    p <- p + scale_x_continuous(breaks=round(seq(floor(min(df[["x"]])),
        ceiling(max(df[["x"]])), length=x.ticks), rounding))
    
    if (!is.null(main)) {
        p <- p + ggtitle(main)
    }
    
    p
    
}



# Bandwidth
# As in MASS::bandwidth.nrd but rewritten. Internal.
bwi <- function (x) {
    r <- quantile(x, c(0.25, 0.75))
    4 * 1.06 * min(sd(x), (r[[2]] - r[[1]])/1.34) * length(x)^(-.2)
}

Try the microbiome package in your browser

Any scripts or data that you put into this service are public.

microbiome documentation built on Nov. 8, 2020, 5:08 p.m.