R/plotscores.R

Defines functions plotscores

Documented in plotscores

#' Plotting PCA, PLS or OPLS model scores
#' @export
#' @aliases plotscores
#' @description Function for plotting PCA, PLS or OPLS model scores.
#' @param model PCA, PLS or OPLS model of the package \emph{MetaboMate}.
#' @param pc Specifies which model components should be plotted on abscissa and ordinate (see Details).
#' @param an Colour and label specification (see Details).
#' @param title Plot title.
#' @param qc Row indices of QC samples. Can be left NA, however, if specified QC samples will be highlighted in the plot.
#' @param legend Position of the plot legend, set to NA if legend should be outside of the plotting area.
#' @param cv.scores Logical value (TRUE or FALSE) indicating if cross-validated scores should be plotted for the predictive component(s) (only for PLS and O-PLS).
#' @param ... Additional values passed on to \code{\link[ggplot2]{scale_colour_gradientn}}.
#' @details Scores colouring is specified with the argument \code{an}, which is a list of three elements. The first list element specifies the colour (class factor required for categorical variables), the second list element specifies a point labeling (class character or factor) and the third list element specifies point shape. The Hotelling's \out{T<sup>2</sup>} ellipse is automatically included and calculated for the dimensions selected by the \code{pc} argument.
#' @references Trygg J. and Wold, S. (2002) Orthogonal projections to latent structures (O-PLS). \emph{Journal of Chemometrics}, 16.3, 119-128.
#' @references Hotelling, H. (1931) The generalization of Student’s ratio. \emph{Ann. Math. Stat.}, 2, 360-378.
#' @return This function returns a \emph{ggplot2} plot object.
#' @author Torben Kimhofer \email{tkimhofer@@gmail.com}
#' @seealso \code{\link{pca}} \code{\link{opls}} \code{\link{PCA_MetaboMate-class}} \code{\link{OPLS_MetaboMate-class}}
#' @importFrom stats cov
#' @importFrom ellipse ellipse
#' @importFrom RColorBrewer brewer.pal
#' @importFrom ggplot2 ggplot aes geom_polygon geom_hline geom_vline geom_point scale_colour_manual ggtitle labs theme labs scale_colour_gradientn scale_x_continuous
#' @importFrom colorRamps matlab.like
#' @importFrom ggrepel geom_text_repel
#' @importFrom graphics plot
#' @importFrom reshape2 melt
#' @importFrom scales pretty_breaks
#' @importFrom grid unit
plotscores <- function(model, pc = c(1, 2), an, title = "", qc = NA, legend = "in", cv.scores = T, ...) {
    if (missing(an)) 
        an <- list("NA")
    # define plot dimensions
    switch(class(model), pcaRes = {
        x <- model@scores[, pc[1]]
        y <- model@scores[, pc[2]]
    }, PCA_MetaboMate = {
        x <- model@t[, pc[1]]
        y <- model@t[, pc[2]]
    }, OPLS_MetaboMate = {
        if (cv.scores == T) {
            x <- model@t_cv[, 1]
            y <- model@t_orth_cv[, pc[1]]
        } else {
            x <- model@t_pred[, 1]
            y <- model@t_orth[, pc[1]]
        }
    })
    ###### DATA PREP
    if (is.null(names(an))) {
        cat("No facet, colour and linetype names given. See an argument in ?specOverlay\n")
        names(an) <- paste("an", 1:length(an), sep = "")
    }
    # names of an list
    if ("" %in% names(an)) {
        idx <- which(names(an) == "")
        names(an)[idx] <- paste("an", idx, sep = "")
    }
    names(an) <- gsub(" ", ".", names(an))
    # create scores df for ggplot function
    an1 <- do.call(cbind.data.frame, an)
    # prep df for ggplot2
    sc <- data.frame(x, y, an1)
    colnames(sc)[-c(1:2)] <- names(an)
    # if(length(an)==2){ sc$shape=factor(an[[2]])}else{ sc$shape=16} if(length(an)==3){ sc$labs=an[[3]]} prep sub-df for QC samples
    if (!is.na(qc[1])) {
        df.qc <- data.frame(x = x[qc], y = y[qc], cl = an[[1]][qc])
    }
    # Calculate Hotellings T2 elipse
    SD <- cov(cbind(x, y))
    el <- ellipse(SD, centre = colMeans(cbind(x, y)), level = 0.95)
    colnames(el) <- c("V1", "V2")
    # define axis ranges
    xlim <- c(min((c(el[, 1], x))), max((c(el[, 1], x))))
    xlim <- xlim + c(diff(range(xlim)) * -0.05, diff(range(xlim)) * +0.05)
    ylim <- c(min((c(el[, 2], y))), max((c(el[, 2], y))))
    ylim <- ylim + c(diff(range(ylim)) * -0.05, diff(range(ylim)) * +0.05)
    # prep df for Hotelltings T2 ellipse and axis label positions
    df <- as.data.frame(el)
    y.labb <- diff(range(ylim)) * 0.015
    x.labb <- diff(range(xlim)) * 0.015
    df1 <- sc
    df1[, 1] <- df1[, 1] + x.labb
    df1[, 2] <- df1[, 2] + y.labb
    ###### COLOUR PREP populate colours if number of levels exceeds colours in palette (n=8), fix point shape minimum nb of factors for categorical
    ###### variables in 17
    if (class(an[[1]]) != "numeric" & length(table(an[[1]])) < 17) {
        type <- "categorical"
        if (length(unique(an[[1]])) > 8) {
            cols <- c(brewer.pal(8, "Dark2"), brewer.pal(8, "Set1")[1:(length(unique(an[[1]])) - 8)])
        } else {
            cols <- brewer.pal(8, "Dark2")[1:length(unique(an[[1]]))]
        }
    } else {
        type <- "continuous"
    }
    ###### FURTHER GGPLOT COMMANDS prep ggplot2 skeleton
    g <- ggplot() + geom_polygon(data = df, aes_string(x = "V1", y = "V2"), fill = "white", alpha = 0.4) + geom_hline(yintercept = 0, colour = "gray70") + 
        geom_vline(xintercept = 0, colour = "gray70") + ggtitle(title) + labs(colour = names(an)[1])
    # define colour schemes and create ggplot2 objects if categorical colour vector has too many levels (>=17) groups will be converted to numeric
    # and plotted with a colour scale following if expression is for coloured points, no shape or label specifications
    if (length(an) == 1) {
        switch(type, categorical = {
            g <- g + geom_point(data = sc, aes_string("x", "y", colour = names(an)[1]), alpha = 0.7, shape = 16) + scale_colour_manual(values = cols) + 
                labs(colour = names(an)[1])
        }, continuous = {
            g <- g + geom_point(data = sc, aes_string("x", "y", colour = names(an)[1]), shape = 16, alpha = 1) + scale_colour_gradientn(colours = matlab.like(10), 
                breaks = pretty_breaks(), ...) + ggtitle(title)
        })
    }
    if (length(an) == 2) {
        switch(type, categorical = {
            g <- g + geom_point(data = sc, aes_string("x", "y", colour = names(an)[1], shape = names(an)[2]), alpha = 0.7) + scale_colour_manual(values = cols) + 
                labs(colour = names(an)[1], shape = names(an)[2])
        }, continuous = {
            g <- g + geom_point(data = sc, aes_string("x", "y", colour = names(an)[1], shape = names(an)[2]), alpha = 0.7) + scale_colour_gradientn(colours = matlab.like(10), 
                breaks = pretty_breaks(), ...) + ggtitle(title)
        })
    }
    if (length(an) == 3) {
        switch(type, categorical = {
            if (length(unique(sc$shape)) == 1) {
                g <- g + geom_point(data = sc, aes_string("x", "y", colour = names(an)[1]), alpha = 0.7) + scale_colour_manual(values = cols) + 
                  geom_text_repel(data = sc, aes_string("x", "y", label = names(an)[3])) + labs(colour = names(an)[1], shape = names(an)[2])
            } else {
                g <- g + geom_point(data = sc, aes_string("x", "y", colour = names(an)[1], shape = names(an)[2]), alpha = 0.7) + scale_colour_manual(values = cols) + 
                  geom_text_repel(data = sc, aes_string("x", "y", label = names(an)[3])) + labs(colour = names(an)[1], shape = names(an)[2])
            }
        }, continuous = {
            if (length(unique(sc$shape)) == 1) {
                g <- g + geom_point(data = sc, aes_string("x", "y", colour = names(an)[1]), alpha = 0.7) + scale_colour_gradientn(colours = matlab.like(10), 
                  breaks = pretty_breaks(), ...) + geom_text_repel(data = sc, aes_string("x", "y", label = names(an)[3]), min.segment.length = unit(0, 
                  "lines")) + labs(colour = names(an)[1], shape = names(an)[2])
            } else {
                g <- g + geom_point(data = sc, aes_string("x", "y", colour = names(an)[1], shape = names(an)[2]), alpha = 0.7) + scale_colour_gradientn(colours = matlab.like(10), 
                  breaks = pretty_breaks(), ...) + geom_text_repel(data = sc, aes_string("x", "y", label = names(an)[3]), min.segment.length = unit(0, 
                  "lines")) + labs(colour = names(an)[1], shape = names(an)[2])
            }
        })
    }
    if (!is.na(qc[1])) {
        g <- g + geom_point(data = df.qc, aes_string("x", "y"), alpha = 0.7, colour = "black")
    }
    ###### AXIS LABELLING
    switch(class(model), pcaRes = {
        g <- g + scale_x_continuous(name = paste("PC ", pc[1], " (", round(model@R2[pc[1]] * 100, 1), " %)", sep = "")) + scale_y_continuous(name = paste("PC ", 
            pc[2], " (", round(model@R2[pc[2]] * 100, 1), " %)", sep = ""))
    }, PCA_MetaboMate = {
        g <- g + scale_x_continuous(name = paste("PC ", pc[1], " (", round(model@R2[pc[1]] * 100, 1), " %)", sep = "")) + scale_y_continuous(name = paste("PC ", 
            pc[2], " (", round(model@R2[pc[2]] * 100, 1), " %)", sep = ""))
    }, OPLS_MetaboMate = {
        if (ncol(model@t_orth) > 1) {
            comp <- "orthogonal components"
        } else {
            comp <- "orthogonal component"
        }
        if (cv.scores == F) {
            if (model@type == "DA") {
                g <- g + scale_x_continuous(name = expression(t[pred])) + scale_y_continuous(name = expression(t[orth])) + ggtitle(paste("OPLS - ", 
                  model@nPC, " ", comp, " (R2X=", round(model@summary$R2X[model@nPC], 2), ", R2Y=", round(model@summary$R2Y[model@nPC], 2), ", Q2=", 
                  round(model@summary$Q2[model@nPC], 2), ", AUROC=", round(model@summary$AUROC[model@nPC], 2), ")", sep = "")) + theme(plot.title = element_text(size = 10))
            } else {
                g <- g + scale_x_continuous(name = expression(t[pred])) + scale_y_continuous(name = expression(t[orth])) + ggtitle(paste("OPLS - ", 
                  model@nPC, " ", comp, " (R2X=", round(model@summary$R2X[model@nPC], 2), ", R2Y=", round(model@summary$R2Y[model@nPC], 2), ", Q2=", 
                  round(model@summary$Q2[model@nPC], 2), ")", sep = "")) + theme(plot.title = element_text(size = 10))
            }
        } else {
            if (model@type == "DA") {
                g <- g + scale_x_continuous(name = expression(t[pred][","][cv])) + scale_y_continuous(name = expression(t[orth][","][cv])) + ggtitle(paste("OPLS - ", 
                  model@nPC, " ", comp, " (R2X=", round(model@summary$R2X[model@nPC], 2), ", R2Y=", round(model@summary$R2Y[model@nPC], 2), ", Q2=", 
                  round(model@summary$Q2[model@nPC], 2), ", AUROC=", round(model@summary$AUROC[model@nPC], 2), ")", sep = "")) + theme(plot.title = element_text(size = 10))
            } else {
                g <- g + scale_x_continuous(name = expression(t[pred][","][cv])) + scale_y_continuous(name = expression(t[orth][","][cv])) + ggtitle(paste("OPLS - ", 
                  model@nPC, " ", comp, " (R2X=", round(model@summary$R2X[model@nPC], 2), ", R2Y=", round(model@summary$R2Y[model@nPC], 2), ", Q2=", 
                  round(model@summary$Q2[model@nPC], 2), ")", sep = "")) + theme(plot.title = element_text(size = 10))
            }
        }
    })
    if (legend == "in") {
        g <- g + theme(legend.position = c(1.01, 1.02), legend.justification = c(1, 1))
    }
    return(g)
}
kimsche/MetaboMate documentation built on Aug. 8, 2020, 1:14 a.m.