R/check_confounders.r

Defines functions variance.plots factorize.bmi factorize.metadata get.names confounders.descriptive.plots confounders.glm.auroc.plot confounders.glm.reg.pval.plot confounders.glm.reg.coef.plot confounders.build.glms confounders.corrplot check.confounders

Documented in check.confounders

#!/usr/bin/Rscript
### SIAMCAT - Statistical Inference of Associations between
### Microbial Communities And host phenoTypes R flavor EMBL
### Heidelberg 2012-2018 GNU GPL 3.0

#' @title Check for potential confounders in the metadata
#' @description Checks potential confounders in the metadata and produces
#'     some visualizations
#' @usage check.confounders(siamcat, fn.plot, meta.in = NULL,
#' feature.type='filtered', verbose = 1)
#' @param siamcat an object of class \link{siamcat-class}
#' @param fn.plot string, filename for the pdf-plot
#' @param meta.in vector, specific metadata variable names to analyze,
#'     defaults to NULL (all metadata variables will be analyzed)
#'@param feature.type string, on which type of features should the function
#' work? Can be either \code{c()"original", "filtered", or "normalized")}.
#' Please only change this paramter if you know what you are doing!
#' @param verbose integer, control output: \code{0} for no output at all,
#'     \code{1} for only information about progress and success, \code{2} for
#'     normal level of information and \code{3} for full debug information,
#'     defaults to \code{1}
#' @keywords SIAMCAT check.confounders
#' @details This function checks for associations between class labels and
#'     potential confounders (e.g. Age, Sex, or BMI) that are present in the
#'     metadata. Statistical testing is performed with Fisher's exact test or
#'     Wilcoxon test, while associations are visualized either as barplot or
#'     Q-Q plot, depending on the type of metadata.
#'
#'     Additionally, it evaluates associations among metadata variables using
#'     conditional entropy and associations with the label using generalized
#'     linear models, producing a correlation heatmap and appropriate
#'     quantitative barplots, respectively.
#' @export
#' @return Does not return anything, but outputs plots to specified pdf file
#' @examples
#' # Example data
#' data(siamcat_example)
#'
#' # Simple working example
#' check.confounders(siamcat_example, './conf_plot.pdf')

check.confounders <- function(siamcat, fn.plot, meta.in = NULL,
    feature.type='filtered', verbose = 1) {


    if (verbose > 1) message("+ starting check.confounders")
    s.time <- proc.time()[3]
    label <- label(siamcat)
    meta <- meta(siamcat)
    # get features
    if (feature.type == 'original'){
        feat <- get.orig_feat.matrix(siamcat)
    } else if (feature.type == 'filtered'){
        if (is.null(filt_feat(siamcat, verbose=0))){
            stop('Features have not yet been filtered, exiting...\n')
        }
        feat <- get.filt_feat.matrix(siamcat)
    } else if (feature.type == 'normalized'){
        if (is.null(norm_feat(siamcat, verbose=0))){
            stop('Features have not yet been normalized, exiting...\n')
        }
        feat <- get.norm_feat.matrix(siamcat)
    }
    if (is.null(meta)) {
        stop('SIAMCAT object does not contain any metadata.\nExiting...')
    }
    meta <- factorize.metadata(meta, verbose) # creates data.frame

    # check validity of input metadata conditions
    if (!is.null(meta.in)){
    if (!all(meta.in %in% colnames(meta))){
        meta.in <- meta.in[which(meta.in %in% colnames(meta))]
        warning(paste0("Some specified metadata were not in metadata file.\n",
                        "Continuing with: ",  paste(c(meta.in), collapse=" ")))
    }
        meta <- meta[,meta.in]
    }

    # remove nested variables
    indep <- vapply(colnames(meta), FUN=function(x) {
        return(condentropy(label$label, discretize(meta[,x])))},
        FUN.VALUE = numeric(1))
    if ((verbose >= 1) & (length(names(which(indep == 0))) > 0)){
        message("++ metadata variables:\n\t",
                paste(c(names(which(indep == 0))), collapse = " & "),
                "\n++ are nested inside the label and ",
                "have been removed from this analysis")
    }
    meta <- meta[,names(which(indep != 0))]

    # remove metavariables with less than 2 levels
    n.levels <- vapply(meta,
        FUN = function(x){length(unique(x))},
        FUN.VALUE = integer(1))
    if (any(n.levels < 2)){
        s.name <- names(which(n.levels < 2))
        if (verbose >= 1){
            message("++ remove metadata variables, since all ",
                "subjects have the same value\n\t", s.name)
        }
        meta <- meta[,which(n.levels > 1)]
    }

    if (ncol(meta) > 10){
        warning(paste0("The recommended number of metadata variables is 10.\n",
                    "Please be aware that some visualizations may not work."))
    }

    pdf(fn.plot, paper = 'special', height = 8.27, width = 11.69)
    # FIRST PLOT - conditional entropies for metadata variables
    if (verbose > 1)
        message("+++ plotting conditional entropies for metadata variables")
    confounders.corrplot(meta, label)

    # SECOND PLOT - glm regression coefficients + significance + AU-ROCs
    layout(matrix(c(1, 2, 3), nrow = 1, ncol = 3),
            widths = c(0.5, 0.3, 0.2), heights = c(1, 1, 1))
    if (verbose > 2)
        message("+++ building logistic regression classifiers for metadata")
    glm.data <- confounders.build.glms(meta, label)
    if (verbose > 2)
        message("+++ plotting regression coefficients")
    confounders.glm.reg.coef.plot(glm.data)
    if (verbose > 2)
        message("+++ plotting regression coefficient significance")
    confounders.glm.reg.pval.plot(glm.data)
    if (verbose > 2)
        message("+++ plotting au-roc values")
    confounders.glm.auroc.plot(glm.data)

    # THIRD PLOT(S) - original confounder check descriptive stat plots
    confounders.descriptive.plots(meta(siamcat)[,colnames(meta)],
        label, verbose)

    # FOURTH PLOT(S) - variance explained by label versus variance explained by
    # metadata plots
    variance.plots(meta, label, feat, verbose)

    dev.off()

    e.time <- proc.time()[3]
    if (verbose > 1) {
        message(paste("+ finished check.confounders in",
                        formatC(e.time - s.time, digits = 3), "s"))
    }
    if (verbose == 1) {
        message(paste("Finished checking metadata for confounders,",
            "results plotted to:", fn.plot))
    }
}

#'@keywords internal
confounders.corrplot <- function(meta, label) {

    meta.temp <- data.frame(meta, Label = label$label)
    entropies <- matrix(NA, nrow=ncol(meta)+1, ncol=ncol(meta)+1,
                        dimnames=list(c(colnames(meta), 'Label'),
                                    c(colnames(meta), 'Label')))

    for (n in seq_along(colnames(meta.temp))) {
        entropies[,n] <- vapply(colnames(meta.temp), FUN=function(x) {
            idx <- intersect(which(!is.na(meta.temp[,n]) == TRUE),
                            which(!is.na(meta.temp[,x]) == TRUE))
            return(condentropy(discretize(meta.temp[,x][idx]),
                            discretize(meta.temp[idx,n])))},
            FUN.VALUE = numeric(1))}

    fix.names <- vapply(colnames(entropies), FUN=function(x) {
        x <- tolower(x)
        return(gsub('[_.-]', ' ', paste(toupper(substring(x, 1, 1)),
                                        substring(x, 2), sep = "")))},
        FUN.VALUE = character(1))
    colnames(entropies) <- fix.names
    rownames(entropies) <- fix.names

    col = c(rev(colorRampPalette(brewer.pal(9, 'Reds'))(100)),
                rev(colorRampPalette(brewer.pal(9, 'Blues'))(100)))

    corrplot(entropies,
            is.corr = FALSE, order = 'FPC', method = 'color',
            pch.cex = 0.9,
            cl.lim = c(min(entropies), ceiling(max(entropies))),
            tl.col = 'black', tl.srt = 45,
            col=col,
            number.cex = 0.7, number.digits = 2, addCoef.col = 'black',
            mar = c(3.1, 2.1, 5.1, 4.1))
    title(main = 'Conditional Entropies\n H(Row | Col)', cex = 0.7)

}

#'@keywords internal
confounders.build.glms <- function(meta, label) {

    reg.coef <- vector()
    reg.ci   <- list()
    reg.pval <- vector()
    rocs     <- list()
    aucs     <- vector()
    rm       <- vector()

    for (m in seq_along(colnames(meta))) {
        d <- data.frame(x = as.ordered(meta[,m]),
                        y = factor(label$label, levels = c(-1,1),
                                labels = c(0,1)))

        model <- glm(y ~ x, family = binomial("logit"), data = d)

        # check glms; conditional entropy should catch
        if (model$converged == TRUE) {
            reg.coef[m] <- model$coefficients[2]
            reg.ci[[m]] <- confint(profile(model))[2,]
            reg.pval[m] <- coef(summary(model))[2,4]
            rocs[[m]]   <- roc(y ~ x, data=d, direction='<',
                                ci=TRUE, auc=TRUE, levels=c(0,1))
            aucs[m]     <- as.numeric(rocs[[m]]$auc)}
        else {rm <- c(rm, colnames(meta)[m])}}

    idx <- !(colnames(meta) %in% rm)
    names(reg.ci)   <- colnames(meta)
    names(reg.coef) <- colnames(meta)
    names(reg.pval) <- colnames(meta)
    names(rocs)     <- colnames(meta)
    names(aucs)     <- colnames(meta)

    # order on plot(s) by regression significance
    order <- names(sort(reg.pval, decreasing=TRUE, na.last=FALSE))

    return(list("reg.ci" = reg.ci[which(lapply(reg.ci, is.null) == FALSE)],
                "reg.coef" = reg.coef[!(names(reg.coef) %in% rm)],
                "reg.pval" = reg.pval[!(names(reg.pval) %in% rm)],
                "rocs" = rocs[which(lapply(rocs, is.null) == FALSE)],
                "aucs" = aucs[!(names(aucs) %in% rm)],
                "colors" = rep('slategrey', length(colnames(meta))),
                "order" = order[!(order %in% rm) == TRUE]))
}

#'@keywords internal
confounders.glm.reg.coef.plot <- function(glm.data) {

    order <- glm.data$order
    x.min <- min(unlist(lapply(glm.data$reg.ci, min)), na.rm=TRUE, finite=TRUE)
    x.max <- max(unlist(lapply(glm.data$reg.ci, max)), na.rm=TRUE, finite=TRUE)
    margins <- c(floor(x.min), ceiling(x.max))
    x.ticks <- seq(margins[1], margins[2])
    if (length(x.ticks) < 10){
        x.ticks <- seq(margins[1], margins[2], by=0.5)
    }
    x.tick.labels <- formatC(x.ticks, digits=2)
    y.ticks <- seq(1, length(order))

    text.names <- vapply(order, FUN=function(x) {
        x <- tolower(x)
        return(gsub('[_.-]', ' ',
                        paste(toupper(substring(x, 1, 1)),
                        substring(x, 2), sep = "")))
        }, FUN.VALUE = character(1))

    par(mar = c(5.1, 10.1, 4.1, 1.1))
    plot(NULL, xlab = '', ylab = '', xaxs = 'i', yaxs = 'i', axes = FALSE,
            xlim = c(margins[1], margins[2]),
            ylim = c(0.5, length(order) + 0.5),
            type = 'n')
    abline(v = x.ticks, lty = 3, col = 'lightgrey')
    abline(v = 0, lty = 3, col = 'lightgrey')
    axis(side = 1, at = x.ticks, labels = x.tick.labels, cex.axis = 0.9)
    axis(side = 2, at = y.ticks, labels = text.names, las=2)
    title(main = 'Single Covariate Logistic Regression',
            xlab = 'Coefficients', cex=0.9)

    for (i in seq_along(order)) {
        if (!is.na(glm.data$reg.coef[order[i]])) {
            segments(x0 = glm.data$reg.ci[[order[i]]][1],
                    x1 = glm.data$reg.ci[[order[i]]][2],
                    y0 = i, col = 'darkgrey', lwd = 1.5)
            points(glm.data$reg.coef[order[i]], i, pch = 18,
                    col=glm.data$colors[i])
            points(glm.data$reg.coef[order[i]], i, pch = 5,
                    col=glm.data$colors[i])}}
}

#'@keywords internal
confounders.glm.reg.pval.plot <- function(glm.data) {

    order <- glm.data$order
    pvals.log <- -log10(glm.data$reg.pval[order])
    x.max <- max(ceiling(abs(range(pvals.log, na.rm=TRUE, finite=TRUE))))
    x.ticks <- seq(0, x.max)
    x.tick.labels <- parse(text = paste0('10^',
        signif(x.ticks*-1, digits = 2)))

    par(mar = c(5.1, 2.1, 4.1, 1.1))
    plot(NULL, xlab = '', ylab = '', xaxs = 'i', yaxs = 'i', axes = FALSE,
        xlim = c(0,x.max), ylim = c(0.5, length(order) + 0.5),
        type = 'n')
    abline(v = x.ticks, lty = 3, col = 'lightgrey')
    axis(side = 1, at = x.ticks, labels = x.tick.labels, cex.axis = 0.9)
    title(main = 'Coefficient Significance',
        xlab = 'P Value', cex=0.9)

    for (i in seq_along(pvals.log)) {
        if (!is.na(pvals.log[i])) {
            rect(0, i-0.1, pvals.log[i], i+0.1, col=glm.data$colors[i])}}
    abline(v = -log10(0.5), lty = 1, col = 'red')
}

#'@keywords internal
confounders.glm.auroc.plot <- function(glm.data) {

    x.ticks <- seq(0, 1, length.out = 5)
    x.tick.labels <- formatC(x.ticks, digits = 2)
    order <- glm.data$order

    par(mar = c(5.1, 2.1, 4.1, 4.1))
    plot(NULL, xlab = '', ylab = '', xaxs = 'i', yaxs = 'i', axes = FALSE,
        xlim = c(0,1), ylim = c(0.5, length(order) + 0.5), type = 'n')
    abline(v = x.ticks, lty = 3, col = 'lightgrey')
    abline(v = 0.5, lty = 1, col = 'darkgrey')
    axis(side = 1, at = x.ticks, labels = x.tick.labels, cex.axis = 0.9)
    title(main = 'ROC Analysis', xlab = 'AU-ROC', cex=0.9)

    for (i in seq_along(order)) {
        if (!is.na(glm.data$aucs[order[i]])) {
            segments(x0 = glm.data$rocs[[order[i]]]$ci[1],
                    x1 = glm.data$rocs[[order[i]]]$ci[3],
                    y0 = i, col = 'lightgrey', lwd = 1.5)
            points(glm.data$rocs[[order[i]]]$ci[2], i, pch = 18,
                    col=glm.data$colors[i])
            points(glm.data$rocs[[order[i]]]$ci[2], i, pch = 5,
                    col=glm.data$colors[i])}}
}

#'@keywords internal
confounders.descriptive.plots <- function(meta, label, verbose) {

    cases <- which(label$label == max(label$info))
    controls <- which(label$label == min(label$info))
    p.lab <- names(which(label$info == max(label$info)))
    n.lab <- names(which(label$info == min(label$info)))
    var.level.names <- get.names(meta) # for contingency plots & legends

    for (m in seq_along(meta)) {
        mname <- gsub("[_.-]", " ", colnames(meta)[m])
        mname <- paste(toupper(substring(mname, 1, 1)), substring(mname, 2),
            sep = "")
        if (verbose > 1)
            message(paste("+++ checking",mname,"as a potential confounder"))
        mvar <- meta[[m]]
        if (is.character(mvar)) mvar <- as.factor(mvar)
        mvar <- as.numeric(mvar)
        names(mvar) <- rownames(meta)
        u.val <- sort(unique(mvar)[!is.na(unique(mvar))])
        colors <- brewer.pal(6, "Spectral")
        histcolors <- brewer.pal(9, "YlGnBu")

        if (length(u.val) == 1) {
            if (verbose > 1) {
                message("+++ skipped because all subjects have the",
                    "same value")}
        } else if (length(u.val) <= 6) {
            if (verbose > 1) message("++++ discrete variable, using a bar plot")

            # create contingency table
            ct <- vapply(u.val, FUN = function(x) {
                # deal with NAs...?
                return(c(length(intersect(which(mvar == x), controls)),
                        length(intersect(which(mvar == x), cases))))},
                USE.NAMES = FALSE,
                FUN.VALUE = integer(2))

            freq <- t(ct / rowSums(ct))
            mvar <- factor(mvar, levels = sort(unique(na.omit(mvar))),
                            labels = var.level.names[[m]])

            if (verbose > 2)
                message("++++ plotting barplot")

            layout(matrix(c(1, 1, 2)))

            # barplot
            par(mar = c(4.1, 9.1, 4.1, 9.1))
            vps <- baseViewports()
            pushViewport(vps$figure)
            vp1 <- plotViewport()
            bar.plot <- barplot(freq, ylim = c(0, 1), main = mname,
                                names.arg = names(label$info), col = colors)
            legend(2.5, 1, legend = var.level.names[[m]],
                    xpd = NA, lwd = 2, col = colors,
                    inset = 0.5, bg = "grey96", cex = 0.8)
            ifelse(length(u.val) > 4,
                    p.val <- fisher.test(ct, simulate.p.value = TRUE,
                                        B = 2000)$p.value,
                    p.val <- fisher.test(ct)$p.value)
            mtext(
                paste("Fisher Test P Value:", format(p.val, digits = 4)),
                cex = 0.8, side = 1, line = 2)
            popViewport()

            if (verbose > 2)
                message("++++ drawing contingency table")

            # contingency table
            plot.new()
            vps <- baseViewports()
            pushViewport(vps$figure)
            niceLabel <- factor(label$label, levels = label$info,
                                labels = names(label$info))
            vp1 <- plotViewport()
            t <- addmargins(table(mvar, niceLabel, dnn = c(mname, "Label")))
            grid.table(t, theme = ttheme_minimal())
            popViewport()
            par(mfrow = c(1, 1), bty = "o")
        } else {
            if (verbose > 1)
                message("++++ continuous variable, using a Q-Q plot")

            layout(rbind(c(1, 2), c(3, 4)))

            if (verbose > 2)
                message("++++ panel 1/4: Q-Q plot")
            par(mar = c(4.5, 4.5, 2.5, 1.5), mgp = c(2.5, 1, 0))
            ax.int <- c(min(mvar, na.rm = TRUE), max(mvar, na.rm = TRUE))
            qqplot(mvar[controls], mvar[cases], xlim = ax.int,
                    ylim = ax.int, pch = 16, cex = 0.6, xlab = n.lab,
                    ylab = p.lab, main = paste("Q-Q plot for", mname))
            abline(0, 1, lty = 3)
            p.val <- wilcox.test(mvar[controls], mvar[cases],
                                exact = FALSE)$p.value
            text(ax.int[1] + 0.9 * (ax.int[2] - ax.int[1]),
                ax.int[1] + 0.1 * (ax.int[2] - ax.int[1]),
                cex = 0.8,paste("MWW Test P Value:",
                                format(p.val, digits = 4)),
                pos = 2)

            if (verbose > 2)
                message("++++ panel 2/4: X histogram")
            par(mar = c(4, 2.5, 3.5, 1.5))
            hist(mvar[controls], main = n.lab, xlab = mname,
                col = histcolors, breaks = seq(min(mvar, na.rm = TRUE),
                                                max(mvar, na.rm = TRUE),
                                                length.out = 10))
            mtext(paste("N =", length(mvar[controls])), cex = 0.6,
                    side = 3, adj = 1, line = 1)

            if (verbose > 2)
                message("++++ panel 3/4: X boxplot")
            par(mar = c(2.5, 4.5, 2.5, 1.5))
            boxplot(mvar ~ label$label, range = 1.5,
                    use.cols = TRUE, names = names(label$info),
                    ylab = mname, main = paste("Boxplot for", mname),
                    col = histcolors, outpch = NA)
            stripchart(mvar ~ label$label, vertical = TRUE, add = TRUE,
                    method = "jitter", pch = 20)

            if (verbose > 2)
                message("++++ panel 4/4: Y histogram")
            par(mar = c(4.5, 2.5, 3.5, 1.5))
            hist(mvar[cases], main = p.lab, xlab = mname,
                    col = histcolors, breaks = seq(min(mvar, na.rm = TRUE),
                                                max(mvar, na.rm = TRUE),
                                                length.out = 10))
            mtext(paste("N =", length(mvar[cases])), cex = 0.6,
                    side = 3, adj = 1, line = 1)
            par(mfrow = c(1, 1))
        }
            }
}

#'@keywords internal
get.names <- function(meta) {
    temp <- lapply(meta, FUN=function(x) {
        if (is.numeric(x) | is.character(x)) {
            if (length(unique(x)) <= 6) {return(levels(as.factor(x)))}
            else {return(NULL)}}
        else if (is.factor(x)) {return(levels(x))}
        else {return(levels(x))}})
}

#'@keywords internal
factorize.metadata <- function(meta, verbose) {

    if ('BMI' %in% toupper(colnames(meta))) {
        idx <- match('BMI', toupper(colnames(meta)))
        meta[,idx] <- factorize.bmi(meta[,idx])}

    factorized <- as.data.frame(lapply(meta, FUN=function(x) {
        if (is.numeric(x) & (length(unique(x)) > 5)) {
            quart <- quantile(x, probs = seq(0, 1, 0.25), na.rm = TRUE)
            temp <- cut(x, unique(quart), include.lowest = TRUE)
            return(factor(temp, labels = seq_along(levels(temp))))}
        else {(return(as.factor(x)))}}))
    rownames(factorized) <- rownames(meta)

    # check for IDs and other metavariable with too many levels
    n.levels <- vapply(colnames(factorized), FUN=function(x){
        length(levels(factorized[[x]]))}, FUN.VALUE = integer(1))
    if (any(n.levels > 0.9*nrow(meta))){
        remove.meta <- names(which(n.levels > 0.9*nrow(meta)))
        if (verbose > 1){
            message("++ metadata variables:\n\t",
                paste(remove.meta, collapse = " & "),
                "\n++ have too many levels and ",
                "have been removed from this analysis")
        }
        factorized <- factorized[,-which(colnames(factorized) %in% remove.meta)]
    }
    return(factorized)
}

#'@keywords internal
factorize.bmi <- function(bmi) {
    # ranges taken from CDC
    # https://www.cdc.gov/healthyweight/assessing/bmi/adult_bmi/index.html

    if (!is.matrix(bmi)) bmi <- as.matrix(bmi)
    temp <- vapply(bmi, FUN=function(x) {
        if (is.na(x)) {return(as.character(NA))}
        else if (x < 18.5) {return("Underweight")}
        else if ((x >= 18.5) & (x <= 24.9)) {return("Healthy")}
        else if ((x > 24.9) & (x <= 29.9)) {return("Overweight")}
        else if (x > 29.9) {return("Obese")}},
        FUN.VALUE = character(1), USE.NAMES = TRUE)
    #names(temp) <- rownames(bmi)
    return(as.factor(temp))
}

#'@keywords internal
variance.plots <- function(meta, label, feat, verbose){
    if (verbose > 2){message('+++ computing variance explained by label')}
    stopifnot(all(colnames(feat)==names(label$label)))
    var.label <- vapply(rownames(feat), FUN=function(x){
        x <- feat[x,]
        x <- rank(x)/length(x)
        ss.tot <- sum((x - mean(x))^2)/length(x)
        ss.o.i <- sum(vapply(unique(label$label), function(s){
        sum((x[label$label==s] - mean(x[label$label==s]))^2)
        }, FUN.VALUE = double(1)))/length(x)
        return(1-ss.o.i/ss.tot)
    }, FUN.VALUE = double(1))
    if (any(is.infinite(var.label))){
        var.label[is.infinite(var.label)] <- NA
    }
    par(mfrow=c(2,2), mar=c(4.1, 4.1, 2.1, 2.1))
    for (variable in colnames(meta)){
        if (verbose > 2){message('+++ computing variance explained by ',
            variable)}
        temp <- meta[[variable]]
        names(temp) <- rownames(meta)
        if (any(is.na(temp))){
            temp <- temp[!is.na(temp)]
        }
        var.batch <- vapply(rownames(feat), FUN=function(x){
        x <- feat[x,names(temp)]
        x <- rank(x)/length(x)
        ss.tot <- sum((x - mean(x))^2)/length(x)
        ss.o.i <- sum(vapply(levels(temp), function(s){
            sum((x[temp==s] - mean(x[temp==s]))^2)
        }, FUN.VALUE = double(1)))/length(x)
        return(1-ss.o.i/ss.tot)
        }, FUN.VALUE = double(1))
    if (any(is.infinite(var.batch))){
        var.batch[is.infinite(var.batch)] <- NA
    }
    lim <- round(max(var.label, var.batch, na.rm=TRUE), digits = 2)
    r.mean <- rowMeans(log10(feat+1e-05))
    mean.size <- (r.mean + 5) * 8/5 + 1
    plot(var.label, var.batch, type='n',
        xlab='Variance explained by label',
        ylab=paste0('Variance expained by ', variable),
        xlim=c(0,lim), ylim=c(0,lim))
    symbols(x=var.label, y=var.batch, circles=mean.size, inches=1/9,
        bg=alpha("darkgrey", 0.4), fg=alpha('black', 0.7), add=TRUE)
    abline(0,1, lty=3, col='black')
    }
}

Try the SIAMCAT package in your browser

Any scripts or data that you put into this service are public.

SIAMCAT documentation built on Nov. 8, 2020, 5:14 p.m.