plots.R
In DiscoRhythm: Interactive Workflow for Discovering Rhythmicity in Biological Data

# Get internal package color palette 
colors <- DiscoRhythm:::discoColors

##### ggplot Theme #####

sharedtheme <- DiscoRhythm:::theme_disco

##### Inter-Sample Correlation #####

plotHeatMCor <- function(mat, k, ...) {
    diag(mat) <- NA
    heatmaply::heatmaply(mat,
        colors = colors$quantRamp(20),
        k_row = k,
        k_col = k,
        ...
        )
}

plotAvgCor <- function(Metadata, meanCor, corCut = 0.95, tUnit = "CT") {
    library(ggplot2)

    set.seed(666)
    df <- as.data.frame(meanCor)
    colnames(df) <- "MeanCor"
    df$ID <- rownames(df)
    df$toRemove <- df$MeanCor < corCut
    df$Time <- Metadata$Time

    tmp <- sort(unique(Metadata$Time))
    tmp <- factor(tmp[seq(1, length(tmp), 2)])
  # make 2 neighbor times different color
    df$colorF <- 3
    df$colorF[df$toRemove == FALSE & df$Time %in% tmp] <- 1
    df$colorF[df$toRemove == FALSE & !df$Time %in% tmp] <- 2

    ggplot(df, aes(Time, MeanCor)) +
    # geom_violin(fill = colors$neutral) +
    geom_jitter(aes(
        color = factor(colorF),
        alpha = toRemove,
        shape = toRemove,
        group = ID
        ),
    size = 2, alpha = 0.8, width = 0.25
    ) +
    labs(
        x = paste0("Sample Time (", tUnit, ")"),
        y = "Average Inter-Sample Correlation"
        ) +
    scale_shape_manual(values = c("FALSE" = 16, "TRUE" = 4)) +
    scale_alpha_manual(values = c(0.95, 0.3)) +
    scale_color_manual(values = c(colors$neutral, colors$neutral2,
        colors$outlier)) +
    sharedtheme() +
    theme(legend.position = "none") +
    geom_hline(yintercept = corCut, color = colors$outlier)
}


#####  Filtering Summary #####

plotObsDistribution <- function(data, selectedSamples, ylab = "Value") {
    selcol <- colors$discoMain2
    unselcol <- colors$neutral2

    col <- rep(unselcol, ncol(data))
    col[selectedSamples] <- selcol
    boxplot(data, col = col, las = 2, ylab = ylab)
    legend("bottomright",
        inset = c(0, 1), xpd = TRUE, horiz = TRUE, bty = "n",
        legend = c("TRUE", "FALSE"), pch = 15,
        col = c(selcol, unselcol), title = "Selected"
        )
}

#####  PCA #####

plotPCAdists <- function(pca, SDfactor, pcToUse = "", npcs = 10) {
    suppressPackageStartupMessages({library(dplyr)})
    set.seed(1) # For consistent jitter in ggplot
    
    # Exclude last PC from visualization as it interferes with
    # the violin plotting
    npcs <- min(npcs, ncol(pca$rotation)-1)

    df <- reshape2::melt(pca$x[, 1:npcs])
    df2 <- df
    df2$usePC = df2$Var2 %in% pcToUse
    df2 <- df2 %>%
        group_by(Var2) %>%
        mutate(SD = sd(value)) %>%
        mutate(outlier = abs(value - mean(value)) >= SD * SDfactor) %>%
        mutate(PCn = as.numeric(substr(Var2, 3, 7)))

    p <- df2 %>%
    ggplot(aes(Var2, value, alpha = usePC)) +
    labs(x = "", y = "PC Score") +
    scale_alpha_manual(values = c(0.5, 1),
                       limits=c(FALSE,TRUE)) +
    geom_hline(yintercept = 0) +
    geom_violin(fill = colors$neutral2)

    # MC: I think this causes errors if not split like this
    if(any(df2$outlier)){
        p <- p + geom_point(
            data = df2 %>% subset(outlier), aes(group = Var1),
            position = position_jitter(width = 0.25), shape = 4, size = 2,
            color = colors$outlier
            )
    }

    p <- p +
    geom_line(data = df2, aes(PCn, SD * SDfactor), linetype = 3,
        color = colors$outlier, alpha = 1) +
    geom_line(data = df2, aes(PCn, -SD * SDfactor), linetype = 3,
        color = colors$outlier, alpha = 1) +
    sharedtheme() +
    theme(legend.position = "none")
    return(p)
}


plotPCAWithShape <- function(x, Metadata, col, pcA = 1, pcB = 2,
    outids = NULL, plotTitle = "") {
    KEY <- rownames(x)
    COLOR <- as.factor(Metadata[which(Metadata$ID %in% KEY), col])
    da <- data.frame(x[, c(as.numeric(pcA), as.numeric(pcB))])
    da$isoutlier <- FALSE
    if (!is.null(outids)) da$isoutlier[outids] <- TRUE
    if (pcA == pcB) warning("X and Y are the same principal component")
    data <- da 
    colnames(data) <- c("A", "B", "isoutlier")

    withcolor <- length(COLOR) != 0

    if (!withcolor) {
        p <- ggplot(data, aes(A, B, key = KEY)) +
        geom_point(aes(shape = isoutlier), size = 2, alpha = 0.8,
            color = colors$neutral)
    } else {
        p <- ggplot(data, aes(A, B, color = COLOR, key = KEY)) +
        geom_point(aes(shape = isoutlier), size = 2, alpha = 0.8)
    }
    p <- p +
    labs(
        x = paste("PC", as.numeric(pcA)),
        y = paste("PC", as.numeric(pcB)),
        color = NULL,
        title = plotTitle
        ) +
    sharedtheme() +
    theme(legend.position = "none") +
    scale_shape_manual(values = c("TRUE"=4,"FALSE"=16))
    if (withcolor) {
        p <- p + scale_color_manual(
          values=colors$qualRamp(length(unique(COLOR))))
    }
    p
}

# Plot variance explained from two different PCAs 
# (before and after removing outliers)
# Label which PCs were used for outlier detection
plotPCAstats <- function(pcaBeforeTable, pcaAfterTable, pcToUse="",
                         npcs=10 # Deprecated
                         ) {
    
    if(npcs!=10){
      warning("The npcs argument is no longer used and will be removed in 
              DiscoRhythm 1.8.0")
    }
  
    npcs <- nrow(pcaBeforeTable)
  
    master_tab <- rbind(
      data.frame(pcaAfterTable, status = "After"),
      data.frame(pcaBeforeTable, status = "Before")
    )
    
    plotdf <- master_tab
    plotdf$PC <- factor(plotdf$PC,levels=unique(plotdf$PC))
    plotdf$Proportion.of.Variance = as.numeric(plotdf$Proportion.of.Variance)
    plotdf$usePC = (master_tab$PC %in% pcToUse)
    plotdf$status=factor(plotdf$status,levels=c("Before","After"))
      
    ggplot(plotdf,aes(PC, Proportion.of.Variance, fill = status,
                      # Only apply alpha to unused "Before" PCs
                      alpha=!(!usePC & status=="Before") 
                      )) +
    geom_bar(
        color = "black",
        stat = "identity", position = "dodge"
        ) +
    labs(
        x = "", y = "Proportion of Variance",
        title = "",
        fill = ""
        ) +
    scale_alpha_manual(
        values = c(0.2,1),
        limits=c(FALSE,TRUE)
        )+
    scale_fill_manual(
        values = c(colors$neutral2, colors$outlier),
        limits = c("Before", "After")
        ) +
    scale_y_continuous(labels = scales::percent) +
    labs(alpha="usePC") +
    sharedtheme() +
    scale_x_discrete(limits = paste0("PC", seq_len(min(npcs,10))))
}

##### Before Outlier Removal
plotPCAPairs <- function(x,pairsPC) {
    x2 <- x[,pairsPC]
pairs(x2, col=adjustcolor(colors$neutral2, 0.5), pch=20, lwd=0, gap=0, asp=1,
      panel=function(...) {grid(nx=5, ny=5, lty=1, col="grey90");
        points(...)},
      font.labels=2, oma=c(1,1,3,1), xaxt="n", yaxt="n"
      )
}

##### After Outlier Removal
plotPCAPairsAfter <- function(x,pairsPC,outliers) {
    if (sum(outliers) == 0) {
        p <- errorPlot(main = "No Outliers\nTry reducing threshold",
            textcol = "black")
    } else if (sum(!outliers) < 2) {
        p <- errorPlot(
          main = "0 or 1 Samples Remaining\nTry increasing threshold",
            textcol = "black")
    } else {
        if (!all(pairsPC %in% colnames(x))) {
            pairsPC <- pairsPC[pairsPC %in% colnames(x)]
        }
      x2 <- x[,pairsPC]
      p <- pairs(x2, col=adjustcolor(colors$neutral2, 0.5), pch=20, lwd=0,
                 gap=0, asp=1,
      panel=function(...) {grid(nx=5, ny=5, lty=1, col="grey90");
        points(...)},
      font.labels=2, oma=c(1,1,3,1), xaxt="n", yaxt="n"
      )
    }
    p
}

#####  Row Selection #####

ObsVsExpSNR <- function(allStats, anovaFstatCut) {
    perSig <- formatC(mean(allStats$statistic > anovaFstatCut) * 100)
    nSig <- sum(allStats$statistic > anovaFstatCut)
    myTitle <- paste0("Signal to Noise ratio, ", perSig,
        "% Remaining (n=", nSig, ")")
    tmpdf <- data.frame(statistic = stats::rf(1e3, allStats$df.between[1],
        allStats$df.within[1]))
    plyr::rbind.fill(
        data.frame(tmpdf, Distribution = "Theoretical"),
        data.frame(allStats, Distribution = "Observed")
        ) %>%
    mutate(Distribution = factor(Distribution,
        levels = c("Observed", "Theoretical"))) %>%
    ggplot(aes(Distribution, log10(statistic), fill = Distribution)) +
    sharedtheme() +
    guides(fill = FALSE) +
    ggtitle(myTitle) +
    geom_violin(alpha = 0.7, color = colors$neutral2) +
    geom_hline(yintercept = log10(anovaFstatCut)) +
    scale_fill_manual(values = c("Observed" = colors$sig,
        "Theoretical" = colors$neutral))
}

##### Period Detection #####

plotPeriodDetect <- function(dat, tUnit = "hours") {
    dat %>%
    ggplot(aes(as.factor(Var2), value)) +
    sharedtheme() +
    ylab("Cosinor R-squared") +
    geom_boxplot(fill = colors$`neutral-light`,
        outlier.colour = colors$neutral2) +
    xlab("Cosinor Period")
}

plotOVpcaScatter <- function(OVpca, regressionMeta, OVperiodSelect,
    tUnit = "CT", PCsToUse=NULL) {
    time <- regressionMeta$Time
    
    if(is.null(PCsToUse)){
      PCsToUse <- ifelse(ncol(OVpca$x) > 10, 1:10, 1:ncol(OVpca$x))  
    } else{
      PCsToUse <- as.numeric(substr(PCsToUse,3,20))
    }
    
    per <- as.numeric(OVperiodSelect)

  # Shade by fit period/2
  # rects <- data.frame(xmin = per / 2, xmax = per, ymin = -Inf, ymax = Inf)

  # Make plotting data.frame
    plotdf <- reshape2::melt(data.frame("time"=regressionMeta$Time,
        OVpca$x[,PCsToUse]),id.var="time")
    colnames(plotdf) <- c("time","PC","value")
    plotdf$nPC <- as.numeric(gsub("PC","",plotdf$PC))
    plotdf$PC <- reorder(plotdf$PC, plotdf$nPC)

    p <- ggplot(plotdf,aes(time, value)) +
    # geom_rect(data = rects, aes(xmin = xmin, ymin = ymin, xmax = xmax,
    #     ymax = ymax),inherit.aes = FALSE, alpha = 0.2) +
    sharedtheme() +
    geom_point(color = colors$neutral) +
    labs(x = sprintf("Time (%s)",tUnit), y = "PC Score") +
    geom_smooth(method = "lm",
        formula = y ~ sin(x / per * 2 * pi) + cos(x / per * 2 * pi),
        color = colors$neutral2) +
    facet_wrap(~PC)

    return(p)
}

#####  Oscillation Detection #####

plotPvalues <- function(pv, nBin = 80, cutSignificant = 0.05, title = "") {
  perSig <- formatC(mean(pv < cutSignificant) * 100)
    nSig <- formatC(sum(pv < cutSignificant))
    p <- ggplot(data.frame(pv), aes(pv)) +
    geom_histogram(color = colors$neutral2, fill = colors$neutral,
        breaks = 0:nBin / nBin) +
    sharedtheme() +
    labs(
        x = "p-value", y = "Number of Observations",
        title = paste0(title, ", \n", perSig, "% Significant (", formatC(nSig),
                       "/",formatC(length(pv)), ")")
        ) +
    geom_vline(aes(xintercept = as.numeric(cutSignificant)),
        colour = colors$sig)
    p
}
plotPQValueHist <- function(discoODAres, bg, nbreaks, cutoff, id2name) {
    plist <- list()
    for (mcod in names(discoODAres)) {
        plist[[mcod]] <- plotPvalues(
            discoODAres[[mcod]]$pvalue[bg],
            nbreaks,
            cutoff,
            id2name[mcod]
            )
    }
  # arrange plots
    plist[["ncol"]] <- length(discoODAres)
    do.call(gridExtra::grid.arrange, plist)
}

plotAcroHist <- function(vec, binNum, name = "", rose = TRUE, per = 24) {
    numbins <- as.numeric(binNum)
    if (!rose) {
    # convert vector to dataframe
        vec <- data.frame(acrophase = vec)
        p <- ggplot(vec, aes(x = acrophase)) +
        geom_histogram(data = vec, position = "identity", bins = numbins,
            color = colors$neutral2, fill = colors$neutral) +
        scale_x_discrete("", limits = c(0, per)) +
        sharedtheme()
    } else {
    
    # value for centering the bin intervals (i.e. half bin width)  
    offset <- per / numbins / 2
    
    # round acro to nearest bin
        tmp <- floor((vec%%per) / per * numbins) / numbins * per
        cnts <- table(tmp)
        plotdf <- data.frame(
            "acrophase" = as.numeric(names(cnts)),
            "Count" = as.numeric(cnts)
            )
        
    # fill in zeros
    poss_vals <- seq(numbins)/numbins*per-offset*2
    zero_idx <- which(!(poss_vals %in% plotdf$acrophase))
    plotdf <- rbind(plotdf,data.frame("acrophase"=poss_vals[zero_idx],
                                      "Count"=0))

    # find limits and labels for the y axis
        ymax <- max(cnts)
        int <- pretty(c(0, ymax))[2]
        breaks <- if (ymax %/% int > 5) {
            seq(int * 2, ymax, int * 2)
        } else {
            seq(int, ymax, int)
        }
        labels <- formatC(breaks, format = "d", big.mark = ",")
        mcnt <- sum(cnts) / numbins

    # draw
        p <-
          ggplot() +
        geom_rect(data = data.frame(1), ymin = mcnt, ymax = Inf, xmin = 0,
            xmax = per, fill = colors$discoMain, alpha = 0.2) +
        geom_rect(data = data.frame(1), ymin = 0, ymax = mcnt, xmin = 0,
            xmax = per, fill = colors$discoSec, alpha = 0.2) +
        geom_bar(data = plotdf, aes(x = acrophase + offset, y = Count),
            stat = "identity", colour = colors$neutral2,
            position=position_identity(),
            fill = colors$neutral,width = offset) +
        geom_hline(yintercept = c(0, breaks), colour = colors$neutral,
            size = 0.2) +
        scale_x_continuous(name = "", limits = c(0, per),
            breaks = seq(0, per, length.out = 5)) +
        scale_y_continuous(name = "Count", limits = c(-ymax * 0.5, NA),
            breaks = c(0, as.numeric(labels))) +
        annotate("text", x = 0, y = -ymax * 0.5, label = "Acrophase",
            size = 2.5) +
        sharedtheme() +
        theme(
            panel.grid.major.y = element_blank(),
            panel.grid.minor.y = element_blank(),
            panel.grid = element_blank(),
            panel.border = element_blank(),
            aspect.ratio = 1
            ) +
        coord_polar()
    }
    p + ggtitle(name)
}
plotAllAcroHist <- function(discoODAres, fg, nbreaks, per,
    asrose = TRUE, id2name) {
    acrolist <- list()
    for (mcod in names(discoODAres)) {
        id <- fg[[mcod]]
        thismethod <- id2name[mcod]
        thisphase <- discoODAres[[mcod]]$acrophase[id]
        if (length(thisphase) > 1) {
            acrolist[[mcod]] <- plotAcroHist(
                thisphase,
                nbreaks,
                thismethod,
                asrose,
                per
                )
        } else {
      # error plot
            acrolist[[mcod]] <- errorPlot("Not Enough Significant Data") +
            ggtitle(thismethod)
        }
    }
    acrolist[["ncol"]] <- length(discoODAres)
    do.call(gridExtra::grid.arrange, acrolist)
}


plotAmpliHist <- function(discoODAres, bg, fg, binNum, obsUnit = "") {

  if(is.null(discoODAres[["CS"]])){
   return(errorPlot(main = "No Cosinor Results\n
                    No Amplitudes Available"))
  }

    tmp <- data.frame(
        amplitude = discoODAres[["CS"]]$amplitude,
        SigOrNot = "Not Significant", stringsAsFactors = FALSE
        )

    if (length(fg) > 0) {
        tmp$SigOrNot[fg] <- "Significant"
        vec <- tmp[bg, ]
        sigMean <- mean(vec$amplitude[vec$SigOrNot == "Significant"])
    } else {
        vec <- tmp[bg, ]
        sigMean <- NULL
    }

  # Binning the data manually so plot annotation can use count directly
    nfact <- max(vec$amplitude) - min(vec$amplitude)
    plotdf <- vec %>%
    mutate(bin = ceiling((amplitude - min(amplitude)) / nfact * binNum)) %>%
    group_by(bin, SigOrNot) %>%
    summarise(N = dplyr::n(), value = (bin * nfact)[1] / binNum)

    gg <- ggplot(plotdf, aes(x = value, y = N, fill = SigOrNot)) +
    geom_bar(stat = "identity") +
    ggtitle(paste("Amplitudes Estimated by", "OLS/Cosinor")) +
    labs(fill = "", x = paste0("Amplitude (", obsUnit, ")")) +
    scale_fill_manual(values = c("Significant" = colors$sig,
        "Not Significant" = colors$neutral)) +
    sharedtheme()

    if (!is.null(sigMean)) {
        gg <- gg +
        geom_segment(
            x = sigMean, xend = sigMean, y = 0, yend = max(plotdf$N) * 0.8,
            colour = colors$sig, linetype = 2, size = 0.5, alpha = 0.1
            ) +
        annotate(
            geom = "text", x = as.numeric(sigMean), y = max(plotdf$N) * 0.87,
            label = paste0("Mean\n", formatC(sigMean)), color = colors$sig
            )
    }
    gg
}

allIntersect <- function(fg) {
    if(sum(sapply(fg,length)>0)<2) {
        return(errorPlot("Multiple methods needed\n for intersection"))
    }
    library(UpSetR)
    upset(fromList(fg), order.by = "freq")
}

##### Fisher's Exact #####

plotFisherExact <- function(var1, var2, var1Name, var2Name) {

    col1 <- c(sum(var1 & var2), sum(var1 & !var2),
        sum(var1 & var2) + sum(var1 & !var2))
    col2 <- c(sum(!var1 & var2), sum(!var1 & !var2),
        sum(!var1 & var2) + sum(!var1 & !var2))
    col3 <- col1 + col2
    col3[3] <- 0
    rowNames <- c(
        paste(var2Name, "Significant"),
        paste(var2Name, "Non-Significant"),
        "Sum"
        )
    colNames <- c(
        paste(var1Name, "Significant"),
        paste(var1Name, "Non-Significant"),
        "Sum"
        )
    df <- data.frame(col1, col2, col3)
    df[3, 3] <- ""
    rownames(df) <- rowNames
    colnames(df) <- colNames
    xtable::xtable(df)
  # grid.table(df,rows = rowNames, cols = colNames)
}

vennFisherExact <- function(var1, var2, var1Name, var2Name) {
    a <- sum(var1)
    b <- sum(var2)
    c <- sum(var1 & var2)
    grid.draw(draw.pairwise.venn(a, b, c,
        category = c(paste("significant", var1Name),
            paste("significant", var2Name)),
        lty = rep("blank", 2),
        fill = c(colors$neutral, colors$neutral2), alpha = rep(0.25, 2),
        cat.pos = c(0, 0), cat.dist = rep(0.025, 2)
        ))
}

##### Individual Models #####

plotRowFit <- function(times, y, CS = TRUE, withErrorBar = TRUE, per = 24,
    tunit = "CT", ylab = "Value") {
    se <- function(x) {
        return(sd(x) / sqrt(length(x)))
    }
    # rects <- data.frame(xmin = per / 2, xmax = per, ymin = -Inf, ymax = Inf)

    if (withErrorBar) {
        plotdf <- data.frame(times = times, y) %>%
        group_by(times) %>%
        summarise(vals = mean(y), se = se(y))
    } else {
        plotdf <- data.frame(times = times, vals = y)
    }

    p <- ggplot(plotdf, aes(times, vals))

    if (CS) {
        p <- p + geom_smooth(
            method = "lm",
            formula = y ~ sin(x / per * 2 * pi) + cos(x / per * 2 * pi),
            se = !withErrorBar, fullrange = TRUE, color = colors$neutral2
            )
    } else {
        p <- p + geom_smooth(se = !withErrorBar, fullrange = TRUE,
                             method="loess",formula=y~x,
            color = colors$neutral2)
    }
    p <- p +
    # geom_rect(data = rects,
    #     aes(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax),
    #     inherit.aes = FALSE, alpha = 0.2) +
    geom_point(data = plotdf, aes(y = vals, x = times), inherit.aes = FALSE,
        color = colors$neutral) +
    sharedtheme() +
    labs(y = ylab, x = paste0("Sample Time (", tunit, ")"))

    if (withErrorBar) {
        p + geom_errorbar(data = plotdf, aes(ymin = vals - se * 1.96,
            ymax = vals + se * 1.96),
        color = colors$neutral)
    } else {
        p
    }
}

#####  Error Plot #####

errorPlot <- function(main = "ERROR", top = "", bottom = "", textcol = "red") {
    df <- data.frame(
        x = c(1, 1, 1),
        y = c(1.5, 1, 0.5),
        text = c(top, main, bottom)
        )
    ggplot(df, aes(x, y)) +
    scale_y_discrete("", limits = 0:24) +
    labs(x = "") +
    geom_text(aes(label = text), colour = textcol, vjust = "inward",
        hjust = "inward", size = c(8, 8, 8)) +
    sharedtheme()+
    theme(
        legend.position = "none",
        axis.line = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank()
        )
}
Any scripts or data that you put into this service are public.
DiscoRhythm documentation built on Nov. 8, 2020, 7:32 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
DiscoRhythm
Interactive Workflow for Discovering Rhythmicity in Biological Data

inst/app/code/server/plots.R
In DiscoRhythm: Interactive Workflow for Discovering Rhythmicity in Biological Data

Try the DiscoRhythm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DiscoRhythm Interactive Workflow for Discovering Rhythmicity in Biological Data

inst/app/code/server/plots.R In DiscoRhythm: Interactive Workflow for Discovering Rhythmicity in Biological Data

Try the DiscoRhythm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DiscoRhythm
Interactive Workflow for Discovering Rhythmicity in Biological Data

inst/app/code/server/plots.R
In DiscoRhythm: Interactive Workflow for Discovering Rhythmicity in Biological Data