## Load required packages suppressPackageStartupMessages({ library(knitr) library(SummarizedExperiment) library(limma) library(tidyr) library(scales) library(plotly) })
## Import variables md5sum <- params$md5sum data_samples <- params$data_samples data_counts <- params$data_counts data_annotation <- params$data_annotation setGeneName <- params$setGeneName cpm_value <- params$cpm_value cpm_perc <- params$cpm_perc excluded_samples <- params$excluded_samples design_base <- params$design_base design_value <- params$design_value matrix_v1 <- params$matrix_v1 matrix_v2 <- params$matrix_v2 alpha <- params$alpha
## Values of variables
md5sum
cpm_value
cpm_perc
excluded_samples
alpha
design_base
design_value
matrix_v1
matrix_v2
## Match sample sheet & count data data_samples <- data_samples[!rownames(data_samples) %in% excluded_samples, , drop = FALSE] data_counts <- data_counts[, !colnames(data_counts) %in% excluded_samples] ## Read data as Summarized Experiment se <- readCountsFromTable(data_counts, data_samples) se <- addSamplesFromTableToSE(se, data_samples) ## Alignment summary plot_data <- alignment_summary(se) bar_plot( df = plot_data, x = "count", y = "sample", group = design_base, fill = "feature", rev = TRUE, title = "Count assignments", xlab = "Counts", ylab = "" ) ## Complexity plot_data <- complexity(se) line_plot( df = plot_data, x = "rank", y = "fraction", group = design_base, plot = "complexity", title = "Gene complexity", xlab = "Cumulative reads per number of genes", ylab = "Number of genes" ) ## Create final Summarized Experiment se <- readCountsFromTable(data_counts[!grepl('^__', rownames(data_counts)), ], data_samples) se <- addSamplesFromTableToSE(se, data_samples) if (!is.null(data_annotation)) { se <- addAnnotationsFromTableToSE(se, data_annotation) }
## Create DGE list dge <- DGEList(counts = assay(se), samples = colData(se), genes = rowData(se)) row.names(dge$genes) <- row.names(dge$counts) ## Remove genes with all 0 counts dge <- dge[rowSums(abs(dge$counts)) > 1, ] ## Count distribution tempDge <- dge tempDge$counts <- cpm(dge, log = TRUE, prior.count = 1) plot_data <- count_dist(tempDge) line_plot( df = plot_data, x = "x", y = "y", group = design_base, title = "Gene count distribution", xlab = "Log2CPM", ylab = "Density" )
## Filter data on low expressed genes limmaV <- calcNormFactors(dge, method = "TMM") counts <- cpm(limmaV, log = TRUE, prior.count = 1) selectedFeatures <- rownames(limmaV)[apply(counts, 1, function(v) sum(v >= cpm_value)) >= (cpm_perc / 100) * ncol(counts)] ## Get high expressed genes highExprDge <- dge[selectedFeatures, , keep.lib.sizes = FALSE] ## Normalize data normDge <- calcNormFactors(highExprDge, method = "TMM") ## Count distribution tempDge <- normDge tempDge$counts <- cpm(normDge, log = TRUE, prior.count = 1) plot_data <- count_dist(tempDge) line_plot( df = plot_data, x = "x", y = "y", group = design_base, title = "Gene count distribution", xlab = "Log2CPM", ylab = "Density" ) ## PCA plot_data <- pca_data(tempDge) scatter_plot( df = plot_data, x = "PC1", y = "PC2", group = design_base, size = 4, title = "PCA", xlab = paste0("PC1", " (", plot_data$percent[1], "%)"), ylab = paste0("PC2", " (", plot_data$percent[2], "%)") )
## Create design get_design <- createDesign(normDge$samples, design_base, design_value, matrix_v1, matrix_v2) get_design normDge <- relevelSamples(normDge, design_base, design_value, matrix_v1, matrix_v2) design <- model.matrix(eval(parse(text = get_design)), normDge$samples) design ## Get contrast values get_matrix1 <- createMatrix(normDge, design_base, design_value, matrix_v1) get_matrix2 <- createMatrix(normDge, design_base, design_value, matrix_v2) get_matrix1 get_matrix2 ## Create contrast contrast <- createContrast(design, get_matrix1, get_matrix2) contrast
## Run analysis voom <- voom(normDge, design) vfit <- lmFit(voom) vfit <- contrasts.fit(vfit, contrasts = contrast) efit <- eBayes(vfit) ## Get DE values efit$DE <- decideTests(efit, p.value = alpha) summary(efit$DE)
## Get DE tables deTab <- data.frame(topTable(efit, coef = 1, n = Inf)) deTab <- deTab[order(rownames(deTab)), ] deTab$DE <- c(efit$DE[order(rownames(efit$DE)), ]) ## Reorder & rename table values deTab$t <- NULL deTab$B <- NULL deTab <- rename(deTab, "avgLog2CPM" = "AveExpr") deTab <- rename(deTab, "avgLog2FC" = "logFC") deTab <- rename(deTab, "FDR" = "adj.P.Val") deOrder <- c("avgLog2CPM", "avgLog2FC", "P.Value", "FDR", "DE") ## Reorder annotation (if present) if (!is.null(data_annotation)) { deTab <- deTab[colnames(deTab) [c(1, match(deOrder, names(deTab)), 2:(ncol(deTab) - 5))]] } else { deTab <- deTab[deOrder] } ## Add gene symbol if (setGeneName == "symbol" && !is.null(data_annotation)) { tempCol <- rownames(deTab) rownames(deTab) <- make.names(deTab$geneName, unique = TRUE) deTab$geneName <- tempCol colnames(deTab)[1] <- "geneId" rownames(normDge$counts) <- rownames(deTab) }
## MA index <- round(seq(1, nrow(deTab), length.out = 1000)) plot_data <- ma(deTab) scatter_plot( df = plot_data, x = "avgLog2CPM", y = "avgLog2FC", group = "DE", index = index, title = "MA Plot", xlab = "Average Log2CPM", ylab = "Average Log2FC" ) ## P-value plot_data <- pvalue_data(deTab) bar_plot( df = plot_data, x = "p", y = "x", title = "P-Value plot", xlab = "P-Value", ylab = "Count" )
## Save data files deTab <- deTab[order(deTab$FDR),] normDge$counts <- cpm(normDge, log = TRUE, prior.count = 1) save(deTab, normDge, file = "analysis.RData")
## Session info sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.