#' Differential Expression Analysis using DESeq2
#'
#' @param disease String; name of a disease
#' @param sample_number Integer; the number of samples which are simulated
#' @param output_directory String; directory of the output files
#' @param description String; indicating which two results are compared "NT_PT" or "NT_Simulated"
#' @param count_matrix_control String; first Matrix with gene expression values, control group
#' @param count_matrix_test String; second Matrix with gene expression values, test group
#' @param file_prefix String; name of the results output file
#'
#' @return
#' @export
#'
#' @examples
DEA <- function(disease, sample_number, output_directory, count_matrix_control, count_matrix_test, description, file_prefix){
cat("Do Differential Expression Analysis: \n")
if(!file.exists(file.path("cache", disease, paste("DEA_results", disease, "_description=", description, "_sample=", sample_number, ".rds", sep="")))){
# Merge Matrices
count_matrix <- merge(count_matrix_control[, 1:sample_number], count_matrix_test[, 1:sample_number], by = "row.names", all = TRUE)
count_matrix <- count_matrix[complete.cases(count_matrix), ]
# Set correct rownames for merged Matrix
row.names(count_matrix) <- count_matrix[, 1]
count_matrix <- count_matrix[, 2:ncol(count_matrix)]
count_matrix <- as.matrix(count_matrix)
# Extract Sample-Names
names <- colnames(count_matrix)
# Extract Gene-Symbols
genes <- rownames(count_matrix)
# Choose Groups
group <- as.factor(rep(c("Control", "Test"), times = c(sample_number, sample_number)))
#group <- as.factor(rep(1, times = sample_number))
# Create col_data
col_data <- data.frame(samples = names, group = group)
# IMPORTANT! Change mode of matrix --> only Integer as counts allowed
mode(count_matrix) <- "integer"
# Create ddsMat
ddsMat <- DESeq2::DESeqDataSetFromMatrix(countData = count_matrix, colData = col_data, design = ~ group)
# Filter samples with low expression
dds_filtered <- ddsMat[ , colSums(counts(ddsMat)) > 5]
# Filter genes with low expression
dds_filtered <- dds_filtered[rowSums(counts(dds_filtered)) > 1, ]
# Normalization
dds_filtered = DESeq2::estimateSizeFactors(dds_filtered)
# Get p-Values
prep <- DESeq2::DESeq(dds_filtered, fitType = 'local', quiet = TRUE, parallel = TRUE)
res <- DESeq2::results(prep)
print(res)
# Write DGE analysis data to csv files
if(!file.exists(file.path(output_directory, disease, description))){
dir.create(file.path(output_directory, disease, description))
}
count_matrix <- cbind(count_matrix, res$pvalue)
write.table(count_matrix, file = file.path(output_directory, disease, description, paste(file_prefix, "_CountMatrix_", description, ".csv", sep="")), quote=FALSE, sep =";", row.names = TRUE, col.names = NA)
write.table(res, file = file.path(output_directory, disease, description, paste(file_prefix, "_DGE_Results_", description, ".csv", sep="")), quote=FALSE, sep =";", row.names = TRUE, col.names = NA)
# log-Transforamtion (vst instead of rlog, because it is faster (vst needs >= 1000 rows))
#dds_transformed = DESeq2::varianceStabilizingTransformation(dds_filtered, fitType = 'local')
# Build heatmap
#mat <- SummarizedExperiment::assay(dds_transformed)
#mat <- mat - rowMeans(mat)
#annotation_c <- as.data.frame(SummarizedExperiment::colData(dds_transformed)[,c("group")])
#colnames(annotation_c) <- c("Group")
#rownames(annotation_c) <- colnames(dds_transformed)
#pheatmap::pheatmap(mat, annotation_col = annotation_c, filename = file.path(output_directory, paste(description, "_pheatmap_exprVal_", disease, "_sample=", sample_number, ".pdf", sep="")))
# Save as RDS file in cache
saveRDS(res, file.path("cache", disease, paste("DEA_results", disease, "_description=", description, "_sample=", sample_number, ".rds", sep="")))
cat("--> DONE. \n")
}
else{
res <- readRDS(file.path("cache", disease, paste("DEA_results", disease, "_description=", description, "_sample=", sample_number, ".rds", sep="")))
cat("Loaded from save file.\n")
}
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.