#' Finds differentially expressed genes by comparing neighboring genes
#'
#' @param nearest_neighbours How many nearest neighbours within 1 Mb window to evaluate?
#' @param pSmrExpt SummarizedExperiment object
#' @param pDesign design formula
#' @param pValue_cut cut off value for adjusted p-value
#' @param pLogFold_cut cut off value for relative log fold change compared to neighbouring genes
#' @return A data.frame with top significant genes with the following columns:
#'
#' relative.logFC: relative logFC compared to neighbouring genes
#'
#' P.Value: raw p-value
#'
#' adj.P.Value: adjusted p-value
#'
#' B: log-odds that the gene is differentially expressed
#'
#' @export
#' @importFrom dplyr %>% left_join
#' @importFrom stats model.matrix
#' @importFrom DESeq2 DESeqDataSet estimateSizeFactors counts
#' @importFrom SummarizedExperiment assays assays<- rowData colData
#' @examples count_matrix <- as.matrix(read.table(file = system.file("extdata",
#' "tooth_RNASeq_counts.txt",
#' package = "DELocal")))
#' colData <- data.frame(condition=gsub("\\..*",x=colnames(count_matrix),
#' replacement = ""))
#' gene_location <- read.table(file = system.file("extdata", "gene_location.txt",
#' package = "DELocal"))
#' smrExpt <- SummarizedExperiment::SummarizedExperiment(
#' assays=list(counts=count_matrix),
#' rowData = gene_location,
#' colData=colData)
#' contrast= c("condition","ME13","ME14")
#' require(dplyr)
#' x_genes <- SummarizedExperiment::rowData(smrExpt) %>%
#' as.data.frame() %>%
#' filter(chromosome_name=="X") %>% rownames()
#' DELocal_result <- DELocal(pSmrExpt = smrExpt[x_genes,],
#' nearest_neighbours = 5, pDesign = ~ condition,
#' pValue_cut = 0.05, pLogFold_cut = 0)
DELocal<-
function(pSmrExpt,nearest_neighbours,pDesign,pValue_cut = 0.05, pLogFold_cut = 0){
stopifnot("`pSmrExpt` must be a SummarizedExperiment object" = inherits(pSmrExpt,"SummarizedExperiment"))
if("neighbors_start" %in% (rowData(pSmrExpt) %>% colnames() )){
message("User provided neighborhood will be used")
} else {
message("Default 1Mb neighborhood will be used")
}
pSmrExpt <- DESeqDataSet(pSmrExpt, design = pDesign)
pSmrExpt <- estimateSizeFactors(pSmrExpt)
assays(pSmrExpt)$normalized_counts <- counts(pSmrExpt, normalized = TRUE)
exp_mat <- as.data.frame(assays(pSmrExpt)$normalized_counts)
sample_names <- colnames(exp_mat)
exp_mat$Xgene_id <- rownames(exp_mat)
xRowData <- rowData(pSmrExpt) %>% as.data.frame()
xRowData$Xgene_id <- rownames(xRowData)
exp_mat <- exp_mat %>% left_join(
xRowData,
by = c("Xgene_id" = "Xgene_id")
)
linear_model <- .LocalizedLinearModel(
exp_mat, sample_names,nearest_neighbours + 1
)
design_matrix <- model.matrix(pDesign,colData(pSmrExpt))
DELocal_table <- .DELocal_topTable(pLinear_model = linear_model, pDesign_matrix = design_matrix,
pLogFold_cut = pLogFold_cut,pValue_cut = pValue_cut)
colnames(DELocal_table)[1] <- "relative.logFC"
return(DELocal_table[,c("relative.logFC","P.Value","adj.P.Val","B")])
}
# Table of Top Genes from DELocal Linear Model Fit
#' @importFrom limma topTable lmFit contrasts.fit eBayes
.DELocal_topTable <- function(pLinear_model,pDesign_matrix,pLogFold_cut,pValue_cut) {
fit <- lmFit(pLinear_model,pDesign_matrix)
fit2 <- contrasts.fit(fit, coefficients = ncol(pDesign_matrix))
fit3 <- eBayes(fit2)
top <- topTable(fit3,adjust.method="BH",number=nrow(pLinear_model),lfc = pLogFold_cut)
pos <- which(top[,"adj.P.Val"]<=pValue_cut)
return(top[pos,])
}
#' Returns median expression from different conditions of genes from a neighbourhood of a gene of interest
#'
#' @param pSmrExpt SummarizedExperiment object
#'
#' @param pNearest_neighbours How many nearest neighbours within 1 Mb window to plot
#' @param pDesign design formula
#' @param colorFactor The coloring factor
#' @param pGene_id The gene of interest
#' @return a list which contains both the data from the neighbourhood and a ggplot object
#' @export
#'
#' @importFrom DESeq2 DESeqDataSet estimateSizeFactors counts
#' @importFrom SummarizedExperiment assays assays<- rowData colData
#' @importFrom dplyr %>% left_join pull filter
#' @importFrom matrixStats rowMedians
#' @importFrom reshape2 melt
#' @import ggplot2
#' @examples count_matrix <- as.matrix(read.table(file = system.file("extdata",
#' "tooth_RNASeq_counts.txt",
#' package = "DELocal")))
#' colData <- data.frame(condition=gsub("\\..*",x=colnames(count_matrix),
#' replacement = ""))
#' gene_location <- read.table(file = system.file("extdata", "gene_location.txt",
#' package = "DELocal"))
#' smrExpt <- SummarizedExperiment::SummarizedExperiment(assays=list(counts=count_matrix),
#' rowData = gene_location,
#' colData = colData)
#' contrast= c("condition","ME13","ME14")
#' require(dplyr)
#' x_genes <- SummarizedExperiment::rowData(smrExpt) %>%
#' as.data.frame() %>%
#' filter(chromosome_name=="X") %>% rownames()
#' DELocal::plotNeighbourhood(pSmrExpt = smrExpt, pGene_id = "ENSMUSG00000059401")
plotNeighbourhood<- function(pSmrExpt, pNearest_neighbours=5, pDesign = ~ condition,
colorFactor = "condition", pGene_id){
stopifnot("`pSmrExpt` must be a SummarizedExperiment object" = inherits(pSmrExpt,"SummarizedExperiment"))
if(!(pGene_id %in% rownames(pSmrExpt))){
message(paste(pGene_id," does not exist"))
return()
}
pSmrExpt <- DESeqDataSet(pSmrExpt , design = pDesign)
pSmrExpt <- estimateSizeFactors(pSmrExpt)
assays(pSmrExpt)$normalized_counts <- counts(pSmrExpt, normalized = TRUE)
selected_gene <- pSmrExpt[pGene_id] %>% rowData()
if ("neighbors_start" %in% (rowData(pSmrExpt) %>% colnames())) {
message("User provided neighborhood will be used")
} else {
# message("Default 1Mb neighborhood will be used")
selected_gene$neighbors_start <- selected_gene$start_position-500000
selected_gene$neighbors_end <- selected_gene$start_position+500000
}
neighborsSmExp <- .getNeighbours(pSmrExpt = pSmrExpt,
pSelected_gene = selected_gene,
pNearest_neighbours = pNearest_neighbours )
exp_mat <- as.data.frame(assays(neighborsSmExp)$normalized_counts)
exp_mat$ensembl_gene_id <- rownames(exp_mat)
sample_names <- colnames(exp_mat)
states <- colData(pSmrExpt) %>% as.data.frame() %>% pull(colorFactor) %>% unique()
for(i in states){
xx <-colData(pSmrExpt) %>% as.data.frame() %>% filter(!!sym(colorFactor)==i) %>% rownames()
exp_mat[,i] <- exp_mat[,xx] %>% as.matrix() %>% rowMedians()
}
result_data <- exp_mat[,c("ensembl_gene_id",as.character(states))] %>%
left_join(as.data.frame(rowData(neighborsSmExp)),
by = c("ensembl_gene_id" = "ensembl_gene_id"))
results <- list()
results$data <- melt(result_data[, c(as.character(states),
c("start_position","ensembl_gene_id", "chromosome_name"))],
id = c("start_position","ensembl_gene_id", "chromosome_name"))
results$plot <- ggplot(results$data,aes(x = start_position, y = value, text = ensembl_gene_id)) +
geom_point(aes(colour = variable),size = 3) +
scale_x_continuous("Gene start distance ",
breaks = results$data$start_position,
labels = results$data$ensembl_gene_id,
limits = c(selected_gene$neighbors_start, selected_gene$neighbors_end)) +
ylab("Normalized rna-seq") + ggtitle(paste(selected_gene$ensembl_gene_id)) +
theme_bw() + theme(axis.text.y = element_text(size = 15),
axis.text.x = element_text(size = 10,angle = 45,hjust = 1),
legend.position = "bottom",
legend.title = element_blank())
message(results$plot)
return(results)
}
#' @importFrom stats na.omit
#' @importFrom stats lm median
#' @importFrom dplyr filter
.LocalizedLinearModel<-
function(gene_xprsn_annotation,sample_names,nearest_neighbours){
gene_xprsn_annotation <- gene_xprsn_annotation[order(gene_xprsn_annotation$chromosome_name,
gene_xprsn_annotation$start_position),]
linear_models <- data.frame(matrix(vector(), 0,length(sample_names),
dimnames=list(c(), sample_names)), stringsAsFactors=FALSE)
for (i in seq_len(nrow(gene_xprsn_annotation))){
current_chromosom <- gene_xprsn_annotation[i,]$chromosome_name
current_ensembl_gene_id <- gene_xprsn_annotation[i,]$Xgene_id
if ("neighbors_start" %in% colnames(gene_xprsn_annotation)) {
current_neighbors_start <- gene_xprsn_annotation[i,]$neighbors_start
current_neighbors_end <- gene_xprsn_annotation[i,]$neighbors_end
neighbors <- gene_xprsn_annotation %>% filter(
chromosome_name == current_chromosom &
start_position >= current_neighbors_start &
start_position <= current_neighbors_end
)
} else{
current_loc <- gene_xprsn_annotation[i,]$start_position
neighbors <- gene_xprsn_annotation[gene_xprsn_annotation$chromosome_name == current_chromosom
& abs(gene_xprsn_annotation$start_position - current_loc) < 500000, ]
}
neighbors$distance <- neighbors$start_position - gene_xprsn_annotation[i,]$start_position
neighbors <- neighbors[order(abs(neighbors$distance)),]
neighbors <- na.omit(neighbors)
if(dim(neighbors)[1] >= 2){
num_neighbours <- nearest_neighbours
max_neighbours <- dim(neighbors)[1]
if(max_neighbours < nearest_neighbours){
num_neighbours <- max_neighbours
}
y <- as.numeric( (neighbors[ 1,c(sample_names)]))
median_sample_expression <- as.numeric (apply( (neighbors[2:num_neighbours,sample_names]),2,median))
tryCatch( {lm<-lm( y ~ median_sample_expression)
linear_models[i,] <- lm$residuals
}, error= function(err){
message(paste(i,current_ensembl_gene_id,sep=" "));
})
}else {
linear_models[i,] <- rep(NA,length(sample_names))
}
rownames(linear_models)[i] <- as.character(current_ensembl_gene_id)
}
return(linear_models)
}
#' @importFrom stats na.omit
#' @importFrom SummarizedExperiment rowData
.getNeighbours <- function(pSmrExpt,pSelected_gene,pNearest_neighbours){
neighborsSmExp <- subset(pSmrExpt, subset = (chromosome_name == pSelected_gene$chromosome_name &
start_position >= pSelected_gene$neighbors_start &
start_position <= pSelected_gene$neighbors_end))
neighbors <- rowData(neighborsSmExp) %>% as.data.frame()
neighbors$distance <- neighbors$start_position - pSelected_gene$start_position
neighbors <- neighbors[order(abs(neighbors$distance)),]
neighbors <- na.omit(neighbors)
if(dim(neighborsSmExp)[1] > pNearest_neighbours ){
neighbors<-neighbors[seq_len(pNearest_neighbours),]
}
return(neighborsSmExp[neighbors$ensembl_gene_id,])
}
utils::globalVariables(c("chromosome_name", "start_position", "value",
"ensembl_gene_id","variable"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.