library(DESeq2)
library(ggplot2)
library(pheatmap)
library(wesanderson)
library(data.table)
library(org.Hs.eg.db)
library(ggridges)
library(ggpubr)
library(patchwork)
setwd("/home/joh/Projet/PED/April2022")
library(future)
options(future.fork.enable = FALSE)
supportsMulticore()
availableCores() #40 cores
availableWorkers()
plan()
plan("multiprocess", workers = 5)
#source("./signature_generator.R")
dataDIR <- "raw_data/agilent_data"
##-------- Load Counts and metadata
samples_counts.1 <- read.delim(paste0("./",dataDIR,"/NGS2021_4678.count.genes.txt"),skip = 1)
samples_counts.1 <- samples_counts.1[,-c(2:6)]
rownames(samples_counts.1) <- samples_counts.1$Geneid
#samples_counts.1$Geneid <- NULL
colnames(samples_counts.1)
samples_counts <- read.delim(paste0("./",dataDIR,"/NGS2022_5083.count.genes.txt"),skip = 1)
samples_counts <- samples_counts[,-c(2:6)]
rownames(samples_counts) <- samples_counts$Geneid
#samples_counts$Geneid <- NULL
colnames(samples_counts)
samples_counts <- merge(samples_counts.1, samples_counts)
colnames(samples_counts)
rownames(samples_counts) <- samples_counts$Geneid
samples_counts$Geneid <- NULL
samples_meta <- read.csv(paste0(dataDIR,"/TF_SampleID_JO.csv"))
#samples_meta$BulkName <- gsub("-","_",samples_meta$BulkName)
#samples_meta$BulkName <- gsub(" ","_",samples_meta$BulkName)
#write.csv(samples_meta, paste0(dataDIR,"/TF_SampleID_JO.csv"))
colnames(samples_counts) %in% samples_meta$BulkName
#---- Load 116 gene signature
covid_genes <- read.csv("./raw_data/MM_list.csv")
random_genes <- covid_genes # save for additional genes
covid_genes <- covid_genes[which(covid_genes$Group == "Myosig"),]
covid_genes <- covid_genes$Signature
covid_genes <- unique(covid_genes)
length(covid_genes)
length(random_genes$Signature)
#---- Load 105 genes
reduced_genes <- read.csv("./raw_data/public_data/MM_list_reduced.csv")
reduced_random_genes <- reduced_genes # save for additional genes
reduced_genes <- reduced_genes[which(reduced_genes$Group == "Myosig"),]
reduced_genes <- reduced_genes$Signature
reduced_genes <- unique(reduced_genes)
length(reduced_genes)
length(reduced_random_genes$Signature)
#--- MISC paper gene
top25_genes_paper <- read.csv("./raw_data/MM_sig_paper.csv")
top25_genes_paper <- top25_genes_paper[1:25,"SignatureMyo"]
#################################################################################
# Distribution
#################################################################################
samples_counts
samples_meta
message("** Selecting highly variable genes in 3 Chips")
rv <- as.data.frame(rowVars(as.matrix(samples_counts)))
rownames(rv) <- rownames(samples_counts)
colnames(rv) <- "Variance"
rv$geneid <- rownames(rv)
# select the ntop genes by variance
ntop <- 1000
select <- rv[order(rv$Variance, decreasing=TRUE),]
select <- select[1:ntop,]
message("** Converting Ensemble to Symbol")
symbols <- suppressMessages(mapIds(org.Hs.eg.db, keys = rownames(select), keytype = "ENSEMBL", column="SYMBOL"))
master_gene_table <- as.data.frame(symbols)
master_gene_table$ensembl <- rownames(master_gene_table)
master_gene_table$symbols <- ifelse(is.na(master_gene_table$symbols), master_gene_table$ensembl, master_gene_table$symbols)
message("** Finding tageted genes (214) + adding 100 non targeted highly variable genes")
length(random_genes$Signature)
master_gene_table$target <- ifelse(master_gene_table$symbols %in% random_genes$Signature, "target","non")
table(master_gene_table$target)
selected_gene_table <- master_gene_table[which(master_gene_table$target=="non"),]
selected.gene <- selected_gene_table[1:100,]$symbols
selected_gene_table <- master_gene_table[which(master_gene_table$symbols %in% c(selected.gene, random_genes$Signature)),]
selected_gene_table$symbols <- factor(selected_gene_table$symbols, levels = c(random_genes$Signature, selected.gene))
nrow(selected_gene_table)
separate_data <- function(counts, samples_metadata, selected_group){
samples_metadata <- samples_metadata[which(samples_metadata$Chip == selected_group),]
counts <- counts[,samples_metadata$BulkName]
message(paste0("Selected chip : ", unique(samples_metadata$Chip)))
return(counts)
}
generate_normalised_data_byGene <- function(datasetname, counts, samples_metadata, selected_group,
selected.gene.table, genetype){
tmp.data <- separate_data(counts, samples_metadata, selected_group)
if(genetype=="ensembl"){
tmp.data <- as.data.frame(t(tmp.data[selected.gene.table$ensembl,]))
}else{
tmp.data <- as.data.frame(t(tmp.data[selected.gene.table$symbols,]))
}
tmp.data$BulkName <- rownames(tmp.data)
data.plt <- list()
n <- ncol(tmp.data) -1
for(i in 1:n){
d <- tmp.data[,c(i,ncol(tmp.data))]
d$Gene <- colnames(d)[1]
colnames(d) <- c("value","BulkName", "Gene")
data.plt[[i]] <- d
}
final.data <- Reduce(rbind, data.plt)
final.data$Chip <- selected_group
final.data$dataset <- datasetname
return(final.data)
}
generate_data_for_plot <- function(datasetname, counts, samples_metadata, selected_group,
selected.gene.table, genetype, master_gene_table){
message("** Generating data for plots")
data <- generate_normalised_data_byGene(datasetname, counts, samples_metadata, selected_group, selected.gene.table, genetype)
data$symbols <- master_gene_table$symbols[match(data$Gene, master_gene_table$ensembl)]
data$symbols <- factor(data$symbols , levels=rev(levels(selected.gene.table$symbols)))
message("** Define targeted genes")
data$target <- ifelse(data$symbols %in% random_genes$Signature, "target", "no")
data$target <- as.factor(data$target)
return(data)
}
plot_distribution <- function(data, title, saveFile){
message(paste0("Plotting : ", title))
plt <- ggplot(data, aes(value, symbols, fill=target))+
geom_density_ridges(aes(value, symbols, fill=target)) +
theme_ridges() +
ggtitle(title) +
ylab("Gene")+
theme(axis.text.y = element_text(size = 10))
message("Saved plots")
ggsave(plt, filename = saveFile, height = 35, width = 15, bg = "white")
return(plt)
}
message("** Plotting")
data.chip1 <- generate_data_for_plot("Agilent", samples_counts, samples_meta, 1, selected_gene_table, "ensembl", master_gene_table)
unique(data.chip1$BulkName)
data.chip2 <- generate_data_for_plot("Agilent", samples_counts, samples_meta, 2, selected_gene_table, "ensembl", master_gene_table)
unique(data.chip2$BulkName)
data.chip3 <- generate_data_for_plot("Agilent", samples_counts, samples_meta, 3, selected_gene_table, "ensembl", master_gene_table)
unique(data.chip3$BulkName)
plot_distribution(data.chip1, "Chip 1", "./check_agilent/distribution_chip1.png")
plot_distribution(data.chip2, "Chip 2", "./check_agilent/distribution_chip2.png")
plot_distribution(data.chip3, "Chip 3", "./check_agilent/distribution_chip3.png")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.