#!/usr/bin/env Rscript
library(dplyr)
library(stringr)
library(optparse)
library(DESeq2)
#------------------------------------------------------------------------------#
# handling command line input
arguments <- parse_args(OptionParser(usage = "",
description="",
option_list=list(
make_option(c("-g","--gene_expression"),
default = "",
help="the gene expression file"),
make_option(c("-s","--splice_dat_file"),
default = "",
help="splicemutr file"),
make_option(c("-k","--kmer_counts"),
default = "",
help="the kmer counts file"),
make_option(c("-j","--junc_expr_file"),
default = "",
help="junction expression file"),
make_option(c("-o","--out"),
default = "",
help="output_file_prefix"))))
opt=arguments
gene_expression_file <- opt$gene_expression
splice_dat_file <- opt$splice_dat_file
kmer_counts_file <- opt$kmer_counts
junc_expr_file <- opt$junc_expr_file
out<-opt$out
#------------------------------------------------------------------------------#
# internal functions
calc_gene_expression <- function(gene_tar,gene_expression){
gene_expr_target <- gene_expression[gene_tar,,drop=F]
if (length(gene_tar)>1){
gene_expr_tar_min <- apply(gene_expr_target,2,min)
} else {
gene_expr_tar_min <- gene_expr_target
}
gene_expr_tar_min[is.na(gene_expr_tar_min)]<-0
return(gene_expr_tar_min)
}
count_kmers <- function(vals){
vapply(strsplit(vals,":"),function(val){
length(which(val != "NAZZZZZZZ"))
},numeric(1))
}
#------------------------------------------------------------------------------#
# local play
#
# gene_expression_file <- "/media/theron/My_Passport/TCGA_junctions/TCGA_cancers/LUAD/gene_expression_vst.rds"
# splice_dat_file <- "/media/theron/My_Passport/TCGA_junctions/TCGA_cancers/LUAD/LUAD_splicemutr_dat.txt"
# kmer_counts_file <- "/media/theron/My_Passport/TCGA_junctions/TCGA_cancers/LUAD/kmer_counts_all.rds"
# vst <-1
# junc_expr_file <- "/media/theron/My_Passport/TCGA_junctions/TCGA_cancers/LUAD/junc_expr_combined_vst_7.rds"
# tcga<-T
#------------------------------------------------------------------------------#
# reading in the files
gene_expression <- readRDS(gene_expression_file)
if (str_detect(splice_dat_file,".txt")){
splice_dat <- read.table(splice_dat_file,header=T,sep="\t")
} else {
splice_dat <- readRDS(splice_dat_file)
}
if (str_detect(kmer_counts_file,".txt")){
kmer_counts <- read.table(kmer_counts_file)
} else {
kmer_counts <- readRDS(kmer_counts_file)
}
kmer_counts<-kmer_counts[,!duplicated(colnames(kmer_counts))]
kmer_counts[,1]<-as.numeric(kmer_counts[,1])
kmer_counts[,seq(3,ncol(kmer_counts))] <- apply(kmer_counts[,seq(3,ncol(kmer_counts))],2,count_kmers)
kmer_counts[,seq(3,ncol(kmer_counts))] <- mutate_all(kmer_counts[,seq(3,ncol(kmer_counts))], function(x) as.numeric(x))
splice_dat$deltapsi <- as.numeric(splice_dat$deltapsi)
splice_dat <- splice_dat %>% dplyr::filter(deltapsi>0 & protein_coding=="Yes")
splice_dat_strands <- matrix(unlist(strsplit(splice_dat$cluster,"_")),byrow=T,nrow=nrow(splice_dat))[,3]
splice_dat$juncs <- sprintf("%s:%s",splice_dat$juncs,splice_dat_strands)
splice_dat_juncs <- as.data.frame(matrix(unlist(strsplit(splice_dat$juncs,":")),byrow=T,nrow=nrow(splice_dat)))
splice_dat$juncs <- sprintf("%s:%s-%s:%s",splice_dat_juncs$V1,
splice_dat_juncs$V2,splice_dat_juncs$V3,
splice_dat_juncs$V4)
splice_dat$X <- as.numeric(splice_dat$X)
junc_expr_comb <- readRDS(junc_expr_file)
junc_expr_comb <- mutate_all(junc_expr_comb, function(x) as.numeric(x))
splice_dat_filt <- splice_dat[!duplicated(splice_dat[,seq(1,ncol(splice_dat)-1)]),]
kmer_counts_filt <- kmer_counts %>% dplyr::filter(row %in% splice_dat_filt$X)
samples <- colnames(kmer_counts_filt)[seq(3,ncol(kmer_counts_filt))]
samples <- samples[which(samples %in% colnames(junc_expr_comb))]
gene_expression_filt <- gene_expression[,samples,drop=F]
junc_expr_comb_filt <- unique(junc_expr_comb[splice_dat_filt$juncs,samples,drop=F])
kmer_counts_filt <- kmer_counts_filt[,c("row","cluster",samples),drop=F]
# rm(gene_expression)
# rm(kmer_counts)
# rm(splice_dat)
#------------------------------------------------------------------------------#
# calculating per-gene metric
genes <- unique(splice_dat_filt$gene)
gene_metric_mean <- as.data.frame(t(vapply(genes,function(gene_tar){
g<<-gene_tar
splice_dat_small <- splice_dat_filt %>% dplyr::filter(gene==gene_tar)
kmer_counts_small <- kmer_counts_filt %>% dplyr::filter(as.numeric(kmer_counts_filt[,1]) %in% splice_dat_small$X)
kmer_count <- kmer_counts_small[,samples,drop=F]
gene_split <- strsplit(gene_tar,"-")[[1]]
gene_expr <- calc_gene_expression(gene_split,gene_expression_filt)
junc_expr <- junc_expr_comb_filt[splice_dat_small$juncs,,drop=F]
dup_num <-nrow(splice_dat_small)
gene_expr_dup <- as.data.frame(matrix(rep(as.numeric(gene_expr),dup_num),byrow=T,nrow=dup_num))
a<-as.numeric(apply((kmer_count*junc_expr)/gene_expr_dup,2,mean))
},numeric(length(samples)))))
colnames(gene_metric_mean)<-samples
gene_metric_max <- as.data.frame(t(vapply(genes,function(gene_tar){
splice_dat_small <- splice_dat_filt %>% dplyr::filter(gene==gene_tar)
kmer_counts_small <- kmer_counts_filt %>% dplyr::filter(as.numeric(kmer_counts_filt[,1]) %in% splice_dat_small$X)
kmer_count <- kmer_counts_small[,samples,drop=F]
gene_split <- strsplit(gene_tar,"-")[[1]]
gene_expr <- calc_gene_expression(gene_split,gene_expression_filt)
junc_expr <- junc_expr_comb_filt[splice_dat_small$juncs,,drop=F]
dup_num <-nrow(splice_dat_small)
gene_expr_dup <- as.data.frame(matrix(rep(as.numeric(gene_expr),dup_num),byrow=T,nrow=dup_num))
a<-as.numeric(apply((kmer_count*junc_expr)/gene_expr_dup,2,max))
},numeric(length(samples)))))
colnames(gene_metric_max)<-samples
#------------------------------------------------------------------------------#
# saving gene metric data
saveRDS(gene_metric_mean,file=sprintf("%s_gene_metric_mean.rds",out))
write.table(gene_metric_mean,
file=sprintf("%s_gene_metric_mean.txt",out),quote=F, col.names = T, row.names = T, sep = "\t")
saveRDS(gene_metric_max,file=sprintf("%s_gene_metric_max.rds",out))
write.table(gene_metric_max,
file=sprintf("%s_gene_metric_max.txt",out),quote=F, col.names = T, row.names = T, sep = "\t")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.