#!/usr/bin/env Rscript
library(dplyr)
library(stringr)
library(optparse)
library(DESeq2)
#------------------------------------------------------------------------------#
# handling command line input
arguments <- parse_args(OptionParser(usage = "",
description="",
option_list=list(
make_option(c("-g","--gene_expression"),
default = "",
help="the gene expression file"),
make_option(c("-s","--splice_dat_file"),
default = "",
help="splicemutr file"),
make_option(c("-k","--kmer_counts"),
default = "",
help="the kmer counts file"),
make_option(c("-j","--junc_expr_file"),
default = "",
help="junction expression file"),
make_option(c("-o","--out"),
default = "",
help="output_file_prefix"))))
opt=arguments
gene_expression_file <- opt$gene_expression
splice_dat_file <- opt$splice_dat_file
kmer_counts_file <- opt$kmer_counts
junc_expr_file <- opt$junc_expr_file
out<-opt$out
#------------------------------------------------------------------------------#
# internal functions
calc_gene_expression <- function(gene_tar,gene_expression){
gene_expr_target <- gene_expression[gene_tar,]
gene_expr_tar_min <- as.data.frame(t(apply(gene_expr_target,2,min)))
gene_expr_tar_min[is.na(gene_expr_tar_min)]<-0
return(gene_expr_tar_min[1,])
}
#------------------------------------------------------------------------------#
# local play
#
# gene_expression_file <- "/media/theron/My_Passport/head_and_neck_DARIA/data/featurecounts_out/featurecounts_all_vst.rds"
# splice_dat_file <- "/media/theron/My_Passport/head_and_neck_DARIA/data/splicemutr_05_26_2021/full_splicemutr_dat.rds"
# kmer_counts_file <- "/media/theron/My_Passport/head_and_neck_DARIA/data/splicemutr_05_26_2021/full_kmers.rds"
# vst <-1
# junc_expr_file <- "/media/theron/My_Passport/head_and_neck_DARIA/data/junc_expr_combined_vst.rds"
#------------------------------------------------------------------------------#
# reading in the files
gene_expression <- readRDS(gene_expression_file)
if (str_detect(splice_dat_file,".txt")){
splice_dat <- read.table(splice_dat_file,header=T,sep="\t")
} else {
splice_dat <- readRDS(splice_dat_file)
}
if (str_detect(kmer_counts_file,".txt")){
kmer_counts <- read.table(kmer_counts_file)
} else {
kmer_counts <- readRDS(kmer_counts_file)
}
kmer_counts$rows<-as.numeric(kmer_counts$rows)
kmer_counts[,seq(3,ncol(kmer_counts))] <- mutate_all(kmer_counts[,seq(3,ncol(kmer_counts))], function(x) as.numeric(x))
splice_dat$rows <- as.numeric(splice_dat$rows)
junc_expr_comb <- readRDS(junc_expr_file)
junc_expr_comb <- mutate_all(junc_expr_comb, function(x) as.numeric(x))
splice_dat_filt <- splice_dat[!duplicated(splice_dat[,seq(1,ncol(splice_dat)-1)]),]
kmer_counts_filt <- kmer_counts %>% dplyr::filter(rows %in% splice_dat_filt$rows)
samples <- colnames(kmer_counts_filt)[seq(3,ncol(kmer_counts_filt))]
gene_expression_filt <- gene_expression[,samples]
if ("juncs" %in% colnames(splice_dat_filt)){
junc_expr_comb_filt <- unique(junc_expr_comb[splice_dat_filt$juncs,samples])
} else {
strands <- as.data.frame(matrix(unlist(str_split(splice_dat_filt$cluster,"_")),byrow=T,nrow=nrow(splice_dat_filt)))[,3]
splice_dat_filt$juncs <- sprintf("%s:%s:%s:%s",splice_dat_filt$chr,splice_dat_filt$start,splice_dat_filt$end,strands)
junc_expr_comb_filt <- unique(junc_expr_comb[splice_dat_filt$juncs,samples])
}
rm(gene_expression)
rm(kmer_counts)
rm(splice_dat)
#------------------------------------------------------------------------------#
# calculating per-gene metric
genes <- unique(splice_dat_filt$gene)
gene_metric_mean <- as.data.frame(t(vapply(genes,function(gene_tar){
splice_dat_small <- splice_dat_filt %>% dplyr::filter(gene==gene_tar)
kmer_counts_small <- kmer_counts_filt %>% dplyr::filter(rows %in% splice_dat_small$rows)
kmer_counts <- kmer_counts_small[,samples]
gene_split <- strsplit(gene_tar,"-")[[1]]
gene_expr <- calc_gene_expression(gene_split,gene_expression_filt)
junc_expr <- junc_expr_comb_filt[splice_dat_small$juncs,]
dup_num <-nrow(splice_dat_small)
gene_expr_dup <- as.data.frame(matrix(rep(as.numeric(gene_expr),dup_num),byrow=T,nrow=dup_num))
a<-as.numeric(apply((kmer_counts*junc_expr)/gene_expr_dup,2,mean))
},numeric(length(samples)))))
colnames(gene_metric_mean)<-samples
gene_metric_max <- as.data.frame(t(vapply(genes,function(gene_tar){
splice_dat_small <- splice_dat_filt %>% dplyr::filter(gene==gene_tar)
kmer_counts_small <- kmer_counts_filt %>% dplyr::filter(rows %in% splice_dat_small$rows)
kmer_counts <- kmer_counts_small[,samples]
gene_split <- strsplit(gene_tar,"-")[[1]]
gene_expr <- calc_gene_expression(gene_split,gene_expression_filt)
junc_expr <- junc_expr_comb_filt[splice_dat_small$juncs,]
dup_num <-nrow(splice_dat_small)
gene_expr_dup <- as.data.frame(matrix(rep(as.numeric(gene_expr),dup_num),byrow=T,nrow=dup_num))
a<-as.numeric(apply((kmer_counts*junc_expr)/gene_expr_dup,2,max))
},numeric(length(samples)))))
colnames(gene_metric_max)<-samples
#------------------------------------------------------------------------------#
# saving gene metric data
saveRDS(gene_metric_mean,file=sprintf("%s_gene_metric_mean.rds",out))
write.table(gene_metric_mean,
file=sprintf("%s_gene_metric_mean.txt",out),quote=F, col.names = T, row.names = T, sep = "\t")
saveRDS(gene_metric_max,file=sprintf("%s_gene_metric_max.rds",out))
write.table(gene_metric_max,
file=sprintf("%s_gene_metric_max.txt",out),quote=F, col.names = T, row.names = T, sep = "\t")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.