#!/usr/bin/env Rscript
library(dplyr)
library(ggplot2)
library(stringr)
library(optparse)
#------------------------------------------------------------------------------#
# handling command line input
arguments <- parse_args(OptionParser(usage = "",
description="",
option_list=list(
make_option(c("-g","--genotypes_file"),
default = sprintf("%s",getwd()),
help="the genotypes file"),
make_option(c("-s","--summary_dir"),
default = sprintf("%s",getwd()),
help="summary file dir"),
make_option(c("-d","--splice_dat_file"),
default = sprintf("%s",getwd()),
help="splicemutr file"),
make_option(c("-c","--counts_file"),
default = sprintf("%s",getwd()),
help="sample junc file"))))
opt=arguments
genotypes_file <- opt$genotypes_file
summary_dir <- opt$summary_dir
splice_dat_file <- opt$splice_dat_file
counts_file <- opt$counts_file
#------------------------------------------------------------------------------#
# local directories and file inputs for testing
# genotypes_file <- "/media/theron/My_Passport/Valsamo/genotypes/genotypes.rds"
# summary_dir <- "/media/theron/My_Passport/Valsamo/mhcnuggets_out/run_12072021/predictions_1"
# splice_dat_file <- "/media/theron/My_Passport/Valsamo/analysis/splicemutr_output/run_12072021/data_splicemutr.txt"
# counts_file <- "/media/theron/My_Passport/Valsamo/juncs/Q21777-Plate-1-A01_L15.filt.junc"
#------------------------------------------------------------------------------#
# reading in the data necessary for creating specific splicemutr data
genotypes <- readRDS(genotypes_file)
splice_dat <- read.table(splice_dat_file,header=T,sep=" ")
counts <- read.table(counts_file)
#------------------------------------------------------------------------------#
# formatting counts file
counts$V2 <- as.numeric(counts$V2)
counts$V3 <- as.numeric(counts$V3)
counts$V5 <- as.numeric(counts$V5)
counts$juncs <- sprintf("%s:%s:%s:%s",counts$V1,counts$V2,counts$V3,counts$V6)
#------------------------------------------------------------------------------#
# reading in the genotype for the specific sample
sample <- basename(str_replace(counts_file,".filt.junc",""))
sample_geno <- unname(genotypes[[which(names(genotypes) == sample)]])
class_1 <- c("HLA-A","HLA-B","HLA-C")
sample_geno <- str_replace(sample_geno[which(str_detect(sample_geno,class_1[1]) |
str_detect(sample_geno,class_1[2]) |
str_detect(sample_geno,class_1[3]))], ":","-")
sample_kmers <- data.frame(seq(nrow(splice_dat)))
colnames(sample_kmers) <- "rows"
rownames(sample_kmers) <- sample_kmers$rows
sample_kmers$kmers <- NA
sample_kmers_ret <- vapply(seq(length(sample_geno)),function(geno_val){
HLA<-sample_geno[geno_val]
HLA_summ_file <- sprintf("%s/%s_tx_dict_summary_perc.txt",summary_dir,HLA)
HLA_summ <- read.table(HLA_summ_file,header=F,sep="\t")
HLA_summ$V1 <- as.numeric(HLA_summ$V1)+1
sample_kmers[HLA_summ$V1,"kmers"] <<- vapply(seq(nrow(HLA_summ)),function(row_val){
dat_row_val <- HLA_summ[row_val,"V1"]
if (is.na(sample_kmers$kmers[dat_row_val])){
HLA_summ[row_val,"V2"]
} else {
paste(c(sample_kmers$kmers[dat_row_val],HLA_summ[row_val,"V2"]),collapse=":")
}
},character(1))
return(TRUE)
},logical(1))
#------------------------------------------------------------------------------#
# annotating sample_kmers with junctions
juncs <- sprintf("%s:%s:%s:%s",splice_dat$chr,splice_dat$start,splice_dat$end,splice_dat$strand)
splice_dat$juncs <- juncs
sample_kmers$juncs <- juncs
rownames(counts)<-counts$juncs
sample_kmers$counts <- counts[sample_kmers$juncs,"V5"]
#------------------------------------------------------------------------------#
# saving samples_kmers file
sample_kmers_file <- sprintf("%s/%s_splicemutr_kmers.rds",dirname(counts_file),sample)
saveRDS(sample_kmers,file=sample_kmers_file)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.