"_PACKAGE"
#' MutSet class
#'
#' @docType class
#' @import R6
#' @return Object of class \code{MutSet}
#' @format An \code{\link{R6Class}} object.
#' @examples
#' \dontrun{
#' MutSet$new()
#' }
#' @slot mut table from MAF
#' @slot gid list of gene id for analysis
#' @slot exome table contains all possible mutations
#' @slot exomeGene table contains all posiible mutations for each gene.
#' @slot covar table contains covariates for each gene
#' @slot mutContext The context of 96 mutations
#' @slot samples data.frame store sample info. if BED colunm is specified,
#' the background process will be using the regions limited to bed file.
#' @slot incosistantAnno Variants with incosistant gene annotation from
#' GSMuta reference data .
#' \describe{
#' \item{\code{new()}}{Create a MutSet }
#' \item{\code{nonsilent()}}{Get nonsilent mutations table}
#' \item{\code{silent()}}{Get silent mutations table}
#' \item{\code{nc()}}{Get noncoding mutations table}
#' }
#'
#' @name MutSet
#' @docType class
#' @exportClass MutSet
#'
MutSet = R6::R6Class(
'MutSet',
portable = FALSE,
public = list(
mut = NULL,
gid = NULL,
exome = NULL,
exomeGene = NULL,
covar = NULL,
mutContext = NULL,
incosistantAnno = NULL,
samples = NULL,
nonsilent = function(gid) {
mut[mut$Sil == 1,]
},
silent = function() {
mut[mut$Sil == 2,]
},
nc = function() {
mut[mut$Sil == 3,]
},
get_nonsil_passengers = function(fraction){
nosam = aggregate(data=mut[mut$Sil == 1,], Tumor_Sample_Barcode~ Ensembl_gene_id, function(x) length(unique(x)))
mut.gene = nosam[with(nosam,order(Tumor_Sample_Barcode)),1]
nonsilent_passenger_gene=gid[! gid %in% mut.gene[round(length(mut.gene)* fraction):length(mut.gene)]]
return(nonsilent_passenger_gene)
}
)
)
#' read and load maf file and necessary reference file
#' @param mutf Path to maf file. Detail see \code{mafCleanup}.
#' @param exomef Path to exome file
#' @param covarf Path to covariable file
#' @param mutContextf Path to mutContext file.
#' @param samplef Path to sample file or data.frame. The column name to specify bed file must be BED
#' @param ref Path to reference genome
#' @export
#' @return a MutSet
readData = function(mutf, exomef, covarf, mutContextf, ref, samplef= NULL, includeincon = FALSE){
# load mutation context file
mutContext = read.table(mutContextf)
colnames(mutContext) = c("triMut", "rev_triMut", "triMut_code", "rev_triMut_code", "tag")
# load covariant file
covar = read.table(covarf, header=T, row.names=2, sep="\t")
if(all(!grepl('chr',covar$Chromosome))) covar$Chromosome = paste0("chr", covar$Chromosome) ## fix encounter old covar file.
# load exome file
## format of exome file
## pos <\t> tri-nucleotide<\t>A <\t> C <\t> G <\t> T <\t> gene <\t> amino_acid_pos/protein_length
exome = loadfile(exomef)
colnames(exome) = c("Chromosome","pos", "seq_code","A", "C", "G", "T","gid", "aa_pos");
# get valide gid
gid = intersect(rownames(covar)[covar$Chromosome %in% paste0("chr",c(1:22,"X", "Y"))],exome$gid)
# read in maf file
mafTable = mafCleanup(mutf, gid = gid)
mafTable = retrieve_context(mafTable, ref)
mafTable$tag = mutContext[match(mafTable[,"Context"],mutContext[,"triMut_code"]),"tag"]
# remove inconsistant annotation
inconMut = Get_incon_mut(mafTable, exome)
cat(sprintf("Number of inconsistant annotation mutation: %s out of total %s mutation \n",sum(inconMut), length(inconMut)))
# if(includeincon){
# cat("include the inconsistant mutations")
# }
incosistantAnno = mafTable[inconMut, ]
mafTable = mafTable[!inconMut, ]
# load sample file
if(is.null(samplef)){
samples = data.frame(SampleID = unique(mafTable$Tumor_Sample_Barcode),
stringsAsFactors = FALSE)
}else if(is.character(samplef)){
samples = read.delim(samplef, stringsAsFactors = FALSE)
}else if("data.frame" %in% class(samplef)){
samples = samplef
}
## check all samples contain at least one mutation.
samples = checkMutC(samples, mafTable)
# load exomeGene file
## format of exome.gene #################
## EnsembleGeneID <\t> tri-nucleotide+change <\t> consequence <\t> count
# consequence coding
# 0 - silent
# 1 - miss-sense
# 2 - nonsense
# 3 - nonstop
# 4 - TSS
# 5 - splice
if("BED" %in% colnames(samples)){
cat("Bed file is specified for samples\n")
if(length(unique(samples$BED)) == 1){
cat("\tAll samples have the same bed regions\n")
require(GenomicRanges)
exomeGene = get_exomeGene(exome, Bed = unique(samples$BED), mutContext = mutContext)
gid = intersect(gid, exomeGene$gid) ## need to update gid to bed file.
MutBed = data.frame(Chromosome = mafTable[, "Chromosome"],
Start = mafTable[, "Start_Position"],
End = mafTable[,"End_Position"])
subMut = OverLap(MutBed, regions= unique(samples$BED))
cat(sprintf("Reduce number of mutation from: %s to %s \n",
nrow(MutBed), length(subMut)))
mafTable = mafTable[subMut,]
## check all samples contain at least one mutation.
}else{
cat("\tSamples' bed file are different. exomeGene for each sample will be generated\n")
exomeGene = lapply(samples$BED, function(x){get_exomeGene(exome, Bed = x, mutContext = mutContext)})
gids = lapply(exomeGene, function(x){intersect(gid, x$gid)})
names(exomeGene) = names(gids) = samples$SampleID
gid = gids
tmp = lapply(split(mafTable, mafTable$Tumor_Sample_Barcode),
function(x){MutBed = data.frame(Chromosome = x[, "Chromosome"],
Start = x[, "Start_Position"],
End = x[,"End_Position"])
subMut = OverLap(MutBed, regions= unique(samples$BED))
out = x[subMut,]
return(out)})
cat(sprintf("Reduce number of mutation from: %s to %s \n", nrow(mafTable), nrow(do.call(rbind, tmp))))
mafTable = do.call(rbind, tmp)
}
}else{
cat("Bed file is not specified for samples. exomeGene for whole exome will be generated.\n")
exomeGene = get_exomeGene(exome, mutContext = mutContext)
}
samples = checkMutC(samples, mafTable)
# generate mutSet
sset = MutSet$new()
sset$mut = mafTable
sset$gid = gid
sset$exome = exome
sset$exomeGene = exomeGene
sset$covar = covar
sset$mutContext = mutContext
sset$samples = samples
sset$incosistantAnno = incosistantAnno
cat(sprintf(paste0("Total gene analyzed: ",length(gid))),"\n")
return(sset)
}
#'calMut
#' provide observed and predicted mutation rate
#' @param Data MutSet class. Detail see \code{MutSet}.
#' @param params output from fit_model. Detail see \code{fit_model}
#' @param sampleN Sample names
#' @param gids gene IDs
#' @param bed path to bed file.
#' @importFrom data.table setkey
#' @export
#' @return a summary of predicted and expected mutation rate.
calMut = function(Data, params, sampleN = NULL, gids = NULL, bed = NULL){
subData = Data$clone()
if(is.null(sampleN)) sampleN = unique(subData$samples$SampleID)
if(!is.null(bed)){
exomeGene = get_exomeGene(subData$exome, Bed = bed, mutContext = subData$mutContext)
gid = intersect(subData$gid, exomeGene$gid) ## need to update gid to bed file.
MutBed = data.frame(Chromosome = subData$mut[, "Chromosome"],
Start = subData$mut[, "Start_Position"],
End = subData$mut[,"End_Position"])
subMut = OverLap(MutBed, regions= bed)
subData$mut = subData$mut[subMut,]
subData$exomeGene = subData$exomeGene
subData$gid = subData$gid
}
if(is.null(gids)) gids = subData$gid
### get parameters from modeling
gene.mu = params[["geneMu"]]
gene.phi = params[["genePhi"]]
o_scale = params[["o_scale"]]
MRtriProb = params[['MRtriProb']]
zero_p = params[["geneZeroP"]]
if(!is.null(bed)){ ## if bed file provided, parameter need to recalculated.
MR_p = MRtriProb$MR_p # relative mutation frequency for each tri-nucleotide context #
### get gene.prob
geneProb = getGeneBgProb(subData$exomeGene, MR_p)
# count number of mutation per patient#
mutPerP = count_Mut(subData$mut)
mutPerP$count = mutPerP$count/get_glen(subData$exomeGene, selGid = names(subData$gid)) * 1000000
### get gene length
geneLen = get_glen(subData$exomeGene, byGene= TRUE)
}else{
### get gene.prob
geneProb = params[["geneProb"]]
### get mutation per pateint
mutPerP = params[["mutPerP"]]
### get gene length
geneLen = params[["geneLen"]]
}
if("data.table" %in% class(geneProb)){
geneProb = data.table(geneProb)
}
setkey(geneProb, gid, consequence)
## get mutation for non-silent/silent mutation ##
if(!is.null(zero_p)){
## zero inflated model
mut_pre_nonsil = as.matrix((1-zero_p) * gene.mu * ((geneProb[J(gids,1),]$prob + geneLen[gids]*MRtriProb$MR_p["indel"]) %*%
t(as.matrix(mutPerP[match(sampleN, mutPerP$Tumor_Sample_Barcode),"count"]))))/exp(o_scale)
mut_pre_sil = as.matrix((1-zero_p) * gene.mu * ((geneProb[J(gids,0),]$prob) %*%
t(as.matrix(mutPerP[match(sampleN, mutPerP$Tumor_Sample_Barcode),"count"]))))/exp(o_scale)
}else{
mut_pre_nonsil = as.matrix(gene.mu * (( geneProb[J(gids,1),]$prob + geneLen[gids]*MRtriProb$MR_p["indel"]) %*%
t(as.matrix(mutPerP[match(sampleN, mutPerP$Tumor_Sample_Barcode),"count"]))))/exp(o_scale)
mut_pre_sil = as.matrix(gene.mu * ((geneProb[J(gids,0),]$prob) %*%
t(as.matrix(mutPerP[match(sampleN, mutPerP$Tumor_Sample_Barcode),"count"]))))/exp(o_scale)
}
rownames(mut_pre_nonsil) = rownames(mut_pre_sil) = names(gene.mu)
colnames(mut_pre_nonsil) = colnames(mut_pre_sil) = sampleN
## get observed mutation count for non-silent and silent mutation ##
mut_obs_sil = count_Mut(Data$silent(), gid = gids, sampleN = sampleN)
mut_obs_nonsil = count_Mut(Data$nonsilent(), gid = gids, sampleN = sampleN)
out = melt(mut_obs_sil)
colnames(out) = c("gid", "sample","mut_obs_sil")
out$mut_obs_nonsil = melt(mut_obs_nonsil)$value
out$mut_pre_sil = melt(mut_pre_sil)$value
out$mut_pre_nonsil = melt(mut_pre_nonsil)$value
out$geneLen = geneLen[as.character(out$gid)]
out$geneMu = gene.mu[as.character(out$gid)]
out$geneProb0 = geneProb[J(as.character(out$gid),0),]$prob
out$geneProb1 = geneProb[J(as.character(out$gid),1),]$prob
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.