example.r
In MOP: Mutation Order Prediction

library(MOP)
#library(gtools)
#library(alabama)
#library(doSNOW) # for parallel computing
#doSNOW::registerDoSNOW()
#MC <- multicore:::detectCores()

parallel <- FALSE
N <- 4
# N is the number of times that the optimization with different inital values is repeated.
# If the variable "parallel" is TRUE, then the repeated optimization is done using parallel
# computing R package doMC. If it is not possible, let "parallel" be FALSE, then the
# optimization is repeated using for loop. We recommend N to be at least 10.

data("Lung_sample_gene_mutation")
sample.gene.mutation <- Lung_sample_gene_mutation
# Read lung cancer data. It is a matrix with columns representing genes and rows representing
# samples. The value for the i'th row and j'th column is the number of mutations that
# occurred in gene j in sample i.
sample.gene.mutation <- as.matrix(sample.gene.mutation)

sample.gene.mutation <- sample.gene.mutation[rowSums(sample.gene.mutation)>0, ] # remove samples with no mutations
MAX <- 4 # assume the same distribution after MAX events. In this case, assume P_{k,i}=P_{5,i} for k>5
lower <- 0.05  # for 90% CI
upper <- 0.95
Early <- 3  # consider mutations occuring by "Early"th step as early mutations


### mle calculation #####################################################

mle <- order_estimate(sample.gene.mutation, N, parallel, MAX=MAX) # mle of P_{k,i}, the prob of kth mutation occurring in gene i
a <- round(mle,3)

tab =NULL
for(i in 1:ncol(a))
{
    tab=cbind(tab,c(i,a[,i]))
}
tab=cbind(c("event",colnames(sample.gene.mutation)),tab)

write.table(tab,file="MLE.txt" ,sep="\t", quote=F,row.names=F,col.names=F) # table of mle of P_{k,i}

mle <- read.table("../MLE.txt", sep="\t", header=TRUE, row.names=1)

delta <- as.matrix(a - mle[, 1:7])
summary(as.vector(delta))
plot(sort(as.vector(delta)))
hist(delta)

if(FALSE) {
### Bootstrap for calculating confidence interval ######################
bootstrap.no <- 200  #number of bootstraps
bootstrap.result <- vector("list", bootstrap.no)
no.gene <- ncol(sample.gene.mutation)

for (k in 1:bootstrap.no)
{
  # bootstrap samples
  boot.sample.gene.mutation <- sample.gene.mutation[sample(1:nrow(sample.gene.mutation), replace=TRUE),] 
  bootstrap.result[[k]] <- order_estimate(boot.sample.gene.mutation, N, parallel)
}

jackknife.result <- vector("list", nrow(sample.gene.mutation))

for(k in 1:nrow(sample.gene.mutation)) {
  jackknife.result[[k]] <- order_estimate(sample.gene.mutation[-k,], 4, parallel) # why N=4?
}


###################################################################
# construct CI for the prob of kth mutation in gene i (P_{k,i}) ###


x <- array(NA,dim=c(no.gene, MAX+1, length(bootstrap.result)))
for(i in 1:length(bootstrap.result)) {
  if(is.matrix(bootstrap.result[[i]])) {
    x[,,i] <- bootstrap.result[[i]][,1:(MAX+1)]
  }
}

jack <- array(NA, dim=c(no.gene,(MAX+1), nrow(sample.gene.mutation)))
for(i in 1:nrow(sample.gene.mutation)) {
  if(is.matrix(jackknife.result[[i]])) {
    jack[,,i] <- jackknife.result[[i]][,1:(MAX+1)]
  }
}

# CI for P_{k,i} ###
tab <- ConfInterval(x, mle[,1:(MAX+1)], jack, sample.gene.mutation, lower, upper)
tab <- tab[order(tab[,1]),]

write.table(tab, file="table_MLE_with_CI.txt" , quote=FALSE, row.names=FALSE, col.names=FALSE) 
#  table of mle of P_{k,i} with its CI

#####################################################################
# construct CI for the conditional prob of early or late mutations ##

x1 <- x2 <- array(NA, dim=c(no.gene, 1, length(bootstrap.result)))
for(i in 1:length(bootstrap.result)) {
  if(is.matrix(bootstrap.result[[i]])) {
    # conditional prob of early mutation
    x1[,,i] <- Union(bootstrap.result[[i]][,1:Early])/total(bootstrap.result[[i]])
    # conditional prob of late mutation
    x2[,,i] <- Union(bootstrap.result[[i]][,(Early+1):ncol(bootstrap.result[[i]])])/total(bootstrap.result[[i]]) 
  }
}

 # for calculating acceleration constant of BCa method
jack1 <- jack2 <- array(NA,dim=c(no.gene,1,nrow(sample.gene.mutation)))
for(i in 1:nrow(sample.gene.mutation)) {
  if(is.matrix(jackknife.result[[i]])) {
    # for early mutation
    jack1[,,i] <- Union(jackknife.result[[i]][,1:Early])/total(jackknife.result[[i]])
    # for late mutation
    jack2[,,i] <- Union(jackknife.result[[i]][,(Early+1):ncol(jackknife.result[[i]])])/total(jackknife.result[[i]]) 
  }
}

# mle for the conditional prob of early and late mutation
mle1 <- as.matrix(Union(mle[,1:Early])/total(mle))
mle2 <- as.matrix(Union(mle[,(Early+1):ncol(mle)])/total(mle))

tab1 <- ConfInterval(x1,mle1,jack1,sample.gene.mutation,lower,upper) # CI for the conditional prob of early mutation
tab2 <- ConfInterval(x2,mle2,jack2,sample.gene.mutation,lower,upper) # CI for the conditional prob of late mutation
tab <- cbind(tab1[,-ncol(tab1)],tab2[,-1])
tab <- tab[order(tab[,1]),]

write.table(tab,file="table_early_late_mutation.txt", quote=FALSE, row.names=FALSE, col.names=FALSE)  
# table of conditional prob of early and late mutations with CIs

} # FALSE commented out