R/RtreemixMethods.R

##################################################################################
## Function for fitting Rtreemix model to a given RtreemixData dataset.
## The data and the number of trees (K) have to be specified.
## The function estimates K-oncogenetic trees mixture model from the
## specified data (with a noise component).
## When K == 1, and noise == FALSE a single mutagenetic tree is fit to the data.
## When K == 1 and noise == TRUE a star mutagenetic tree is fit to the data.
## If K > 1, the first mutagenetic tree is always the star,
## i.e. the case K > 1 and noise == FALSE is not possible.
if(!isGeneric("fit"))
  setGeneric("fit", function(data, K, ...) standardGeneric("fit"))

setMethod("fit",
          signature(data = "RtreemixData", K = "numeric"),
          function(data, K, no.start.sol = 100,
                   eps = 0.01, weighing = FALSE, equal.edgeweights = TRUE,
                   seed = (-1), noise = TRUE) {

            ## Setting the true type
            K <- as.integer(K)
            no.start.sol <- as.integer(no.start.sol)
            equal.edgeweights <- as.integer(equal.edgeweights)
            eps <- as.numeric(eps)
            weighing <- as.integer(weighing)
            seed <- as.integer(seed)
            ## Check if the necessary parameters are provided and have the correct form.                
            if(K < 1)
              stop("The number of trees must be an integer greater than zero!")

            if((seed != -1) &&
               (!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0 )))
              stop("The seed must be a positive integer!")

            if(no.start.sol < 1)
              stop("The number of starting solutions for the k-means must be an integer greater than zero!")            
  
            ##Add the first column of ones in the sample.
            sample <- cbind(rep(1, sampleSize(data)), Sample(data))

            ## Fitting the corresponding RtreemixModel to the provided data.
            if(noise && (K > 1)) {
              fit.res <- .Call("R_fit", Events(data), sample, K,
                               no.start.sol, equal.edgeweights,
                               eps, weighing, seed)
    
              for(i in 1:K)
                edgeDataDefaults(fit.res$graphs.mixture[[i]], "weight") <- numeric(1)
              
              return(new("RtreemixModel",
                         ParentData = data,
                         Weights =  as.numeric(fit.res$alpha),
                         Resp = as.matrix(fit.res$resp),
                         CompleteMat = as.matrix(fit.res$pat.hat),
                         Star = TRUE,
                         Trees = fit.res$graphs.mixture))
            } else {
              if(K == 1) {
                if (!noise) {
                  ## Fit single RtreemixModel to the data.
                  fit.res <- .Call("R_fit1", Events(data), sample, eps, weighing)
                  
                  edgeDataDefaults(fit.res$graphs.mixture[[1]], "weight") <- numeric(1)
                  
                  return(new("RtreemixModel",
                             ParentData = data,
                             Weights =  as.numeric(fit.res$alpha),
                             Resp = as.matrix(t(fit.res$resp)),
                             Star = FALSE,
                             Trees = fit.res$graphs.mixture))
                } else {
                  ## Fit single star RtreemixModel to the data.
                  fit.res <- .Call("R_fit0", Events(data), sample,
                                   equal.edgeweights, weighing)
                  
                  edgeDataDefaults(fit.res$graphs.mixture[[1]], "weight") <- numeric(1)
                  
                  return(new("RtreemixModel",
                             ParentData = data,
                             Weights =  as.numeric(fit.res$alpha),
                             Resp = as.matrix(t(fit.res$resp)),
                             Star = TRUE,
                             Trees = fit.res$graphs.mixture))
                }
              } else
              stop("Mixture model with more than one tree and without noise component is not implemented!")
            }
          })

##################################################################################
## Function for fitting Rtreemix model to given data and
## analyzing its variance with the bootstrap method.
## The data and number of trees (K) have to be specified.
if(!isGeneric("bootstrap"))
  setGeneric("bootstrap", function(data, K, ...) standardGeneric("bootstrap"))

setMethod("bootstrap",
          signature(data = "RtreemixData", K = "numeric"),
          function(data, K, no.start.sol = 100,
                      eps = 0.0, weighing = FALSE, equal.edgeweights = TRUE,
                      seed = (-1), B = 1000,
                      conf.interval = 0.05) {

            ## Setting the true type
            K <- as.integer(K)
            no.start.sol <- as.integer(no.start.sol)
            equal.edgeweights <- as.integer(equal.edgeweights)
            eps <- as.numeric(eps)
            weighing <- as.integer(weighing)
            seed <- as.integer(seed)
            B <- as.integer(B)
            conf.interval <- as.numeric(conf.interval)
            events <- Events(data)
            ## Check if the necessary parameters are provided and have the correct form.    
            if(K < 1)
              stop("The number of trees must be an integer greater than zero!")
  
            if((seed != -1) &&
               (!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0 )))
              stop("The seed must be a positive integer!")
            
            if(no.start.sol < 1)
              stop("The number of starting solutions for the k-means must be an integer greater than zero!")

            if(B < 1)
              stop("The number of bootstrap samples integer greater than zero!")            
            
            ## Add the first column of ones in the sample.
            sample <- cbind(replicate(sampleSize(data), 1), Sample(data))

            ## Fit a model and analyze its variance with the bootstrap method.
            fit.boot <- .Call("R_bootstrap", events, sample, K,
                              no.start.sol, equal.edgeweights,
                              eps, weighing, seed,
                              B, conf.interval
                              )

            for(i in 1:K) {
              edgeDataDefaults(fit.boot$graphs.mixture[[i]], "weight") <- numeric(1)
              edgeDataDefaults(fit.boot$graphs.mixture[[i]], "ci") <- numeric(0)
            }
            
            return(new("RtreemixModel",
                       ParentData = data,
                       Weights =  as.numeric(fit.boot$alpha),
                       WeightsCI = fit.boot$alpha.ci,
                       Resp = as.matrix(fit.boot$resp),
                       CompleteMat = as.matrix(fit.boot$pat.hat),
                       Star = TRUE,
                       Trees = fit.boot$graphs.mixture
                       ))  
          })

####################################################################################
## Function for predicting the likelihoods of given data by using a given Rtreemix model.
## The model and the data have to be specified.
if(!isGeneric("likelihoods"))
  setGeneric("likelihoods", function(model, data) standardGeneric("likelihoods"))

setMethod("likelihoods",
          signature(model = "RtreemixModel", data = "RtreemixData"),
          function(model, data) {
            ## Check if the patterns have the correct length.
            if(eventsNum(model) !=  eventsNum(data)) {
              cat("The patterns are expected to have length: ", eventsNum(model), ". ")
              stop("\n")
            } else {
              ## Give the node names and edge weights in a list structure.             
              listG <- list()
              for(i in 1:numTrees(model)) {
                listG[[i]] <- list()
                listG[[i]][[1]] <- nodes(Trees(model)[[i]])
                listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
              }
              
              ## Call to the C function for calculating the likelihoods.              
              res.like <- .Call("R_likelihood", eventsNum(model),
                                listG, Weights(model), cbind(rep(1, sampleSize(data)), Sample(data)))
    
              return(new("RtreemixStats",
                         Data = data,
                         Model = model,
                         LogLikelihoods = res.like[[2]],
                         WLikelihoods = res.like[[1]]))  
            }
          })

###################################################################################
## Function for predicting the GPS of given dataset by using a given Rtreemix model.
## The model has to be specified. If the dataset is missing the GPS is calculated for all possible patterns.
if(!isGeneric("gps"))
  setGeneric("gps", function(model, data, ...) standardGeneric("gps"))

## The dataset is specified as an RtreemixData object.
setMethod("gps",
          signature(model = "RtreemixModel", data = "RtreemixData"),
          function(model, data, sampling.mode = "exponential", sampling.param = 1,
                    no.sim = 10, seed = (-1)) {

            ## Setting the true type
            sampling.param <- as.numeric(sampling.param)
            no.sim <- as.integer(no.sim)
            seed <- as.integer(seed)           
            ## Check if the necessary parameters are provided and have the correct form.    
            if((seed != -1) &&
               (!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0 )))
              stop("The seed must be a positive integer!")
            
            if (!missing(sampling.mode)) {
              if (!(identical(sampling.mode, "constant") || identical(sampling.mode, "exponential")))
                stop("Please give correct sampling mode (constant or exponential)!")
            }
            
            if (no.sim < 1)
              stop("The number of iterations for the waiting time simulation must be an integer greater than zero!")            
                   
            mat <- cbind(rep(1, sampleSize(data)), Sample(data))
            ## Check if the patterns have the correct length.            
            if(eventsNum(model) !=  ncol(mat)) {
              cat("The patterns are expected to have length: ", eventsNum(model), ". \n")
              stop("\n")
            }       
                       
            ## Give the node names and edge weights in a list structure. 
            listG <- list()
            for(i in 1:numTrees(model)) {
              listG[[i]] <- list()
              listG[[i]][[1]] <- nodes(Trees(model)[[i]])
              listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
            }
            ## Call to the C function for calculating the GPS.  
            res.GPS <- .Call("R_time", eventsNum(model), listG, Weights(model),
                             as.integer(ifelse(identical(sampling.mode, "constant"), 0, 1)),
                             sampling.param, no.sim, seed)
            ## All possible patterns.
            all.data <- new("RtreemixData", Sample = res.GPS[[1]][, -1])

            resp <- WLikelihoods(likelihoods(model = model, data = all.data))
            bind.gps <- sapply(res.GPS[[2]], rbind)
            bind.gps.bin <- apply(!apply(bind.gps, 2, is.na), 2, as.integer)
            filter.resp <- resp * bind.gps.bin
            resp <- filter.resp / apply(filter.resp, 1, sum)

            if(numTrees(model) > 1) {
              resp[which(apply(bind.gps.bin, 1, sum) == 0),] <- c(1, rep(0, numTrees(model) - 1))
              ## Take the mean for the noise.
              if (sampling.mode == "exponential")  
                res.GPS[[2]][[1]][which(is.na(res.GPS[[2]][[1]]))] <- 1.0/sampling.param
              else 
                res.GPS[[2]][[1]][which(is.na(res.GPS[[2]][[1]]))] <- sampling.param
                      
              for(i in 2:numTrees(model)) 
                res.GPS[[2]][[i]][which(is.na(res.GPS[[2]][[i]]))] <- numeric(1)        
            } else {
              for(i in 1:numTrees(model)) 
                res.GPS[[2]][[i]][which(is.na(res.GPS[[2]][[i]]))] <- numeric(1)
            }
              
            fin.GPS <- apply(t(resp) *  t(sapply(res.GPS[[2]], rbind)), 2, sum)
              
            ## Take the GPS of the patterns present in the given dataset.
            index <- apply(mat[,-1], 1, '%*%', 2^c(0:(ncol(mat) - 2))) + 1              
            res <- fin.GPS[index]
              
            return(new("RtreemixGPS",
                       Data = data,
                       Model = model,
                       SamplingMode = sampling.mode,
                       SamplingParam = sampling.param,
                       GPS = res))
          })

## The dataset is specified as a binary matrix.
setMethod("gps",
          signature(model = "RtreemixModel", data = "matrix"),
          function(model, data, sampling.mode = "exponential", sampling.param = 1,
                    no.sim = 10, seed = (-1)) {

            ## Setting the true type
            sampling.param <- as.numeric(sampling.param)
            no.sim <- as.integer(no.sim)
            seed <- as.integer(seed)           
            ## Check if the necessary parameters are provided and have the correct form.    
            if((seed != -1) &&
               (!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0)))
              stop("The seed must be a positive integer!")
            
            if (!missing(sampling.mode)) {
              if (!(identical(sampling.mode, "constant") || identical(sampling.mode, "exponential")))
                stop("Please give correct sampling mode (constant or exponential)!")
            }
            
            if (no.sim < 1)
              stop("The number of iterations for the waiting time simulation must be an integer greater than zero!")
            
            mat <- cbind(rep(1, nrow(data)), data)
            ## Check if the patterns have the correct length.            
            if(eventsNum(model) !=  ncol(mat)) {
              cat("The patterns are expected to have length: ", eventsNum(model), ". \n")
              stop("\n")
            }       
                       
            ## Give the node names and edge weights in a list structure. 
            listG <- list()
            for(i in 1:numTrees(model)) {
              listG[[i]] <- list()
              listG[[i]][[1]] <- nodes(Trees(model)[[i]])
              listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
            }
            ## Call to the C function for calculating the GPS.  
            res.GPS <- .Call("R_time", eventsNum(model), listG, Weights(model),
                             as.integer(ifelse(identical(sampling.mode, "constant"), 0, 1)),
                             sampling.param, no.sim, seed)
            ## Generate all possible patterns.
            all.data <- new("RtreemixData", Sample = res.GPS[[1]][, -1])

            resp <- WLikelihoods(likelihoods(model = model, data = all.data))
            bind.gps <- sapply(res.GPS[[2]], rbind)
            bind.gps.bin <- apply(!apply(bind.gps, 2, is.na), 2, as.integer)
            filter.resp <- resp * bind.gps.bin
            resp <- filter.resp / apply(filter.resp, 1, sum)

            if(numTrees(model) > 1) {
              resp[which(apply(bind.gps.bin, 1, sum) == 0),] <- c(1, rep(0, numTrees(model) - 1))
              ## Take the mean for the noise.
              if (sampling.mode == "exponential")  
                res.GPS[[2]][[1]][which(is.na(res.GPS[[2]][[1]]))] <- 1.0/sampling.param
              else 
                res.GPS[[2]][[1]][which(is.na(res.GPS[[2]][[1]]))] <- sampling.param
              
              for(i in 2:numTrees(model)) 
                res.GPS[[2]][[i]][which(is.na(res.GPS[[2]][[i]]))] <- numeric(1)        
            } else {
              for(i in 1:numTrees(model)) 
                res.GPS[[2]][[i]][which(is.na(res.GPS[[2]][[i]]))] <- numeric(1)
            }
            
            fin.GPS <- apply(t(resp) *  t(sapply(res.GPS[[2]], rbind)), 2, sum)
            
            ## Take the GPS of the patterns present in the given dataset.
            index <- apply(mat[,-1], 1, '%*%', 2^c(0:(ncol(mat) - 2))) + 1              
            res <- fin.GPS[index]
            
            return(new("RtreemixGPS",
                       Model = model,
                       Data = new("RtreemixData", Sample = data),
                       SamplingMode = sampling.mode,
                       SamplingParam = sampling.param,
                       GPS = res))                        
          })

## The dataset is not specified. Calculate the GPS for all possible patterns.
setMethod("gps",
          signature(model = "RtreemixModel", data = "missing"),
          function(model, sampling.mode = "exponential", sampling.param = 1,
                    no.sim = 10, seed = (-1)) {
            ## Setting the true type
            sampling.param <- as.numeric(sampling.param)
            no.sim <- as.integer(no.sim)
            seed <- as.integer(seed)           
            ## Check if the necessary parameters are provided and have the correct form.    
            if((seed != -1) &&
               (!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0 )))
              stop("The seed must be a positive integer!")
            
            if (!missing(sampling.mode)) {
              if (!(identical(sampling.mode, "constant") || identical(sampling.mode, "exponential")))
                stop("Please give correct sampling mode (constant or exponential)!")
            }

            if (no.sim < 1)
              stop("The number of iterations for the waiting time simulation must be an integer greater than zero!")           
                                          
            ## Give the node names and edge weights in a list structure. 
            listG <- list()
            for(i in 1:numTrees(model)) {
              listG[[i]] <- list()
              listG[[i]][[1]] <- nodes(Trees(model)[[i]])
              listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
            }
            ## Call to the C function for calculating the GPS.  
            res.GPS <- .Call("R_time", eventsNum(model), listG, Weights(model),
                             as.integer(ifelse(identical(sampling.mode, "constant"), 0, 1)),
                             sampling.param, no.sim, seed)
            ## Generate all possible patterns.
            all.data <- new("RtreemixData", Sample = res.GPS[[1]][, -1])

            resp <- WLikelihoods(likelihoods(model = model, data = all.data))
            bind.gps <- sapply(res.GPS[[2]], rbind)
            bind.gps.bin <- apply(!apply(bind.gps, 2, is.na), 2, as.integer)
            filter.resp <- resp * bind.gps.bin
            resp <- filter.resp / apply(filter.resp, 1, sum)

            if(numTrees(model) > 1) {
              resp[which(apply(bind.gps.bin, 1, sum) == 0),] <- c(1, rep(0, numTrees(model) - 1))
              ## Take the mean for the noise.
              if (sampling.mode == "exponential")  
                res.GPS[[2]][[1]][which(is.na(res.GPS[[2]][[1]]))] <- 1.0/sampling.param
              else 
                res.GPS[[2]][[1]][which(is.na(res.GPS[[2]][[1]]))] <- sampling.param
                      
              for(i in 2:numTrees(model)) 
                res.GPS[[2]][[i]][which(is.na(res.GPS[[2]][[i]]))] <- numeric(1)        
            } else {
              for(i in 1:numTrees(model)) 
                res.GPS[[2]][[i]][which(is.na(res.GPS[[2]][[i]]))] <- numeric(1)
            }
            ## Compute GPS for all patterns.
            fin.GPS <- apply(t(resp) *  t(sapply(res.GPS[[2]], rbind)), 2, sum)  

            return(new("RtreemixGPS",
                       Model = model,
                       Data = all.data,
                       SamplingMode = sampling.mode,
                       SamplingParam = sampling.param,
                       GPS = fin.GPS))             
          })

###################################################################################
## When the sampling mode and sampling parameter are specified, function for simulating patterns
## and their waiting and sampling times from a given Rtreemix model.
## When only the mixture model is specified, function for drawing data from the mixture model. 
## The model has to be specified in both cases.
if(!isGeneric("sim"))
  setGeneric("sim", function(model, sampling.mode, sampling.param, ...) standardGeneric("sim"))

## Function for drawing patterns from a given Rtreemix model.
setMethod("sim",
          signature(model = "RtreemixModel", sampling.mode = "missing", sampling.param = "missing"),
          function(model, no.draws = 10, seed = (-1)) {
            ## Setting the true type            
            no.draws <- as.integer(no.draws)
            seed <- as.integer(seed)           
            ## Check if the necessary parameters are provided and have the correct form.    
            if((seed != -1) &&
               (!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0 )))
              stop("The seed must be a positive integer!")
            
            if(no.draws < 1)
              stop("The number of draws must be an integer greater than zero!")
            
            ## Give the node names and edge weights in a list structure.
            listG <- list()
            for(i in 1:numTrees(model)) {
              listG[[i]] <- list()
              listG[[i]][[1]] <- nodes(Trees(model)[[i]])
              listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
            }
            
            ## Call to the C function
            res.sim <- .Call("R_draw", eventsNum(model), listG, Weights(model),
                             no.draws, seed)
      
            return(new("RtreemixData",
                       Sample = res.sim[,-1]))
            
          })

## Function for simulating patterns with their sampling and waiting times.
setMethod("sim",
          signature(model = "RtreemixModel", sampling.mode = "character", sampling.param = "numeric"),
          function(model, sampling.mode, sampling.param, no.sim = 10, seed = (-1)) {
            ## Setting the true type            
            no.sim <- as.integer(no.sim)
            seed <- as.integer(seed)           
            ## Check if the necessary parameters are provided and have the correct form.    
            if((seed != -1) &&
               (!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0 )))
              stop("The seed must be a positive integer!")

            if (!(identical(sampling.mode, "constant") || identical(sampling.mode, "exponential")))
              stop("Please give correct sampling mode (constant or exponential)!")
            
            if(no.sim < 1)
              stop("The number of iterations for the waiting time simulation must be an integer greater than zero!")            
            
            ## Give the node names and edge weights in a list structure.
            listG <- list()
            for(i in 1:numTrees(model)) {
              listG[[i]] <- list()
              listG[[i]][[1]] <- nodes(Trees(model)[[i]])
              listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
            }
            
            ## Call to the C function
            res.sim <- .Call("R_simulate", eventsNum(model), listG, Weights(model),
                             as.integer(ifelse(identical(sampling.mode, "constant"), 0, 1)),
                             sampling.param, no.sim,
                             seed)
            
            return(new("RtreemixSim",
                       Model = model,
                       SimPatterns = new("RtreemixData", Sample = res.sim$patterns[, -1]),
                       SamplingMode = sampling.mode,
                       SamplingParam = sampling.param,
                       WaitingTimes = res.sim$wtimes,
                       SamplingTimes = res.sim$stimes))    
            
          })

###################################################################################
## Function for generating a random Rtreemix model.
## The number of tree components and the number of genetic events have to be specified.
if(!isGeneric("generate"))
  setGeneric("generate", function(K, no.events, ...) standardGeneric("generate"))

setMethod("generate",
          signature(K = "numeric", no.events = "numeric"),
          function(K, no.events, noise.tree = TRUE,
                   equal.edgeweights = TRUE, prob = c(0.0, 1.0),
                   seed = (-1)) {
            
            ## Setting the true type
            K <- as.integer(K)
            no.events <- as.integer(no.events)
            noise.tree <- as.integer(noise.tree)
            seed <- as.integer(seed)
            equal.edgeweights <- as.integer(equal.edgeweights)
            ## Check if the necessary parameters are provided and have the correct form.                
            if(K < 1)
              stop("The number of trees must be an integer greater than zero!")
            
            if (no.events < 1)
              stop("The number of events must be an integer greater than zero!")
            
            if((seed != -1) &&
               (!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0 )))
              stop("The seed must be a positive integer!")
            
            if(!missing(prob)) {
              if(!is.numeric(prob) || (length(prob) != 2))
                stop("Specify the boundaries of the conditional probabilities as a numeric vector
of length two = c(min, max)!")              
              if(prob[2] < prob[1])
                stop("In the probability vector the lower boundary must be smaller than the
upper boundary!")
            }
            ## Call to the C function.  
            res.sim <- .Call("R_random", K, no.events,
                             noise.tree, equal.edgeweights,
                             prob[1], prob[2], seed)

            for(i in 1:K)
              edgeDataDefaults(res.sim$graphs.mixture[[i]], "weight") <- numeric(1)

            res <- new("RtreemixModel",
                       Weights =  as.numeric(res.sim$alpha),
                       Star = as.logical(noise.tree), 
                       Trees = res.sim$graphs.mixture)
  
            return(res)   
          })

###################################################################################
## Function for generating the (scaled) probablility distribution induced with a given RtreemixModel.
## The RtreemixModel has to be specified.
## The sampling mode and the parameters for the sampling times for the observed
## input and output probabilities are optional.
if(!isGeneric("distribution"))
  setGeneric("distribution", function(model, sampling.mode, sampling.param, output.param) standardGeneric("distribution"))

setMethod("distribution",
          signature(model = "RtreemixModel", sampling.mode = "missing",
                    sampling.param = "missing", output.param = "missing"),             
          function(model) {

            if(eventsNum(model) > 30)
              stop("Invalid for more than 30 events!")

            ## There is no specified sampling mode  
            mode = as.integer(-1)

            ## Give the node names and edge weights in a list structure.
            listG <- list()
            for(i in 1:numTrees(model)) {
              listG[[i]] <- list()
              listG[[i]][[1]] <- nodes(Trees(model)[[i]])
              listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
            }
            ## Call to the C function for getting the distribution.
            res.distr <- .Call("R_distr", eventsNum(model), listG, Weights(model),
                               mode, numeric(1), numeric(1))  

            ## All possible patterns
            sample <- as.matrix(res.distr[[1]])
            rownames(sample) <- paste("P", 1:nrow(sample), sep = "")
            ## Build a dataframe of all possible patterns with their corresponding probabilities
            result <- data.frame(sample, res.distr[[2]])
            colnames(result) <- c(paste("E", 0:(ncol(sample) - 1), sep = ""), "probability")
                        
            return(result)  
          })

setMethod("distribution",
          signature(model = "RtreemixModel", sampling.mode = "character",
                    sampling.param = "numeric", output.param = "numeric"),             
          function(model, sampling.mode, sampling.param,
                   output.param) {

            if(eventsNum(model) > 30)
              stop("Invalid for more than 30 events!")
  
            if (!(identical(sampling.mode, "constant") || identical(sampling.mode, "exponential")))
              stop("Please give correct sampling mode (constant or exponential)!")

            mode <- as.integer(ifelse(identical(sampling.mode, "constant"), 0, 1))

            ## Give the node names and edge weights in a list structure.
            listG <- list()
            for(i in 1:numTrees(model)) {
              listG[[i]] <- list()
              listG[[i]][[1]] <- nodes(Trees(model)[[i]])
              listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
            }
            ## Call to the C function for getting the distribution.
            res.distr <- .Call("R_distr", eventsNum(model), listG, Weights(model),
                               mode, sampling.param, output.param)
            ## All possible patterns
            sample <- as.matrix(res.distr[[1]])
            rownames(sample) <- paste("P", 1:nrow(sample), sep = "")
            ## Build a dataframe of all possible patterns with their corresponding probabilities
            result <- data.frame(sample, res.distr[[2]])
            colnames(result) <- c(paste("E", 0:(ncol(sample) - 1), sep = ""), "probability")
            ## Print some useful information.
            if (mode == 1) 
              cat("Exponential sampling mode!\n")
            else
              cat("Constant sampling mode!\n")
            
            cat("The parameter for the observed input probabilities:\n")
            print(sampling.param)
            cat("\n The parameter for the observed output probabilities:\n")
            print(output.param)
            print(result)
            
            return(result)  
          })

###################################################################################
## Function for calculating GPS values and their 95% bootstrap 
## confidence intervals for a given dataset.
## the GPS values are calculated with respect to an underlying
## oncogenetic trees mixture model fitted to the given data.
## The number of trees for the model (K) and the dataset have to be specified.

##change to confIntGPS
if(!isGeneric("confIntGPS"))
  setGeneric("confIntGPS", function(data, K, ...) standardGeneric("confIntGPS"))

setMethod("confIntGPS",
          signature(data = "RtreemixData", K = "numeric"),
          function(data, K, sampling.mode = "exponential", sampling.param = 1,
                   no.sim = 10000, B = 1000, equal.star = TRUE) {
            ## Setting the true type
            K <- as.integer(K)
            B <- as.integer(B)
            sampling.param <- as.numeric(sampling.param)            
            no.events <- eventsNum(data)
            no.sim <-  as.integer(no.sim)
            equal.star <- as.logical(equal.star)
            ## Check if the necessary parameters are provided and have the correct form.                
            if(K < 1)
              stop("The number of trees must be an integer greater than zero!")
            
            if(B < 1)
              stop("The number of bootstrap samples integer greater than zero!")

            if (no.sim < 1)
              stop("The number of iterations for the waiting time simulation must be an integer greater than zero!")
            
            if (!missing(sampling.mode)) {
              if (!(identical(sampling.mode, "constant") || identical(sampling.mode, "exponential")))
                stop("Please give correct sampling mode (constant or exponential)!")
            }
  
            ## Fit an Rtreemix fm model to the given data.
            fm <- fit(data = data, K = K, noise = TRUE, equal.edgeweights = equal.star, eps = 0.01)
            ## Compute the GPS values for all possible patterns.
            fit.gps <- GPS(gps(model = fm, no.sim = no.sim))
            ## Find the row indices of the patterns in the given data in the matrix of all possible patterns
            ind <- apply(Sample(data), 1, '%*%', 2^c(0 : (no.events - 2))) + 1
  
            boot.gps <- vector(mode = "list", length = 2 ^ (no.events - 1))  
            for (i in 1:B) {
              ## Draw a bootstrap sample from the data with replacement.
              random.rows <- sample(sampleSize(data), sampleSize(data), replace = TRUE)
              new.data <- new("RtreemixData", Sample = Sample(data)[random.rows, ])
              ## Fit an Rtreemix model to it
              new.fit <- fit(data = new.data, K = K, noise = TRUE, equal.edgeweights = equal.star, eps = 0.01)
              ## Calculate the GPS values of all patterns for the new model
              new.gps <- GPS(gps(model = new.fit, no.sim = no.sim))
              for(j in 1:length(boot.gps))
                boot.gps[[j]] <- c(boot.gps[[j]], new.gps[j])
            }
            sort.boot <- lapply(boot.gps, sort)
            ## Calculate 95% confidence intervals for the GPS values
            boot.intervals <- lapply(sort.boot, function(sort.boot){c(sort.boot[ceiling((B * 2.5) / 100)], sort.boot[as.integer((B * 97.5) / 100)])})
  
            fit.gps.data <- fit.gps[ind]
            result <- matrix(nrow = length(ind), ncol = 2)
  
            for(i in 1:length(ind)) 
              result[i, ] <- boot.intervals[[ind[i]]]
            
            return(new("RtreemixGPS",
                       Model = fm,
                       Data = data,
                       SamplingMode = sampling.mode,
                       SamplingParam = sampling.param,
                       GPS = fit.gps.data,
                       gpsCI = result))
          })
######################################################################          

Try the Rtreemix package in your browser

Any scripts or data that you put into this service are public.

Rtreemix documentation built on Nov. 8, 2020, 5:57 p.m.