mutSignatures_dev: Decipher Mutational Signatures from Somatic Mutational Catalogs

Documented in alexaNMF attachContext attachMutType chihJenNMF countMutTypes extractXvarlinkData filterSNV frequencize getCosmicSignatures importVCFfiles matchSignatures plotMutCount plotMutTypeProfile prelimProcessAssess processVCFdata removeMismatchMut resolveMutSignatures setMutClusterParams simplifySignatures sortByMutations table2df

# mutSignatures
# by Damiano Fantini, Ph.D.
# http://www.mutSignatures.org

###
##### S4 classes and methods
###

## set 'getters' and 'setters' Generics -----------------------------------------------------
setGeneric("getFwkParam", function(x, label) {
  standardGeneric("getFwkParam")
})

setGeneric("getMutationTypes", function(x) {
  standardGeneric("getMutationTypes")
})

setGeneric("getSampleIdentifiers", function(x) {
  standardGeneric("getSampleIdentifiers")
})

setGeneric("getCounts", function(x) {
  standardGeneric("getCounts")
})

setGeneric("getSignatureIdentifiers", function(x) {
  standardGeneric("getSignatureIdentifiers")
})


setGeneric("setFwkParam", function(x, label, value) {
  standardGeneric("setFwkParam")
})

## set Generics for class-to-class Object coercion and other operations -----------------
setGeneric("coerceObj", function(x, to, ...) {
  standardGeneric("coerceObj")
})

setGeneric("as.mutation.counts", function(x, rownames=NULL, colnames=NULL) {
  standardGeneric("as.mutation.counts")
})

setGeneric("as.mutation.signatures", function(x) {
  standardGeneric("as.mutation.signatures")
})

setGeneric("as.mutsign.exposures", function(x, samplesAsCols = TRUE) {
  standardGeneric("as.mutsign.exposures")
})



## mutFrameworkParams class -------------------------------------------------------------
setClass("mutFrameworkParams",
         slots = list(params = "list"))

# Define an initializer
setMethod("initialize", "mutFrameworkParams",
          function(.Object, params) {
            .Object <- callNextMethod(.Object)
            
            # Check args
            if(!is.list(params))
              stop("Bad Input")
            
            if(sum(sapply(params, length) != 1, na.rm = TRUE) > 0)
              stop("Bad input")
            
            requiredNames <- c("num_processesToExtract","num_totIterations","num_parallelCores",
                               "thresh_removeWeakMutTypes","thresh_removeLastPercent","distanceFunction",
                               "num_totReplicates","eps","stopconv",
                               "niter", "guided", "debug", 
                               "approach", "stopRule", "algorithm",
                               "logIterations")
            
            if(sum(names(params) %in% requiredNames) < 16)
              stop("Missing Params")
            
            .Object@params <- params
            .Object
          })

setMethod("show", signature(object = "mutFrameworkParams"),
          function(object) {
            out <- object@params
            cat(" mutFrameworkParams object - mutSignatures \n")
            cat(paste(" num of params included:", length(out), "\n\n"))
            linex <- lapply(1:length(out), function(i) {
              paste("   - ", names(out)[i], ": ", out[[i]], "\n", sep = "")
            })
            for (li in linex) {cat(li)}
          })

setMethod("print", signature(x = "mutFrameworkParams"),
          function(x) {
            show(object = x)
          })

setMethod("getFwkParam", signature(x = "mutFrameworkParams", label = "character"),
          function(x, label) {
            out <- x@params
            out[[label]]})

setMethod("setFwkParam", signature(x="mutFrameworkParams"),
          function(x, label, value) {
            if (length(label) != 1 | length(value) != 1)
              stop("Bad input")
            if(!is.character(label))
              stop("Bad input")
            
            x@params[[ label ]] <- value
            x
          })

setMethod("coerceObj", signature(x = "mutFrameworkParams", to = "character"),
          function(x, to) {
            if (to[1] == "list") {
              x@params
            } else if (to[1] == class(x)[1]){
              x
            } else {
              stop("Object could not be coerced to the desired data type")
            }
          })

setMethod("as.list", signature(x = "mutFrameworkParams"),
          function(x) {
            coerceObj(x, to = "list")
          })


## mutationCounts class ------------------------------------------------------
setClass("mutationCounts",
         slots = list(counts = "data.frame",
                      mutTypes = "data.frame",
                      sampleId = "data.frame"))

# Define an initializer
setMethod("initialize", "mutationCounts",
          function(.Object, x, muts, samples) {
            .Object <- callNextMethod(.Object)
            
            # Check args
            if(!is.data.frame(x) |
               !is.data.frame(muts) |
               !is.data.frame(samples))
              stop("Bad input")
            
            if(sum(duplicated(muts[,1])) > 0)
              stop("Mutation Types cannot be duplicated")
            
            if(sum(duplicated(samples[,1])) > 0)
              stop("Sample Identifiers cannot be duplicated")
            
            if (ncol(x) != nrow(samples) |
                nrow(x) != nrow(muts))
              stop("Data Dimension Issues")
            
            .Object@counts <- x
            rownames(.Object@counts) <- NULL
            colnames(.Object@counts) <- NULL
            
            .Object@mutTypes <- muts
            rownames(.Object@mutTypes) <- NULL
            colnames(.Object@mutTypes)[1] <- "mutTypes"
            
            
            .Object@sampleId <- samples
            rownames(.Object@sampleId) <- NULL
            colnames(.Object@sampleId)[1] <- "ID"
            
            .Object
          })

setMethod("show", "mutationCounts",
          function(object) {
            
            out <- object@counts
            rownames(out) <- object@mutTypes[,1]
            colnames(out) <- object@sampleId[,1]
            
            cat(" Mutation Counts object - mutSignatures")
            cat("\n\n")
            cat(paste(" Total num of MutTypes:", length(object@mutTypes[,1])))
            cat("\n")
            cat(paste(" MutTypes:", paste(head(object@mutTypes[,1], 5), collapse = ", "),
                      ifelse(length(object@mutTypes[,1]) > 5, "...", "")))
            cat("\n\n")
            cat(paste(" Total num of Samples:", length(object@sampleId[,1])))
            cat("\n")
            cat(paste(" Sample Names:", paste(head(object@sampleId[,1], 5), collapse = ", "),
                      ifelse(length(object@sampleId[,1]) > 5, "...", "")))
            
            cat("")
          })

setMethod("print", signature(x="mutationCounts"),
          function(x) {
            show(x)
          })

setMethod("getMutationTypes", "mutationCounts",
          function(x){
            base::as.vector(x@mutTypes[,1])
          })

setMethod("getSampleIdentifiers", "mutationCounts",
          function(x = "mutationCounts"){
            base::as.vector(x@sampleId[,1])
          })

setMethod("getCounts", signature(x="mutationCounts"),
          function(x) {
            
            out <- x@counts
            rownames(out) <- x@mutTypes[,1]
            colnames(out) <- x@sampleId[,1]
            out
          })

setMethod("coerceObj", signature(x = "mutationCounts", to = "character"),
          function(x, to, keepNames = TRUE) {
            if (to[1] == "data.frame") {
              getCounts(x)
            } else if (to[1] == "matrix") {
              out <- getCounts(x)
              out <- base::as.matrix(out)
              if (is.logical(keepNames[1])) {
                if (!keepNames[1]) {
                  dimnames(out) <- NULL
                }
              }
              out
            } else if (to[1] == class(x)[1]){
              x
            } else {
              stop("Object could not be coerced to the desired data type")
            }
          })

setMethod("as.data.frame" , signature(x="mutationCounts"),
          function(x) {
            coerceObj(x, to = "data.frame")
          })

setMethod("as.matrix", signature(x = "mutationCounts"),
          function(x) {
            coerceObj(x, to = "matrix", keepNames = FALSE)
          })

setMethod("as.mutation.counts", signature(x="data.frame"),
          function(x, rownames = NULL, colnames = NULL) {
            
            # Checks
            if (is.null(rownames) & (is.null(rownames(x))))
              stop("Incomplete Mutation Type info")
            
            if (is.null(colnames) & is.null(colnames(x)))
              stop("Incomplete Mutation Type info")
            
            if (!is.null(rownames)){
              if (!is.character(rownames) |
                  length(rownames) != nrow(x) |
                  sum(duplicated(rownames) > 0))
                stop("Unsuitable Mutation Types")
            }
            
            if (!is.null(colnames)){
              if (!is.character(colnames) |
                  length(colnames) != ncol(x) |
                  sum(duplicated(colnames) > 0))
                stop("Unsuitable Sample Identifiers")
            }
            
            if (is.null(rownames))
              rownames <- rownames(x)
            
            if (is.null(colnames))
              colnames <- colnames(x)
            
            fOUT <- new(Class = "mutationCounts", 
                        x = x, 
                        muts = data.frame(mutTypes = rownames, stringsAsFactors = FALSE), 
                        samples = data.frame(ID = colnames, stringsAsFactors = FALSE))            
            
            fOUT
          })


setMethod("[", signature(x="mutationCounts", i="numeric"),
          function(x, i, ...) {
            
            if(nargs() > 2)
              stop("object of type 'S4' is not subsettable")

            new(Class = "mutationCounts", 
                x=base::as.data.frame(x@counts[,i]), 
                muts = x@mutTypes,
                samples=base::data.frame(x@sampleId[i,], 
                                         stringsAsFactors = FALSE,
                                         row.names = NULL))
          })

## mutationSignatures class ------------------------------------------------------
setClass("mutationSignatures",
         slots = list(mutationFreq = "data.frame",
                      mutTypes = "data.frame",
                      signatureId = "data.frame"))

# Define an initializer
setMethod("initialize", "mutationSignatures",
          function(.Object, x, muts, signNames) {
            .Object <- callNextMethod(.Object)
            
            # Check args
            if(!is.data.frame(x) |
               !is.data.frame(muts) |
               !is.data.frame(signNames))
              stop("Bad input")
            
            if(sum(duplicated(muts[,1])) > 0)
              stop("Mutation Types cannot be duplicated")
            
            if(sum(duplicated(signNames[,1])) > 0)
              stop("Sample Identifiers cannot be duplicated")
            
            if (ncol(x) != nrow(signNames) |
                nrow(x) != nrow(muts))
              stop("Data Dimension Issues")
            
            all.Nums <- base::as.numeric(format(apply(x, 2, sum), digits = 2, nsmall = 2))
            if (sum(all.Nums != 1) > 0) 
              stop ("Mutation frequency must always sum up to unity")
            
            .Object@mutationFreq <- x
            rownames(.Object@mutationFreq) <- NULL
            colnames(.Object@mutationFreq) <- NULL
            
            .Object@mutTypes <- muts
            rownames(.Object@mutTypes) <- NULL
            colnames(.Object@mutTypes)[1] <- "mutTypes"
            
            .Object@signatureId <- signNames
            rownames(.Object@signatureId) <- NULL
            colnames(.Object@signatureId)[1] <- "ID"
            
            .Object
          })

setMethod("show", "mutationSignatures",
          function(object) {
            
            out <- object@mutationFreq
            rownames(out) <- object@mutTypes[,1]
            colnames(out) <- object@signatureId[,1]
            
            cat(" Mutation Signatures object - mutSignatures")
            cat("\n\n")
            
            clMax <- ncol(out)
            cat(paste(" Total num of Signatures:", clMax))
            cat("\n")
            
            rowMax <- nrow(out)
            cat(paste(" Total num of MutTypes:", rowMax))
            cat("\n\n")
            
            nuClMax <- ifelse(clMax > 5, 5, clMax)
            colRange <- 1 : nuClMax
            #message(nuClMax)
            nuRwMax <- ifelse(rowMax > 10, 10, rowMax)
            rowRange <- 1 : nuRwMax
            
            # Headings
            cat(paste("    ", paste("Sign.", colRange, sep = "", collapse = "   "), sep = ""))
            cat("\n")
            cat(paste("    ", paste(rep("------", length(colRange)), sep = "", collapse = "   "), sep = ""))
            cat("\n")
            for(jj in rowRange){
              cat(paste("  + ", paste(format(round(out[jj,colRange], digits = 4), nsmall = 4, scientific = FALSE), sep = "", collapse = "   "), sep = ""))
              cat("  +  ")
              cat(rownames(out)[jj])
              cat("\n")
            }
            
            if(rowMax > 10){
              cat(paste("    ", paste(rep("......", length(colRange)), sep = "", collapse = "   "), sep = ""))
            }
            cat("\n")
          })

setMethod("print", signature(x="mutationSignatures"),
          function(x, ...) {
            show(x)
          })

setMethod("getMutationTypes", "mutationSignatures",
          function(x){
            base::as.vector(x@mutTypes[,1])
          })


setMethod("getSignatureIdentifiers", "mutationSignatures",
          function(x){
            base::as.vector(x@signatureId[,1])
          })



setMethod("as.data.frame", signature(x ="mutationSignatures"),
          function(x){
            coerceObj(x, to = "data.frame")
          })



setMethod("as.mutation.signatures", signature(x ="data.frame"),
          function(x){
            
            data <- x
            rw1 <- data.frame(type = rownames(x), stringsAsFactors = FALSE)
            cl1 <- data.frame(ID = colnames(x), stringsAsFactors = FALSE)
            
            out <- new(Class = "mutationSignatures", 
                       x= data, 
                       muts=rw1, 
                       signNames=cl1)
            out
          })

setMethod("[", signature(x="mutationSignatures", i="numeric"),
          function(x, i, ...) {
            
            if(nargs() > 2)
              stop("object of type 'S4' is not subsettable")
            
            new(Class = "mutationSignatures", 
                x=base::as.data.frame(x@mutationFreq[,i]), 
                muts = x@mutTypes,
                signNames=base::data.frame(x@signatureId[i,], 
                                     stringsAsFactors = FALSE,
                                     row.names = NULL))
          })

setMethod("as.list", "mutationSignatures", 
          function(x) {
            coerceObj(x, to = "list")
          })

setMethod("coerceObj", signature(x = "mutationSignatures", to = "character"),
          function(x, to) {
            if (to[1] == "data.frame") {
              out <- x@mutationFreq
              rownames(out) <- x@mutTypes[,1]
              colnames(out) <- x@signatureId[,1]
              out
            } else if (to[1] == "matrix") {
              out <- x@mutationFreq
              rownames(out) <- x@mutTypes[,1]
              colnames(out) <- x@signatureId[,1]
              base::as.matrix(out)
            } else if (to[1] == "list") {
              list(mutationFreq=x@mutationFreq,
                   mutTypes=x@mutTypes,
                   signatureId=x@signatureId)
            } else if (to[1] == class(x)[1]){
              x
            } else {
              stop("Object could not be coerced to the desired data type")
            }         
          })

setMethod("cbind2", signature(x="mutationSignatures", y="mutationSignatures"), 
          function(x, y, ...) {
            xMut <- getMutationTypes(x)
            yMut <- getMutationTypes(y)
            
            if (length(xMut) == length(yMut) &
                sum(!xMut %in% yMut) == 0) {
              
              orderY <- sapply(xMut, function(ix) which(yMut == ix))
              #sortedYmut <- yMut[orderY]
              sortedYexp <- y@mutationFreq[orderY, ]
              
              new(Class = "mutationSignatures", 
                  x=base::as.data.frame(cbind(x@mutationFreq, sortedYexp)), 
                  muts = x@mutTypes,
                  signNames=data.frame(c(x@signatureId[,1], y@signatureId[,1]) , 
                                       stringsAsFactors = FALSE))
              
            } else {
              stop("Incompatible Objects")
            }
          })

## mutSignExposures class ------------------------------------------------------
setClass("mutSignExposures",
         slots = list(exposures = "data.frame",
                      sampleId = "data.frame",
                      signatureId = "data.frame"))

# Define an initializer
setMethod("initialize", "mutSignExposures",
          function(.Object, x, samples, signNames) {
            .Object <- callNextMethod(.Object)
            
            # Check args
            if(!is.data.frame(x) |
               !is.data.frame(samples) |
               !is.data.frame(signNames))
              stop("Bad input")
            
            if(sum(duplicated(samples[,1])) > 0)
              stop("Mutation Types cannot be duplicated")
            
            if(sum(duplicated(signNames[,1])) > 0)
              stop("Sample Identifiers cannot be duplicated")
            
            if (ncol(x) != nrow(samples) |
                nrow(x) != nrow(signNames))
              stop("Data Dimension Issues")
            
            
            .Object@exposures <- x
            rownames(.Object@exposures) <- NULL
            colnames(.Object@exposures) <- NULL
            
            .Object@sampleId <- samples
            rownames(.Object@sampleId) <- NULL
            colnames(.Object@sampleId)[1] <- "ID"
            
            .Object@signatureId <- signNames
            rownames(.Object@signatureId) <- NULL
            colnames(.Object@signatureId)[1] <- "ID"
            
            .Object
          })

setMethod("show", "mutSignExposures",
          function(object) {
            
            out <- t(object@exposures)
            rownames(out) <- object@sampleId[,1]
            colnames(out) <- object@signatureId[,1]
            
            zero.approach <- ifelse (sum(apply(out, 1, sum) > 2) == 0, TRUE, FALSE)
            
            cat(" MutSignature Exposures object - mutSignatures")
            cat("\n\n")
            
            rwMax <- nrow(out)
            cat(paste(" Total num of Samples:", rwMax))
            cat("\n")
            
            clMax <- ncol(out)
            
            nuClMax <- ifelse(clMax > 5, 5, clMax)
            colRange <- 1 : nuClMax
            #message(nuClMax)
            nuRwMax <- ifelse(rwMax > 10, 10, rwMax)
            rowRange <- 1 : nuRwMax
            
            cat(paste(" Total num of Signatures:", clMax, " { first", nuClMax,  "signatures are displayed }"))
            cat("\n")
            cat(paste(" Signature names:  ", 
                      paste(colnames(out)[1:nuClMax], collapse = ", "), 
                      ifelse(clMax > 5, " ...", ""), 
                      sep = "", collapse = " ") )
            
            cat("\n\n")
            
            
            # Headings
            cat(paste("    ", paste("Sign.", colRange, sep = "", collapse = "   "), sep = ""))
            cat("\n")
            cat(paste("    ", paste(rep("------", length(colRange)), sep = "", collapse = "   "), sep = ""))
            cat("\n")
            for(jj in rowRange){
              
              if (zero.approach) {
                prep.nums <- paste(paste(" ", format(round(out[jj,colRange],digits = 2), nsmall = 2), sep = ""), " ", sep = "")
              } else {
                prep.nums <- leadZeros(out[jj,colRange], m = 999999, char = " ", na.value = "999999")
              }
              
              prep.nums[base::as.numeric(prep.nums) > 999] <- "  >999"
              cat(paste("  + ", paste(prep.nums,
                                      sep = "", collapse = "   "), sep = ""))
              cat("  +  ")
              cat(rownames(out)[jj])
              cat("\n")
            }
            
            if(rwMax > 10){
              cat(paste("    ", paste(rep("......", length(colRange)), sep = "", collapse = "   "), sep = ""))
            }
            cat("\n")
          })

setMethod("print", signature(x="mutSignExposures"),
          function(x, ...) {
            show(x)
          })

setMethod("getSampleIdentifiers", "mutSignExposures",
          function(x){
            base::as.vector(x@sampleId[,1])
          })

setMethod("getSignatureIdentifiers", "mutSignExposures",
          function(x){
            base::as.vector(x@signatureId[,1])
          })


setMethod("[", signature(x="mutSignExposures", i="numeric"),
          function(x, i, ...) {
            
            if(nargs() > 2)
              stop ("object of type 'S4' is not subsettable")

            new(Class = "mutSignExposures", 
                x = base::as.data.frame(x@exposures[,i]), 
                signNames = x@signatureId,
                samples=base::data.frame(x@sampleId[i,], 
                                         stringsAsFactors = FALSE, 
                                         row.names = NULL))
          })

setMethod("coerceObj", signature(x = "mutSignExposures", to = "character"),
          function(x, to, transpose = TRUE) {
            if (to[1] == "data.frame" & is.logical(transpose) & transpose[1]) {
              out <- base::data.frame(t(x@exposures))
              rownames(out) <- x@sampleId[,1]
              colnames(out) <- x@signatureId[,1]
              out
            } else if (to[1] == "matrix" & is.logical(transpose) & transpose[1]) {
              out <- t(x@exposures)
              rownames(out) <- x@sampleId[,1]
              colnames(out) <- x@signatureId[,1]
              base::as.matrix(out)
            } else if (to[1] == "data.frame" & is.logical(transpose) & !transpose[1]) {
              out <- x@exposures
              rownames(out) <- x@signatureId[,1]
              colnames(out) <- x@sampleId[,1]
              out
            } else if (to[1] == "matrix" & is.logical(transpose) & !transpose[1]) {
              out <- x@exposures
              rownames(out) <- x@signatureId[,1]
              colnames(out) <- x@sampleId[,1]
              base::as.matrix(out)
            } else if (to[1] == class(x)[1]){
              x
            } else {
              stop("Object could not be coerced to the desired data type")
            }         
          })


setMethod("as.data.frame", signature(x ="mutSignExposures"),
          function(x){
            coerceObj(x, to = "data.frame")
          })

setMethod("as.data.frame", signature(x ="mutSignExposures"),
          function(x, transpose=TRUE){
            if (is.logical(transpose)) {
              coerceObj(x, to = "data.frame", transpose = transpose[1])  
            } else {
              coerceObj(x, to = "data.frame")
            }
          })

setMethod("as.mutsign.exposures", signature(x ="data.frame"),
          function(x, samplesAsCols = TRUE){
            if (is.null(rownames(x)) | is.null(colnames(x))) 
              stop("Bad input")
            
            data <- x
            rw1 <- data.frame(type = rownames(x), stringsAsFactors = FALSE)
            cl1 <- data.frame(ID = colnames(x), stringsAsFactors = FALSE)
            
            if(samplesAsCols) {
              out <- new(Class = "mutSignExposures", 
                         x = data, 
                         samples=cl1, 
                         signNames=rw1)
            } else {
              out <- new(Class = "mutSignExposures", 
                         x = base::as.data.frame(t(data)), 
                         samples = rw1, 
                         signNames = cl1)
              
            }
            out
          })

setMethod("plot", signature(x = "mutationSignatures"), 
          function(x, signature, main = NULL, ...) {
            mutCounts <- x
            objOut <- NULL
            if (is.numeric(signature)) {
              if(length(signature) == 1 & signature[1] <= length(getSignatureIdentifiers(mutCounts))) {
                objOut <- coerceObj(x = mutCounts, to = "data.frame")[,signature]
                label <-  getSignatureIdentifiers(mutCounts)[signature]
              }
            } else if (is.character(signature)) {
              if (length(signature) == 1 & signature[1] %in% getSignatureIdentifiers(mutCounts)) {
                objOut <- coerceObj(x = mutCounts, to = "data.frame")[,signature]
                label <- signature
              }
            } 
            
            if(!is.null(objOut)){
              final.label <- ifelse(!is.null(main), main, label)
              plotMutTypeProfile(mutCounts = objOut, 
                                 mutLabs = getMutationTypes(mutCounts),
                                 main = final.label, ...)
              
            } else {
              stop("Signature not found")
            }
          })


setMethod("plot",signature(x = "mutationCounts"), 
          function(x, sample, main = NULL, ...) {
            mutCounts <- x
            objOut <- NULL
            if (is.numeric(sample)) {
              if(length(sample) == 1 & sample[1] <= length(getSampleIdentifiers(mutCounts))) {
                objOut <- coerceObj(x = mutCounts, to = "data.frame")[,sample]
                label <-  getSampleIdentifiers(mutCounts)[sample]
              }
            } else if (is.character(sample)) {
              if (length(sample) == 1 & sample[1] %in% getSampleIdentifiers(mutCounts)) {
                objOut <- coerceObj(x = mutCounts, to = "data.frame")[,sample]
                label <- sample
              }
            } 
            
            if(!is.null(objOut)){
              final.label <- ifelse(!is.null(main), main, label)
              plotMutTypeProfile(mutCounts = objOut, 
                                 mutLabs = getMutationTypes(mutCounts),
                                 main = final.label,
                                 ...)
              
            } else {
              stop("Sample not found")
            }
          })


setMethod("plot", signature(x = "mutSignExposures"), 
          function(x, top = 100) {
            mutCount <- coerceObj(x = x, to = "data.frame")
            plotMutCount(mutCount, top = top)
          })

setMethod("coerceObj", signature(x = "data.frame", to = "character"),
          function(x, to, ...) {
            if (to[1] == "mutationCounts" & nargs() %in% 2:4) {
              as.mutation.counts(x, ...)
            } else if (to[1] == "mutationSignatures" & nargs() == 2){
              as.mutation.signatures(x)
            } else if (to[1] == "mutSignExposures" & nargs() %in% 2:3) {
              as.mutsign.exposures(x, ...)
            } else if (to[1] == class(x)[1]){
              x
            } else {
              stop("Object could not be coerced to the desired data type")
            }
          })

###
##### Custom Functions - core package
###

addWeak <- function (mutationTypesToAddSet, 
                     processes_I, 
                     processesStd_I, 
                     Wall_I, 
                     genomeErrors_I, 
                     genomesReconstructed_I) 
{
  if (length(mutationTypesToAddSet) > 0 & mutationTypesToAddSet[1] > 0) {
    totalMutTypes <- nrow(Wall_I) + length(mutationTypesToAddSet)
    processes <- matrix(0, nrow = totalMutTypes, ncol = ncol(processes_I))
    processesStd <- matrix(0, nrow = totalMutTypes, ncol = ncol(processesStd_I))
    Wall <- matrix(0, nrow = totalMutTypes, ncol = ncol(Wall_I))
    genomeErrors <- lapply(1:length(genomeErrors_I), (function(i) {
      matrix(0, nrow = totalMutTypes, ncol = ncol(genomeErrors_I[[1]]))
    }))
    genomesReconstructed <- lapply(1:length(genomesReconstructed_I), 
                                   (function(i) {
                                     matrix(0, 
                                            nrow = totalMutTypes, 
                                            ncol = ncol(genomesReconstructed_I[[1]]))
                                   }))
    origArrayIndex <- 1
    for (i in 1:totalMutTypes) {
      #message(i)
      if (!(i %in% mutationTypesToAddSet)) {
        processes[i, ] <- processes_I[origArrayIndex, ]
        processesStd[i, ] <- processesStd_I[origArrayIndex,]
        Wall[i, ] <- Wall_I[origArrayIndex, ]
        for (j in 1:length(genomeErrors_I)) {
          genomeErrors[[j]][i, ] <- genomeErrors_I[[j]][origArrayIndex,  ]
        }
        for (j in 1:length(genomesReconstructed_I)) {
          genomesReconstructed[[j]][i, ] <- genomesReconstructed_I[[j]][origArrayIndex,   ]
        }
        origArrayIndex <- origArrayIndex + 1
      }
    }
  }
  else {
    processes <- processes_I
    processesStd <- processesStd_I
    Wall <- Wall_I
    genomeErrors <- genomeErrors_I
    genomesReconstructed <- genomesReconstructed_I
  }
  weakAdded.list <- list()
  weakAdded.list$processes <- processes
  weakAdded.list$processesStd <- processesStd
  weakAdded.list$Wall <- Wall
  weakAdded.list$mutCountErrors <- genomeErrors
  weakAdded.list$mutCountReconstructed <- genomesReconstructed
  return(weakAdded.list)
}


bootstrapCancerGenomes <- function (genomes) 
{
  genome.col.sums <- apply(genomes, 2, sum)
  norm.genomes <- genomes/matrix(genome.col.sums, 
                                 ncol = ncol(genomes), 
                                 nrow = nrow(genomes), 
                                 byrow = TRUE)
  bootstrapGenomes <- sapply(1:length(genome.col.sums), (function(i) {
    stats::rmultinom(1, size = genome.col.sums[i], prob = norm.genomes[,i])
  }))
  return(bootstrapGenomes)
}


evaluateStability <- function (wall, 
                               hall, 
                               params) 
{
  BIG_NUMBER <- 100
  CONVERG_ITER <- 10
  CONVERG_CUTOFF <- 0.005
  TOTAL_INIT_CONDITIONS <- 5
  num_processesToExtract <- params$num_processesToExtract
  num_totReplicates <- params$num_totReplicates
  distanceFunction <- params$distanceFunction
  minClusterDist <- BIG_NUMBER
  totalIter <- ncol(wall)/num_processesToExtract
  idx = matrix(0, nrow = nrow(hall), ncol = 1)
  clusterCompactness <- matrix(0, 
                               nrow = num_processesToExtract, 
                               ncol = totalIter)
  iStartDataSet = seq(1, ncol(wall), by = num_processesToExtract)
  iStartingDataSet = iStartDataSet[sample(1:totalIter)]
  for (iInitData in 1:min(c(TOTAL_INIT_CONDITIONS, totalIter))) {
    #message(iInitData)
    iStartingData <- iStartingDataSet[iInitData]
    iEnd <- iStartingData + num_processesToExtract - 1
    centroids <- cbind(wall[, iStartingData:iEnd])
    centroidsTest <- sapply(1:ncol(centroids), (function(kk) {
      runif(nrow(centroids))
    }))
    countIRep <- 0
    for (iRep in 1:num_totReplicates) {
      #message(iRep)
      tmp.tab <- t(cbind(centroids, wall))
      tmp.pdist <- base::as.vector(proxy::dist(tmp.tab, distanceFunction))
      if (num_processesToExtract > 1)
        tmp.pdist[tmp.pdist == 1 | tmp.pdist == 0] <- NA
      allDist <- pracma::squareform(tmp.pdist)
      
      cd.colRange <- (ncol(centroids) + 1):ncol(allDist)
      centroidDist = t(allDist[1:ncol(centroids), cd.colRange])
      
      jRange <- sort(1:num_processesToExtract)
      for (jIndex in 1:num_processesToExtract) {
        j <- jRange[jIndex]
        for (i in seq(1, ncol(wall), by = num_processesToExtract)) {
          #message(i)
          iRange = i:(i + num_processesToExtract - 1)
          tmp.min <- min(centroidDist[iRange, j], na.rm = TRUE)
          Ind <- which(centroidDist[iRange, j] == tmp.min)[1]
          centroidDist[iRange[Ind], ] <- BIG_NUMBER
          idx[iRange[Ind], 1] <- j
        }
      }
      maxDistToNewCentroids <- 0
      for (i in 1:ncol(centroids)) {
        tmp.dset <- wall[, base::as.vector(idx == i)]
        centroids[, i] <- apply(tmp.dset, 1, mean)
        tmp.dset <- t(cbind(centroids[, i], 
                            centroidsTest[,i]))
        tmp.pdist <- base::as.vector(proxy::dist(tmp.dset, distanceFunction))
        tmp.pdist[tmp.pdist == 1 | tmp.pdist == 0] <- NA
        maxDistToNewCentroids <- max(maxDistToNewCentroids, 
                                     tmp.pdist, 
                                     na.rm = TRUE)
      }
      if (maxDistToNewCentroids < CONVERG_CUTOFF) {
        countIRep <- countIRep + 1
      }
      else {
        countIRep <- 0
        centroidsTest <- centroids
      }
      if (countIRep == CONVERG_ITER) {
        break
      }
    }
    for (i in 1:ncol(centroids)) {
      tmp.tab <- t(cbind(centroids[, i], 
                         wall[, base::as.vector(idx == i)]))
      tmp.pdist <- base::as.vector(proxy::dist(tmp.tab, distanceFunction))
      tmp.pdist[tmp.pdist == 1 | tmp.pdist == 0] <- NA
      clusterDist <- pracma::squareform(tmp.pdist)
      clusterCompactness[i, ] = clusterDist[1, 2:ncol(clusterDist)]
    }
    dist.test <- apply(clusterCompactness, 2, (function(clm) {
      mean(clm, na.rm = TRUE)
    }))
    if (sum(minClusterDist > dist.test) == length(dist.test)) {
      centroidsFinal <- centroids
      idxFinal <- idx
      clusterCompactnessFinal <- clusterCompactness
    }
  }
  centroids <- t(centroidsFinal)
  idx <- idxFinal
  clusterCompactness <- clusterCompactnessFinal
  centDist <- apply(clusterCompactness, 1, (function(tmprw) {
    mean(tmprw, na.rm = TRUE)
  }))
  centDistInd <- order(centDist, decreasing = FALSE)
  clusterCompactness <- clusterCompactness[centDistInd, ]
  centroids <- centroids[centDistInd, ]
  idxNew <- idx
  for (i in 1:num_processesToExtract) {
    idxNew[base::as.vector(idx == centDistInd[i]), 1] <- i
  }
  idx <- idxNew
  if (num_processesToExtract > 1) {
    processStab <- silhouetteMLB(data = t(wall), fac = idx, 
                                 distanceFunction)
    processStabAvg <- matrix(0, nrow = 1, ncol = num_processesToExtract)
    for (i in 1:num_processesToExtract) {
      processStabAvg[1, i] = mean(processStab[idx == i])
    }
  } else {
    # Adjusted params for a 1-class silhouette!
    tmp.tab <- t(cbind(centroids, wall))
    tmp.pdist <- base::as.vector(proxy::dist(tmp.tab, distanceFunction))
    tmp.pdist[tmp.pdist == 1 | tmp.pdist == 0] <- NA
    allDist <- pracma::squareform(tmp.pdist)
    processStab <- 1 - t(allDist[1, 2:ncol(allDist)])
    
    # Silhouette plot
    xrange <- c(min(processStab), max(processStab))
    xrange[1] <- ifelse(xrange[1] > 0, 0, (-1.15) * abs(xrange[1]))
    xrange[2] <- 1.15
    graphics::barplot(sort(base::as.numeric(processStab), decreasing = TRUE), 
                      col = "gray20",
                      xlim = xrange,
                      horiz = TRUE, xlab = "Silhouette Value", 
                      ylab = "", 
                      main = "Silhouette Plot", 
                      border = "gray20")
    graphics::abline(v=0)
    graphics::title(ylab="Iter. Results (by Group)", line=1, cex.lab=1, font = 2)
    
    processStabAvg <- apply(processStab, 2, (function(clmn) {
      base::mean(clmn, na.rm = TRUE)
    }))
  }
  
  if (num_processesToExtract > 1) {
    centroidStd <- matrix(0, nrow = nrow(centroids), ncol = ncol(centroids))
  } else {
    centroidStd <- matrix(0, ncol = length(centroids), nrow = 1)
  }
  
  for (i in 1:num_processesToExtract) {
    centroidStd[i, ] <- apply(wall[, idx == i], 1, (function(rw) {
      stats::sd(rw, na.rm = TRUE)
    }))
  }
  centroids <- t(cbind(centroids))
  centroidStd <- t(centroidStd)
  idxS <- matrix(0, nrow = length(idx), ncol = 1)
  for (i in seq(1, ncol(wall), by = num_processesToExtract)) {
    iEnd <- i + num_processesToExtract - 1
    idxG <- idx[i:iEnd]
    for (j in 1:num_processesToExtract) {
      idxS[(i + j - 1), ] = which(idxG == j)
    }
  }
  exposure <- matrix(0, nrow = max(idxS), ncol(hall))
  exposureStd <- matrix(0, nrow = max(idxS), ncol(hall))
  for (i in 1:max(idxS)) {
    exposure[i, ] <- apply(hall[idx == i, ], 2, (function(cl) {
      mean(cl, na.rm = TRUE)
    }))
    exposureStd[i, ] <- apply(hall[idx == i, ], 2, (function(cl) {
      sd(cl, na.rm = TRUE)
    }))
  }
  
  # Fix to sign.to.extract.num = 1
  if (num_processesToExtract < 2){
    centroids <- t(centroids)
  }
  
  
  result.list <- list()
  result.list$centroids <- centroids
  result.list$centroidStd <- centroidStd
  result.list$exposure <- exposure
  result.list$exposureStd <- exposureStd
  result.list$idx <- idx
  result.list$idxS <- idxS
  result.list$processStab <- processStab
  result.list$processStabAvg <- processStabAvg
  result.list$clusterCompactness <- clusterCompactness
  return(result.list)
}


filterOutIterations <- function (wall, 
                                 hall, 
                                 cnt_errors, 
                                 cnt_reconstructed, 
                                 params) 
{
  num_processesToExtract <- params$num_processesToExtract
  thresh_removeLastPercent <- params$thresh_removeLastPercent
  num_totIterations <- ncol(wall)/num_processesToExtract
  tot.rm.iterations <- round(thresh_removeLastPercent * num_totIterations)
  if (tot.rm.iterations > 0) {
    closeness.mutCounts <- matrix(0, 
                                  nrow = num_totIterations, 
                                  ncol = 1)
    for (i in 1:num_totIterations) {
      closeness.mutCounts[i, ] <- base::norm(cnt_errors[[i]], "F")
    }
    indexClosenessGenomes <- order(closeness.mutCounts, 
                                   decreasing = TRUE)
    removeIterations <- indexClosenessGenomes[1:tot.rm.iterations]
    removeIterationSets <- matrix(0, 
                                  nrow = (num_processesToExtract * tot.rm.iterations), 
                                  ncol = 1)
    for (i in 1:tot.rm.iterations) {
      iStart <- num_processesToExtract * (removeIterations[i] - 1) + 1
      iEnd <- num_processesToExtract * removeIterations[i]
      tmpRowRange <- (num_processesToExtract * (i - 1) + 1):(num_processesToExtract * i)
      removeIterationSets[tmpRowRange, ] <- iStart:iEnd
    }
    wall <- wall[, -removeIterationSets]
    hall <- hall[-removeIterationSets, ]
    cnt_errors <- cnt_errors[-removeIterations]
    cnt_reconstructed <- cnt_reconstructed[-removeIterations]
  }
  res.list <- list()
  res.list$Wall <- wall
  res.list$Hall <- hall
  res.list$mutCounts.errors <- cnt_errors
  res.list$mutCounts.reconstructed <- cnt_reconstructed
  return(res.list)
}


getTestRunArgs <- function (testN = 1) 
{
  out <- list()
  if (testN == 3) {
    out$v <- cbind(c(142, 133, 1, 24, 53, 55, 4, 4, 100), 
                   c(132, 113, 0, 34, 50, 52, 3, 3, 17), 
                   c(155, 139,10, 14, 53, 45, 2, 5, 13), 
                   c(124, 156, 22, 21,52, 45, 2, 7, 100))
    out$r <- 2
    out$params <- setMutClusterParams(num_processesToExtract = 2)
    out$params$num_parallelCores <- 1
    out$params$stopconv <- 800
    out$params$niter <- 8000
  }
  else if (testN == 4) {
    out$data <- cbind(c(runif(4, 10, 25), 
                        runif(6, 20, 50), 
                        runif(3, 0, 5)), 
                      c(runif(4, 50, 60),
                        runif(6, 45, 55), 
                        runif(3, 30, 40)), 
                      c(runif(4, 12, 15), 
                        runif(6,10, 15), 
                        runif(3, 10, 12)), 
                      c(runif(4, 5, 20), 
                        runif(6, 16, 26), 
                        runif(3, 24, 29)))
    out$fac <- c(rep(1, 4), rep(2, 6), rep(3, 3))
  }
  else if (testN == 5) {
    out$W <- do.call(cbind, lapply(1:20, (function(i) {
      cbind(c(runif(4, 0.05, 0.15), 
              c(1e-15 * runif(1,1, 9)), 
              runif(3, 0.003, 0.007), 
              runif(9, 0.04, 0.09)), 
            c(runif(3, 0.08, 0.18), 
              c(1e-15 * runif(1, 1, 9)), 
              runif(1, 0.03, 0.07), 
              runif(3, 0.004, 0.009), 
              runif(9, 0.04, 0.09)))
    })))
    out$H <- do.call(rbind, lapply(1:40, (function(i) {
      c(runif(1, 0.005, 0.099), 
        runif(2, 50, 800), 
        runif(2, 0.005, 0.099), 
        runif(1, 50, 750), 
        runif(2, 0.005, 0.099), 
        runif(1, 20, 800))
    })))
    out$params$analyticApproach <- "denovo"
  }
  else if (testN == 6) {
    tmut <- runif(10, 150, 1350)
    eff1 <- runif(10, 0.45, 0.89)
    eff2 <- 1 - eff1
    out$exposures <- sapply(1:10, (function(i) {
      c(eff1[i], eff2[i]) * tmut[i]
    }))
  }
  else {
    set.seed(999)
    my.mat <- sapply(1:10, (function(i) {
      c(base::as.integer(runif(3, 80, 150)), 
        base::as.integer(runif(7, 0, 10)), 
        base::as.integer(runif(9, 40, 80)), 
        base::as.integer(runif(1, 0, 3)), 
        base::as.integer(runif(10, 60, 120)))
    }))
    rownames(my.mat) <- c("A[C>A]A", "A[C>A]C", "A[C>A]G", 
                          "A[C>A]T", "A[C>G]A", "A[C>G]C", "A[C>G]G", "A[C>G]T", 
                          "A[C>T]A", "A[C>T]C", "A[C>T]G", "A[C>T]T", "A[T>A]A", 
                          "A[T>A]C", "A[T>A]G", "A[T>A]T", "A[T>C]A", "A[T>C]C", 
                          "A[T>C]G", "A[T>C]T", "A[T>G]A", "A[T>G]C", "A[T>G]G", 
                          "A[T>G]T", "C[C>A]A", "C[C>A]C", "C[C>A]G", "C[C>A]T", 
                          "C[C>G]A", "C[C>G]C")
    #out$mutCount.obj <- setMutCountObject(mutCountMatrix = my.mat)
    #out$params <- setMutClusterParams(num_processesToExtract = 2)
    out$params$num_totIterations <- 1
    out$params$num_parallelCores <- 1
    out$params$stopconv <- 800
    out$params$niter <- 8000
    if (testN == 2) {
      out$params$stopconv <- 2000
      out$params$niter <- 15000
      out$params$num_totIterations <- 3
    }
  }
  return(out)
}


leadZeros <- function (n, m, char = "0", na.value = NA) 
{
  max.zeros <- nchar(base::as.character(round(m)))
  tmp.digits <- nchar(base::as.character(round(n)))
  zeros.toAdd <- max.zeros - tmp.digits
  
  returnVect <- sapply(1:length(n), function(i){
    if (zeros.toAdd[i] >= 0) {
      paste(c(rep(char, zeros.toAdd[i]), base::as.character(round(n[i]))), sep = "", collapse = "")    
    } else {
      na.value
    }
  })
  return(returnVect) 
}


removeWeak <- function (input_mutCounts, 
                        params) 
{
  thresh_removeWeakMutTypes <- params$thresh_removeWeakMutTypes
  sum.counts <- apply(input_mutCounts, 1, sum)
  sum.counts.idx <- order(sum.counts, decreasing = FALSE)
  sorted.sum.counts <- sum.counts[sum.counts.idx]
  tot.mut.counts <- sum(input_mutCounts)
  tot.muttypes.toremove <- sum((sapply(1:length(sorted.sum.counts), 
                                       (function(i) {
                                         sum(sorted.sum.counts[1:i])
                                       }))/tot.mut.counts) < thresh_removeWeakMutTypes)
  return.list <- list()
  if (tot.muttypes.toremove > 0) {
    removed.mutset <- sum.counts.idx[c(1:tot.muttypes.toremove)]
    input_mutCounts <- input_mutCounts[-removed.mutset, ]
    return.list$removed.mutset <- removed.mutset
  }
  else {
    return.list$removed.mutset <- (-1)
  }
  return.list$output.mutCounts <- input_mutCounts
  return(return.list)
}


silhouetteMLB <- function (data, 
                           fac, 
                           method = "cosine", 
                           plot = TRUE) 
{
  if (nrow(data) != length(fac)) 
    stop("Bad input!")
  dist.matrix <- base::as.matrix(proxy::dist(x = data, method = method))
  sil.check <- cluster::silhouette(x = base::as.numeric(base::as.factor(fac)), 
                                   dist = dist.matrix)
  if (plot == TRUE) {
    tmp <- lapply(unique(sil.check[, 1]), (function(clid) {
      part.out <- sil.check[sil.check[, 1] == clid, ]
      part.out[order(part.out[, 3], decreasing = TRUE), 
               ]
    }))
    tmp <- do.call(rbind, tmp)
    xrange <- c(min(tmp[,3]), max(tmp[,3]))
    xrange[1] <- ifelse(xrange[1] > 0, 0, (-1.15) * abs(xrange[1]))
    xrange[2] <- 1.15
    graphics::barplot(tmp[nrow(tmp):1, 3], 
                      col = base::as.factor(tmp[nrow(tmp):1,1]),
                      xlim = xrange,
                      horiz = TRUE, xlab = "Silhouette Value", 
                      ylab = "", 
                      main = "Silhouette Plot", 
                      border = base::as.factor(tmp[nrow(tmp):1,1]))
    graphics::abline(v=0)
    graphics::title(ylab="Iter. Results (by Group)", line=1, cex.lab=1, font = 2)
  }
  return(base::as.vector(sil.check[, 3]))
}


alexaNMF <- function(v, r, params)
{
  # Brunet algorithm, from Alexandrov NMF WTSI code
  # define params (hard-set)
  debug <- params$debug
  chk.step <- 50
  dot.eachSteps <- 2000
  
  # retireve user-defined params
  eps <- params$eps
  num.processes <- r
  stopconv <- params$stopconv
  niter <- params$niter
  err.threshold <- 1e-10
  stopRule <- ifelse(params$stopRule == "LA", "LA", "DF")
  
  # set.seed(231082)
  v <- base::as.matrix(v)
  rownames(v) <- NULL
  colnames(v) <- NULL
  
  # Double check input matrix
  if (min(v) < 0)
    stop("Matrix entries cannot be negative")
  if (min(apply(v, 1, sum)) == 0)
    stop("Entries cannot all be equal to 0")
  
  # Initialize W0 and H0
  W.k <- do.call(cbind, lapply(1:num.processes, (function(i){
    out.o <- runif(n = nrow(v), min = eps, max = 100)
    out.o/sum(out.o)
  })))
  H.k <- matrix((1/num.processes), nrow = num.processes, ncol = ncol(v))
  
  # Initialize looping vars
  itr <- 1
  chk.j <- 1
  stationary.chk <- 0
  force.out <- 1
  
  # Debugging plot
  if (debug)
    graphics::plot(-10, xlim = c(1000, niter),
                   ylim = c( ifelse(stopRule == "DF", (0.1*err.threshold), 0.001) ,
                             ifelse(stopRule == "DF", 10, ncol(H.k))), log = "xy",
                   xlab = "Iteration", ylab = "Variation", main = "Convergence")
  
  # Initialize the objects for comparing dissimilarity
  # DF approach
  cons.old <- base::as.vector(W.k)
  # Alexandrov approach
  consold <- matrix(0, nrow = ncol(H.k), ncol = ncol(H.k))
  
  while (itr < niter){
    
    if (itr %% dot.eachSteps == 0) {
      if (stationary.chk > chk.step) {
        message(":", appendLF = FALSE)
      } else {
        message(".", appendLF = FALSE)
      }
    }
    
    delta.01 <- apply(W.k, 2, sum)
    H.k <- H.k * (t(W.k) %*% (v/(W.k %*% H.k))) / delta.01
    H.k[H.k < eps] <- eps
    
    W.tmp <- W.k * ((v/(W.k %*% H.k)) %*% t(H.k))
    W.k <- do.call(cbind, lapply(1:ncol(W.tmp), (function(ci){
      W.tmp[,ci] / sum(H.k[ci,])
    })))
    W.k[W.k<eps] <- eps
    
    # check convergence every 'chk.step' iterations
    if (itr > stopconv & itr %% chk.step == 0 & stopRule == "DF") {
      chk.j <- chk.j + 1
      H.k[H.k < eps] <- eps
      W.k[W.k < eps] <- eps
      cons <- base::as.vector(W.k)
      
      # compare to consold and reorder
      dist.measure <- proxy::dist(rbind(cons, cons.old), method = "cosine") [1]
      cons.old <- cons
      
      if (debug)
        points(itr, (dist.measure + (err.threshold*0.1)), pch = 19, col = "red2")
      
      # evaluate distance
      if (dist.measure < err.threshold) {
        stationary.chk <- stationary.chk + 1
      } else {
        stationary.chk <- 0
      }
      if (stationary.chk > (stopconv / chk.step)) {
        force.out <- 0
        message("$", appendLF = FALSE)
        break()
      }
    } else if (itr > stopconv & itr %% chk.step == 0 & stopRule == "LA") {
      chk.j <- chk.j + 1
      H.k[H.k < eps] <- eps
      W.k[W.k < eps] <- eps
      y <- apply(H.k, 2, max)
      index <- apply(H.k, 2, (function(dt) {
        which.max(dt)[1]
      }))
      mat1 = t(sapply(1:ncol(H.k), (function(ii) {
        index
      })))
      mat2 = sapply(1:ncol(H.k), (function(ii) {
        index
      }))
      cons <- mat1 == mat2
      if (sum(cons != consold) == 0) {
        stationary.chk <- stationary.chk + 1
      } else {
        stationary.chk <- 0
      }
      #
      consold <- cons
      
      if (debug)
        points(itr, (sum(cons != consold) / 100), pch = 19, col = "red2")
      
      if (stationary.chk > (stopconv/chk.step)) {
        force.out <- 0
        message("$", appendLF = FALSE)
        break()
      }
    }
    itr <- itr + 1
  }
  if (force.out == 1) {
    message("!", appendLF = FALSE)
  }
  output <- list()
  output$w <- W.k
  output$h <- H.k
  return(output)
}


attachMutType <- function(mutData,
                          ref_colName = "reference_allele",
                          var_colName = "variant_allele",
                          var2_colName = NULL,
                          context_colName = "context",
                          format = 1,
                          mutType_dict = "alexa",
                          mutType_colName = "mutType")
{
  # Validate input data and fields in mutData
  if(!((is.data.frame(mutData) | is.matrix(mutData) ) &
       sum(c(ref_colName, var_colName, context_colName) %in% colnames(mutData)) == 3 ))
    stop ("Issue with the input dataset. Make sure to feed in a data.frame or
          a matrix and double check the name of the fields pointing to chromosome
          name, start and end positions")
  if (!(format %in% c(1,2)))
    stop ("Please, specify a valid format number (example: 1)")
  if (! (is.null(var2_colName))) {
    if (!(var2_colName %in% colnames(mutData)))
      stop ("Invalid var2 column")
  }
  
  if (!is.character(mutType_colName) |
      length(mutType_colName) > 1)
    stop("Bad mutType_colName")
  if(mutType_colName %in% colnames(mutData))
    stop ("mutType_colName already exists as column name in the current dataset")
  
  # convert factors to chars
  mutData <- data.frame(mutData, stringsAsFactors = FALSE, row.names = NULL)
  my.key.cols <- c(ref_colName, var_colName, var2_colName, context_colName)
  my.key.cols <- my.key.cols[!is.na(my.key.cols)]
  for (clmn in my.key.cols) {
    mutData[,clmn] <- base::as.character(base::as.vector(mutData[,clmn])) 
  }
  
  message("Assigning mutation types ", appendLF = FALSE)
  
  mutData[,mutType_colName] <- sapply(1:nrow(mutData), (function(i){
    
    if (nrow(mutData) > 1000 & i %in% base::as.integer(seq(1, nrow(mutData),length.out = 20)))
      message(".", appendLF = FALSE)
    
    # first, extract elems and check for middle base to match the reference
    ctx.len <- nchar(mutData[i,context_colName])
    half.ln <- (ctx.len - 1) / 2
    mid.seq <- substr(mutData[i,context_colName], (half.ln + 1), (half.ln + 1))
    pre.seq <- substr(mutData[i,context_colName], 1, half.ln)
    post.seq <- substr(mutData[i,context_colName], (half.ln + 2), ctx.len)
    
    if(mid.seq !=  mutData[i,ref_colName] | 
       (is.null(var2_colName) & mid.seq == mutData[i,var_colName]) |
       (tryCatch({(mid.seq == mutData[i,var_colName] & mid.seq == mutData[i,var2_colName])},
                 error = function(e) {FALSE}))){
      # no match means --> NA
      mut.base <- NA
    } else {
      #mut.variant to use in case var2 is specified
      if (mutData[i,ref_colName] != mutData[i,var_colName]) {
        mut.base <- mutData[i,var_colName]
      } else if (!is.null(var2_colName) ){
        if ( mutData[i,ref_colName] != mutData[i,var2_colName]) {
          mut.base <- mutData[i,var2_colName]
        } else {
          mut.base <- NA  
        }
      } else {
        mut.base <- NA  
      }
      
      # match, format and return according to a standard format (for now)
      if (is.na(mut.base)) {
        NA
      } else {
        paste(mid.seq, ".", mut.base, "[", pre.seq, mid.seq, post.seq, "][", pre.seq, mut.base, post.seq, "]", sep = "", collapse = "")
      }
    }
  }))
  message(". Done!", appendLF = TRUE)
  
  if (sum(is.na(mutData[,mutType_colName])) > 0) {
    message(paste("Removing",sum(is.na(mutData[,mutType_colName])), "positions."))
    mutData <- mutData[!is.na(mutData[,mutType_colName]),]
  }
  
  # Now, apply revCompl transformation
  message("Now applying RevCompl transformation", appendLF = FALSE)
  if (mutType_dict == "alexa") {
    idx <- grep("^((G|A)\\.)", mutData[,mutType_colName])
    mutData[idx,mutType_colName] <- sapply(mutData[idx,mutType_colName], (function(seq){
      
      base.wt  <- revCompl(gsub("\\..+$", "", seq))
      base.mut <- revCompl(gsub("^.+\\.", "", gsub("\\[.+$", "", seq)))
      seq.wt   <- revCompl(gsub("^.+\\[", "", gsub("\\]\\[.+$", "", seq)))
      seq.mut  <- revCompl(gsub("^.+\\]\\[", "", gsub("\\]$", "", seq)))
      paste(base.wt,".",base.mut, "[", seq.wt, "][", seq.mut, "]", sep = "", collapse = "")
    }))
  } else if (mutType_dict == "custom") {
    idx <- grep("^((G|T)\\.)", mutData[,mutType_colName])
    mutData[idx,mutType_colName] <- sapply(mutData[idx,mutType_colName], (function(seq){
      base.wt  <- revCompl(gsub("\\..+$", "", seq))
      base.mut <- revCompl(gsub("^.+\\.", "", gsub("\\[.+$", "", seq)))
      seq.wt   <- revCompl(gsub("^.+\\[", "", gsub("\\]\\[.+$", "", seq)))
      seq.mut  <- revCompl(gsub("^.+\\]\\[", "", gsub("\\]$", "", seq)))
      paste(base.wt,".",base.mut, "[", seq.wt, "][", seq.mut, "]", sep = "", collapse = "")
    }))
  }
  
  message(". Done!", appendLF = TRUE)
  message("Final formatting", appendLF = FALSE)
  #Attach the format of interest
  mutData[,mutType_colName] <- sapply(mutData[,mutType_colName], (function(seq){
    base.wt  <- gsub("\\..+$", "", seq)
    base.mut <- gsub("^.+\\.", "", gsub("\\[.+$", "", seq))
    seq.wt   <- gsub("^.+\\[", "", gsub("\\]\\[.+$", "", seq))
    seq.mut  <- gsub("^.+\\]\\[", "", gsub("\\]$", "", seq))
    half.len <- (nchar(seq.wt) - 1 ) / 2
    pre.seq <- substr(seq.wt, 1, half.len)
    post.seq <-substr(seq.wt, half.len + 2, nchar(seq))
    
    if (format == 1) {
      # --> N[N>M]N
      paste(pre.seq, "[", base.wt, ">", base.mut, "]", post.seq, sep = "", collapse = "")
    } else if (format == 2) {
      # --> NN.N>M
      paste(pre.seq, post.seq, ".", base.wt, ">", base.mut, sep = "", collapse = "")
    }  else {
      # --> N.M[NNN][NMN]
      paste(base.wt,".",base.mut, "[", seq.wt, "][", seq.mut, "]", sep = "", collapse = "")
    }
  }))
  
  message(". Done!", appendLF = TRUE)
  
  return(mutData)
}


chihJenNMF <- function(v, r, params) {
  
  # http://ieeexplore.ieee.org/document/4359171/
  # alternative approach for NMF
  # define params (hard-set)
  debug <- params$debug
  chk.step <- 50
  dot.eachSteps <- 2000
  
  # retireve user-defined params
  eps <- params$eps
  num.processes <- r
  stopconv <- params$stopconv
  niter <- params$niter
  err.threshold <- 1e-10
  
  # set.seed(231082)
  v <- base::as.matrix(v)
  rownames(v) <- NULL
  colnames(v) <- NULL
  
  # Double check input matrix
  if (min(v) < 0)
    stop("Matrix entries cannot be negative")
  if (min(apply(v, 1, sum)) == 0)
    stop("Entries cannot all be equal to 0")
  
  # Debugging plot
  if (debug)
    graphics::plot(-10, xlim = c(1000, niter), ylim = c( (0.1*err.threshold), 10), log = "xy", 
                   xlab = "Iteration", ylab = "Variation", main = "Convergence")
  
  # Initialize W0 and H0
  W.k <- do.call(cbind, lapply(1:num.processes, (function(i){
    out.o <- runif(n = nrow(v), min = eps, max = 100)
    out.o/sum(out.o)
  })))
  H.k <- matrix((1/num.processes), nrow = num.processes, ncol = ncol(v))
  
  # Initialize looping vars
  itr <- 1
  chk.j <- 1
  cons.old <- base::as.vector(W.k)
  stationary.chk <- 0
  force.out <- 1
  while (itr < niter){
    
    if (itr %% dot.eachSteps == 0) {
      if (stationary.chk > chk.step) {
        message(":", appendLF = FALSE)
      } else {
        message(".", appendLF = FALSE)
      }
    }
    
    WtW <- t(W.k) %*% W.k
    gradH <- ((WtW %*% H.k) - (t(W.k) %*% v))
    H.b <- H.k; H.b[H.b<eps] <- eps;
    H.k <- H.k - (H.b / ((WtW %*% H.b) + eps)) * gradH
    HHt <- H.k %*% t(H.k)
    gradW <- (W.k %*% HHt) - (v %*% t(H.k))
    
    W.b <- W.k; W.b[W.b < eps] <- eps
    W.k <- W.k - (W.b / ((W.b %*% HHt) + eps)) * gradW
    S <- apply(W.k, 2, sum)
    
    W.k <- do.call(cbind, lapply(1:ncol(W.k), (function(ci){
      W.k[,ci] / S[ci]
    })))
    H.k <- do.call(rbind, lapply(1:nrow(H.k), (function(ri){
      H.k[ri,] * S[ri]
    })))
    
    # optional ? Keep as is for now
    H.k <- do.call(cbind, lapply(1:ncol(H.k), (function(ci){
      H.k[,ci] / sum(H.k[,ci])
    })))
    
    # Final non-negative check
    H.k[H.k < eps] <- eps
    W.k[W.k < eps] <- eps
    
    # check convergence every 'chk.step' iterations
    if (itr > stopconv & itr %% chk.step == 0) {
      chk.j <- chk.j + 1
      W.k[W.k < eps] <- eps
      cons <- base::as.vector(W.k)
      # compare to consold and reorder
      dist.measure <- proxy::dist(rbind(cons, cons.old), method = "cosine") [1]
      cons.old <- cons
      
      if (debug)
        points(itr, (dist.measure + (err.threshold*0.1)), pch = 19, col = "red2")
      
      # evaluate distance
      if (dist.measure < err.threshold) {
        stationary.chk <- stationary.chk + 1
      } else {
        stationary.chk <- 0
      }
      if (stationary.chk > (stopconv / chk.step)) {
        force.out <- 0
        message("$", appendLF = FALSE)
        break()
      }
    }
    itr <- itr + 1
  }
  if (force.out == 1) {
    message("!", appendLF = FALSE)
  }
  output <- list()
  output$w <- W.k
  output$h <- H.k
  return(output)
}


countMutTypes <- function(mutTable, 
                          mutType_colName = "mutType", #mutTypeLab 
                          sample_colName = NULL)  #sampleLab
{
  
  if (!(is.data.frame(mutTable) | is.matrix(mutTable)))
    stop ("Bad input")
  if (! mutType_colName %in% colnames(mutTable))
    stop ("mutType field not found")
  if (!is.null(sample_colName)){
    if (! sample_colName %in% colnames(mutTable))
      stop ("sample_colName field not found")
  } 
  
  mutType.labels <- c("A[C>A]A", "A[C>A]C", "A[C>A]G", "A[C>A]T", "A[C>G]A", "A[C>G]C",
                      "A[C>G]G", "A[C>G]T", "A[C>T]A", "A[C>T]C", "A[C>T]G", "A[C>T]T",
                      "A[T>A]A", "A[T>A]C", "A[T>A]G", "A[T>A]T", "A[T>C]A", "A[T>C]C",
                      "A[T>C]G", "A[T>C]T", "A[T>G]A", "A[T>G]C", "A[T>G]G", "A[T>G]T",
                      "C[C>A]A", "C[C>A]C", "C[C>A]G", "C[C>A]T", "C[C>G]A", "C[C>G]C",
                      "C[C>G]G", "C[C>G]T", "C[C>T]A", "C[C>T]C", "C[C>T]G", "C[C>T]T",
                      "C[T>A]A", "C[T>A]C", "C[T>A]G", "C[T>A]T", "C[T>C]A", "C[T>C]C",
                      "C[T>C]G", "C[T>C]T", "C[T>G]A", "C[T>G]C", "C[T>G]G", "C[T>G]T",
                      "G[C>A]A", "G[C>A]C", "G[C>A]G", "G[C>A]T", "G[C>G]A", "G[C>G]C",
                      "G[C>G]G", "G[C>G]T", "G[C>T]A", "G[C>T]C", "G[C>T]G", "G[C>T]T",
                      "G[T>A]A", "G[T>A]C", "G[T>A]G", "G[T>A]T", "G[T>C]A", "G[T>C]C",
                      "G[T>C]G", "G[T>C]T", "G[T>G]A", "G[T>G]C", "G[T>G]G", "G[T>G]T",
                      "T[C>A]A", "T[C>A]C", "T[C>A]G", "T[C>A]T", "T[C>G]A", "T[C>G]C",
                      "T[C>G]G", "T[C>G]T", "T[C>T]A", "T[C>T]C", "T[C>T]G", "T[C>T]T",
                      "T[T>A]A", "T[T>A]C", "T[T>A]G", "T[T>A]T", "T[T>C]A", "T[T>C]C",
                      "T[T>C]G", "T[T>C]T", "T[T>G]A", "T[T>G]C", "T[T>G]G", "T[T>G]T")
  custPatt01 <- "^(A|C|G|T)\\[(A|C|G|T)>(A|C|G|T)\\](A|C|G|T)$"
  if (sum(regexpr(custPatt01, mutTable[,mutType_colName]) > 0) != length(mutTable[,mutType_colName]))
    stop ("Problem with the mutation type format... Please use the following format: A[C>G]T")
  
  # Force Alexandrov-style mut types
  idx.to.fix <- which(!mutTable[,mutType_colName] %in% mutType.labels)
  if (length(idx.to.fix) > 0) {
    
    tmp <- mutTable[,mutType_colName][idx.to.fix]
    corrected.mutTypes <- sapply(1:length(tmp), (function(i){
      out <- c(revCompl(substr(tmp[i], 7,7)), "[",
               revCompl(substr(tmp[i], 3,3)), ">",
               revCompl(substr(tmp[i], 5,5)), "]",
               revCompl(substr(tmp[i], 1,1)))
      paste(out, collapse = "", sep = "")
    }))
    mutTable[,mutType_colName][idx.to.fix] <- corrected.mutTypes
  }
  
  if (sum(!mutTable[,mutType_colName] %in% mutType.labels) > 0)
    stop("Problem with the mutType... Please check the input")
  
  if(is.null(sample_colName)) {
    my.mutTypes <- mutTable[,mutType_colName]
    out.1 <- sapply(mutType.labels, (function(mtt){
      sum(my.mutTypes == mtt)
    }))
    out.1 <- data.frame(cbind(sample=out.1))
  } else {
    unique.cases <- unique(mutTable[,sample_colName])
    out.1 <- lapply(unique.cases, (function(csid){
      tmp.df <- mutTable[mutTable[,sample_colName] == csid,]
      out.3 <- sapply(mutType.labels, (function(mtt){
        sum(tmp.df[,mutType_colName] == mtt)
      }))
      out.3 <- data.frame(cbind(out.3))
      colnames(out.3) <- csid
      out.3
    }))
    out.1 <- data.frame(do.call(cbind, out.1), stringsAsFactors = FALSE)
    tryCatch({colnames(out.1) <- unique.cases}, error = function(e) NULL)
  }
  
  # Prepare mutationCounts object
  fOUT <- new(Class = "mutationCounts", 
              x = out.1, 
              muts = data.frame(mutTypes = rownames(out.1), stringsAsFactors = FALSE), 
              samples = data.frame(ID = colnames(out.1), stringsAsFactors = FALSE))
  return(fOUT)
}


decipherMutationalProcesses <- function (input, 
                                         params)
{
  if (class(params) == "mutFrameworkParams" & class(input) == "mutationCounts") {
    paramsList <- coerceObj(params, to = "list")
    inputMAT <- coerceObj(input, to = "matrix", keepNames = FALSE) 
  } else {
    stop("Malformed Input")
  }
  
  if (paramsList$approach != "counts") {
    freq.input <- frequencize(inputMAT)
    inputMAT <- freq.input$freqs
    inputColsums <- freq.input$colSums
  }
  currentWarnings <- options()$warn
  options(warn = -1)
  #requiredElements <- c("cancerType", "mutCounts", "sampleNames", "mutTypes")
  #if (!("input" %in% ls()) | !(is.list(input)) | sum(requiredElements %in%
  #                                                   names(input)) != length(requiredElements))
  #  stop("Malformed Input / Input does not include all the required fields!")
  
  if (is.numeric(paramsList$num_processesToExtract)) {
    paramsList$analyticApproach <- "denovo"
  } else {
    stop("An error occurred!")
  }
  if (paramsList$analyticApproach == "denovo") {
    deconvData <- deconvoluteMutCounts(input_mutCounts = inputMAT,
                                       params = paramsList)
  } else {
    stop("An error occurred!")
  }
  
  # Package results and send out
  mutProcesses <- list()
  
  # Results first
  final.proc <- data.frame(deconvData$processes, stringsAsFactors = FALSE)
  colnames(final.proc) <- paste("Sign.",
                                sapply(1:ncol(final.proc), (function(n){
                                  leadZeros(n, (10*ncol(final.proc)))
                                })),  sep = "")
  rownames(final.proc) <- getMutationTypes(input)
  
  # New signatures
  signResult <- mutSignatures::as.mutation.signatures(final.proc)
  
  
  final.expo <- data.frame(deconvData$exposure, stringsAsFactors = FALSE)
  if (getFwkParam(params, "approach") != "counts"){
    final.expo <- data.frame(sapply(1:ncol(final.expo), function(cjj){
      inputColsums[cjj] * final.expo[,cjj] / sum(final.expo[,cjj])
    }), row.names = NULL, stringsAsFactors = FALSE)  
  }
  tryCatch({colnames(final.expo) <- getSampleIdentifiers(input)}, error = function(e) {NULL})
  rownames(final.expo) <- colnames(final.proc)
  
  # New exposures
  expoResult <- mutSignatures::as.mutsign.exposures(x = final.expo) 
  
  mutProcesses$Results <- list()
  mutProcesses$Results$signatures <- signResult
  mutProcesses$Results$exposures <- expoResult
  
  # Attach input and analysis data
  mutProcesses$RunSpecs <- list()
  mutProcesses$RunSpecs$input <- input
  mutProcesses$RunSpecs$params <- params
  
  # Attach Supplementary and Extra Controls stuff
  if (getFwkParam(params, "logIterations") != "lite") {
    mutProcesses$Supplementary <- list()
    mutProcesses$Supplementary$allProcesses <- deconvData$Wall
    mutProcesses$Supplementary$allExposures <- deconvData$Hall
    mutProcesses$Supplementary$idx <- deconvData$idx
    mutProcesses$Supplementary$mutCountErrors <- deconvData$mutCountErrors
    mutProcesses$Supplementary$mutCountReconstructed <- deconvData$mutCountReconstructed
    mutProcesses$Supplementary$processStab <- deconvData$processStab
    mutProcesses$Supplementary$processStabAvg <- deconvData$processStabAvg
  }
  options(warn = currentWarnings)
  return(mutProcesses)
}


deconvoluteMutCounts <- function (input_mutCounts, 
                                  params)
{
  # avoid NOTEs
  j <- NULL
  
  num_totIterations <- params$num_totIterations
  #perCore.iterations <- params$perCore.iterations
  num_processesToExtract <- params$num_processesToExtract
  distanceFunction <- params$distanceFunction
  thresh_removeWeakMutTypes <- params$thresh_removeWeakMutTypes
  num_parallelCores <- params$num_parallelCores
  guided <- params$guided
  debugStatus <- params$debug
  num_totReplicates <- params$num_totReplicates
  thresh_removeLastPercent <- params$thresh_removeLastPercent
  colnames(input_mutCounts) <- NULL
  rownames(input_mutCounts) <- NULL
  input_mutCounts <- base::as.matrix(input_mutCounts)
  bckgrnd.removed.mutCounts <- removeWeak(input_mutCounts, params)
  bckgrnd.removed.mutset <- bckgrnd.removed.mutCounts$removed.mutset
  bckgrnd.removed.mutCounts <- bckgrnd.removed.mutCounts$output.mutCounts
  total.mutationTypes <- nrow(bckgrnd.removed.mutCounts)
  total.samples <- ncol(bckgrnd.removed.mutCounts)
  
  # Start with a sample NMF iteration to guide the rest
  if (guided) {
    if (!debugStatus) {
      guide.W <- suppressMessages(extractSignatures(mutCountMatrix = bckgrnd.removed.mutCounts,
                                   params = params,
                                   bootStrap = FALSE))
    } else {
      guide.W <- extractSignatures(mutCountMatrix = bckgrnd.removed.mutCounts,
                                   params = params,
                                   bootStrap = FALSE)
    }
    guide.W <- guide.W$Wk
  } else {
    # dummy variable
    guide.W <- 0
  }
  
  if (num_parallelCores < 2) {
    muCounts.checkDF <- tryCatch(lapply(1:num_totIterations, (function(j){
      if (debugStatus) {
        if (j %in% base::as.integer(seq(1, num_totIterations, length.out = 100))) {
          message(paste("(",j,")", sep = ""), appendLF = FALSE)
        }
      }
      if (!debugStatus) {
        tmp.out <- suppressMessages(extractSignatures(mutCountMatrix = bckgrnd.removed.mutCounts,
                                                      bootStrap = TRUE,
                                                      params = params))
        
      } else {
        tmp.out <- extractSignatures(mutCountMatrix = bckgrnd.removed.mutCounts,
                                     bootStrap = TRUE,
                                     params = params)
        
      }
      if(guided){
        re.ORD <- rep(0, num_processesToExtract)
        for (ki in 1:num_processesToExtract) {
          my.i <- order(apply(abs(tmp.out$Wk - guide.W[,ki]), 2, sum))
          if (ki > 1) {
            my.i[re.ORD[1:(ki-1)]] <- max(my.i) + 1
          }
          re.ORD[ki] <- which.min(my.i)
        }
      } else {
        re.ORD <- 1:num_processesToExtract
      }
      # compare to ref and sort!
      if(num_processesToExtract > 1) {
        tmp.out$Wk <- tmp.out$Wk[,re.ORD]
        tmp.out$Hk <- tmp.out$Hk[re.ORD,]
      } else {
        tmp.out$Wk <- tmp.out$Wk
        tmp.out$Hk <- rbind(tmp.out$Hk)
      }
      tmp.out
    })), error = (function(e) {
      print(e)
    }))
    if (debugStatus) {
    message("Done!", appendLF = TRUE)
    }
  } else {
    # Initialize cores
    max.cores <- parallel::detectCores()
    max.cores <- max.cores - 1
    max.cores <- ifelse(max.cores < 1, 1, max.cores)
    use.cores <- ifelse(1 <= num_parallelCores & num_parallelCores <= max.cores,
                        num_parallelCores, max.cores)
    
    if (debugStatus) {
      cl <- suppressMessages(parallel::makeCluster(use.cores, outfile = ""))
    } else {
      cl <- suppressMessages(parallel::makeCluster(use.cores))
    }
    
    print(paste("Extracting", num_processesToExtract, "mutational signatures X",
                num_totIterations, "iterations using", use.cores, "cores"))
    suppressMessages(doParallel::registerDoParallel(cl))
    stuffToExp <- c("alexaNMF", "leadZeros", "extractSignatures", "frequencize",
                    "bootstrapCancerGenomes", 
                    #"guide.W", 
                    #"guided", 
                    #"params",
                    "chihJenNMF")
    
    suppressMessages(parallel::clusterExport(cl, stuffToExp))

    # Also, use guide.W to sort results
    muCounts.checkDF <- tryCatch(foreach::foreach(j = (1:num_totIterations),
                                                  .verbose = TRUE, .packages = "stats") %dopar%
                                                  {
                                                    if (j %in% base::as.integer(seq(1, num_totIterations, length.out = 100))) {
                                                      message(paste("(",j,")", sep = ""), appendLF = FALSE)
                                                    }
                                                    tmp.out <- extractSignatures(mutCountMatrix = bckgrnd.removed.mutCounts,
                                                                                 params = params)
                                                    if(guided){
                                                      re.ORD <- rep(0, num_processesToExtract)
                                                      for (ki in 1:num_processesToExtract) {
                                                        my.i <- order(apply(abs(tmp.out$Wk - guide.W[,ki]), 2, sum))
                                                        if (ki > 1) {
                                                          my.i[re.ORD[1:(ki-1)]] <- max(my.i) + 1
                                                        }
                                                        re.ORD[ki] <- which.min(my.i)
                                                      }
                                                    } else {
                                                      re.ORD <- 1:num_processesToExtract
                                                    }
                                                    # compare to ref and sort!
                                                    if(num_processesToExtract > 1) {
                                                      tmp.out$Wk <- tmp.out$Wk[,re.ORD]
                                                      tmp.out$Hk <- tmp.out$Hk[re.ORD,]
                                                    } else {
                                                      tmp.out$Wk <- tmp.out$Wk
                                                      tmp.out$Hk <- rbind(tmp.out$Hk)
                                                    }
                                                    tmp.out
                                                  }, error = (function(e) {
                                                    print(e)
                                                  }), finally = (function(f) {
                                                    parallel::stopCluster(cl)
                                                  }))
    message("Done!", appendLF = TRUE)
  }
  
  # Run analysis in parallel
  W.all <- do.call(cbind, lapply(muCounts.checkDF, (function(tmp){
    tmp$Wk
  })))
  H.all <- do.call(rbind, lapply(muCounts.checkDF, (function(tmp){
    tmp$Hk
  })))
  errors.all <- lapply(muCounts.checkDF, (function(tmp){
    tmp$mutCounts.errors
  }))
  reconstruct.all <- lapply(muCounts.checkDF, (function(tmp){
    tmp$mutCounts.reconstructed
  }))
  
  # get rid of pre-processed data 
  # muCounts.checkDF <- NULL
  fltr.mutCounts.data <- filterOutIterations(wall = W.all,
                                             hall = H.all,
                                             cnt_errors = errors.all,
                                             cnt_reconstructed = reconstruct.all,
                                             params)
  stability.check <- evaluateStability(wall = fltr.mutCounts.data$Wall,
                                       hall = fltr.mutCounts.data$Hall, params)
  final.mutCounts.data <- addWeak(mutationTypesToAddSet = bckgrnd.removed.mutset,
                                  processes_I = stability.check$centroids, 
                                  processesStd_I = stability.check$centroidStd,
                                  Wall_I = fltr.mutCounts.data$Wall, 
                                  genomeErrors_I = fltr.mutCounts.data$mutCounts.errors,
                                  genomesReconstructed_I = fltr.mutCounts.data$mutCounts.reconstructed)
  
  # in case of 'freq' approach, adjust 
  deconvoluted.results <- list()
  deconvoluted.results$Wall <- final.mutCounts.data$Wall
  deconvoluted.results$Hall <- fltr.mutCounts.data$Hall
  deconvoluted.results$mutCountErrors <- final.mutCounts.data$mutCountErrors
  deconvoluted.results$mutCountReconstructed <- final.mutCounts.data$mutCountReconstructed
  deconvoluted.results$idx <- stability.check$idx
  deconvoluted.results$idxS <- stability.check$idxS
  deconvoluted.results$processes <- final.mutCounts.data$processes
  deconvoluted.results$processesStd <- final.mutCounts.data$processesStd
  deconvoluted.results$exposure <- stability.check$exposure
  deconvoluted.results$exposureStd <- stability.check$exposureStd
  deconvoluted.results$processStab <- stability.check$processStab
  deconvoluted.results$processStabAvg <- stability.check$processStabAvg
  deconvoluted.results$clusterCompactness <- stability.check$clusterCompactness
  #message("completed")
  return(deconvoluted.results)
}


extractXvarlinkData <- function(xvarLink_data) {
  tmpVars <- sub("^.*&var=[[:alnum:]]+(,.*)&.*$", "\\1", xvarLink_data)
  tmpVars[!grepl("^,.+", tmpVars)] <- NA
  tmpVars <- strsplit(tmpVars, ",")
  tmpVars <- do.call(rbind, lapply(1:length(tmpVars), function(i){
    if (tmpVars[[i]][1] == "" & length(tmpVars[[i]]) == 5) {
      tmpVars[[i]][2:5]
    } else {
      c(NA, NA, NA, NA)
    }
  }))
  out <- data.frame(chrXvar = base::as.character(tmpVars[,1]),
                    posXvar = base::as.numeric(tmpVars[,2]),
                    refXvar = base::as.character(tmpVars[,3]),
                    mutXvar = base::as.character(tmpVars[,4]),
                    stringsAsFactors = FALSE)
  return(out)
}


extractSignatures <- function (mutCountMatrix, 
                               params, 
                               bootStrap = TRUE)
{
  num_processesToExtract <- params$num_processesToExtract
  approach <- params$approach
  algorithm <- params$algorithm
  eps <- params$eps
  
  if (bootStrap) {
    bstrpd.result <- bootstrapCancerGenomes(mutCountMatrix) 
  } else {
    bstrpd.result <- mutCountMatrix
  }
  
  #if (approach != "counts") {
  #  frq.bstrpd <- frequencize(bstrpd.result)
  #  bstrpd.result <-  frq.bstrpd$freqs
  #}
  bstrpd.result[bstrpd.result < eps] <- eps
  
  if (algorithm %in% c("brunet", "alexa")) {
    nmf.results <- alexaNMF(v = bstrpd.result,
                            r = num_processesToExtract,
                            params = params)
  } else {
    nmf.results <- chihJenNMF(v =  bstrpd.result,
                              r = num_processesToExtract,
                              params = params)
  } 
  # nmf.results
  tmp.w <- nmf.results$w
  tmp.h <- nmf.results$h
  
  # This step seems useless for the new approach, as it divides or multiplies by 1
  # Keep for consistency and cause we let the user select what approach to use
  for (jj in 1:num_processesToExtract) {
    tmp.tot <- sum(tmp.w[, jj])
    tmp.w[, jj] <- tmp.w[, jj]/tmp.tot
    tmp.h[jj, ] <- tmp.h[jj, ] * tmp.tot
  }
  
  # modify for frequentized approach 
  #if (approach != "counts"){
  #  if (length(frq.bstrpd$colSums) == ncol(tmp.h)) {
  #    tmp.h <- sapply(1:length(frq.bstrpd$colSums), function(ai) {
  #      frq.bstrpd$colSums[ai] * tmp.h[,ai] / sum(tmp.h[,ai], na.rm = TRUE)
  #    })
  #  }  
  #}
  
  mutCountMatrix.reconstructed <- tmp.w %*% tmp.h
  
  result.list <- list()
  result.list$Wk <- tmp.w
  result.list$Hk <- tmp.h
  result.list$mutCounts.reconstructed <- mutCountMatrix.reconstructed
  result.list$mutCounts.errors <- bstrpd.result - mutCountMatrix.reconstructed
  
  if (params$logIterations != "lite") {
    result.list$inputMatrix <- bstrpd.result
    result.list$cosDist <- proxy::dist(rbind(base::as.vector(bstrpd.result),
                                             base::as.vector(mutCountMatrix.reconstructed)),
                                       method = "cosine")[1]
  }
  
  return(result.list)
}


filterSNV <- function(dataSet, 
                      seq_colNames) 
{
  if (!(is.data.frame(dataSet) | is.matrix(dataSet) ) |
      sum(!seq_colNames %in% colnames(dataSet)) > 0 |
      length(seq_colNames) < 2) {
    stop("Bad input or seq_colNames not found")
  }
  check.tab <- sapply(1:length(seq_colNames), (function(i){
    tmp <- gsub("[[:space:]]", "", dataSet[,seq_colNames[i]])
    toupper(tmp) %in% c("A","C","G","T")
  }))
  toKeep <- apply(check.tab, 1, (function(rw){
    sum(rw) == length(rw)
  }))
  out <- dataSet[toKeep,]
  rownames(out) <- NULL
  return(out)
}


frequencize <- function(countMatrix, 
                        permille = TRUE)
{
  out <- list()
  cf <- ifelse(permille, 1000, 1)
  
  out[["colSums"]] <- apply(countMatrix, 2, sum)
  out[["freqs"]] <- cf * apply(countMatrix, 2, (function(clmn){clmn/sum(clmn)}))
  return(out)
}


getCosmicSignatures <- function(forceUseMirror = FALSE, asMutSign = TRUE)
{
  mutType.labels <- c("A[C>A]A", "A[C>A]C", "A[C>A]G", "A[C>A]T", "A[C>G]A", "A[C>G]C",
                      "A[C>G]G", "A[C>G]T", "A[C>T]A", "A[C>T]C", "A[C>T]G", "A[C>T]T",
                      "A[T>A]A", "A[T>A]C", "A[T>A]G", "A[T>A]T", "A[T>C]A", "A[T>C]C",
                      "A[T>C]G", "A[T>C]T", "A[T>G]A", "A[T>G]C", "A[T>G]G", "A[T>G]T",
                      "C[C>A]A", "C[C>A]C", "C[C>A]G", "C[C>A]T", "C[C>G]A", "C[C>G]C",
                      "C[C>G]G", "C[C>G]T", "C[C>T]A", "C[C>T]C", "C[C>T]G", "C[C>T]T",
                      "C[T>A]A", "C[T>A]C", "C[T>A]G", "C[T>A]T", "C[T>C]A", "C[T>C]C",
                      "C[T>C]G", "C[T>C]T", "C[T>G]A", "C[T>G]C", "C[T>G]G", "C[T>G]T",
                      "G[C>A]A", "G[C>A]C", "G[C>A]G", "G[C>A]T", "G[C>G]A", "G[C>G]C",
                      "G[C>G]G", "G[C>G]T", "G[C>T]A", "G[C>T]C", "G[C>T]G", "G[C>T]T",
                      "G[T>A]A", "G[T>A]C", "G[T>A]G", "G[T>A]T", "G[T>C]A", "G[T>C]C",
                      "G[T>C]G", "G[T>C]T", "G[T>G]A", "G[T>G]C", "G[T>G]G", "G[T>G]T",
                      "T[C>A]A", "T[C>A]C", "T[C>A]G", "T[C>A]T", "T[C>G]A", "T[C>G]C",
                      "T[C>G]G", "T[C>G]T", "T[C>T]A", "T[C>T]C", "T[C>T]G", "T[C>T]T",
                      "T[T>A]A", "T[T>A]C", "T[T>A]G", "T[T>A]T", "T[T>C]A", "T[T>C]C",
                      "T[T>C]G", "T[T>C]T", "T[T>G]A", "T[T>G]C", "T[T>G]G", "T[T>G]T")
  cosmic.url <- "http://cancer.sanger.ac.uk/cancergenome/assets/signatures_probabilities.txt"
  
  #initialize variable
  my_fullW <- NULL
  
  if (!forceUseMirror) {
    my_fullW <- tryCatch({TMP <- suppressWarnings(read.delim(cosmic.url, header = TRUE));
    rownames(TMP) <- TMP$Somatic.Mutation.Type;
    TMP <- TMP[,grep("Signature", colnames(TMP))];
    TMP},
    error = function(e) NULL)
  } 
  
  # private mirror
  if (is.null(my_fullW)) {
    private.mirror.url <- "http://www.labwizards.com/rlib/mutSignatures/cosmic.signatures.csv"
    my_fullW <- tryCatch({suppressWarnings(read.csv(private.mirror.url, 
                                                    header = TRUE, as.is = TRUE, row.names = 1))},
                         error = function(e2) { NULL })
  }
  if (is.null(my_fullW)) {
    message("An error occurred!")
    return(NULL)
  } else {
    if (sum(mutType.labels %in% rownames(my_fullW)) == length(mutType.labels)) {
      obj2rt <- my_fullW[mutType.labels,]
      if(asMutSign)
        obj2rt <- mutSignatures::as.mutation.signatures(obj2rt)
      
      return(obj2rt)
    } else {
      message("An error occurred!")
      return(NULL)
    }   
  }
}


importVCFfiles <- function(vcfFiles, sampleNames = NULL){
  
  my.colnames <- c("CHROM", "POS", "ID", "REF", "ALT", "QUAL", 
                   "FILTER", "INFO", "FORMAT", "XTR1", "XTR2", "XTR3")
  
  if (is.null(sampleNames) | length(sampleNames) != length(vcfFiles)){
    
    bypassNames <- paste("sample", 1:length(vcfFiles), sep = ".")
    
    sampleNames <- vcfFiles
    sampleNames <- sub("\\.vcf$", "", sub("^.*(\\\\|/)", "", tolower(sampleNames)))
    
    sampleNames[sampleNames == ""] <- bypassNames[sampleNames == ""]
    
  }
  out <- sapply(1:length(vcfFiles), (function(j){
    x <- vcfFiles[j]
    if (!file.exists(x)) {
      NULL
    } else {
      tmpVCF <- read.delim(x, comment.char = '#', header = F, stringsAsFactors = F)
      
      for (i in 1:ncol(tmpVCF)) {
        colnames(tmpVCF)[i] <- my.colnames[i]      
      }
      tmpVCF$SAMPLEID <- sampleNames[j]
      tmpVCF
    }
  }), simplify = FALSE, USE.NAMES = TRUE)
  out <- do.call(rbind, out)
  #names(out) <- sub("\\.vcf$", "", names(out))
  return(out)
}


plotMutCount <- function(mutCount, top = 50) {
  # avoid NOTEs
  count <- NULL
  feature <- NULL
  
  sampleLabs <- rownames(mutCount)
  rownames(mutCount) <- NULL
  
  nuSmplOrder <- order(apply(mutCount, 1, sum), decreasing = TRUE)
  mutCount <- mutCount[nuSmplOrder,]
  if (!is.null(sampleLabs))
    sampleLabs <- sampleLabs[nuSmplOrder]
  
  rownames(mutCount) <- NULL
  
  if (is.null(top)) {
    mutDF <- table2df(dataMatrix = mutCount)
  } else if(is.na(base::as.numeric(top[1]))){
    mutDF <- table2df(dataMatrix = mutCount)
  } else if (top[1] > nrow(mutCount) | top[1] < 2) {
    mutDF <- table2df(dataMatrix = mutCount)
  } else {
    mutDF <- table2df(dataMatrix = mutCount[1:top,])
    if (!is.null(sampleLabs))
      sampleLabs <- sampleLabs[1:top]
  }
  
  mutDF$sample <- 100000 + base::as.numeric(base::as.character(mutDF$sample))
  mutDF$sample <- base::as.character(mutDF$sample)
  
  tryCatch({mutDF$inputLabel <- do.call(c, lapply(sampleLabs, 
                                                  rep, times = ncol(mutCount)))}, 
           error = function(e) {
             message("damn")})
  
  mutDF$feature <- factor(mutDF$feature, levels = rev(colnames(mutCount)))
  bp <- ggplot(data=mutDF, aes(x=sample, y=count, fill=feature)) +
    geom_bar(stat="identity")
  bp <- bp + theme_minimal() + 
    theme(axis.ticks.x = element_blank(), 
          axis.text.x = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()) +
    theme(axis.line.y = element_line(colour = "black", size = 0.75),
          axis.line.x = element_line(colour = "black", size = 0.75),
          axis.ticks.y = element_line(colour = "black", size = 1),
          axis.ticks.length = unit(x = 6, "points"),
          plot.title = element_text(hjust = 0.5)) +
    scale_y_continuous(expand=c(0,0),
                       limits = c(0, 1.2 * max(apply(mutCount, 1, function(rx) sum(rx, na.rm = TRUE)))))

  return(bp)
}


plotMutTypeProfile <- function(mutCounts,
                               mutLabs,
                               freq = TRUE,
                               ylim = "auto",
                               ylab = "Fraction of Variants",
                               xlab = "Sequence Motifs",
                               xaxis_cex = 0.475,
                               cols = c("#4eb3d3", "#040404", "#b30000", "#bdbdbd", "#41ab5d", "#dd3497"),
                               main = "MutType Profile", 
                               ...)
{
  
  mutLabs <- toupper(mutLabs)
  if (!sum(regexpr("^(A|C|G|T)(\\[)(A|C|G|T)(>)(A|C|G|T)(\\])(A|C|G|T)$", toupper(mutLabs)) > 0) == length(mutLabs)) {
    message("mutTypes are in a non-standard format (ie, not Sanger 'A[G>A]T' style)")
    altPlot <- 1
  } else {
    altPlot <- 0
  }

  
  if (length(mutCounts) != length(mutLabs))
    stop("Mutation Type Labels and Mutation Counts do not match!") 
  
  mutCounts[is.na(mutCounts)] <- 0
  
  if (freq == TRUE) {
    if (ylim[1] == "auto")
      ylim <- c(0,0.2)
    freqs <- mutCounts/sum(mutCounts, na.rm = TRUE)
  } else {
    if (ylim[1] == "auto")
      ylim = c(0,(1.05*max(mutCounts)))
    freqs <- mutCounts
  }
  
  tinyMut <- gsub("\\](.*)$", "",
                  gsub("^(.*)\\[", "", toupper(mutLabs)))
  
  # Order the frequencies
  first.out <- lapply(sort(unique(tinyMut)), (function(mt){
    idxKeep <- which(tinyMut == mt)
    tmp.order <- order(mutLabs[idxKeep])
    out <- freqs[idxKeep][tmp.order]
    names(out) <- mutLabs[idxKeep][tmp.order]
    out
  }))
  names.first.out <- sort(unique(tinyMut))
  
  # Add a spacer
  second.out <- lapply(first.out, (function(vct){
    c(vct, 0)
  }))
  
  # Wrap together
  third.out <- do.call(c, second.out)
  third.out <- third.out[-length(third.out)]
  
  # Define color scheme
  col.out <- lapply(1:length(first.out), (function(ii){
    rep(cols[ii], (length(first.out[[ii]]) + 1))
  }))
  col.out <- do.call(c, col.out)
  col.out <- col.out[-length(col.out)]
  
  third.out[third.out>max(ylim)] <- max(ylim)
  third.shortLab <- gsub("\\[([[:alpha:]]>[[:alpha:]])\\]","-", names(third.out))
  
  xpos <- barplot(third.out,
                  col = col.out,
                  ylim = ylim,
                  axes = FALSE,
                  names.arg = FALSE,
                  ylab = ylab,
                  border = NA,
                  main = main)
  
  xpos.out <- lapply(1:length(first.out), (function(ii){
    c(rep(ii, length(first.out[[ii]])),0)
  }))
  xpos.out <- do.call(c, xpos.out)
  xpos.out <- xpos.out[-length(xpos.out)]
  
  #(max(ylim)*0.0075*0.2)
  box()
  axis(side = 1, tick = FALSE, hadj = 1, cex.axis = xaxis_cex, pos = (max(ylim) * 0.035), family = "mono",
       at = xpos,
       labels = third.shortLab,
       las = 2)
  
  my.y <- seq(min(ylim), max(ylim), length.out = 4)
  if (max(ylim) > 1.75) {
    my.y.labs <- format(round(my.y, digits = 0))
  } else {
    my.y.labs <- format(round(my.y, digits = 2))
  }
  
  axis(side = 2, tick = -0.005, at = my.y, labels = my.y.labs, las = 1, cex.axis = 0.75)
  
  zz <- unique(xpos.out)
  zz <- zz[zz>0]
  lab.xx <- sapply(zz, (function(i){
    mean(xpos[,1][xpos.out == i]) 
  }))
  mtext(names.first.out, side = 1, line = 1.5, at = lab.xx, cex = 0.8, col = cols)
  
  mtext(xlab, side = 1, line = 3.0, at = mean(xpos[,1]), cex = 1)
  # done! No return, just a plot!
}


prelimProcessAssess <- function(input,
                                maxProcess = 6,
                                approach = "counts",
                                plot = TRUE,
                                verbose = TRUE) 
{
  tmpParams <- setMutClusterParams(num_processesToExtract = 1, 
                                   approach = approach,
                                   stopconv = 10000,
                                   niter = 100000,
                                   thresh_removeWeakMutTypes = 0.01,
                                   thresh_removeLastPercent =0.025,
                                   num_totIterations = 10,
                                   num_parallelCores = 1,
                                   debug = FALSE,
                                   logIterations = "full")
  
  tmpParams <- coerceObj(x = tmpParams, to = "list")
  input_mutCounts <- coerceObj(x = input, to = "matrix")
  
  if (tmpParams$approach != "counts") {
    tmpFRQ.input <- frequencize(input_mutCounts)
    tmpParams$approach <- "counts"
    input_mutCounts <- tmpFRQ.input$freqs
  }
  bckgrnd.removed.mutCounts <- removeWeak(input_mutCounts,
                                          tmpParams)
  bckgrnd.removed.mutset <- bckgrnd.removed.mutCounts$removed.mutset
  bckgrnd.removed.mutCounts <- bckgrnd.removed.mutCounts$output.mutCounts
  total.mutationTypes <- nrow(bckgrnd.removed.mutCounts)
  total.samples <- ncol(bckgrnd.removed.mutCounts)
  tmpParams$num_totIterations <- 1
  
  # Calc initial (max) error
  medianMaxErr <- sum(t(sapply(1:nrow(bckgrnd.removed.mutCounts), (function(i){
    ((median(bckgrnd.removed.mutCounts[i,]) - bckgrnd.removed.mutCounts[i,]))^2
  }))))
  
  # Message
  if(verbose)
    message("Preliminary Mutational Process Assessment: ", appendLF = FALSE)
  
  outRes <- lapply(1:maxProcess, (function(i){
    tmpParams$num_processesToExtract <- i
    tmpRes <- suppressWarnings(suppressMessages(
      extractSignatures(mutCountMatrix = bckgrnd.removed.mutCounts,
                        params = tmpParams,
                        bootStrap = FALSE)))
    
    #apply(tmpRes$mutCounts.reconstructed, 2, sum)
    #mutCounts.reconstructed
    #tmpErr <- sum((tmpRes$mutCounts.errors)^2)
    tmpErr <- sum((bckgrnd.removed.mutCounts - tmpRes$mutCounts.reconstructed) ^ 2)
    
    if(verbose)
      message(".", appendLF = FALSE)
    tmpErr
  }))
  if(verbose)
    message("", appendLF = TRUE)
  
  err.points <- c(medianMaxErr, do.call(c, outRes))
  err.points <- (-1) * (err.points - max(err.points))
  err.points <- err.points / max(err.points, na.rm = TRUE)
  
  if (plot) {
    plot(err.points ~ c(0:(length(err.points) - 1)), type = "n", las = 1, axes = FALSE,
         ylab = "", xlab = "", main = "Preliminary Mutational Process Assessment")
    axis(side = 1, at = NULL, cex = 0.65, font = 3)
    axis(side = 2, at = seq(0, 1, by = 0.2), cex = 0.65, font = 3, 
         labels = format(seq(1, 0, by = -0.2), digits = 2, nsmall = 2), las = 1)
    lines(c(0:(length(err.points) - 1)), err.points, lwd = 1.5, col = "red2")
    points(c(0:(length(err.points) - 1)), err.points, pch = 19, col = "gray30")
    title(xlab="Num of Signatures", line=2.2, cex.lab=1.2, font = 2)
    title(ylab="Error (% vs. Median)", line=3.1, cex.lab=1.2, font = 2)
    box()
  }
  
  return(data.frame(numProcess = c(0:(length(err.points) - 1)),
                    percentErr = (1 - err.points),
                    stringsAsFactors = TRUE))
}


processVCFdata <- function(vcfData, 
                           BSGenomeDb,
                           chr_colName = "CHROM",
                           pos_colName = "POS",
                           ref_colName = "REF",
                           alt_colName = "ALT",
                           sample_colName = NULL,
                           nucl_contextN = 3,
                           verbose = TRUE)
{
  # make sure
  if (verbose)
    message("Processing VCF data: ", appendLF = FALSE)
  
  #Initialize
  reprepInput <- NULL
  
  # Fill reprepInput
  if (!is.null(sample_colName)){
    if (length(sample_colName) == 1 & sample_colName[1] %in% colnames(vcfData)){
      
      # compute how many unique samples
      unique.SID <- unique(vcfData[,sample_colName])
      reprepInput <- lapply(unique.SID, function(iID) {
        TMP <- vcfData[vcfData[,sample_colName] == iID, ]
        rownames(TMP)
        TMP
      })
    } else {
      stop("Bad input")
    }
  } else {
    #initialize input as list
    reprepInput <- list(vcfData)
  }
  
  # Triple check
  if(is.null(reprepInput))
    stop("Bad input")
  
  # Now loop
  bigOUT <- lapply(1:length(reprepInput), (function(jj){
    finalOut <- tryCatch({
      if(verbose)
        message(paste("[", jj, "/", length(reprepInput), "]", sep = ""), appendLF = F)
      
      tmpVCF <- suppressMessages({filterSNV(dataSet = reprepInput[[jj]],
                                            seq_colNames = c(ref_colName, alt_colName))})
      if (verbose)
        message(".", appendLF = FALSE)
      
      tmpVCF <- suppressMessages({attachContext(mutData = tmpVCF,
                                                chr_colName = chr_colName,
                                                start_colName = pos_colName,
                                                end_colName = pos_colName,
                                                nucl_contextN = 3,
                                                BSGenomeDb = BSGenomeDb)})
      if (verbose)
        message(".", appendLF = F)
      
      tmpVCF <- suppressMessages({removeMismatchMut(mutData = tmpVCF,
                                                    refMut_colName = ref_colName,
                                                    context_colName = "context",
                                                    refMut_format = "N")})
      if (verbose)
        message(".", appendLF = F)
      
      tmpVCF <- suppressMessages({attachMutType(mutData = tmpVCF,
                                                ref_colName = ref_colName,
                                                var_colName = alt_colName,
                                                var2_colName = alt_colName, 
                                                context_colName = "context")})
      if (verbose)
        message(".", appendLF = F)
      
      tmpVCF
    }, error = function(e) NA)
  }))
  
  bigOUT <- do.call(rbind, bigOUT)
  
  if(verbose) {
    message("", appendLF = T)
    message("Done!", appendLF = T)
  }
  return(bigOUT)
}


removeMismatchMut <- function(mutData,
                              refMut_colName = "mutation",
                              context_colName = "context",
                              refMut_format = "N>N")
{
  # Validate input data and fields in mutData
  if(!((is.data.frame(mutData) | is.matrix(mutData) ) &
       sum(c(refMut_colName, context_colName) %in% colnames(mutData)) == 2))
    stop ("Issue with the input dataset. Make sure to feed in a data.frame or
          a matrix and double check the name of the fields pointing to chromosome
          name, start and end positions")
  if (! refMut_format %in% c("N>N", "N"))
    stop("Unrecognized refMut_format")
  
  output <- data.frame(mutData, stringsAsFactors = FALSE, row.names = NULL)
  colnames(output) <- colnames(mutData)
  if (refMut_format == "N>N") {
    output$mutSite.dnaSeq.Ref <- substr(mutData[,refMut_colName], 1, 1)
    output$mutSite.dnaSeq.Mut <- substr(mutData[,refMut_colName], 3, 3)
    refMut_colName <- "mutSite.dnaSeq.Ref"
  }
  
  pos.to.extr <- (0.5 * (nchar(output[,context_colName]) - 1)) + 1
  keep.id <- output[,refMut_colName] == substr(output[,context_colName], pos.to.extr, pos.to.extr)
  if (sum( keep.id == FALSE) > 0)
    message(paste("Removing", sum( keep.id == FALSE), "rows because of mismatch"))
  
  output <- output[keep.id, ]
  rownames(output) <- NULL
  return(output)
}


resolveMutSignatures <- function(mutCountData, 
                                 signFreqData, 
                                 byFreq = TRUE )
{
  # process expected objects
  if (class(mutCountData) == "mutationCounts")
    mutCountData <- coerceObj(x = mutCountData, to = "data.frame")
  
  if (class(signFreqData) == "mutationSignatures")
    signFreqData <- coerceObj(x = signFreqData, to = "data.frame")
  
  if (!(sum(!rownames(mutCountData) %in% rownames(signFreqData)) == 0 &
        sum(! rownames(signFreqData) %in% rownames(mutCountData)) == 0)) {
    stop (paste("There is an issue with the mutTypes.",
                "MutTypes in the mutType Count Matrix",
                "have to match those in the signature",
                "Matrix... check rownames()"))
  }
  mutCountData <- mutCountData[rownames(signFreqData),]
  
  if (byFreq) {
    full.Y <- apply(mutCountData, 2, (function(clmn){
      1000 * clmn / sum(clmn)
    }))
  } else {
    full.Y <- base::as.matrix(mutCountData)    
  }
  
  
  # record sum counts
  mutSums <- apply(mutCountData, 2, sum)
  
  # make sure we have frequencies
  my.signatures <- apply(signFreqData, 2, (function(clmn){
    clmn / sum(clmn)
  }))
  X <- base::as.matrix(my.signatures)
  
  # initialize results collector
  out <- list()
  
  res <- NMF::fcnnls(x = X, y = full.Y)
  
  beta.hat <- data.frame(t(res$x / ifelse (byFreq, 1000, 1)), stringsAsFactors = FALSE)
  
  colnames(beta.hat) <- colnames(signFreqData)
  rownames(beta.hat) <- colnames(mutCountData)
  out$results <- list()
  out$coeffs <- list()
  out$coeffs$beta.hat <- beta.hat
  out$coeffs$unexplained.mutNum <- round((1 - apply(beta.hat, 1, sum)) * mutSums, digits = 0)
  out$coeffs$unexplained.mutFrac <- out$coeffs$unexplained.mutNum  / mutSums
  
  if (byFreq) {
    for (i in 1:nrow(beta.hat)) {
      beta.hat[i,] <- beta.hat[i,] * mutSums[i]
    }
  }
  
  out$results$count.result <- mutSignatures::as.mutsign.exposures(beta.hat, samplesAsCols = FALSE)
  out$results$freq.result <- mutSignatures::as.mutsign.exposures(do.call(rbind, lapply(1:nrow(beta.hat), (function(jjj){
    beta.hat[jjj,] / sum(beta.hat[jjj,] )
  }))), samplesAsCols = FALSE)
  
  out$results$fitted <- res$fitted
  out$results$residuals <- res$residuals
  
  return(out)
}


revCompl <- function (DNAseq, 
                      rev = TRUE, 
                      compl = TRUE)
{
  if (is.character(DNAseq)) {
    resultVect <- sapply(DNAseq, (function(seqString) {
      mySeq <- toupper(seqString)
      if (rev == TRUE) {
        mySeq <- paste(sapply(1:nchar(mySeq), (function(i) {
          pos <- nchar(mySeq) + 1 - i
          substr(mySeq, pos, pos)
        })), collapse = "")
      }
      if (compl == TRUE) {
        mySeq <- paste(sapply(1:nchar(mySeq), (function(i) {
          aBase <- substr(mySeq, i, i)
          complBase <- c("A", "T", "C", "G", "N", "A")
          names(complBase) <- c("T", "A", "G", "C", "N",
                                "U")
          returnBase <- complBase[aBase]
          if (is.na(returnBase)) {
            stop("Bad input")
          }
          returnBase
        })), collapse = "")
      }
      return(mySeq)
    }))
    names(resultVect) <- NULL
    return(resultVect)
  }
  else {
    warning("Bad input")
  }
}


setMutClusterParams <- function(num_processesToExtract = 2, # number of de-novo signatures to extract
                                num_totIterations = 10,
                                num_parallelCores = 1,
                                thresh_removeWeakMutTypes = 0.01,
                                thresh_removeLastPercent = 0.07,
                                distanceFunction = "cosine",
                                num_totReplicates = 100,
                                eps = 2.2204e-16,
                                stopconv = 20000,
                                niter = 1000000,
                                guided = TRUE,
                                debug = FALSE,
                                approach = "freq",
                                stopRule = "DF",
                                algorithm = "brunet",
                                logIterations = "lite")
{
  #
  # Step-by-step Parameter Validation and preparation
  paramList <- list()
  #
  if (!((is.numeric(num_processesToExtract[1]) & num_processesToExtract[1] > 0) ))
    stop("Provide a reasonable number of signatures/processes to extract")
  paramList$num_processesToExtract <- round(num_processesToExtract[1])
  #
  if (!(is.numeric(num_totIterations[1]) & num_totIterations[1] > 0))
    stop("Provide a reasonable number of iterations to run (Bootstrapping)")
  paramList$num_totIterations <- round(num_totIterations[1])
  #
  if (!(is.numeric(num_parallelCores[1]) & num_parallelCores[1] > 0))
    stop("Provide a reasonable number of CPU cores to use for the analysis")
  paramList$num_parallelCores <- round(num_parallelCores[1])
  #
  #paramList$perCore.iterations <- as.integer(paramList$num_totIterations / paramList$num_parallelCores)
  #paramList$perCore.iterations <- ifelse(paramList$perCore.iterations < 1, 1, paramList$perCore.iterations)
  #
  if (!(is.numeric(thresh_removeWeakMutTypes[1]) & thresh_removeWeakMutTypes[1] >= 0 & thresh_removeWeakMutTypes[1] < 1))
    stop("Provide a reasonable (0.00-0.99) number of low-occurring mutation types to remove from the input before starting the analysis")
  paramList$thresh_removeWeakMutTypes <- thresh_removeWeakMutTypes[1]
  #
  if (!(is.numeric(thresh_removeLastPercent[1]) & thresh_removeLastPercent[1] >= 0 & thresh_removeLastPercent[1] < 1))
    stop("Provide a reasonable (0.00-0.99) number for filtering out poor iterations")
  paramList$thresh_removeLastPercent <- thresh_removeLastPercent[1]
  #
  allowed.dist.methods <- c("Braun-Blanquet", "Chi-squared", "correlation", "cosine", "Cramer", "Dice",
                            "eDice", "eJaccard", "Fager", "Faith", "Gower", "Hamman", "Jaccard",
                            "Kulczynski1", "Kulczynski2", "Michael", "Mountford", "Mozley", "Ochiai",
                            "Pearson", "Phi", "Phi-squared", "Russel", "simple matching", "Simpson",
                            "Stiles", "Tanimoto", "Tschuprow", "Yule", "Yule2", "Bhjattacharyya",
                            "Bray", "Canberra", "Chord", "divergence", "Euclidean", "fJaccard",
                            "Geodesic", "Hellinger", "Kullback", "Levenshtein", "Mahalanobis",
                            "Manhattan", "Minkowski", "Podani", "Soergel","supremum", "Wave", "Whittaker")
  if (!(is.character(distanceFunction[1]) & distanceFunction[1] %in% allowed.dist.methods))
    stop("Unknown method for calculating distances. For options, run: <<summary(proxy::pr_DB)>>")
  paramList$distanceFunction <- distanceFunction[1]
  #
  if (!(is.numeric(num_totReplicates[1]) & num_totReplicates[1] > 99))
    stop("Provide a reasonable number of replicates for stability evaluation of the results")
  paramList$num_totReplicates <- round(num_totReplicates[1])
  #
  if (!(is.numeric(eps[1]) & eps[1] > 0 & eps[1] < 0.0001))
    stop("Provide a reasonably small number (0 < n < 0.0001) for data overflow prevention")
  paramList$eps <- eps[1]
  #
  if (!(is.numeric(stopconv[1]) & stopconv[1] >= 500))
    stop("Provide a reasonable large number: number of 'conn-matrix-stable' iterations before stopping NMF")
  paramList$stopconv <- round(stopconv[1])
  #
  if (!(is.numeric(niter[1]) & niter[1] >= 20000))
    stop("Provide a reasonable large number: total NMF iterations")
  paramList$niter <- round(niter[1])
  #
  paramList$guided <- ifelse(guided, TRUE, FALSE)
  paramList$debug <- ifelse(debug, TRUE, FALSE)
  paramList$approach <- ifelse (approach == "counts", "counts", "freq")
  paramList$stopRule <- ifelse (stopRule == "LA", "LA", "DF")
  paramList$algorithm <- ifelse (tolower(algorithm) %in% c("brunet", "alexa"), "brunet", "chihjen")
  paramList$logIterations <- ifelse(tolower(logIterations) %in% c("lite", "light", "li"), "lite", "full")
  #
  return(new(Class = "mutFrameworkParams", params = paramList))
}


table2df <- function(dataMatrix, 
                     rowLab = "sample", 
                     colLab = "feature", 
                     valueLab = "count") 
{
  #
  if(!is.null(colnames(dataMatrix))) {
    names.X <- colnames(dataMatrix)
  } else {
    names.X <- 1:ncol(dataMatrix)
  }
  if(!is.null(rownames(dataMatrix))) {
    names.Y <- rownames(dataMatrix)
  } else {
    names.Y <- 1:nrow(dataMatrix)
  }
  #
  tmp <- do.call(rbind, lapply(1:nrow(dataMatrix), (function(i){
    t(sapply(1:ncol(dataMatrix), (function(j){
      c(names.Y[i], names.X[j], dataMatrix[i,j])
    })))
  })))
  tmp <- data.frame(tmp, stringsAsFactors = FALSE)
  if (is.numeric(dataMatrix[1,1]))
    tmp[,3] <- base::as.numeric(base::as.character(tmp[,3]))
  rownames(tmp) <- NULL
  colnames(tmp) <- c(rowLab, colLab, valueLab)
  return(tmp)
}


attachContext <- function(mutData,
                          BSGenomeDb,
                          chr_colName = "chr",
                          start_colName = "start_position",
                          end_colName = "end_position",
                          nucl_contextN = 3,
                          context_colName = "context")
{
  # Make sure about the right dependencies, or kill the process smoothly (no error)
  if (sum(grepl("^GenomicRanges", rownames(installed.packages()))) == 0 &
      sum(grepl("^BSgenome", rownames(installed.packages()))) == 0 ) {
    message("The `attachContext` function depends on the following bioconductor repositories:")
    message(" >  GenomicRanges")
    message(" >  a BSgenome data package, such as BSgenome.Hsapiens.UCSC.hg19")
    message("")
    message("Please install these packages via bioconductor and try again!")
    return(NULL)
  }
  
  # Validate input data and fields in mutData
  if(!((is.data.frame(mutData) | is.matrix(mutData) ) &
       sum(c(chr_colName, start_colName, end_colName) %in% colnames(mutData)) == 3 ))
    stop ("Issue with the input dataset. Make sure to feed in a data.frame or
          a matrix and double check the name of the fields pointing to chromosome
          name, start and end positions")
  if (!(is.numeric(nucl_contextN[1]) &
        nucl_contextN[1] > 2 & ((nucl_contextN[1] - 1) %% 2 == 0)))
    stop ("Please, specify a odd number > 2 as nucl_contextN")
  if (class(BSGenomeDb) != "BSgenome" | 
      sum(c("pkgname", "organism") %in% slotNames(BSGenomeDb)) != 2)
    stop ("Please, provide a valid 'BSgenome'-class object")
    
  # retrieve data, check quality, prepare
  output.data <- data.frame(mutData, stringsAsFactors = FALSE, row.names = NULL)
  rws.toExcl <- which(is.na(output.data[,chr_colName]) | 
                        is.na(output.data[,start_colName]) | 
                        is.na(output.data[,end_colName]))
  if (length(rws.toExcl) != 0) {
    output.data <- output.data[-rws.toExcl,]
  }
  chrPatt <- "(^chr([[:digit:]]{1,2})$)|(^chr(X|Y|M)$)|(^(X|Y|M)$)|(^[[:digit:]]{1,2}$)"
  rws.toKeep <- grep(chrPatt, output.data[,chr_colName])
  output.data <- output.data[rws.toKeep,]
  rws.toKeep <- grep("(^[[:digit:]]+$)", output.data[,start_colName])
  output.data <- output.data[rws.toKeep,]
  rws.toKeep <- grep("(^[[:digit:]]+$)", output.data[,end_colName])
  output.data <- output.data[rws.toKeep,]
  message(paste( (nrow(mutData) - nrow(output.data)), "rows were excluded from analysis"))
  
  tmp.ranges <- data.frame(cbind(base::as.character(gsub("^(C|c)hr", "", base::as.character(base::as.vector(output.data[,chr_colName])))),
                                 base::as.character(base::as.vector(output.data[,start_colName])),
                                 base::as.character(base::as.vector(output.data[,end_colName]))), 
                           stringsAsFactors = FALSE)
  colnames(tmp.ranges) <- c("chr", "start", "end")
  tmp.ranges$chr <- paste("chr", tmp.ranges$chr, sep = "")
  tmp.ranges$start <- base::as.numeric(tmp.ranges$start)
  tmp.ranges$end <- base::as.numeric(tmp.ranges$end)
  
  # Now let's check which are not point mutations
  non.point <- which(abs(tmp.ranges$end - tmp.ranges$start) > 1)
  if (length(non.point) > 0 ) {
    message(paste(length(non.point), "rows removed cause those are not point mutations"))
    tmp.ranges <- tmp.ranges[-non.point,]
    output.data <- output.data[-non.point,]
  }
  
  # Adding this line to fix an issue with human chromosomes X and Y, sometimes referred to as
  # chromosome 23 and 24
  if (BSGenomeDb@organism == "Homo sapiens"){
    tmp.ranges$chr[tmp.ranges$chr == "chr23"] <- "chrX"
    tmp.ranges$chr[tmp.ranges$chr == "chr24"] <- "chrY"
  } else if (BSGenomeDb@organism == "Mus musculus"){
    tmp.ranges$chr[tmp.ranges$chr == "chr20"] <- "chrX"
    tmp.ranges$chr[tmp.ranges$chr == "chr21"] <- "chrY"
  }
  
  # define a GRanges object
  half.context <- round((nucl_contextN[1] - 1) / 2, digits = 0)
  half.context <- ifelse(half.context <1, 1, half.context)
  tmp.granges <- GenomicRanges::GRanges(tmp.ranges$chr,
                                        IRanges::IRanges(start = (tmp.ranges$start - half.context),
                                                         end =  (tmp.ranges$start + half.context)),
                                        strand = "*")

  # make sure to remove seqs out-of-bounds
  message("Removing out-of-bounds positions...  ", appendLF = FALSE)
  tmp.seqNm <- BSgenome::seqnames(BSGenomeDb)
  tmp.seqNm <- tmp.seqNm[tmp.seqNm %in% unique(base::as.vector(BSgenome::seqnames(tmp.granges)))]
  chr.lens <- sapply(tmp.seqNm, (function(sqnm){
    length(BSGenomeDb[[sqnm]])
  }))
  keepId <- sapply(1:nrow(tmp.ranges), (function(jj){
    if (! tmp.ranges[jj, "chr"] %in% tmp.seqNm) {
      FALSE
    } else if ((tmp.ranges[jj, "end"] + half.context) > chr.lens[tmp.ranges[jj, "chr"]] |
               (tmp.ranges[jj, "start"] - 1) < 1) {
      FALSE
    } else {
      TRUE
    }
  }))
  message(sum(!keepId), " records were removed.", appendLF = TRUE)
  
  tmp.ranges <- tmp.ranges[keepId,]
  tmp.granges <- tmp.granges[keepId]
  output.data <- output.data[keepId,]
  
  all.contexts <- BSgenome::getSeq(BSGenomeDb, tmp.granges)
  all.contexts.seq <- sapply(1:length(all.contexts), (function(i){toString(all.contexts[[i]])}))
  
  # Attach sequences and wrap-up
  output.data[,context_colName] <- all.contexts.seq
  rownames(output.data) <- NULL
  message("Done!")
  return(output.data)
}


countMutTypes <- function (mutTable, mutType_colName = "mutType", sample_colName = NULL) 
{
  if (!(is.data.frame(mutTable) | is.matrix(mutTable))) 
    stop("Bad input")
  if (!mutType_colName %in% colnames(mutTable)) 
    stop("mutType field not found")
  if (!is.null(sample_colName)) {
    if (!sample_colName %in% colnames(mutTable)) 
      stop("sample_colName field not found")
  }
  mutType.labels <- c("A[C>A]A", "A[C>A]C", "A[C>A]G", "A[C>A]T", 
                      "A[C>G]A", "A[C>G]C", "A[C>G]G", "A[C>G]T", "A[C>T]A", 
                      "A[C>T]C", "A[C>T]G", "A[C>T]T", "A[T>A]A", "A[T>A]C", 
                      "A[T>A]G", "A[T>A]T", "A[T>C]A", "A[T>C]C", "A[T>C]G", 
                      "A[T>C]T", "A[T>G]A", "A[T>G]C", "A[T>G]G", "A[T>G]T", 
                      "C[C>A]A", "C[C>A]C", "C[C>A]G", "C[C>A]T", "C[C>G]A", 
                      "C[C>G]C", "C[C>G]G", "C[C>G]T", "C[C>T]A", "C[C>T]C", 
                      "C[C>T]G", "C[C>T]T", "C[T>A]A", "C[T>A]C", "C[T>A]G", 
                      "C[T>A]T", "C[T>C]A", "C[T>C]C", "C[T>C]G", "C[T>C]T", 
                      "C[T>G]A", "C[T>G]C", "C[T>G]G", "C[T>G]T", "G[C>A]A", 
                      "G[C>A]C", "G[C>A]G", "G[C>A]T", "G[C>G]A", "G[C>G]C", 
                      "G[C>G]G", "G[C>G]T", "G[C>T]A", "G[C>T]C", "G[C>T]G", 
                      "G[C>T]T", "G[T>A]A", "G[T>A]C", "G[T>A]G", "G[T>A]T", 
                      "G[T>C]A", "G[T>C]C", "G[T>C]G", "G[T>C]T", "G[T>G]A", 
                      "G[T>G]C", "G[T>G]G", "G[T>G]T", "T[C>A]A", "T[C>A]C", 
                      "T[C>A]G", "T[C>A]T", "T[C>G]A", "T[C>G]C", "T[C>G]G", 
                      "T[C>G]T", "T[C>T]A", "T[C>T]C", "T[C>T]G", "T[C>T]T", 
                      "T[T>A]A", "T[T>A]C", "T[T>A]G", "T[T>A]T", "T[T>C]A", 
                      "T[T>C]C", "T[T>C]G", "T[T>C]T", "T[T>G]A", "T[T>G]C", 
                      "T[T>G]G", "T[T>G]T")
  custPatt01 <- "^(A|C|G|T)\\[(A|C|G|T)>(A|C|G|T)\\](A|C|G|T)$"
  if (sum(regexpr(custPatt01, mutTable[, mutType_colName]) > 
          0) != length(mutTable[, mutType_colName])){
    message("Non-standard format... MutTypes will be inferred.")
    
    mutType.labels <- sort(unique(mutTable[, mutType_colName]))
    
  } else {
    # fix reverse-complement if needed
    # only works for the standard three-nucletide system
    idx.to.fix <- which(!mutTable[, mutType_colName] %in% mutType.labels)
    if (length(idx.to.fix) > 0) {
      tmp <- mutTable[, mutType_colName][idx.to.fix]
      corrected.mutTypes <- sapply(1:length(tmp), (function(i) {
        out <- c(revCompl(substr(tmp[i], 7, 7)), "[", revCompl(substr(tmp[i], 
                                                                      3, 3)), ">", revCompl(substr(tmp[i], 5, 5)), 
                 "]", revCompl(substr(tmp[i], 1, 1)))
        paste(out, collapse = "", sep = "")
      }))
      mutTable[, mutType_colName][idx.to.fix] <- corrected.mutTypes
    }
  }
  
  if (sum(!mutTable[, mutType_colName] %in% mutType.labels) > 0) 
    stop("Problem with the mutType... Please check the input")
  if (is.null(sample_colName)) {
    my.mutTypes <- mutTable[, mutType_colName]
    out.1 <- sapply(mutType.labels, (function(mtt) {
      sum(my.mutTypes == mtt)
    }))
    out.1 <- data.frame(cbind(sample = out.1))
  } else {
    unique.cases <- unique(mutTable[, sample_colName])
    out.1 <- lapply(unique.cases, (function(csid) {
      tmp.df <- mutTable[mutTable[, sample_colName] == 
                           csid, ]
      out.3 <- sapply(mutType.labels, (function(mtt) {
        sum(tmp.df[, mutType_colName] == mtt)
      }))
      out.3 <- data.frame(cbind(out.3))
      colnames(out.3) <- csid
      out.3
    }))
    out.1 <- data.frame(do.call(cbind, out.1), stringsAsFactors = FALSE)
    tryCatch({
      colnames(out.1) <- unique.cases
    }, error = function(e) NULL)
  }
  fOUT <- new(Class = "mutationCounts", 
              x = out.1, 
              muts = data.frame(mutTypes = rownames(out.1),
                                stringsAsFactors = FALSE), 
              samples = data.frame(ID = colnames(out.1),
                                   stringsAsFactors = FALSE))
  return(fOUT)
}


sortByMutations <- function(x) {
  
  if (class(x) == "mutationSignatures") {
    outClass <- "mutationSignatures"
    x1 <- coerceObj(x = x, to = "data.frame")
  } else if (class(x) == "mutationCounts") {
    outClass <- "mutationCounts"
    x1 <- coerceObj(x = x, to = "data.frame")
  } else if (class(x) %in% c("matrix","data.frame")){
    outClass <- "data.frame"
    x1 <- x
  } else {
    stop ("Bad input")
  }
  
  # Get rownames == mutation names
  if (is.null(rownames(x1)))
    stop("Bad input")
  
  aRN <- rownames(x1)
  aRN <- sapply(sort(unique(sub("^.*\\[(.*)\\].*$", "\\1", aRN))), 
                function(mt) {
                  sort(aRN[grepl(mt, aRN)])
                }, USE.NAMES = FALSE, simplify = FALSE) 
  aRN <- do.call(c, aRN)
  ox <- x1[aRN,]
  
  # Format and return
  if (outClass == "data.frame") {
    return(ox)
  } else if (outClass == "mutationSignatures") {
    return(mutSignatures::as.mutation.signatures(ox))
  } else if (outClass == "mutationCounts") {
    return(mutSignatures::as.mutation.counts(ox))
  }
}


simplifySignatures <- function(x, asMutationSignatures = TRUE) {
  if(class(x) != "mutationSignatures")
    stop("Bad input")
  curMT <- getMutationTypes(x)
  if(sum(!grepl("[ACGT]\\[[ACGT]>[ACGT]\\][ACGT]", curMT)) > 0)
    stop("Mutation Types are not compatible with simplifySignatures")
  
  # Start by defined tri-nucleotides
  mutType.labels <- c("A[C>A]A", "A[C>A]C", "A[C>A]G", "A[C>A]T", "A[C>G]A", "A[C>G]C",
                      "A[C>G]G", "A[C>G]T", "A[C>T]A", "A[C>T]C", "A[C>T]G", "A[C>T]T",
                      "A[T>A]A", "A[T>A]C", "A[T>A]G", "A[T>A]T", "A[T>C]A", "A[T>C]C",
                      "A[T>C]G", "A[T>C]T", "A[T>G]A", "A[T>G]C", "A[T>G]G", "A[T>G]T",
                      "C[C>A]A", "C[C>A]C", "C[C>A]G", "C[C>A]T", "C[C>G]A", "C[C>G]C",
                      "C[C>G]G", "C[C>G]T", "C[C>T]A", "C[C>T]C", "C[C>T]G", "C[C>T]T",
                      "C[T>A]A", "C[T>A]C", "C[T>A]G", "C[T>A]T", "C[T>C]A", "C[T>C]C",
                      "C[T>C]G", "C[T>C]T", "C[T>G]A", "C[T>G]C", "C[T>G]G", "C[T>G]T",
                      "G[C>A]A", "G[C>A]C", "G[C>A]G", "G[C>A]T", "G[C>G]A", "G[C>G]C",
                      "G[C>G]G", "G[C>G]T", "G[C>T]A", "G[C>T]C", "G[C>T]G", "G[C>T]T",
                      "G[T>A]A", "G[T>A]C", "G[T>A]G", "G[T>A]T", "G[T>C]A", "G[T>C]C",
                      "G[T>C]G", "G[T>C]T", "G[T>G]A", "G[T>G]C", "G[T>G]G", "G[T>G]T",
                      "T[C>A]A", "T[C>A]C", "T[C>A]G", "T[C>A]T", "T[C>G]A", "T[C>G]C",
                      "T[C>G]G", "T[C>G]T", "T[C>T]A", "T[C>T]C", "T[C>T]G", "T[C>T]T",
                      "T[T>A]A", "T[T>A]C", "T[T>A]G", "T[T>A]T", "T[T>C]A", "T[T>C]C",
                      "T[T>C]G", "T[T>C]T", "T[T>G]A", "T[T>G]C", "T[T>G]G", "T[T>G]T")
  
  # Needs sorting
  input <- coerceObj(x = x, to = "data.frame")
  sigNames <- getSignatureIdentifiers(x)
  
  EXPLD <- sapply(mutType.labels, (function(pat){
    tpat <- sub("\\[", "\\\\[", 
                sub("\\]", "\\\\]", pat))
    input[grepl(tpat, rownames(input)),]
  }), simplify = FALSE, USE.NAMES = TRUE)
  
  OUT <- base::as.data.frame(t(sapply(EXPLD, function(xi) {
    base::as.numeric(apply(xi, 2, sum, na.rm = TRUE))
  })), row.names = mutType.labels)
  colnames(OUT) <- sigNames
  
  if (asMutationSignatures){
    return(mutSignatures::as.mutation.signatures(OUT))
  }
  
  altOut <- lapply(1:ncol(OUT), (function(j){
    curSGN <- base::as.numeric(OUT[,j])
    iSig <- data.frame(curSGN, row.names = mutType.labels)
    colnames(iSig) <- sigNames[j]
    
    AttDF <- do.call(rbind, sapply(mutType.labels, function(mmd) {
      zi <- EXPLD[[mmd]]
      nuNames <- gsub("[ACGT]\\[[ACGT]>[ACGT]\\][ACGT]", "...", rownames(zi))
      nuNums <- base::as.data.frame(rbind(zi[,j]))
      names(nuNums) <- nuNames
      nuNums
    }, simplify = FALSE, USE.NAMES = TRUE))
    
    # out
    cbind(iSig, AttDF)
  }))
  names(altOut) <- sigNames
  return(altOut)
}


matchSignatures <- function(mutSign, 
                            reference = NULL, 
                            method = 'cosine',
                            threshold = 0.5,
                            plot = "TRUE")
{
  # avoid NOTEs
  newSign <- NULL
  refSign <- NULL
  
  if(is.null(reference)){
    my.ref <- getCosmicSignatures()
  } else if (class(reference) == "mutationSignatures") {
    my.ref <- reference
  } else {
    stop("reference: wrong data type. Provide NULL or a 'mutationSignatures'-class object")
  }
  if(class(mutSign) != "mutationSignatures") {
    stop("mutSign: wrong data type. Provide a 'mutationSignatures'-class object")
  }
  if (sum(!getMutationTypes(mutSign) %in% getMutationTypes(my.ref)) != 0 |
      sum(!getMutationTypes(my.ref) %in% getMutationTypes(mutSign)) != 0)
    stop("non-compatible mutation types")
  
  # Coerce to tabular objects
  my.ref2 <- my.ref@mutationFreq
  dimnames(my.ref2) <- list(my.ref@mutTypes[,1], 
                           my.ref@signatureId[,1])

  mutSign2 <- mutSign@mutationFreq
  dimnames(mutSign2) <- list(mutSign@mutTypes[,1], 
                             mutSign@signatureId[,1])
  
  my.ref <- my.ref2
  mutSign <- mutSign2[rownames(my.ref), ]
  
  #Debug
  #message("Debug - proceeded! :-)")
  
  distMat <- sapply(1:ncol(mutSign), function(i){
    TMP <- cbind(tmpSig=mutSign[,i], my.ref)
    TMPdist <- proxy::dist(TMP, method = method, by_rows = FALSE)
    TMPdist[1:ncol(my.ref)]
  })
  
  dimnames(distMat) <- list(colnames(my.ref), colnames(mutSign))
  p <- NULL
  nuDF <- NULL
  
  if(plot) {
    
    nuDF <- do.call(rbind, lapply(1:nrow(distMat), function(i){
      do.call(rbind, lapply(1:ncol(distMat), function(j){
        data.frame(newSign = colnames(distMat)[j],
                   refSign = rownames(distMat)[i],
                   dist = distMat[i,j],
                   stringsAsFactors = FALSE)
      }))
    }))
    
    fillLims <- c(round(min(nuDF$dist)), round(max(nuDF$dist)))
    nuDF$dist[nuDF$dist >= fillLims[2]] <- fillLims[2]
    nuDF$dist[nuDF$dist <= fillLims[1]] <- fillLims[1]
    
    p <- ggplot(nuDF, aes(y=newSign, x=refSign))
    p <- p + geom_tile(aes(fill=dist), width=.875, height=.875)
    p <- p + scale_y_discrete(limits=rev(colnames(distMat)))
    p <- p + scale_x_discrete(limits=rownames(distMat))
    
    p <- p + theme_minimal(base_size = 11) + labs(x = "", y = "")
    p <- p + labs(title = "Signature Comparison")
    #p <- p + scale_fill_gradient2(low = "#f40000", mid = "#ffe9e9", high = "white",
    #                              midpoint = as.numeric(quantile(distMat, probs = 0.1, na.rm = TRUE)),
    #                              guide = "colourbar", 
    #                              name = paste(method, "\ndistance", sep = ""))
    #p <- p + scale_fill_gradient(low = "red2", high = "white",
    #                             guide = "colourbar", 
    #                             name = paste(method, "\ndistance", sep = ""))
    
    nuMid <- base::as.numeric(quantile(distMat, probs = 0.2, na.rm = TRUE))
    if (!is.null(threshold)) {
      if (!is.na(base::as.numeric(threshold[1]))){
        nuMid <- base::as.numeric(threshold[1])
      }  
    } 
    p <- p + scale_fill_gradient2(low = "#f40000", mid = "#ffe9e9", high = "white",
                                  midpoint = nuMid,
                                  guide = "colourbar", 
                                  name = paste(method, "\ndistance", sep = ""), 
                                  limits = fillLims)
    p <- p + theme(legend.position = c('right'), # position the legend in the upper left
                   legend.justification = 0, # anchor point for legend.position.
                   legend.text = element_text(size = 9, color = 'gray10'),
                   legend.title = element_text(size = 11),
                   plot.title = element_text(size = 13, face = 'bold', color = 'gray10', hjust = 0.5),
                   axis.text = element_text(face = 'bold'),
                   panel.grid.major.y = element_blank(),
                   panel.grid.major.x = element_blank(),
                   axis.text.x=element_text(angle = 90, hjust = 1 , vjust = 0.5,
                                            margin=margin(-5,0,-15,0)),
                   axis.text.y=element_text(hjust = 1, vjust = 0.5,
                                            margin = margin(0,3,0,0)))
    #print(p)
  }
  out <- list()
  out[["distanceMatrix"]] <- distMat
  out[["distanceDataFrame"]] <- nuDF
  out[["plot"]] <- p
  
  return(out)
  
}




#package.skeleton(name = "mutSignatures", code_files = "core_mutSignatures_scr_5.R")
dami82/mutSignatures_dev documentation built on May 17, 2019, 7:02 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
dami82/mutSignatures_dev
Decipher Mutational Signatures from Somatic Mutational Catalogs

R/core_mutSignatures_scr_5.R
In dami82/mutSignatures_dev: Decipher Mutational Signatures from Somatic Mutational Catalogs

R Package Documentation

Browse R Packages

We want your feedback!

dami82/mutSignatures_dev Decipher Mutational Signatures from Somatic Mutational Catalogs

R/core_mutSignatures_scr_5.R In dami82/mutSignatures_dev: Decipher Mutational Signatures from Somatic Mutational Catalogs

R Package Documentation

Browse R Packages

We want your feedback!

dami82/mutSignatures_dev
Decipher Mutational Signatures from Somatic Mutational Catalogs

R/core_mutSignatures_scr_5.R
In dami82/mutSignatures_dev: Decipher Mutational Signatures from Somatic Mutational Catalogs