R/s03_all_functions.R

Defines functions simplifySignatures sortByMutations table2df resolveMutSignatures removeMismatchMut processVCFdata matchSignatures prelimProcessAssess plotMutTypeProfile plotSignExposures importVCFfiles getCosmicSignatures frequencize filterSNV extractXvarlinkData attachMutType attachContext setMutClusterParams deconvoluteMutCounts chihJenNMF alexaNMF

Documented in alexaNMF attachContext attachMutType chihJenNMF deconvoluteMutCounts extractXvarlinkData filterSNV frequencize getCosmicSignatures importVCFfiles matchSignatures plotMutTypeProfile plotSignExposures prelimProcessAssess processVCFdata removeMismatchMut resolveMutSignatures setMutClusterParams simplifySignatures sortByMutations table2df

#
##  ~~~~~~~~~~~~~~~~~~~~~~~~~
### ~~~~~ MutSignatures ~~~~~
##  ...all functions...
#   ~~~~~~~~~~~~~~~~~~~~~~~~~

# Damiano Fantini, Ph.D.
# 2020-Mar-05

###
##### Custom Functions - core package
###

#' Add Weak Mutation TYpes
#' 
#' Restore Mutation Types that were initially excluded because a low number of total counts.
#' 
#' 
#' @param mutationTypesToAddSet Set of mutations to restore
#' @param processes_I Set of Mutational Processes
#' @param processesStd_I Set of standard deviations of all Mutational Processes
#' @param Wall_I Set of all W matrices previously extracted
#' @param genomeErrors_I Set of all residuals
#' @param genomesReconstructed_I Fitted Values according to the most likely Model
#'
#' @return Output is the final result of the deconvolution process
#' 
#' @details This is one of the core functions included in the original mutSignatures R library, 
#' and in the WTSI MATLAB framework. This is an internal function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'   \item WTSI framework: \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3588146/}
#'  }
#' 
#' @keywords internal
addWeak <- function (mutationTypesToAddSet, 
                     processes_I, 
                     processesStd_I, 
                     Wall_I, 
                     genomeErrors_I, 
                     genomesReconstructed_I) 
{
  if (length(mutationTypesToAddSet) > 0 & mutationTypesToAddSet[1] > 0) {
    totalMutTypes <- nrow(Wall_I) + length(mutationTypesToAddSet)
    processes <- matrix(0, nrow = totalMutTypes, ncol = ncol(processes_I))
    processesStd <- matrix(0, nrow = totalMutTypes, ncol = ncol(processesStd_I))
    Wall <- matrix(0, nrow = totalMutTypes, ncol = ncol(Wall_I))
    genomeErrors <- lapply(1:length(genomeErrors_I), (function(i) {
      matrix(0, nrow = totalMutTypes, ncol = ncol(genomeErrors_I[[1]]))
    }))
    genomesReconstructed <- lapply(1:length(genomesReconstructed_I), 
                                   (function(i) {
                                     matrix(0, 
                                            nrow = totalMutTypes, 
                                            ncol = ncol(genomesReconstructed_I[[1]]))
                                   }))
    origArrayIndex <- 1
    for (i in 1:totalMutTypes) {
      #message(i)
      if (!(i %in% mutationTypesToAddSet)) {
        processes[i, ] <- processes_I[origArrayIndex, ]
        processesStd[i, ] <- processesStd_I[origArrayIndex,]
        Wall[i, ] <- Wall_I[origArrayIndex, ]
        for (j in 1:length(genomeErrors_I)) {
          genomeErrors[[j]][i, ] <- genomeErrors_I[[j]][origArrayIndex,  ]
        }
        for (j in 1:length(genomesReconstructed_I)) {
          genomesReconstructed[[j]][i, ] <- genomesReconstructed_I[[j]][origArrayIndex,   ]
        }
        origArrayIndex <- origArrayIndex + 1
      }
    }
  }
  else {
    processes <- processes_I
    processesStd <- processesStd_I
    Wall <- Wall_I
    genomeErrors <- genomeErrors_I
    genomesReconstructed <- genomesReconstructed_I
  }
  weakAdded.list <- list()
  weakAdded.list$processes <- processes
  weakAdded.list$processesStd <- processesStd
  weakAdded.list$Wall <- Wall
  weakAdded.list$mutCountErrors <- genomeErrors
  weakAdded.list$mutCountReconstructed <- genomesReconstructed
  return(weakAdded.list)
}

#' Bootstrap a Mutation Count Matrix.
#' 
#' Rearrange a Mutation count Matrix using the multivariate normal distribution. 
#' The function returns a bootstrapped Mutation Count matrix whose dimensions 
#' are identical to the input matrix.
#' 
#' @param genomes a numeric matrix of Mutation Counts.
#' Rows correspond to Mutation Types, columns to different samples.
#' @param seed integer, set a seed to obtain reproducible results. Defaulted to NULL
#' 
#' @return a numeric matrix of bootstrapped Mutation Counts.
#' Rows correspond to Mutation Types, columns to different samples.
#'
#' @details This is one of the core functions included in the original mutSignatures R library, 
#' and in the WTSI MATLAB framework. This is an internal function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'   \item WTSI framework: \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3588146/}
#'  }
#' 
#' @examples 
#' x <- cbind(c(10, 100, 20, 200, 30, 5), 
#'            c(100, 90, 80, 100, 11, 9))
#' mutSignatures:::bootstrapCancerGenomes(x)                
#'
#' 
#' @importFrom stats rmultinom
#' 
#' @keywords internal
bootstrapCancerGenomes <- function (genomes, seed = NULL)
{
  if (!is.null(seed))
    try(set.seed(seed), silent = TRUE)
  
  genome.col.sums <- suppressWarnings(apply(genomes, 2, sum))
  my.denom <- matrix(genome.col.sums, 
                     ncol = ncol(genomes), 
                     nrow = nrow(genomes), 
                     byrow = TRUE)
  norm.genomes <- suppressWarnings(genomes/my.denom)
  
  bootstrapGenomes <- suppressWarnings(
    lapply(1:ncol(genomes), (function(i) {
      tmp <- norm.genomes[,i]
      tmp <- stats::rmultinom(1, size = genome.col.sums[i], prob = tmp)
      as.numeric(tmp[,1])
    })))
  bootstrapGenomes <- suppressWarnings(do.call(cbind, bootstrapGenomes))
  return(bootstrapGenomes)
}

#' Evaluate Results Stability.
#' 
#' Perform a final Stability check comparing the results from all iterations of the analysis.
#' 
#' @param wall numeric matrix including the w results from all the iterations of the analysis
#' @param hall numeric matrix including the h results from all the iterations of the analysis
#' @param params list including all the parameters required for running tha analysis
#'
#' @return list including all results from the stability checks.
#' This includes the most likely signatures (cen-troids) and exposures. 
#' All information for plotting the silhoueette plot will also be returned.
#'  
#' @details The function evaluates the results from all iterations by performing 
#' a silhouette check. A silhouette plot will also be plotted.
#' This is one of the core functions included in the original mutSignatures R library, 
#' and in the WTSI MATLAB framework. This is an internal function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'   \item WTSI framework: \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3588146/}
#'  }
#' 
#' @examples 
#' # Obtain sample data
#' TMP <- mutSignatures:::getTestRunArgs("evaluateStability")
#' Y <- mutSignatures:::evaluateStability(wall = TMP$W, 
#'                                        hall = TMP$H, 
#'                                        params = TMP$params)
#' 
#'
#' @importFrom stats runif sd
#' @importFrom proxy dist
#' @importFrom pracma squareform
#' @importFrom graphics barplot abline title
#'  
#' @keywords internal
evaluateStability <- function (wall, 
                               hall, 
                               params) 
{
  BIG_NUMBER <- 100
  CONVERG_ITER <- 10
  CONVERG_CUTOFF <- 0.005
  TOTAL_INIT_CONDITIONS <- 5
  num_processesToExtract <- params$num_processesToExtract
  num_totReplicates <- params$num_totReplicates
  distanceFunction <- params$distanceFunction
  minClusterDist <- BIG_NUMBER
  totalIter <- ncol(wall)/num_processesToExtract
  idx = matrix(0, nrow = nrow(hall), ncol = 1)
  clusterCompactness <- matrix(0, 
                               nrow = num_processesToExtract, 
                               ncol = totalIter)
  iStartDataSet = seq(1, ncol(wall), by = num_processesToExtract)
  iStartingDataSet = iStartDataSet[sample(1:totalIter)]
  for (iInitData in 1:min(c(TOTAL_INIT_CONDITIONS, totalIter))) {
    #message(iInitData)
    iStartingData <- iStartingDataSet[iInitData]
    iEnd <- iStartingData + num_processesToExtract - 1
    centroids <- cbind(wall[, iStartingData:iEnd])
    centroidsTest <- sapply(1:ncol(centroids), (function(kk) {
      stats::runif(nrow(centroids))
    }))
    countIRep <- 0
    for (iRep in 1:num_totReplicates) {
      #message(iRep)
      tmp.tab <- base::t(cbind(centroids, wall))
      tmp.pdist <- base::as.vector(proxy::dist(tmp.tab, distanceFunction))
      if (num_processesToExtract > 1)
        tmp.pdist[tmp.pdist == 1 | tmp.pdist == 0] <- NA
      allDist <- pracma::squareform(tmp.pdist)
      
      cd.colRange <- (ncol(centroids) + 1):ncol(allDist)
      centroidDist = t(allDist[1:ncol(centroids), cd.colRange])
      
      jRange <- sort(1:num_processesToExtract)
      for (jIndex in 1:num_processesToExtract) {
        j <- jRange[jIndex]
        for (i in seq(1, ncol(wall), by = num_processesToExtract)) {
          #message(i)
          iRange = i:(i + num_processesToExtract - 1)
          tmp.min <- min(centroidDist[iRange, j], na.rm = TRUE)
          Ind <- which(centroidDist[iRange, j] == tmp.min)[1]
          centroidDist[iRange[Ind], ] <- BIG_NUMBER
          idx[iRange[Ind], 1] <- j
        }
      }
      maxDistToNewCentroids <- 0
      for (i in 1:ncol(centroids)) {
        tmp.dset <- wall[, base::as.vector(idx == i)]
        centroids[, i] <- apply(tmp.dset, 1, mean)
        tmp.dset <- base::t(cbind(centroids[, i], 
                            centroidsTest[,i]))
        tmp.pdist <- base::as.vector(proxy::dist(tmp.dset, distanceFunction))
        tmp.pdist[tmp.pdist == 1 | tmp.pdist == 0] <- NA
        maxDistToNewCentroids <- max(maxDistToNewCentroids, 
                                     tmp.pdist, 
                                     na.rm = TRUE)
      }
      if (maxDistToNewCentroids < CONVERG_CUTOFF) {
        countIRep <- countIRep + 1
        
      } else {
        countIRep <- 0
        centroidsTest <- centroids
      }
      
      if (countIRep == CONVERG_ITER) {
        break
      }
    }
    for (i in 1:ncol(centroids)) {
      tmp.tab <- base::t(cbind(centroids[, i], 
                         wall[, base::as.vector(idx == i)]))
      tmp.pdist <- base::as.vector(proxy::dist(tmp.tab, distanceFunction))
      tmp.pdist[tmp.pdist == 1 | tmp.pdist == 0] <- NA
      clusterDist <- pracma::squareform(tmp.pdist)
      clusterCompactness[i, ] = clusterDist[1, 2:ncol(clusterDist)]
    }
    dist.test <- apply(clusterCompactness, 2, (function(clm) {
      base::mean(clm, na.rm = TRUE)
    }))
    if (sum(minClusterDist > dist.test) == length(dist.test)) {
      centroidsFinal <- centroids
      idxFinal <- idx
      clusterCompactnessFinal <- clusterCompactness
    }
  }
  centroids <- base::t(centroidsFinal)
  idx <- idxFinal
  clusterCompactness <- clusterCompactnessFinal
  centDist <- apply(clusterCompactness, 1, (function(tmprw) {
    base::mean(tmprw, na.rm = TRUE)
  }))
  centDistInd <- base::order(centDist, decreasing = FALSE)
  clusterCompactness <- clusterCompactness[centDistInd, ]
  centroids <- centroids[centDistInd, ]
  idxNew <- idx
  for (i in 1:num_processesToExtract) {
    idxNew[base::as.vector(idx == centDistInd[i]), 1] <- i
  }
  idx <- idxNew
  if (num_processesToExtract > 1) {
    processStab <- silhouetteMLB(data = t(wall), fac = idx, 
                                 distanceFunction)
    processStabAvg <- matrix(0, nrow = 1, ncol = num_processesToExtract)
    for (i in 1:num_processesToExtract) {
      processStabAvg[1, i] = base::mean(processStab[idx == i])
    }
  } else {
    # Adjusted params for a 1-class silhouette!
    tmp.tab <- base::t(cbind(centroids, wall))
    tmp.pdist <- base::as.vector(proxy::dist(tmp.tab, distanceFunction))
    tmp.pdist[tmp.pdist == 1 | tmp.pdist == 0] <- NA
    allDist <- pracma::squareform(tmp.pdist)
    processStab <- 1 - base::t(allDist[1, 2:ncol(allDist)])
    
    # Silhouette plot
    xrange <- c(min(processStab), max(processStab))
    xrange[1] <- ifelse(xrange[1] > 0, 0, (-1.15) * abs(xrange[1]))
    xrange[2] <- 1.15
    graphics::barplot(base::sort(base::as.numeric(processStab), decreasing = TRUE), 
                      col = "gray20",
                      xlim = xrange,
                      horiz = TRUE, xlab = "Silhouette Value", 
                      ylab = "", 
                      main = "Silhouette Plot", 
                      border = "gray20")
    graphics::abline(v=0)
    graphics::title(ylab="Iter. Results (by Group)", line=1, cex.lab=1, font = 2)
    
    processStabAvg <- apply(processStab, 2, (function(clmn) {
      base::mean(clmn, na.rm = TRUE)
    }))
  }
  
  if (num_processesToExtract > 1) {
    centroidStd <- matrix(0, nrow = nrow(centroids), ncol = ncol(centroids))
  } else {
    centroidStd <- matrix(0, ncol = length(centroids), nrow = 1)
  }
  
  for (i in 1:num_processesToExtract) {
    centroidStd[i, ] <- apply(wall[, idx == i], 1, (function(rw) {
      stats::sd(rw, na.rm = TRUE)
    }))
  }
  centroids <- base::t(cbind(centroids))
  centroidStd <- base::t(centroidStd)
  idxS <- matrix(0, nrow = length(idx), ncol = 1)
  for (i in seq(1, ncol(wall), by = num_processesToExtract)) {
    iEnd <- i + num_processesToExtract - 1
    idxG <- idx[i:iEnd]
    for (j in 1:num_processesToExtract) {
      idxS[(i + j - 1), ] = which(idxG == j)
    }
  }
  exposure <- matrix(0, nrow = max(idxS), ncol(hall))
  exposureStd <- matrix(0, nrow = max(idxS), ncol(hall))
  for (i in 1:max(idxS)) {
    exposure[i, ] <- apply(hall[idx == i, ], 2, (function(cl) {
      base::mean(cl, na.rm = TRUE)
    }))
    exposureStd[i, ] <- apply(hall[idx == i, ], 2, (function(cl) {
      sd(cl, na.rm = TRUE)
    }))
  }
  
  # Fix to sign.to.extract.num = 1
  if (num_processesToExtract < 2){
    centroids <- base::t(centroids)
  }
  
  
  result.list <- list()
  result.list$centroids <- centroids
  result.list$centroidStd <- centroidStd
  result.list$exposure <- exposure
  result.list$exposureStd <- exposureStd
  result.list$idx <- idx
  result.list$idxS <- idxS
  result.list$processStab <- processStab
  result.list$processStabAvg <- processStabAvg
  result.list$clusterCompactness <- clusterCompactness
  return(result.list)
}

#' Remove Iterations that Generated Outlier Results.
#' 
#' Internal function from the WTSI framework, ported to R. This is a core 
#' function called from within a deconvoluteMutCounts() call. This
#' function removes iterations that generated outlier results.
#' 
#' @param wall numeric matrix combining w results from all iterations
#' @param hall numeric matrix combining h results from all iterations
#' @param cnt_errors numeric matrix combining all residuals from all iterations
#' @param cnt_reconstructed numeric matrix combining fitted values from all iterations
#' @param params list including alll parameters for running the analysis
#'
#' @return list including all data required for running the subsequent stability check
#'  
#' @details This is one of the core functions included in the original mutSignatures R library, 
#' and in the WTSI MATLAB framework. This is an internal function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'   \item WTSI framework: \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3588146/}
#'  }
#' 
#'   
#' @keywords internal
filterOutIterations <- function (wall, 
                                 hall, 
                                 cnt_errors, 
                                 cnt_reconstructed, 
                                 params) 
{
  num_processesToExtract <- params$num_processesToExtract
  thresh_removeLastPercent <- params$thresh_removeLastPercent
  num_totIterations <- ncol(wall)/num_processesToExtract
  tot.rm.iterations <- round(thresh_removeLastPercent * num_totIterations)
  if (tot.rm.iterations > 0) {
    closeness.mutCounts <- matrix(0, 
                                  nrow = num_totIterations, 
                                  ncol = 1)
    for (i in 1:num_totIterations) {
      closeness.mutCounts[i, ] <- base::norm(cnt_errors[[i]], "F")
    }
    indexClosenessGenomes <- order(closeness.mutCounts, 
                                   decreasing = TRUE)
    removeIterations <- indexClosenessGenomes[1:tot.rm.iterations]
    removeIterationSets <- matrix(0, 
                                  nrow = (num_processesToExtract * tot.rm.iterations), 
                                  ncol = 1)
    for (i in 1:tot.rm.iterations) {
      iStart <- num_processesToExtract * (removeIterations[i] - 1) + 1
      iEnd <- num_processesToExtract * removeIterations[i]
      tmpRowRange <- (num_processesToExtract * (i - 1) + 1):(num_processesToExtract * i)
      removeIterationSets[tmpRowRange, ] <- iStart:iEnd
    }
    wall <- wall[, -removeIterationSets]
    hall <- hall[-removeIterationSets, ]
    cnt_errors <- cnt_errors[-removeIterations]
    cnt_reconstructed <- cnt_reconstructed[-removeIterations]
  }
  res.list <- list()
  res.list$Wall <- wall
  res.list$Hall <- hall
  res.list$mutCounts.errors <- cnt_errors
  res.list$mutCounts.reconstructed <- cnt_reconstructed
  return(res.list)
}

#' Generate Arguments for Running Examples and Mock Runs.
#' 
#' This function generates objects that can be used for 
#' running the examples included in the package documentation files, 
#' as well as some simple mutSignature analyses. Note that his function is not exported.
#' 
#' @param testN string, name of the function that we want to test
#'
#' @return an object (typically, a list) including sample data to run
#' analyses or to test mutSignatures functions.
#'  
#' @details This is an internal function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'   \item WTSI framework: \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3588146/}
#'  }
#' 
#' @keywords internal
getTestRunArgs <- function (testN = "evaluateStability") 
{
  out <- list()
  
  if (testN == "evaluateStability") {
    out$W <- do.call(cbind, lapply(1:20, (function(i) {
      cbind(c(runif(4, 0.05, 0.15), 
              c(1e-15 * runif(1,1, 9)), 
              runif(3, 0.003, 0.007), 
              runif(9, 0.04, 0.09)), 
            c(runif(3, 0.08, 0.18), 
              c(1e-15 * runif(1, 1, 9)), 
              runif(1, 0.03, 0.07), 
              runif(3, 0.004, 0.009), 
              runif(9, 0.04, 0.09)))
    })))
    out$H <- do.call(rbind, lapply(1:40, (function(i) {
      c(runif(1, 0.005, 0.099), 
        runif(2, 50, 800), 
        runif(2, 0.005, 0.099), 
        runif(1, 50, 750), 
        runif(2, 0.005, 0.099), 
        runif(1, 20, 800))
    })))
    out$params$num_processesToExtract <- 2
    out$params$num_totReplicates <- 20
    out$params$distanceFunction <- "cosine"
    out$params$analyticApproach <- "denovo"
    
  } else if (testN == "removeWeak") {
    
    out <- list()
    out$data <- sapply(1:12, function(i) {sample(50:99, size = 50, replace = TRUE) })
    out$data[6, ] <- sample(0:5, size = 12, replace = TRUE)
    out$params <- as.list(setMutClusterParams())
    
  } else if (testN == "silhouetteMLB") {
    
    out <- list()
    out$data <- rbind(sapply(1:10, function(i) {sample(x = 0:15, size = 10)}),
                      sapply(1:10, function(i) {sample(x = 11:29, size = 10)}))
    out$fac <- factor(c(rep(1, 10), rep(2, 10)))
    
    
  } else if (testN %in% c("alexaNMF", "chihJenNMF")) {
    
    out <- list()
    
    zz1 <- sapply(1:4, function(i) {base::sample(30:58, replace = TRUE, size = 20)})
    zz2 <- sapply(1:2, function(i) {base::sample(52:80, replace = TRUE, size = 20)})
    zz3 <- sapply(1:4, function(i) {base::sample(60:120, replace = TRUE, size = 15)})
    zz4 <- sapply(1:2, function(i) {base::sample(15:40, replace = TRUE, size = 15)})
    
    out$v <- rbind(
      cbind(zz1, zz2),
      cbind(zz3, zz4))
    
    out$r <- 2
    out$params <- as.list(setMutClusterParams())
    out$params$eps <- 20000
    out$params$stopconv <- 500
    
  } else if (testN %in% c("decipherMutationalProcesses", "deconvoluteMutCounts", 
                          "extractSignatures")) {
    
    out <- list()
    
    muts <- cbind(
      sapply(1:12, function(i) {c(sample(5:15, size = 20, replace = TRUE),
                                  sample(10:25, size = 10))}),
      sapply(1:8, function(i) {c(sample(10:25, size = 20, replace = TRUE),
                                 sample(5:15, size = 10))}))
    rownames(muts) <- c("A[A>C]A", "C[A>C]A", "G[A>C]A", "T[A>C]A", "T[A>C]C", "T[A>C]G", 
                        "A[A>G]A", "C[A>G]A", "G[A>G]A", "T[A>G]A", "T[A>G]C", "T[A>G]G", 
                        "A[A>T]A", "C[A>T]A", "G[A>T]A", "T[A>T]A", "T[A>T]C", "T[A>T]G", 
                        "A[G>C]A", "C[G>C]A", "G[G>C]A", "T[G>C]A", "T[G>C]C", "T[G>C]G", 
                        "A[G>T]A", "C[G>T]A", "G[G>T]A", "T[G>T]A", "T[G>T]C", "T[G>T]G")
    colnames(muts) <- paste0("SMPL.", 1001:1020)
    out$muts <- mutSignatures::as.mutation.counts(as.data.frame(muts))
    out$params <- setMutClusterParams(2)
    out$params@params$num_totIterations <- 10
    out$params@params$niter <- 1000
    
  } else if (testN == "custom_fcnnls") {
    
    out <- list()
    out$muts <- cbind(A = c(10, 15, 23, 15, 23,  5, 2, 6, 8), 
                      B = c(12, 13, 22, 14, 18,  1, 5, 2, 7), 
                      C = c(12,  5, 10,  5, 13, 22, 1, 1, 9))
    out$signs <- cbind(S1 = c(0.16, 0.06, 0.13, 0.06, 0.18, 0.28, 0.01, 0.01, 0.11), 
                       S2 = c(0.09, 0.14, 0.22, 0.15, 0.21, 0.04, 0.02, 0.06, 0.07))
    
  } else if (testN == "custom_cssls") {
    
    CtC <- cbind(c(0.1728, 0.1179), c(0.1179, 0.1532))
    CtA <- cbind(c(10.76, 14.37), c(13.33, 9.05))
    Pset <- cbind(c(FALSE, TRUE), c(TRUE, FALSE))
    
    out <- list(CtC = CtC, 
                CtA = CtA, 
                Pset = Pset)
    
  } else if (testN == "extractXvarlinkData") {
    
    out <- c("getma.org/?cm=var&var=hg19,9,107576738,C,T&fts=all",
             "",
             "getma.org/?cm=var&var=hg19,9,107594970,G,A&fts=all",
             "getma.org/?cm=var&var=hg19,9,107599275,T,A&fts=all",
             "getma.org/?cm=var&var=hg19,9,107591257,C,T&fts=all",
             "",
             "getma.org/?cm=var&var=hg19,16,2336768,G,A&fts=all",
             "getma.org/?cm=var&var=hg19,16,2347484,G,C&fts=all",
             "getma.org/?cm=var&var=hg19,16,2339472,A,G&fts=all",
             "getma.org/?cm=var&var=hg19,16,2358451,C,T&fts=all",
             "getma.org/?cm=var&var=hg19,1,94522197,G,T&fts=all",
             "getma.org/?cm=var&var=hg19,1,94548924,G,A&fts=all") 
    
  } else if (testN == "removeMismatchMut") {
    
    out <- data.frame(CHROM = c("chr1", "chr1", "chr2", "chr3", "chr3", "chr5"),
                      POS = c(12144, 155464, 4232, 35222, 35663, 244425),
                      REF = c("A", "T", "G", "G", "T", "G"),
                      ALT = c("G", "G", "T", "A", "C", "A"),
                      INFO = c(".", ".", ".", "ref133121", ".", "."),
                      context = c("TAA", "ATA", "CGG", "CCG", "ATA", "AAT"), 
                      stringsAsFactors = FALSE)
    
  } else if (testN == "resolveMutSignatures") {
    out <- list()
    
    muts <- cbind(
      sapply(1:12, function(i) {c(sample(5:15, size = 20, replace = TRUE),
                                  sample(10:25, size = 10))}),
      sapply(1:8, function(i) {c(sample(10:25, size = 20, replace = TRUE),
                                 sample(5:15, size = 10))}))
    rownames(muts) <- c("A[A>C]A", "C[A>C]A", "G[A>C]A", "T[A>C]A", "T[A>C]C", "T[A>C]G", 
                        "A[A>G]A", "C[A>G]A", "G[A>G]A", "T[A>G]A", "T[A>G]C", "T[A>G]G", 
                        "A[A>T]A", "C[A>T]A", "G[A>T]A", "T[A>T]A", "T[A>T]C", "T[A>T]G", 
                        "A[G>C]A", "C[G>C]A", "G[G>C]A", "T[G>C]A", "T[G>C]C", "T[G>C]G", 
                        "A[G>T]A", "C[G>T]A", "G[G>T]A", "T[G>T]A", "T[G>T]C", "T[G>T]G")
    colnames(muts) <- paste0("SMPL.", 1001:1020)
    
    sigs <- cbind(
      SIG.1 = c(sample(5:15, size = 20, replace = TRUE), sample(10:25, size = 10)),
      SIG.2 = c(sample(10:25, size = 20, replace = TRUE),sample(5:15, size = 10)))
    for ( i in 1:ncol(sigs)) {
      sigs[, i] <- sigs[, i] / sum(sigs[, i])  
    }
    
    rownames(sigs) <- c("A[A>C]A", "C[A>C]A", "G[A>C]A", "T[A>C]A", "T[A>C]C", "T[A>C]G", 
                        "A[A>G]A", "C[A>G]A", "G[A>G]A", "T[A>G]A", "T[A>G]C", "T[A>G]G", 
                        "A[A>T]A", "C[A>T]A", "G[A>T]A", "T[A>T]A", "T[A>T]C", "T[A>T]G", 
                        "A[G>C]A", "C[G>C]A", "G[G>C]A", "T[G>C]A", "T[G>C]C", "T[G>C]G", 
                        "A[G>T]A", "C[G>T]A", "G[G>T]A", "T[G>T]A", "T[G>T]C", "T[G>T]G")
    
    out$muts <- mutSignatures::as.mutation.counts(as.data.frame(muts))
    out$sigs <- mutSignatures::as.mutation.signatures(as.data.frame(sigs))
    
    
  } else if (testN == "countMutTypes") {
    
    out <- data.frame(CHROM = "chr1", 
                      mutation = c("A[C>A]A", "A[C>A]C", "A[C>A]G", "A[C>A]T", 
                                   "A[C>G]A", "A[C>G]C", "A[C>G]G", "A[C>G]T", 
                                   "A[C>T]A", "A[C>T]C", "A[C>T]G", "A[C>T]T", 
                                   "A[T>A]A", "A[T>A]C", "A[T>A]G", "A[T>A]T", 
                                   "A[T>C]A", "A[T>C]C", "A[T>C]G", "A[T>C]T"),
                      sample = paste0("S.", c(rep(1, 10), rep(2, 10))))
    
    
  } else if (testN == "filterSNV") {
    
    out <- data.frame(CHR = "chr1", 
                      POS = c(972116, 1647600, 11529902, 16624394, 21617326,
                              26923997, 30718807, 36309634, 44805336, 99288016, 
                              108181979, 146846852, 150067867, 152843497, 154054711,
                              158292407, 160895402, 161042822, 165667287, 166620791), 
                      REF = c("G","C", "T", "G", "G", "A", "C", "G", "C", "G", 
                              "C", "G", "T", "C", "A", "A", "G", "C", "C", "T"), 
                      ALT = c("A","G", "C", "GC", "A", "T", "G", "T", "A", "T", 
                              "A", "C", "G", "G", "G", "TT", "T", "G", "G", "A"), 
                      SAMPLEID = paste0("smpl.", c(rep(1, 10), rep(2, 10))), 
                      stringsAsFactors = FALSE)
    
  } else  {
    out <- NULL    
  }
  return(out)
}

#' Add Leading Zeros to Numbers.
#' 
#' Internal function to convert a numeric vector into a character vector, where all elements
#' have the same number of characters (nchar). This is obtained by pasting a series of leading zeros
#' (or other character) to each number in the input vector.
#' 
#' @param n numeric vector whose numbers are to be transformed
#' @param m maximum number that will be used to define how many leading zeros to attach
#' @param char string (typically, a single character). This character is used to fill the leading space.
#' Defaults to 0.
#' @param na.value value used to fill mising values. Defaults to NA
#'
#' @return numeric vector of length equal to length(n), where all numbers are converted 
#' to character and modified by attaching the required number of leading zeros (characters).
#' 
#' @details This is one of the core functions included in the original mutSignatures R library, 
#' and in the WTSI MATLAB framework. This is an internal function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'   \item WTSI framework: \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3588146/}
#'  }
#'
#' @examples 
#' n = c(5:12, NA, 9:11)
#' m = 111
#' mutSignatures:::leadZeros(n=n, m=m)    
#' 
#' @keywords internal
leadZeros <- function (n, m, char = "0", na.value = NA) 
{
  max.zeros <- nchar(base::as.character(round(m)))
  tmp.digits <- nchar(base::as.character(round(n)))
  zeros.toAdd <- max.zeros - tmp.digits
  
  returnVect <- sapply(1:length(n), function(i){
    if (is.na(zeros.toAdd[i])) {
      na.value
    } else if (zeros.toAdd[i] >= 0) {
      paste(c(rep(char, zeros.toAdd[i]), base::as.character(round(n[i]))), sep = "", collapse = "")    
    } else {
      na.value
    }
  })
  return(returnVect) 
}


#' Remove Mutation Types Not Meeting the Threshold.
#' 
#' Remove mutation types that account for a total number of mutations below a defined threshold.
#' 
#' @param input_mutCounts numeric matrix of Mutation Counts
#' @param params object (list) including all parameters required for running the analysis
#'
#' @return List including two elements:
#' \enumerate{
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#' 
#' @details This is one of the core functions included in the original mutSignatures R library, 
#' and in the WTSI MATLAB framework. This is an internal function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'   \item WTSI framework: \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3588146/}
#'  }
#' 
#' 
#' @examples 
#' x <- mutSignatures:::getTestRunArgs("removeWeak")
#' nrow(x$data)
#' y <- mutSignatures:::removeWeak(input_mutCounts = x$data, params = x$params)
#' nrow(y)
#'   
#' @keywords internal
removeWeak <- function (input_mutCounts, 
                        params) 
{
  thresh_removeWeakMutTypes <- params$thresh_removeWeakMutTypes
  sum.counts <- apply(input_mutCounts, 1, sum)
  sum.counts.idx <- order(sum.counts, decreasing = FALSE)
  sorted.sum.counts <- sum.counts[sum.counts.idx]
  tot.mut.counts <- sum(input_mutCounts)
  tot.muttypes.toremove <- sum((sapply(1:length(sorted.sum.counts), 
                                       (function(i) {
                                         sum(sorted.sum.counts[1:i])
                                       }))/tot.mut.counts) < thresh_removeWeakMutTypes)
  return.list <- list()
  if (tot.muttypes.toremove > 0) {
    removed.mutset <- sum.counts.idx[c(1:tot.muttypes.toremove)]
    input_mutCounts <- input_mutCounts[-removed.mutset, ]
    return.list$removed.mutset <- removed.mutset
  }
  else {
    return.list$removed.mutset <- (-1)
  }
  return.list$output.mutCounts <- input_mutCounts
  return(return.list)
}

#' Silhouette Analysis.
#' 
#' Analyze the clustering quality and generate a Silhouette Plot.
#' 
#' @param data numeric matrix
#' @param fac clustering factor
#' @param method method to be used as distance function. Defaults to c("cosine")
#' @param plot logical, shall a barplot showing the cluster silhouettes be printed 
#' 
#' @return numeric vector including the silhouette values of the data poointts in the input matrix
#'
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'   \item \bold{Silhouette analysis in R}: \url{http://www.biotechworld.it/bioinf/2017/01/20/translating-matlabs-silhouette-function-to-r/}
#'  }
#'      
#' @importFrom cluster silhouette
#' @importFrom proxy dist
#' @importFrom graphics barplot abline title
#' 
#' @examples 
#' library(mutSignatures)
#' x <- mutSignatures:::getTestRunArgs("silhouetteMLB")
#' y <- silhouetteMLB(data = x$data, fac = x$fac)
#' y
#' 
#' @export
silhouetteMLB <- function (data, 
                           fac, 
                           method = "cosine", 
                           plot = TRUE) 
{
  if (nrow(data) != length(fac)) 
    stop("Bad input!")
  dist.matrix <- base::as.matrix(proxy::dist(x = data, method = method))
  sil.check <- cluster::silhouette(x = base::as.numeric(base::as.factor(fac)), 
                                   dist = dist.matrix)
  if (plot == TRUE) {
    tmp <- lapply(unique(sil.check[, 1]), (function(clid) {
      part.out <- sil.check[sil.check[, 1] == clid, ]
      part.out[order(part.out[, 3], decreasing = TRUE), 
               ]
    }))
    tmp <- do.call(rbind, tmp)
    xrange <- c(min(tmp[,3]), max(tmp[,3]))
    xrange[1] <- ifelse(xrange[1] > 0, 0, (-1.15) * abs(xrange[1]))
    xrange[2] <- 1.15
    graphics::barplot(tmp[nrow(tmp):1, 3], 
                      col = base::as.factor(tmp[nrow(tmp):1,1]),
                      xlim = xrange,
                      horiz = TRUE, xlab = "Silhouette Value", 
                      ylab = "", 
                      main = "Silhouette Plot", 
                      border = base::as.factor(tmp[nrow(tmp):1,1]))
    graphics::abline(v=0)
    graphics::title(ylab="Iter. Results (by Group)", line=1, cex.lab=1, font = 2)
  }
  return(base::as.vector(sil.check[, 3]))
}

#' Perform Non-negative Matrix Factorization using Brunet's Algotithm.
#' 
#' Perform Non-negative Matrix Factorization.
#' 
#' @param v numeric matrix of Mutation Type Counts
#' @param r numeric, number of signatures to extract
#' @param params list including all paramaters for running the analysis
#' 
#' @return list including all paramaters for running the analysis:
#' \enumerate{
#'   \item \bold{W} extracted signatures
#'   \item \bold{H} contribution of each signature in all the samples of the input mut count matrix
#' }
#'  
#' @details This is one of the core functions included in the original mutSignatures R library, 
#' and in the WTSI MATLAB framework. This is an internal function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'   \item WTSI framework: \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3588146/}
#'  }
#' 
#'  
#' @importFrom graphics plot points
#' @importFrom proxy dist
#' 
#' @examples 
#' x <- mutSignatures:::getTestRunArgs("alexaNMF")
#' y <- mutSignatures:::alexaNMF(v = x$v, r = x$r, params = x$params)
#' y$w[1:5, ]
#' 
#' @keywords internal
alexaNMF <- function(v, r, params)
{
  # Brunet algorithm, from Alexandrov NMF WTSI code
  # define params (hard-set)
  debug <- params$debug
  chk.step <- 50
  dot.eachSteps <- 2000
  
  # retireve user-defined params
  eps <- params$eps
  num.processes <- r
  stopconv <- params$stopconv
  niter <- params$niter
  err.threshold <- 1e-10
  stopRule <- ifelse(params$stopRule == "LA", "LA", "DF")
  
  # set.seed(231082)
  v <- base::as.matrix(v)
  rownames(v) <- NULL
  colnames(v) <- NULL
  
  # Double check input matrix
  if (min(v) < 0)
    stop("Matrix entries cannot be negative")
  if (min(apply(v, 1, sum)) == 0)
    stop("Entries cannot all be equal to 0")
  
  # Initialize W0 and H0
  W.k <- do.call(cbind, lapply(1:num.processes, (function(i){
    out.o <- runif(n = nrow(v), min = 0, max = 100)
    out.o[out.o < eps] <- eps
    out.o/sum(out.o)
  })))
  H.k <- matrix((1/num.processes), nrow = num.processes, ncol = ncol(v))
  
  # Initialize looping vars
  itr <- 1
  chk.j <- 1
  stationary.chk <- 0
  force.out <- 1
  
  # Debugging plot
  if (debug)
    graphics::plot(-10, xlim = c(1000, niter),
                   ylim = c( ifelse(stopRule == "DF", (0.1*err.threshold), 0.001) ,
                             ifelse(stopRule == "DF", 10, ncol(H.k))), log = "xy",
                   xlab = "Iteration", ylab = "Variation", main = "Convergence")
  
  # Initialize the objects for comparing dissimilarity
  # DF approach
  cons.old <- base::as.vector(W.k)
  # Alexandrov approach
  consold <- matrix(0, nrow = ncol(H.k), ncol = ncol(H.k))
  
  while (itr < niter){
    
    if (itr %% dot.eachSteps == 0) {
      if (stationary.chk > chk.step) {
        message(":", appendLF = FALSE)
      } else {
        message(".", appendLF = FALSE)
      }
    }
    
    delta.01 <- apply(W.k, 2, sum)
    H.k <- H.k * (base::t(W.k) %*% (v/(W.k %*% H.k))) / delta.01
    H.k[H.k < eps] <- eps
    
    W.tmp <- W.k * ((v/(W.k %*% H.k)) %*% base::t(H.k))
    W.k <- do.call(cbind, lapply(1:ncol(W.tmp), (function(ci){
      W.tmp[,ci] / sum(H.k[ci,])
    })))
    W.k[W.k<eps] <- eps
    
    # check convergence every 'chk.step' iterations
    if (itr > stopconv & itr %% chk.step == 0 & stopRule == "DF") {
      chk.j <- chk.j + 1
      H.k[H.k < eps] <- eps
      W.k[W.k < eps] <- eps
      cons <- base::as.vector(W.k)
      
      # compare to consold and reorder
      dist.measure <- proxy::dist(rbind(cons, cons.old), method = "cosine") [1]
      cons.old <- cons
      
      if (debug)
        points(itr, (dist.measure + (err.threshold*0.1)), pch = 19, col = "red2")
      
      # evaluate distance
      if (dist.measure < err.threshold) {
        stationary.chk <- stationary.chk + 1
      } else {
        stationary.chk <- 0
      }
      if (stationary.chk > (stopconv / chk.step)) {
        force.out <- 0
        message("$", appendLF = FALSE)
        break()
      }
    } else if (itr > stopconv & itr %% chk.step == 0 & stopRule == "LA") {
      chk.j <- chk.j + 1
      H.k[H.k < eps] <- eps
      W.k[W.k < eps] <- eps
      y <- apply(H.k, 2, max)
      index <- apply(H.k, 2, (function(dt) {
        which.max(dt)[1]
      }))
      mat1 = base::t(sapply(1:ncol(H.k), (function(ii) {
        index
      })))
      mat2 = sapply(1:ncol(H.k), (function(ii) {
        index
      }))
      cons <- mat1 == mat2
      if (sum(cons != consold) == 0) {
        stationary.chk <- stationary.chk + 1
      } else {
        stationary.chk <- 0
      }
      #
      consold <- cons
      
      if (debug)
        points(itr, (sum(cons != consold) / 100), pch = 19, col = "red2")
      
      if (stationary.chk > (stopconv/chk.step)) {
        force.out <- 0
        message("$", appendLF = FALSE)
        break()
      }
    }
    itr <- itr + 1
  }
  if (force.out == 1) {
    message("!", appendLF = FALSE)
  }
  output <- list()
  output$w <- W.k
  output$h <- H.k
  return(output)
}


#' Perform Non-negative Matrix Factorization using Chih-Jen Lin's Algotithm.
#' 
#' Perform Non-negative Matrix Factorization (alternative approach).
#' 
#' @param v numeric matrix of Mutation Type Counts
#' @param r numeric, number of signatures to extract
#' @param params list including all paramaters for running the analysis
#' 
#' @return list including all paramaters for running the analysis:
#' \enumerate{
#'   \item \bold{W} extracted signatures
#'   \item \bold{H} contribution of each signature in all the samples of the input mut count matrix
#' }
#'  
#' @details This is a core internal function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'   \item \bold{Chih-Jen Lin original paper}: \url{https://ieeexplore.ieee.org/document/4359171/}
#'  }
#' 
#'  
#' @importFrom graphics plot points
#' @importFrom proxy dist
#' @importFrom stats runif
#' 
#' @examples 
#' x <- mutSignatures:::getTestRunArgs("chihJenNMF")
#' y <- mutSignatures:::chihJenNMF(v = x$v, r = x$r, params = x$params)
#' y$w[1:10, ]
#' 
#' @keywords internal
chihJenNMF <- function(v, r, params) {
  
  # http://ieeexplore.ieee.org/document/4359171/
  # alternative approach for NMF
  # define params (hard-set)
  debug <- params$debug
  chk.step <- 50
  dot.eachSteps <- 2000
  
  # retireve user-defined params
  eps <- params$eps
  num.processes <- r
  stopconv <- params$stopconv
  niter <- params$niter
  err.threshold <- 1e-10
  
  # set.seed(231082)
  v <- base::as.matrix(v)
  rownames(v) <- NULL
  colnames(v) <- NULL
  
  # Double check input matrix
  if (min(v) < 0)
    stop("Matrix entries cannot be negative")
  if (min(apply(v, 1, sum)) == 0)
    stop("Entries cannot all be equal to 0")
  
  # Debugging plot
  if (debug)
    graphics::plot(-10, xlim = c(1000, niter), ylim = c( (0.1*err.threshold), 10), log = "xy", 
                   xlab = "Iteration", ylab = "Variation", main = "Convergence")
  
  # Initialize W0 and H0
  W.k <- do.call(cbind, lapply(1:num.processes, (function(i){
    out.o <- stats::runif(n = nrow(v), min = 0, max = 100)
    out.o[out.o < eps] <- eps
    out.o/sum(out.o)
  })))
  H.k <- matrix((1/num.processes), nrow = num.processes, ncol = ncol(v))
  
  # Initialize looping vars
  itr <- 1
  chk.j <- 1
  cons.old <- base::as.vector(W.k)
  stationary.chk <- 0
  force.out <- 1
  while (itr < niter){
    
    if (itr %% dot.eachSteps == 0) {
      if (stationary.chk > chk.step) {
        message(":", appendLF = FALSE)
      } else {
        message(".", appendLF = FALSE)
      }
    }
    
    WtW <- base::t(W.k) %*% W.k
    gradH <- ((WtW %*% H.k) - (base::t(W.k) %*% v))
    H.b <- H.k; H.b[H.b<eps] <- eps;
    H.k <- H.k - (H.b / ((WtW %*% H.b) + eps)) * gradH
    HHt <- H.k %*% base::t(H.k)
    gradW <- (W.k %*% HHt) - (v %*% base::t(H.k))
    
    W.b <- W.k; W.b[W.b < eps] <- eps
    W.k <- W.k - (W.b / ((W.b %*% HHt) + eps)) * gradW
    S <- apply(W.k, 2, sum)
    
    W.k <- do.call(cbind, lapply(1:ncol(W.k), (function(ci){
      W.k[,ci] / S[ci]
    })))
    H.k <- do.call(rbind, lapply(1:nrow(H.k), (function(ri){
      H.k[ri,] * S[ri]
    })))
    
    # optional ? Keep as is for now
    H.k <- do.call(cbind, lapply(1:ncol(H.k), (function(ci){
      H.k[,ci] / sum(H.k[,ci])
    })))
    
    # Final non-negative check
    H.k[H.k < eps] <- eps
    W.k[W.k < eps] <- eps
    
    # check convergence every 'chk.step' iterations
    if (itr > stopconv & itr %% chk.step == 0) {
      chk.j <- chk.j + 1
      W.k[W.k < eps] <- eps
      cons <- base::as.vector(W.k)
      # compare to consold and reorder
      dist.measure <- proxy::dist(rbind(cons, cons.old), method = "cosine") [1]
      cons.old <- cons
      
      if (debug)
        points(itr, (dist.measure + (err.threshold*0.1)), pch = 19, col = "red2")
      
      # evaluate distance
      if (dist.measure < err.threshold) {
        stationary.chk <- stationary.chk + 1
      } else {
        stationary.chk <- 0
      }
      if (stationary.chk > (stopconv / chk.step)) {
        force.out <- 0
        message("$", appendLF = FALSE)
        break()
      }
    }
    itr <- itr + 1
  }
  if (force.out == 1) {
    message("!", appendLF = FALSE)
  }
  output <- list()
  output$w <- W.k
  output$h <- H.k
  return(output)
}

#' Decipher Mutational Processes Contributing to a Collection of Genomic Mutations.
#' 
#' Decipher Mutational ProCancer cells accumulate DNA mutations as result of DNA 
#' damage and DNA repair processes. Thiscomputational framework allows to decipher 
#' mutational processes from cancer-derived somatic mutational catalogs.
#' 
#' @param input a mutationCounts-class object, including a mutation counts data. 
#' @param params a mutFrameworkParams-class object including all the parameters required for 
#' running the mutational signature analysis. 
#'
#' @return list including all results of the analysis.
#' The extracted signatures (processes) are included in the "processes" element of the list.
#' The relative contribution of each signature in each sample is summarized in the 
#' "exposures" element.
#'  
#' @details This is one of the core functions included in the original mutSignatures R library, 
#' and in the WTSI MATLAB framework. This is the main user interface for the mutSignatures analysis.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'   \item WTSI framework: \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3588146/}
#'  }
#' 
#' @examples 
#' library(mutSignatures)
#' x <- mutSignatures:::getTestRunArgs("decipherMutationalProcesses")
#' x$muts
#' y <- mutSignatures::decipherMutationalProcesses(input = x$muts, 
#'                                                 params = x$params)
#' y$Results$signatures
#' 
#' @export
decipherMutationalProcesses <- function (input, 
                                         params)
{
  if (class(params) == "mutFrameworkParams" & class(input) == "mutationCounts") {
    paramsList <- coerceObj(params, to = "list")
    inputMAT <- coerceObj(input, to = "matrix", keepNames = FALSE)
  } else {
    stop("Malformed Input")
  }
  if (paramsList$approach != "counts") {
    freq.input <- frequencize(inputMAT)
    inputMAT <- freq.input$freqs
    inputColsums <- freq.input$colSums
  }
  currentWarnings <- options()$warn
  options(warn = -1)
  if (is.numeric(paramsList$num_processesToExtract)) {
    paramsList$analyticApproach <- "denovo"
  } else {
    stop("An error occurred!")
  }
  
  # Make sure that we have at least 2 samples 
  if (length(mutSignatures::getSampleIdentifiers(input)) < 2) {
    
    stop("mutSignatures does not allow to extract mutational signatures from a single sample. 
         Please, add more samples and run again!")
  }
  
  # Make sure that we are extracting at least 2 signatures 
  paramsList$num_processesToExtract <- as.integer(paramsList$num_processesToExtract)
  if (paramsList$num_processesToExtract < 1) {
    
    stop("mutSignatures does not allow to extract mutational signatures from a single sample. 
         Please, add more samples and run again!")
  } else if (paramsList$num_processesToExtract == 1 && 
             paramsList$analyticApproach == "denovo") {
    
    zz <- apply(inputMAT, 1, sum, na.rm = TRUE)
    tot.zz <- sum(zz, na.rm = TRUE)
  
    zz2 <- data.frame(Sign.1 = (zz / tot.zz))
    zz2 <- data.frame(Sign.1 = (zz / tot.zz))
    zzr <- getMutationTypes(input)
    rownames(zz2) <- zzr
    
    out.sig <- as.mutation.signatures(zz2)
    
    message("Since the user requested to extract a single mutational signature, 
            bootstrapping was not performed. A normalized average signature 
            is returned instead!")
    
    return(out.sig)
  } 
  
  if (paramsList$analyticApproach == "denovo") {
    deconvData <- deconvoluteMutCounts(input_mutCounts = inputMAT, 
                                       params = paramsList)
  } else {
    stop("An error occurred!")
  }
  
  mutProcesses <- list()
  final.proc <- data.frame(deconvData$processes, stringsAsFactors = FALSE)
  colnames(final.proc) <- paste("Sign.", sapply(1:ncol(final.proc), 
                                                (function(n) {
                                                  leadZeros(n, (10 * ncol(final.proc)))
                                                })), sep = "")
  rownames(final.proc) <- getMutationTypes(input)
  signResult <- mutSignatures::as.mutation.signatures(final.proc)
  final.expo <- data.frame(deconvData$exposure, stringsAsFactors = FALSE)
  if (getFwkParam(params, "approach") != "counts") {
    final.expo <- data.frame(sapply(1:ncol(final.expo), function(cjj) {
      inputColsums[cjj] * final.expo[, cjj]/sum(final.expo[, cjj])
    }), row.names = NULL, stringsAsFactors = FALSE)
  }
  tryCatch({
    colnames(final.expo) <- getSampleIdentifiers(input)[deconvData$includedSampleId]
  }, error = function(e) {
    NULL
  })
  rownames(final.expo) <- colnames(final.proc)
  expoResult <- mutSignatures::as.mutsign.exposures(x = final.expo, samplesAsCols = TRUE) #added TRUE
  mutProcesses$Results <- list()
  mutProcesses$Results$signatures <- signResult
  mutProcesses$Results$exposures <- expoResult
  mutProcesses$RunSpecs <- list()
  mutProcesses$RunSpecs$input <- input
  mutProcesses$RunSpecs$params <- params
  if (getFwkParam(params, "logIterations") != "lite") {
    mutProcesses$Supplementary <- list()
    mutProcesses$Supplementary$allProcesses <- deconvData$Wall
    mutProcesses$Supplementary$allExposures <- deconvData$Hall
    mutProcesses$Supplementary$idx <- deconvData$idx
    mutProcesses$Supplementary$mutCountErrors <- deconvData$mutCountErrors
    mutProcesses$Supplementary$mutCountReconstructed <- deconvData$mutCountReconstructed
    mutProcesses$Supplementary$processStab <- deconvData$processStab
    mutProcesses$Supplementary$processStabAvg <- deconvData$processStabAvg
  }
  options(warn = currentWarnings)
  return(mutProcesses)
}


#' Deconvolute Mutation Counts.
#' 
#' Characterize mutational signatures from cancer-derived somatic mutational catalogs.
#' 
#' @param input_mutCounts numeric matrix of Mutation Type Counts
#' @param params list including all parameters required for running the analysis
#'
#' @return list including all the results from the deconvolution analysis. 
#' This function is called within thedecipherMutationalProcesses() function 
#' after parameters and input data have been validated
#' 
#' @details This is one of the core functions included in the original mutSignatures R library, 
#' and in the WTSI MATLAB framework. This is an internal function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'   \item WTSI framework: \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3588146/}
#'  }
#'
#' @examples  
#' x <- mutSignatures:::getTestRunArgs("deconvoluteMutCounts")
#' y <- mutSignatures:::deconvoluteMutCounts(input_mutCounts = as.matrix(x$muts),
#'                                           params = as.list(x$params))
#' y$processes[1:10,]
#' 
#' 
#' @import foreach
#' @importFrom parallel detectCores makeCluster clusterExport stopCluster
#' @importFrom doParallel registerDoParallel
#'  
#' @keywords internal
deconvoluteMutCounts <- function(input_mutCounts, params) 
{
  j <- NULL
  num_totIterations <- params$num_totIterations
  num_processesToExtract <- params$num_processesToExtract
  distanceFunction <- params$distanceFunction
  thresh_removeWeakMutTypes <- params$thresh_removeWeakMutTypes
  num_parallelCores <- params$num_parallelCores
  guided <- params$guided
  debugStatus <- params$debug
  num_totReplicates <- params$num_totReplicates
  thresh_removeLastPercent <- params$thresh_removeLastPercent
  colnames(input_mutCounts) <- NULL
  rownames(input_mutCounts) <- NULL
  input_mutCounts <- base::as.matrix(input_mutCounts)
  bckgrnd.removed.mutCounts <- removeWeak(input_mutCounts, params)
  
  # Make sure we have enough counts
  # else, remove samples
  # Carry sample identifiers forard as well
  includedSampleId <- 1:ncol(input_mutCounts)
  if (sum(apply(bckgrnd.removed.mutCounts$output.mutCounts, 2, sum) < 1) > 0) {
    
    # Make a working copy of the matrix, 
    # do this iteratively, just cause
    tmp_input_counts <- input_mutCounts
    tmp_includedSampleId <- includedSampleId
    RMV <- which(apply(bckgrnd.removed.mutCounts$output.mutCounts, 2, sum) < 1)
    while(length(RMV) > 0) {
      tmp_input_counts <- tmp_input_counts[, -RMV]
      tmp_includedSampleId <- tmp_includedSampleId[-RMV]
      bckgrnd.removed.mutCounts <- removeWeak(tmp_input_counts, params)
      RMV <- which(apply(bckgrnd.removed.mutCounts$output.mutCounts, 2, sum) < 1)
    }
    
    # Inform about removed samples
    message(paste("Some of the samples (n=", 
                  (ncol(input_mutCounts) - ncol(tmp_input_counts)), 
                  ") were ineligible for analysis and were removed.", sep = ""))
    input_mutCounts <- tmp_input_counts
    includedSampleId <- tmp_includedSampleId
  }
  
  # Back to analysis
  bckgrnd.removed.mutset <- bckgrnd.removed.mutCounts$removed.mutset
  bckgrnd.removed.mutCounts <- bckgrnd.removed.mutCounts$output.mutCounts
  total.mutationTypes <- nrow(bckgrnd.removed.mutCounts)
  total.samples <- ncol(bckgrnd.removed.mutCounts)
  if (guided) {
    if (!debugStatus) {
      guide.W <- suppressMessages(
        extractSignatures(mutCountMatrix = bckgrnd.removed.mutCounts,
                          params = params, bootStrap = FALSE))
    } else {
      guide.W <- extractSignatures(mutCountMatrix = bckgrnd.removed.mutCounts, 
                                   params = params, bootStrap = FALSE)
    }
    guide.W <- guide.W$Wk
  } else {
    guide.W <- 0
  }
  if (num_parallelCores < 2) {
    muCounts.checkDF <- tryCatch(
      lapply(1:num_totIterations, (function(j) {
        if (debugStatus) {
          if (j %in% base::as.integer(seq(1, num_totIterations, length.out = 100))) {
            message(paste("(", j, ")", sep = ""), appendLF = FALSE)
          }
        }
        if (!debugStatus) {
          tmp.out <- suppressMessages(
            extractSignatures(mutCountMatrix = bckgrnd.removed.mutCounts,
                              bootStrap = TRUE, params = params))
          
        } else {
          
          tmp.out <- extractSignatures(mutCountMatrix = bckgrnd.removed.mutCounts, 
                                       bootStrap = TRUE, params = params)
        }
        
        if (guided) {
          re.ORD <- rep(0, num_processesToExtract)
          
          for (ki in 1:num_processesToExtract) {
            
            my.i <- order(apply(abs(tmp.out$Wk - guide.W[, ki]), 2, sum))
            
            if (ki > 1) {
              my.i[re.ORD[1:(ki - 1)]] <- max(my.i) + 1
            }
            re.ORD[ki] <- which.min(my.i)
          }
          
        } else {
          re.ORD <- 1:num_processesToExtract
        }
        if (num_processesToExtract > 1) {
          tmp.out$Wk <- tmp.out$Wk[, re.ORD]
          tmp.out$Hk <- tmp.out$Hk[re.ORD, ]
        } else {
          tmp.out$Wk <- tmp.out$Wk
          tmp.out$Hk <- rbind(tmp.out$Hk)
        }
        tmp.out
      })), error = (function(e) {
        print(e)
      }))
    if (debugStatus) {
      message("Done!", appendLF = TRUE)
    }
  } else {
    max.cores <- parallel::detectCores()
    max.cores <- max.cores - 1
    max.cores <- ifelse(max.cores < 1, 1, max.cores)
    use.cores <- ifelse(1 <= num_parallelCores & num_parallelCores <= max.cores, 
                        num_parallelCores, max.cores)
    if (debugStatus) {
      cl <- suppressMessages(
        parallel::makeCluster(use.cores, outfile = ""))
    } else {
      cl <- suppressMessages(parallel::makeCluster(use.cores))
    }
    print(paste("Extracting", num_processesToExtract, "mutational signatures X", 
                num_totIterations, "iterations using", use.cores, 
                "cores"))
    suppressMessages(doParallel::registerDoParallel(cl))
    
    # Prep before exporting
    alexaNMF <- alexaNMF
    leadZeros <- leadZeros
    extractSignatures <- extractSignatures
    frequencize <- frequencize
    bootstrapCancerGenomes <- bootstrapCancerGenomes
    chihJenNMF <- chihJenNMF
    
    stuffToExp <- c()
    #stuffToExp <- c("alexaNMF", "leadZeros", "extractSignatures", 
    #                "frequencize", "bootstrapCancerGenomes", "chihJenNMF")
    suppressMessages(parallel::clusterExport(cl, stuffToExp))
    muCounts.checkDF <- tryCatch(
      foreach::foreach(j = (1:num_totIterations),
                       .verbose = TRUE, 
                       .packages = c("stats", "mutSignatures")) %dopar% {
                         
                         if (j %in% base::as.integer(seq(1, num_totIterations, length.out = 100))) {
                           message(paste("(", j, ")", sep = ""), appendLF = FALSE)
                         }
                         tmp.out <- extractSignatures(mutCountMatrix = bckgrnd.removed.mutCounts, 
                                                      params = params)
                         if (guided) {
                           re.ORD <- rep(0, num_processesToExtract)
                           for (ki in 1:num_processesToExtract) {
                             my.i <- order(apply(abs(tmp.out$Wk - guide.W[, ki]), 2, sum))
                             if (ki > 1) {
                               my.i[re.ORD[1:(ki - 1)]] <- max(my.i) + 1
                             }
                             re.ORD[ki] <- which.min(my.i)
                           }
                         } else {
                           re.ORD <- 1:num_processesToExtract
                         }
                         if (num_processesToExtract > 1) {
                           tmp.out$Wk <- tmp.out$Wk[, re.ORD]
                           tmp.out$Hk <- tmp.out$Hk[re.ORD, ]
                         } else {
                           tmp.out$Wk <- tmp.out$Wk
                           tmp.out$Hk <- rbind(tmp.out$Hk)
                         }
                         tmp.out
                       }, error = (function(e) {
                         print(e)
                       }), finally = (function(f) {
                         parallel::stopCluster(cl)
                       }))
    message("Parallel Processing Complete...", appendLF = TRUE)
    message("Starting downstream processing. Please, wait...", appendLF = TRUE)
  }
  W.all <- do.call(cbind, lapply(muCounts.checkDF, (function(tmp) {
    tmp$Wk
  })))
  H.all <- do.call(rbind, lapply(muCounts.checkDF, (function(tmp) {
    tmp$Hk
  })))
  errors.all <- lapply(muCounts.checkDF, (function(tmp) {
    tmp$mutCounts.errors
  }))
  reconstruct.all <- lapply(muCounts.checkDF, (function(tmp) {
    tmp$mutCounts.reconstructed
  }))
  fltr.mutCounts.data <- filterOutIterations(wall = W.all, 
                                             hall = H.all, 
                                             cnt_errors = errors.all, 
                                             cnt_reconstructed = reconstruct.all, 
                                             params)
  stability.check <- evaluateStability(wall = fltr.mutCounts.data$Wall, 
                                       hall = fltr.mutCounts.data$Hall, params)
  final.mutCounts.data <- addWeak(mutationTypesToAddSet = bckgrnd.removed.mutset, 
                                  processes_I = stability.check$centroids, 
                                  processesStd_I = stability.check$centroidStd, 
                                  Wall_I = fltr.mutCounts.data$Wall, 
                                  genomeErrors_I = fltr.mutCounts.data$mutCounts.errors, 
                                  genomesReconstructed_I = fltr.mutCounts.data$mutCounts.reconstructed)
  deconvoluted.results <- list()
  deconvoluted.results$Wall <- final.mutCounts.data$Wall
  deconvoluted.results$Hall <- fltr.mutCounts.data$Hall
  deconvoluted.results$mutCountErrors <- final.mutCounts.data$mutCountErrors
  deconvoluted.results$mutCountReconstructed <- final.mutCounts.data$mutCountReconstructed
  deconvoluted.results$idx <- stability.check$idx
  deconvoluted.results$idxS <- stability.check$idxS
  deconvoluted.results$processes <- final.mutCounts.data$processes
  deconvoluted.results$processesStd <- final.mutCounts.data$processesStd
  deconvoluted.results$exposure <- stability.check$exposure
  deconvoluted.results$exposureStd <- stability.check$exposureStd
  deconvoluted.results$processStab <- stability.check$processStab
  deconvoluted.results$processStabAvg <- stability.check$processStabAvg
  deconvoluted.results$clusterCompactness <- stability.check$clusterCompactness
  deconvoluted.results$includedSampleId <- includedSampleId
  
  if (debugStatus) {
    message("Done...", appendLF = TRUE)
  }
  return(deconvoluted.results)
}

#' Extract Signatures from Genomic Mutational Catalogs.
#' 
#' Extract mutational signatures after the input Data and the 
#' input parameters have been checked andvalidated.
#' 
#' @param mutCountMatrix numeric matrix of mutation counts
#' @param params list including all parameters for performing the analysis
#' @param bootStrap logical, shall bootstrapping be performed
#' 
#' @return list including the following elements
#' \enumerate{
#'    \item \bold{Wall}:  all extracted signatures
#'    \item \bold{Hall}: all extracted exposures
#'    \item \bold{mutCounts.reconstructed}: fitted values
#'    \item \bold{mutCounts.errors}: residuals
#'  }
#' 
#' @details This is one of the core functions included in the original mutSignatures R library, 
#' and in the WTSI MATLAB framework. This is an internal function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'   \item WTSI framework: \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3588146/}
#'  }
#' 
#' @examples 
#' x <- mutSignatures:::getTestRunArgs("extractSignatures")
#' y <- mutSignatures:::extractSignatures(mutCountMatrix = as.matrix(x$muts), 
#'                                        params = as.list(x$params), bootStrap = TRUE)
#' y$Wk[1:10,]
#' 
#' 
#' @importFrom proxy dist
#' 
#' @keywords internal
extractSignatures <- function (mutCountMatrix, 
                               params, 
                               bootStrap = TRUE)
{
  num_processesToExtract <- params$num_processesToExtract
  approach <- params$approach
  algorithm <- params$algorithm
  eps <- params$eps
  
  if (bootStrap) {
    bstrpd.result <- bootstrapCancerGenomes(mutCountMatrix) 
  } else {
    bstrpd.result <- mutCountMatrix
  }
  
  #if (approach != "counts") {
  #  frq.bstrpd <- frequencize(bstrpd.result)
  #  bstrpd.result <-  frq.bstrpd$freqs
  #}
  bstrpd.result[bstrpd.result < eps] <- eps
  
  if (algorithm %in% c("brunet", "alexa")) {
    nmf.results <- alexaNMF(v = bstrpd.result,
                            r = num_processesToExtract,
                            params = params)
  } else {
    nmf.results <- chihJenNMF(v =  bstrpd.result,
                              r = num_processesToExtract,
                              params = params)
  } 
  # nmf.results
  tmp.w <- nmf.results$w
  tmp.h <- nmf.results$h
  
  # This step seems useless for the new approach, as it divides or multiplies by 1
  # Keep for consistency and cause we let the user select what approach to use
  for (jj in 1:num_processesToExtract) {
    tmp.tot <- sum(tmp.w[, jj])
    tmp.w[, jj] <- tmp.w[, jj]/tmp.tot
    tmp.h[jj, ] <- tmp.h[jj, ] * tmp.tot
  }
  
  # modify for frequentized approach 
  #if (approach != "counts"){
  #  if (length(frq.bstrpd$colSums) == ncol(tmp.h)) {
  #    tmp.h <- sapply(1:length(frq.bstrpd$colSums), function(ai) {
  #      frq.bstrpd$colSums[ai] * tmp.h[,ai] / sum(tmp.h[,ai], na.rm = TRUE)
  #    })
  #  }  
  #}
  
  mutCountMatrix.reconstructed <- tmp.w %*% tmp.h
  
  result.list <- list()
  result.list$Wk <- tmp.w
  result.list$Hk <- tmp.h
  result.list$mutCounts.reconstructed <- mutCountMatrix.reconstructed
  result.list$mutCounts.errors <- bstrpd.result - mutCountMatrix.reconstructed
  
  if (params$logIterations != "lite") {
    result.list$inputMatrix <- bstrpd.result
    result.list$cosDist <- proxy::dist(rbind(base::as.vector(bstrpd.result),
                                             base::as.vector(mutCountMatrix.reconstructed)),
                                       method = "cosine")[1]
  }
  
  return(result.list)
}


#' Set Parameters for Extracting Mutational Signatures.
#' 
#' Create an object including all parameters required for running the mutSignatures framework.
#' 
#' @param num_processesToExtract integer, number of mutational signatures to extract
#' @param num_totIterations integer, total number of iterations (bootstrapping)
#' @param num_parallelCores integer, number of cores to use for the analysis
#' @param thresh_removeWeakMutTypes numeric, threshold for filtering out under-represented mutation types
#' @param thresh_removeLastPercent numeric, threshold for removing outlier iteration results
#' @param distanceFunction string, method for calculating distances. Default method is "cosine"
#' @param num_totReplicates integer, number of replicates while checking stability
#' @param eps numeric, close-to-zero positive numeric value for replacing zeros and 
#' preventing negative values to appear in the matrix during NMF
#' @param stopconv integer, max number of stable iterations before termination. Defaults to 20000.
#' @param niter integer, max number of iterations to run. Defaults to 1000000
#' @param guided logical, shall clustering be guided to improve aggregation upon bootstrapping
#' @param debug logical, shall the analysis be run in DEBUG mode
#' @param approach string, indicating whether to model absolute counts ("counts") or 
#' per_mille frequency ("freq"). Defaults to "freq".
#' @param stopRule = string, use the sub-optimal termination rule ("AL") from the WTSI package 
#' (actually, iterations won't terminate, so niter will most certainly reached) or our 
#' efficient termination rule ("DF"). Defaults to "DF". The "AL" option is implemented for 
#' compatibility reasons, but not recommended. 
#' @param algorithm string, algorithm to be used for NMF. Set to "brunet", or "alexa" for using the standard algorithm (Brunet's), 
#' otherwise the alternative "chihjen" algorithm will be used.
#' @param logIterations string indicating if storing and returining all intermediates, 
#' or only final results. Defaults to "lite", i.e. returns full output and limited intermediates.
#' Alternatively, set to "full". 
#' @param seed integer, seed to set for reproducibility
#' 
#' @return Object including all parameters for running the analysis
#'
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'   \item WTSI framework: \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3588146/}
#'  }
#'      
#' @examples
#' library(mutSignatures) 
#' # defaults params
#' A <- setMutClusterParams()
#' A
#' # A second example, set num_processes
#' B <- setMutClusterParams(num_processesToExtract = 5)
#' B
#' 
#'     
#' @export
setMutClusterParams <- function(num_processesToExtract = 2, 
                                num_totIterations = 10,
                                num_parallelCores = 1,
                                thresh_removeWeakMutTypes = 0.01,
                                thresh_removeLastPercent = 0.07,
                                distanceFunction = "cosine",
                                num_totReplicates = 100,
                                eps = 2.2204e-16,
                                stopconv = 20000,
                                niter = 1000000,
                                guided = TRUE,
                                debug = FALSE,
                                approach = "freq",
                                stopRule = "DF",
                                algorithm = "brunet",
                                logIterations = "lite", 
                                seed = 12345)
{
  #
  # Step-by-step Parameter Validation and preparation
  paramList <- list()
  #
  if (!((is.numeric(num_processesToExtract[1]) & num_processesToExtract[1] > 0) ))
    stop("Provide a reasonable number of signatures/processes to extract")
  paramList$num_processesToExtract <- round(num_processesToExtract[1])
  #
  if (!(is.numeric(num_totIterations[1]) & num_totIterations[1] > 0))
    stop("Provide a reasonable number of iterations to run (Bootstrapping)")
  paramList$num_totIterations <- round(num_totIterations[1])
  #
  if (!(is.numeric(num_parallelCores[1]) & num_parallelCores[1] > 0))
    stop("Provide a reasonable number of CPU cores to use for the analysis")
  paramList$num_parallelCores <- round(num_parallelCores[1])
  #
  #paramList$perCore.iterations <- as.integer(paramList$num_totIterations / paramList$num_parallelCores)
  #paramList$perCore.iterations <- ifelse(paramList$perCore.iterations < 1, 1, paramList$perCore.iterations)
  #
  if (!(is.numeric(thresh_removeWeakMutTypes[1]) & thresh_removeWeakMutTypes[1] >= 0 & thresh_removeWeakMutTypes[1] < 1))
    stop("Provide a reasonable (0.00-0.99) number of low-occurring mutation types to remove from the input before starting the analysis")
  paramList$thresh_removeWeakMutTypes <- thresh_removeWeakMutTypes[1]
  #
  if (!(is.numeric(thresh_removeLastPercent[1]) & thresh_removeLastPercent[1] >= 0 & thresh_removeLastPercent[1] < 1))
    stop("Provide a reasonable (0.00-0.99) number for filtering out poor iterations")
  paramList$thresh_removeLastPercent <- thresh_removeLastPercent[1]
  #
  allowed.dist.methods <- c("Braun-Blanquet", "Chi-squared", "correlation", "cosine", "Cramer", "Dice",
                            "eDice", "eJaccard", "Fager", "Faith", "Gower", "Hamman", "Jaccard",
                            "Kulczynski1", "Kulczynski2", "Michael", "Mountford", "Mozley", "Ochiai",
                            "Pearson", "Phi", "Phi-squared", "Russel", "simple matching", "Simpson",
                            "Stiles", "Tanimoto", "Tschuprow", "Yule", "Yule2", "Bhjattacharyya",
                            "Bray", "Canberra", "Chord", "divergence", "Euclidean", "fJaccard",
                            "Geodesic", "Hellinger", "Kullback", "Levenshtein", "Mahalanobis",
                            "Manhattan", "Minkowski", "Podani", "Soergel","supremum", "Wave", "Whittaker")
  if (!(is.character(distanceFunction[1]) & distanceFunction[1] %in% allowed.dist.methods))
    stop("Unknown method for calculating distances. For options, run: <<summary(proxy::pr_DB)>>")
  paramList$distanceFunction <- distanceFunction[1]
  #
  if (!(is.numeric(num_totReplicates[1]) & num_totReplicates[1] > 99))
    stop("Provide a reasonable number of replicates for stability evaluation of the results")
  paramList$num_totReplicates <- round(num_totReplicates[1])
  #
  if (!(is.numeric(eps[1]) & eps[1] > 0 & eps[1] < 0.0001))
    stop("Provide a reasonably small number (0 < n < 0.0001) for data overflow prevention")
  paramList$eps <- eps[1]
  #
  if (!(is.numeric(stopconv[1]) & stopconv[1] >= 500))
    stop("Provide a reasonable large number: number of 'conn-matrix-stable' iterations before stopping NMF")
  paramList$stopconv <- round(stopconv[1])
  #
  if (!(is.numeric(niter[1]) & niter[1] >= 20000))
    stop("Provide a reasonable large number: total NMF iterations")
  paramList$niter <- round(niter[1])
  #
  paramList$guided <- ifelse(guided, TRUE, FALSE)
  paramList$debug <- ifelse(debug, TRUE, FALSE)
  paramList$approach <- ifelse (approach == "counts", "counts", "freq")
  paramList$stopRule <- ifelse (stopRule == "LA", "LA", "DF")
  paramList$algorithm <- ifelse (tolower(algorithm) %in% c("brunet", "alexa"), "brunet", "chihjen")
  paramList$logIterations <- ifelse(tolower(logIterations) %in% c("lite", "light", "li"), "lite", "full")
  #
  return(new(Class = "mutFrameworkParams", params = paramList))
}

## ~~~~~~~~~~~~~~~~~~~~~

#' Custom Fast Combinatorial Nonnegative Least-Square.
#' 
#' This function contributes to solve a least square linear 
#' problem using the fast combinatorial strategy from Van 
#' Benthem et al. (2004). This implementation is similar to that included in the
#' NMF R package by Renaud Gaujoux and Cathal Seoighe, 
#' and it is tailored to the data used in the mutational signature analysis. 
#' For more info, see: \url{https://CRAN.R-project.org/package=NMF}
#' 
#' @param mutCounts numeric matrix including mutation counts
#' @param signatures numeric matrix including mutation signatures
#'
#' @return list, including: (K) a numeric matrix of estimated exposures; and (Pset) a Pset numeric matrix
#'  
#' @examples
#' x <- mutSignatures:::getTestRunArgs(testN = "custom_fcnnls")
#' y <- mutSignatures:::custom_fcnnls(mutCounts = x$muts, signatures = x$signs)
#' y$coef
#'  
#' @keywords internal
custom_fcnnls <- function (mutCounts, signatures) 
{
  
  # Hardcode eps
  eps <-  0 
  
  # Initial checks
  if (sum(c(dim(signatures), dim(mutCounts)) < 1) > 0) {
    stop("Bad input!")
  }
  if (nrow(signatures) != nrow(mutCounts)) {
    stop("Bad input: matrices have imcompatible size")
  }
  
  # Define W, and comute matrix cross-prods
  W <- matrix(0, ncol(signatures), ncol(mutCounts))
  maxiter = 5 * ncol(signatures)
  xxp <- base::crossprod(signatures) 
  xyp <- base::crossprod(signatures, mutCounts)
  
  # Solve, aka compute K
  K <- base::solve(xxp, xyp)
  
  # Prep for looping
  Ps <- K > 0
  K[!Ps] <- 0
  D <- K
  Fset <- which(apply(Ps, 2, sum) != ncol(signatures))
  oitr <- 0
  iter <- 0
  
  while (length(Fset) > 0) {
    oitr <- oitr + 1
    K[, Fset] <- custom_cssls(xxp, 
                              xyp[, Fset, drop = FALSE], 
                              Ps[, Fset, drop = FALSE])
    
    keep <- apply(K[, Fset, drop = FALSE] < eps, 2, sum) > 0
    Hset <- Fset[keep]
    if (length(Hset) > 0) {
      nHset <- length(Hset)
      alpha <- matrix(0, ncol(signatures), nHset)
      while (nHset > 0 && (iter < maxiter)) {
        iter <- iter + 1
        alpha[, 1:nHset] <- Inf
        ij <- which(Ps[, Hset, drop = FALSE] & (K[, Hset, drop = FALSE] < eps), arr.ind = TRUE)
        i <- ij[, 1]
        j <- ij[, 2]
        if (length(i) == 0) 
          break
        hIdx <- (j - 1) * ncol(signatures) + i
        negIdx <- (Hset[j] - 1) * ncol(signatures) + i
        alpha[hIdx] <- D[negIdx]/(D[negIdx] - K[negIdx])
        alpha.inf <- alpha[, 1:nHset, drop = FALSE]
        minIdx <- max.col(-t(alpha.inf))
        alphaMin <- alpha.inf[minIdx + (0:(nHset - 1) * ncol(signatures))]
        alpha[, 1:nHset] <- matrix(alphaMin, ncol(signatures), nHset, byrow = TRUE)
        D[, Hset] <- D[, Hset, drop = FALSE] - alpha[, 1:nHset, drop = FALSE] * 
          (D[, Hset, drop = FALSE] - K[, Hset, drop = FALSE])
        idx2zero <- (Hset - 1) * ncol(signatures) + minIdx
        D[idx2zero] <- 0
        Ps[idx2zero] <- FALSE
        
        K[, Hset] <- custom_cssls(xxp, 
                                  xyp[, Hset, drop = FALSE], 
                                  Ps[, Hset, drop = FALSE])
        
        Hset <- which(apply(K < eps, 2, sum) > 0)
        nHset <- length(Hset)
      }
    }
    W[, Fset] <- xyp[, Fset, drop = FALSE] - xxp %*% K[, Fset, drop = FALSE]
    
    tmp_coeff <- ifelse(!(Ps[, Fset, drop = FALSE]), 1, 0)
    Jset <- which(apply((tmp_coeff *  W[, Fset, drop = FALSE]) > eps, 2, sum) == 0)
    
    Fset <- base::setdiff(Fset, Fset[Jset])
    if (length(Fset) > 0) {
      tmp_coeff <- ifelse(!Ps[, Fset, drop = FALSE], 1, 0)
      mxidx <- max.col(t(tmp_coeff * W[, Fset, drop = FALSE]))
      Ps[(Fset - 1) * ncol(signatures) + mxidx] <- TRUE
      D[, Fset] <- K[, Fset, drop = FALSE]
    }
  }
  return(list(coef = K, Pset = Ps))
}

#' Custom CSSLS.
#' 
#' This function contributes to solving a nonnegative least square linear 
#' problem using normal equations and the fast combinatorial strategy from Van 
#' Benthem et al. (2004). This implementation is similar to that included in the
#' NMF R package by Renaud Gaujoux and Cathal Seoighe, 
#' and it is tailored to the data used in the mutational signature analysis. 
#' For more info, see: \url{https://CRAN.R-project.org/package=NMF}
#' 
#' @param CtC numeric matrix
#' @param CtA numeric matrix
#' @param Pset nueric matrix
#'
#' @return a numeric matrix
#'  
#' @examples
#' x <- mutSignatures:::getTestRunArgs(testN = "custom_cssls")
#' y <- mutSignatures:::custom_cssls(CtC = x$CtC, CtA = x$CtA, Pset = x$Pset)
#' y
#'  
#' @keywords internal
custom_cssls <- function (CtC, CtA, Pset) 
{
  K = matrix(0, nrow(CtA), ncol(CtA))
  
  lVar <- nrow(Pset)
  pRHS <- ncol(Pset)
  codedPset <- as.numeric(2^(seq(lVar - 1, 0, -1)) %*% Pset)
  sortedPset <- sort(codedPset)
  sortedEset <- order(codedPset)
  breaks <- diff(sortedPset)
  breakIdx <- c(0, which(breaks > 0), pRHS)
  for (k in seq(1, length(breakIdx) - 1)) {
    cols2solve <- sortedEset[seq(breakIdx[k] + 1, breakIdx[k + 1])]
    vars <- Pset[, sortedEset[breakIdx[k] + 1]]
    K[vars, cols2solve] <- 
      solve(CtC[vars, vars, drop = FALSE]) %*%  CtA[vars, cols2solve, drop = FALSE]
  }
  K
}


## ~~~~~~~~~~~~~~~~~~~~~

###
##### Exported functions - user interface
###



#' Attach Nucleotide Context.
#' 
#' Retrieve the nucleotide context around each DNA variant based on the
#' genomic coordinates of the variant and a reference BSGenome database.
#' 
#' @param mutData data.frame storing mutation data
#' @param BSGenomeDb a BSGenomeDb-class object, storing info about the genome of interest
#' @param chr_colName string, name of the column storing seqNames. Defaults to "chr"
#' @param start_colName string, name of the column storing start positions. Defaults to "start_position"
#' @param end_colName string, name of the column storing end positions. Defaults to "end_position"
#' @param nucl_contextN integer, the span of nucleotides to be retrieved around the variant. Defaults to 3
#' @param context_colName string, name of the column that will be storing the 
#' nucleotide context. Defaults to "context"
#'
#' @return a modified data.frame including the nucleotide context in a new column
#'
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#' 
#' 
#' @importFrom methods slotNames
#' @importFrom utils installed.packages data
#' 
#' @export
attachContext <- function(mutData,
                          BSGenomeDb,
                          chr_colName = "chr",
                          start_colName = "start_position",
                          end_colName = "end_position",
                          nucl_contextN = 3,
                          context_colName = "context")
{
  # Init Exec
  exec <- TRUE
  
  # Check if dependencies from BIOC are installed
  BiocX <- c("GenomicRanges", "IRanges", "BSgenome", "GenomeInfoDb")
  all.packs <- rownames(utils::installed.packages())
  if (sum(BiocX %in% all.packs) != length(BiocX)) {
    exec <- FALSE
  }

  # Check if BSGenomeDb dependencies from BIOC are installed
  if (!"BSgenome" %in% class(BSGenomeDb)) {
    stop("BSGenomeDb is not a BSgenome-class object")
  }

  if (exec) {
    
    attachContext.addON <- mutSignatures::mutSigData$.addON$attachContext.addON
    
    YY <- tryCatch({
      attachContext.addON(mutData = mutData, 
                          BSGenomeDb = BSGenomeDb, 
                          chr_colName = chr_colName, 
                          start_colName = start_colName, 
                          end_colName = end_colName, 
                          nucl_contextN = nucl_contextN, 
                          context_colName = context_colName)}, 
      
      error = function(e) {
        message("An error has occurred!")
        NULL})
     
     return(YY)
     
  } else {
    message("Sorry, this operation could not be executed!")
    message("In order to run the `attachContext()` function, the following libraries have to be installed from Bioconductor:")
    message("")
    for (xbi in BiocX) {
      message(paste0("  --> ", xbi))  
    }
    message("")
    message("Missing Bioconductor libs:")
    message("  --> ", appendLF = FALSE)
    bxx <- paste(BiocX[!BiocX %in% all.packs], collapse = ", ")
    message(bxx, appendLF = FALSE)
    message("")
    
    # no return
  }
}  



#' Attach Mutation Types.
#' 
#' Modify a data.frame carrying information about DNA mutation, and add a new column that 
#' stores formatted multi-nucleotide types.
#' 
#' @param mutData data.frame including information about DNA mutations
#' @param ref_colName string, pointing to the column with information about the sequence of the "reference_allele"
#' @param var_colName string, pointing to the column with information about the sequence of the "variant_allele"
#' @param var2_colName string (optional), pointing to the column with information about the 
#' sequence of a second "variant_allele". Can be NULL
#' @param context_colName string, pointing to the column with information about the nucleotidic "context"
#' @param format integer, indicates the desired mutation type format: (1) N[R>V]N; (2) NN.R>V; (3) R.V[NRN][NVN]
#' @param mutType_dict string, indicates the dictionary to be used for simplifying reverse-complement identical mutation types.
#' It is recommended to use the standard dictionary from COSMIC, by selecting the default value, i.e. "alexa".
#' @param mutType_colName string, column name of the new column added to the data.frame where mutTypes are stored.
#' 
#' @return a data.frame including a new column with mutation Types.
#'
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#' 
#' @examples  
#' A <- data.frame(REF = c("A", "T", "G"), 
#'                 VAR = c("G", "C", "C"), 
#'                 CTX = c("TAG", "GTG", "CGA"), 
#'                 stringsAsFactors = FALSE)
#' mutSignatures::attachMutType(mutData = A, ref_colName = "REF", 
#'                              var_colName = "VAR", context_colName = "CTX")
#'       
#' @export
attachMutType <- function(mutData,
                          ref_colName = "reference_allele",
                          var_colName = "variant_allele",
                          var2_colName = NULL,
                          context_colName = "context",
                          format = 1,
                          mutType_dict = "alexa",
                          mutType_colName = "mutType")
{
  # Validate input data and fields in mutData
  if(!((is.data.frame(mutData) | is.matrix(mutData) ) &
       sum(c(ref_colName, var_colName, context_colName) %in% colnames(mutData)) == 3 ))
    stop ("Issue with the input dataset. Make sure to feed in a data.frame or
          a matrix and double check the name of the fields pointing to chromosome
          name, start and end positions")
  if (!(format %in% c(1,2)))
    stop ("Please, specify a valid format number (example: 1)")
  if (! (is.null(var2_colName))) {
    if (!(var2_colName %in% colnames(mutData)))
      stop ("Invalid var2 column")
  }
  
  if (!is.character(mutType_colName) |
      length(mutType_colName) > 1)
    stop("Bad mutType_colName")
  if(mutType_colName %in% colnames(mutData))
    stop ("mutType_colName already exists as column name in the current dataset")
  
  # convert factors to chars
  mutData <- data.frame(mutData, stringsAsFactors = FALSE, row.names = NULL)
  my.key.cols <- c(ref_colName, var_colName, var2_colName, context_colName)
  my.key.cols <- my.key.cols[!is.na(my.key.cols)]
  for (clmn in my.key.cols) {
    mutData[,clmn] <- base::as.character(base::as.vector(mutData[,clmn])) 
  }
  
  message("Assigning mutation types ", appendLF = FALSE)
  
  mutData[,mutType_colName] <- sapply(1:nrow(mutData), (function(i){
    
    if (nrow(mutData) > 1000 & i %in% base::as.integer(seq(1, nrow(mutData),length.out = 20)))
      message(".", appendLF = FALSE)
    
    # first, extract elems and check for middle base to match the reference
    ctx.len <- nchar(mutData[i,context_colName])
    half.ln <- (ctx.len - 1) / 2
    mid.seq <- substr(mutData[i,context_colName], (half.ln + 1), (half.ln + 1))
    pre.seq <- substr(mutData[i,context_colName], 1, half.ln)
    post.seq <- substr(mutData[i,context_colName], (half.ln + 2), ctx.len)
    
    if((mid.seq !=  mutData[i,ref_colName]) || 
       (is.null(var2_colName) & mid.seq == mutData[i,var_colName]) ||
       (tryCatch({
         ChK0 <- (mid.seq == mutData[i,var_colName] & mid.seq == mutData[i,var2_colName])
         ifelse(length(ChK0) == 1, ChK0, FALSE) 
         }, error = function(e) {FALSE}))) {
      # no match means --> NA
      mut.base <- NA
    } else {
      #mut.variant to use in case var2 is specified
      if (mutData[i,ref_colName] != mutData[i,var_colName]) {
        mut.base <- mutData[i,var_colName]
      } else if (!is.null(var2_colName) ){
        if ( mutData[i,ref_colName] != mutData[i,var2_colName]) {
          mut.base <- mutData[i,var2_colName]
        } else {
          mut.base <- NA  
        }
      } else {
        mut.base <- NA  
      }
      
      # match, format and return according to a standard format (for now)
      if (is.na(mut.base)) {
        NA
      } else {
        paste(mid.seq, ".", mut.base, "[", pre.seq, mid.seq, post.seq, "][", pre.seq, mut.base, post.seq, "]", sep = "", collapse = "")
      }
    }
  }))
  message(". Done!", appendLF = TRUE)
  
  if (sum(is.na(mutData[,mutType_colName])) > 0) {
    message(paste("Removing",sum(is.na(mutData[,mutType_colName])), "positions."))
    mutData <- mutData[!is.na(mutData[,mutType_colName]),]
  }
  
  # Now, apply revCompl transformation
  message("Now applying RevCompl transformation", appendLF = FALSE)
  if (mutType_dict == "alexa") {
    idx <- grep("^((G|A)\\.)", mutData[,mutType_colName])
    mutData[idx,mutType_colName] <- sapply(mutData[idx,mutType_colName], (function(seq){
      
      base.wt  <- revCompl(gsub("\\..+$", "", seq))
      base.mut <- revCompl(gsub("^.+\\.", "", gsub("\\[.+$", "", seq)))
      seq.wt   <- revCompl(gsub("^.+\\[", "", gsub("\\]\\[.+$", "", seq)))
      seq.mut  <- revCompl(gsub("^.+\\]\\[", "", gsub("\\]$", "", seq)))
      paste(base.wt,".",base.mut, "[", seq.wt, "][", seq.mut, "]", sep = "", collapse = "")
    }))
    
  } else if (mutType_dict == "custom") {
    idx <- grep("^((G|T)\\.)", mutData[,mutType_colName])
    mutData[idx,mutType_colName] <- sapply(mutData[idx,mutType_colName], (function(seq){
      base.wt  <- revCompl(gsub("\\..+$", "", seq))
      base.mut <- revCompl(gsub("^.+\\.", "", gsub("\\[.+$", "", seq)))
      seq.wt   <- revCompl(gsub("^.+\\[", "", gsub("\\]\\[.+$", "", seq)))
      seq.mut  <- revCompl(gsub("^.+\\]\\[", "", gsub("\\]$", "", seq)))
      paste(base.wt,".",base.mut, "[", seq.wt, "][", seq.mut, "]", sep = "", collapse = "")
    }))
  }
  
  message(". Done!", appendLF = TRUE)
  message("Final formatting", appendLF = FALSE)
  #Attach the format of interest
  mutData[,mutType_colName] <- sapply(mutData[,mutType_colName], (function(seq){
    base.wt  <- gsub("\\..+$", "", seq)
    base.mut <- gsub("^.+\\.", "", gsub("\\[.+$", "", seq))
    seq.wt   <- gsub("^.+\\[", "", gsub("\\]\\[.+$", "", seq))
    seq.mut  <- gsub("^.+\\]\\[", "", gsub("\\]$", "", seq))
    half.len <- (nchar(seq.wt) - 1 ) / 2
    pre.seq <- substr(seq.wt, 1, half.len)
    post.seq <-substr(seq.wt, half.len + 2, nchar(seq))
    
    if (format == 1) {
      # --> N[N>M]N
      paste(pre.seq, "[", base.wt, ">", base.mut, "]", post.seq, sep = "", collapse = "")
    } else if (format == 2) {
      # --> NN.N>M
      paste(pre.seq, post.seq, ".", base.wt, ">", base.mut, sep = "", collapse = "")
    }  else {
      # --> N.M[NNN][NMN]
      paste(base.wt,".",base.mut, "[", seq.wt, "][", seq.mut, "]", sep = "", collapse = "")
    }
  }))
  
  message(". Done!", appendLF = TRUE)
  
  return(mutData)
}


#' Extract Variants from XvarlinkData.
#' 
#' Extract Variants from data stored as XvarlinkData.
#' 
#' @param xvarLink_data character vector, including mutation data embedded in XvarlinkData
#'
#' @return a data.frame including mutations as well as corresponding reference nucleotides.
#'
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#'
#' @examples 
#' x <- mutSignatures:::getTestRunArgs("extractXvarlinkData")
#' y <- mutSignatures:::extractXvarlinkData(xvarLink_data = x)
#' y
#' 
#'             
#' @export
extractXvarlinkData <- function(xvarLink_data) {
  tmpVars <- sub("^.*&var=[[:alnum:]]+(,.*)&.*$", "\\1", xvarLink_data)
  tmpVars[!grepl("^,.+", tmpVars)] <- NA
  tmpVars <- strsplit(tmpVars, ",")
  tmpVars <- do.call(rbind, lapply(1:length(tmpVars), function(i){
    if (tmpVars[[i]][1] == "" & length(tmpVars[[i]]) == 5) {
      tmpVars[[i]][2:5]
    } else {
      c(NA, NA, NA, NA)
    }
  }))
  out <- data.frame(chrXvar = base::as.character(tmpVars[,1]),
                    posXvar = base::as.numeric(tmpVars[,2]),
                    refXvar = base::as.character(tmpVars[,3]),
                    mutXvar = base::as.character(tmpVars[,4]),
                    stringsAsFactors = FALSE)
  return(out)
}

#' Filter Single Nucleotide Variants.
#' 
#' Remove entries corresponding to non-SNV, such as insertions and deletions.
#' 
#' @param dataSet data.frame including variant information
#' @param seq_colNames character vector with the names of the columns storing variant data
#' 
#' @return a filtered data.frame only including SNVs
#'
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#'  
#' @examples 
#' x <- mutSignatures:::getTestRunArgs("filterSNV")
#' nrow(x)
#' y <- mutSignatures::filterSNV(dataSet = x, 
#'                               seq_colNames = c("REF", "ALT"))
#' nrow(y)
#' 
#'                     
#' @export
filterSNV <- function(dataSet, 
                      seq_colNames) 
{
  if (!(is.data.frame(dataSet) | is.matrix(dataSet) ) |
      sum(!seq_colNames %in% colnames(dataSet)) > 0 |
      length(seq_colNames) < 2) {
    stop("Bad input or seq_colNames not found")
  }
  check.tab <- sapply(1:length(seq_colNames), (function(i){
    tmp <- gsub("[[:space:]]", "", dataSet[,seq_colNames[i]])
    toupper(tmp) %in% c("A","C","G","T")
  }))
  toKeep <- apply(check.tab, 1, (function(rw){
    sum(rw) == length(rw)
  }))
  out <- dataSet[toKeep,]
  rownames(out) <- NULL
  return(out)
}

#' Convert Mutation COunts to PerMille Frequencies.
#' 
#' Convert Mutation COunts to frequencies. Typically, a permille frequence is returned.
#' In other words, the resulting number indicates the expected mutation count if the genome hat a 
#' total of 1000 mutations. This way, the MutSignatures analysis will be
#' less biased toward the hyper-mutator samples.
#' 
#' @param countMatrix numeric matrix of mutation counts
#' @param permille ligucal, shall the permille conversion be used instead of the standard frequency
#'
#' @return list including colSums (mutation burden of each sample) and freqs (matrix of frequencies)
#'  
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#' 
#' @examples 
#' A <- cbind(c(7, 100, 90, 1000), c(1, 3, 5, 9))
#' fA <- mutSignatures::frequencize(A)
#' fA$freqs
#'  
#' @export
frequencize <- function(countMatrix, 
                        permille = TRUE)
{
  out <- list()
  cf <- ifelse(permille, 1000, 1)
  
  out[["colSums"]] <- apply(countMatrix, 2, sum)
  out[["freqs"]] <- cf * apply(countMatrix, 2, (function(clmn){clmn/sum(clmn)}))
  return(out)
}

#' Obtain COSMIC mutational Signatures.
#' 
#' Obtain latest mutational Signature definitions from COSMIC. FOr more info, please visit: \url{https://cancer.sanger.ac.uk/cosmic/}
#' 
#' @param forceUseMirror logical, shall signatures be downloaded from a mirror. Set to TRUE if the COSMIC
#' server goes down.
#' @param asMutSign logical, shall data be returned as a mutSignatures-class object. Defaults to TRUE
#'
#' @return an object storing COSMIC mutational signature data
#'
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#' 
#' @importFrom utils read.delim read.csv
#' @export
getCosmicSignatures <- function(forceUseMirror = FALSE, asMutSign = TRUE)
{
  mutType.labels <- c("A[C>A]A", "A[C>A]C", "A[C>A]G", "A[C>A]T", "A[C>G]A", "A[C>G]C",
                      "A[C>G]G", "A[C>G]T", "A[C>T]A", "A[C>T]C", "A[C>T]G", "A[C>T]T",
                      "A[T>A]A", "A[T>A]C", "A[T>A]G", "A[T>A]T", "A[T>C]A", "A[T>C]C",
                      "A[T>C]G", "A[T>C]T", "A[T>G]A", "A[T>G]C", "A[T>G]G", "A[T>G]T",
                      "C[C>A]A", "C[C>A]C", "C[C>A]G", "C[C>A]T", "C[C>G]A", "C[C>G]C",
                      "C[C>G]G", "C[C>G]T", "C[C>T]A", "C[C>T]C", "C[C>T]G", "C[C>T]T",
                      "C[T>A]A", "C[T>A]C", "C[T>A]G", "C[T>A]T", "C[T>C]A", "C[T>C]C",
                      "C[T>C]G", "C[T>C]T", "C[T>G]A", "C[T>G]C", "C[T>G]G", "C[T>G]T",
                      "G[C>A]A", "G[C>A]C", "G[C>A]G", "G[C>A]T", "G[C>G]A", "G[C>G]C",
                      "G[C>G]G", "G[C>G]T", "G[C>T]A", "G[C>T]C", "G[C>T]G", "G[C>T]T",
                      "G[T>A]A", "G[T>A]C", "G[T>A]G", "G[T>A]T", "G[T>C]A", "G[T>C]C",
                      "G[T>C]G", "G[T>C]T", "G[T>G]A", "G[T>G]C", "G[T>G]G", "G[T>G]T",
                      "T[C>A]A", "T[C>A]C", "T[C>A]G", "T[C>A]T", "T[C>G]A", "T[C>G]C",
                      "T[C>G]G", "T[C>G]T", "T[C>T]A", "T[C>T]C", "T[C>T]G", "T[C>T]T",
                      "T[T>A]A", "T[T>A]C", "T[T>A]G", "T[T>A]T", "T[T>C]A", "T[T>C]C",
                      "T[T>C]G", "T[T>C]T", "T[T>G]A", "T[T>G]C", "T[T>G]G", "T[T>G]T")
  cosmic.url <- "http://cancer.sanger.ac.uk/cancergenome/assets/signatures_probabilities.txt"
  
  #initialize variable
  my_fullW <- NULL
  
  if (!forceUseMirror) {
    my_fullW <- tryCatch({TMP <- suppressWarnings(read.delim(cosmic.url, header = TRUE));
    rownames(TMP) <- TMP$Somatic.Mutation.Type;
    TMP <- TMP[,grep("Signature", colnames(TMP))];
    TMP},
    error = function(e) NULL)
  } 
  
  # private mirror
  if (is.null(my_fullW)) {
    private.mirror.url <- "http://www.labwizards.com/rlib/mutSignatures/cosmic.signatures.csv"
    my_fullW <- tryCatch({suppressWarnings(read.csv(private.mirror.url, 
                                                    header = TRUE, as.is = TRUE, row.names = 1))},
                         error = function(e2) { NULL })
  }
  if (is.null(my_fullW)) {
    message("An error occurred!")
    return(NULL)
  } else {
    if (sum(mutType.labels %in% rownames(my_fullW)) == length(mutType.labels)) {
      obj2rt <- my_fullW[mutType.labels,]
      colnames(obj2rt) <- sub("Signature", "COSMIC", colnames(obj2rt))
      if(asMutSign)
        obj2rt <- mutSignatures::as.mutation.signatures(obj2rt)
      
      return(obj2rt)
    } else {
      message("An error occurred!")
      return(NULL)
    }   
  }
}

#' Import Mutation data from VCF files.
#' 
#' Import Mutation data from VCF files. The columns are expected in the following order:
#' c("CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"). Optional columns 
#' can be present to inform about sample ID or other info.
#' 
#' @param vcfFiles character vector, includes the names of the VCF files to be analyzed
#' @param sampleNames character vector with alternative sample names (otherwise, 
#' VCF file names will be ised to identify each sample).
#'
#' @return a concatenated data.frame with all variants found in the input VCF files. Sample
#' ID is stored in the "SAMPLEID" column. 
#'
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#'
#' @importFrom utils read.delim  
#' @export
importVCFfiles <- function(vcfFiles, sampleNames = NULL){
  
  my.colnames <- c("CHROM", "POS", "ID", "REF", "ALT", "QUAL", 
                   "FILTER", "INFO", "FORMAT", "XTR1", "XTR2", "XTR3")
  
  if (is.null(sampleNames) | length(sampleNames) != length(vcfFiles)){
    
    bypassNames <- paste("sample", 1:length(vcfFiles), sep = ".")
    
    sampleNames <- vcfFiles
    sampleNames <- sub("\\.vcf$", "", sub("^.*(\\\\|/)", "", tolower(sampleNames)))
    
    sampleNames[sampleNames == ""] <- bypassNames[sampleNames == ""]
    
  }
  out <- sapply(1:length(vcfFiles), (function(j){
    x <- vcfFiles[j]
    if (!file.exists(x)) {
      NULL
    } else {
      tmpVCF <- read.delim(x, comment.char = '#', header = F, stringsAsFactors = F)
      
      for (i in 1:ncol(tmpVCF)) {
        colnames(tmpVCF)[i] <- my.colnames[i]      
      }
      tmpVCF$SAMPLEID <- sampleNames[j]
      tmpVCF
    }
  }), simplify = FALSE, USE.NAMES = TRUE)
  out <- do.call(rbind, out)
  #names(out) <- sub("\\.vcf$", "", names(out))
  return(out)
}





##
### Plotting
##

#' Plot Signature Exposure Profiles.
#' 
#' Build a barplot to visualize exposures to mutation signatures.
#' 
#' @param mutCount a data.frame including mutation Counts
#' @param top integer, max number of samples to include in the plot
#' 
#' @return a plot (ggplot2 object)
#'
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'  }
#'  
#' @importFrom ggplot2 ggplot aes geom_bar theme theme_minimal element_blank element_line unit element_text scale_y_continuous   
#' @export 
plotSignExposures <- function(mutCount, top = 50) {
  # avoid NOTEs
  count <- NULL
  feature <- NULL
  Signature <- NULL
  
  sampleLabs <- rownames(mutCount)
  rownames(mutCount) <- NULL
  
  nuSmplOrder <- order(apply(mutCount, 1, sum), decreasing = TRUE)
  mutCount <- mutCount[nuSmplOrder,]
  if (!is.null(sampleLabs))
    sampleLabs <- sampleLabs[nuSmplOrder]
  
  rownames(mutCount) <- NULL
  
  if (is.null(top)) {
    mutDF <- table2df(dataMatrix = mutCount)
  } else if(is.na(base::as.numeric(top[1]))){
    mutDF <- table2df(dataMatrix = mutCount)
  } else if (top[1] > nrow(mutCount) | top[1] < 2) {
    mutDF <- table2df(dataMatrix = mutCount)
  } else {
    mutDF <- table2df(dataMatrix = mutCount[1:top,])
    if (!is.null(sampleLabs))
      sampleLabs <- sampleLabs[1:top]
  }
  
  mutDF$sample <- 100000 + base::as.numeric(base::as.character(mutDF$sample))
  mutDF$sample <- base::as.character(mutDF$sample)
  
  tryCatch({mutDF$inputLabel <- do.call(c, lapply(sampleLabs, 
                                                  rep, times = ncol(mutCount)))}, 
           error = function(e) {
             message("damn")})
  
  mutDF$Signature <- factor(mutDF$feature, levels = rev(colnames(mutCount)))
  bp <- ggplot2::ggplot(data=mutDF, ggplot2::aes(x=sample, y=count, fill=Signature)) +
    ggplot2::geom_bar(stat="identity")
  bp <- bp + ggplot2::theme_minimal() + 
    ggplot2::theme(axis.ticks.x = ggplot2::element_blank(), 
          axis.text.x = ggplot2::element_blank(),
          panel.grid.major = ggplot2::element_blank(),
          panel.grid.minor = ggplot2::element_blank()) +
    ggplot2::theme(axis.line.y = ggplot2::element_line(colour = "black", size = 0.75),
          axis.line.x = ggplot2::element_line(colour = "black", size = 0.75),
          axis.ticks.y = ggplot2::element_line(colour = "black", size = 1),
          axis.ticks.length = ggplot2::unit(x = 6, "points"),
          plot.title = ggplot2::element_text(hjust = 0.5)) +
    ggplot2::scale_y_continuous(expand=c(0,0),
                       limits = c(0, 1.2 * max(apply(mutCount, 1, function(rx) sum(rx, na.rm = TRUE)))))
  
  return(bp)
}

#' Plot Mutation Signature Profiles.
#' 
#' Build a barplot to visualize the relative abundance of mutation counts in a mutational
#' signature or biological sample of interest.
#' 
#' @param mutCounts data.frame including mutation types counts or frequencies, such as a
#' data.frame of mutation counts from samples, or mutation type frequencies from a mutational signature.
#' @param mutLabs character vector, labels to be used for the mutation types
#' @param freq logical, shall frequency be plotted rather than counts. Defaults to TRUE
#' @param ylim values used for ylim. Defaults to "auto" (ylim automatically set)
#' @param ylab string, used as y-axis title. Defaults to "Fraction of Variants"
#' @param xlab string, used as x-axis title. Defaults to "Sequence Motifs"
#' @param xaxis_cex numeric, cex value for the xaxis
#' @param cols character vector, indicates the colors to be used for the bars. It typically requires 6 colors.
#' @param main string, tutle of the plot. Defaults to "MutType Profile"
#' 
#' @return NULL. A plot is printed to the active device.
#' 
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#' 
#' @importFrom graphics barplot box axis mtext
#' @export 
plotMutTypeProfile <- function(mutCounts,
                               mutLabs,
                               freq = TRUE,
                               ylim = "auto",
                               ylab = "Fraction of Variants",
                               xlab = "Sequence Motifs",
                               xaxis_cex = 0.475,
                               cols = c("#4eb3d3", "#040404", "#b30000", "#bdbdbd", "#41ab5d", "#dd3497"),
                               main = "MutType Profile")
{
  
  # As of 2020-11-01, this is not working, no solution
  # switch to default family
  font.family <- "mono"
  font.family <- ""
  
  mutLabs <- toupper(mutLabs)
  if (!sum(regexpr("^(A|C|G|T)(\\[)(A|C|G|T)(>)(A|C|G|T)(\\])(A|C|G|T)$", toupper(mutLabs)) > 0) == length(mutLabs)) {
    message("mutTypes are in a non-standard format (ie, not Sanger 'A[G>A]T' style)")
    altPlot <- 1
  } else {
    altPlot <- 0
  }
  
  
  if (length(mutCounts) != length(mutLabs))
    stop("Mutation Type Labels and Mutation Counts do not match!") 
  
  mutCounts[is.na(mutCounts)] <- 0
  
  if (freq == TRUE) {
    if (ylim[1] == "auto")
      ylim <- c(0,0.2)
    freqs <- mutCounts/sum(mutCounts, na.rm = TRUE)
  } else {
    if (ylim[1] == "auto")
      ylim = c(0,(1.05*max(mutCounts)))
    freqs <- mutCounts
  }
  
  tinyMut <- gsub("\\](.*)$", "",
                  gsub("^(.*)\\[", "", toupper(mutLabs)))
  
  # Order the frequencies
  first.out <- lapply(sort(unique(tinyMut)), (function(mt){
    idxKeep <- which(tinyMut == mt)
    tmp.order <- order(mutLabs[idxKeep])
    out <- freqs[idxKeep][tmp.order]
    names(out) <- mutLabs[idxKeep][tmp.order]
    out
  }))
  names.first.out <- sort(unique(tinyMut))
  
  # Add a spacer
  second.out <- lapply(first.out, (function(vct){
    c(vct, 0)
  }))
  
  # Wrap together
  third.out <- do.call(c, second.out)
  third.out <- third.out[-length(third.out)]
  
  # Define color scheme
  col.out <- lapply(1:length(first.out), (function(ii){
    rep(cols[ii], (length(first.out[[ii]]) + 1))
  }))
  col.out <- do.call(c, col.out)
  col.out <- col.out[-length(col.out)]
  
  third.out[third.out>max(ylim)] <- max(ylim)
  third.shortLab <- gsub("\\[([[:alpha:]]>[[:alpha:]])\\]","-", names(third.out))
  
  xpos <- graphics::barplot(third.out,
                            col = col.out,
                            ylim = ylim,
                            axes = FALSE,
                            names.arg = FALSE,
                            ylab = ylab,
                            border = NA,
                            main = main)
  
  xpos.out <- lapply(1:length(first.out), (function(ii){
    c(rep(ii, length(first.out[[ii]])),0)
  }))
  xpos.out <- do.call(c, xpos.out)
  xpos.out <- xpos.out[-length(xpos.out)]
  
  #(max(ylim)*0.0075*0.2)
  graphics::box()
  graphics::axis(side = 1, tick = FALSE, 
                 hadj = 1, cex.axis = xaxis_cex, 
                 pos = (max(ylim) * 0.035), family = font.family,
                 at = xpos,
                 labels = third.shortLab,
                 las = 2)
  
  my.y <- seq(min(ylim), max(ylim), length.out = 4)
  if (max(ylim) > 1.75) {
    my.y.labs <- format(round(my.y, digits = 0))
  } else {
    my.y.labs <- format(round(my.y, digits = 2))
  }
  
  graphics::axis(side = 2, tick = -0.005, 
                 at = my.y, labels = my.y.labs, 
                 las = 1, cex.axis = 0.75)
  
  zz <- unique(xpos.out)
  zz <- zz[zz>0]
  lab.xx <- sapply(zz, (function(i){
    base::mean(xpos[,1][xpos.out == i]) 
  }))
  graphics::mtext(names.first.out, side = 1, line = 1.5, at = lab.xx, cex = 0.8, col = cols)
  
  graphics::mtext(xlab, side = 1, line = 3.0, at = base::mean(xpos[,1]), cex = 1)
  # done! No return, just a plot!
}

#' Run a Preliminary Process Assess analysis.
#' 
#' This function is an attempt to analyze the relationship between error and k. In other words, 
#' the goal of prelimProcessAssess is to visualize the reduction in the error/residuals 
#' 
#' @param input a mutationCounts-class object
#' @param maxProcess integer, maximum k to test  
#' @param approach sting, "counts" or "freq"
#' @param plot logical, shall a plot be printed to the active device
#' @param verbose logical, info about the ongoing analysis be messaged/printed to console
#' 
#' @return a data.frame showing the estimated total error with respect to the range of k values
#'  
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#' 
#' 
#' @importFrom stats median
#' @importFrom graphics plot axis lines points title box 
#'   
#' @export
prelimProcessAssess <- function(input,
                                maxProcess = 6,
                                approach = "counts",
                                plot = TRUE,
                                verbose = TRUE) 
{
  tmpParams <- setMutClusterParams(num_processesToExtract = 1, 
                                   approach = approach,
                                   stopconv = 10000,
                                   niter = 100000,
                                   thresh_removeWeakMutTypes = 0.01,
                                   thresh_removeLastPercent =0.025,
                                   num_totIterations = 10,
                                   num_parallelCores = 1,
                                   debug = FALSE,
                                   logIterations = "full")
  
  tmpParams <- coerceObj(x = tmpParams, to = "list")
  input_mutCounts <- coerceObj(x = input, to = "matrix")
  
  if (ncol(input@counts) < 2) {
    stop("It is not possible to analyze mtational signatures in a single sample. 
         Please, include two or more samples before running this analysis.")    
  }
  
  if (tmpParams$approach != "counts") {
    tmpFRQ.input <- frequencize(input_mutCounts)
    tmpParams$approach <- "counts"
    input_mutCounts <- tmpFRQ.input$freqs
  }
  bckgrnd.removed.mutCounts <- removeWeak(input_mutCounts,
                                          tmpParams)
  bckgrnd.removed.mutset <- bckgrnd.removed.mutCounts$removed.mutset
  bckgrnd.removed.mutCounts <- bckgrnd.removed.mutCounts$output.mutCounts
  total.mutationTypes <- nrow(bckgrnd.removed.mutCounts)
  total.samples <- ncol(bckgrnd.removed.mutCounts)
  tmpParams$num_totIterations <- 1
  
  # Calc initial (max) error
  medianMaxErr <- sum(base::t(sapply(1:nrow(bckgrnd.removed.mutCounts), (function(i){
    ((stats::median(bckgrnd.removed.mutCounts[i,]) - bckgrnd.removed.mutCounts[i,]))^2
  }))))
  
  # Message
  if(verbose)
    message("Preliminary Mutational Process Assessment: ", appendLF = FALSE)
  
  outRes <- lapply(1:maxProcess, (function(i){
    tmpParams$num_processesToExtract <- i
    tmpRes <- suppressWarnings(suppressMessages(
      extractSignatures(mutCountMatrix = bckgrnd.removed.mutCounts,
                        params = tmpParams,
                        bootStrap = FALSE)))
    
    #apply(tmpRes$mutCounts.reconstructed, 2, sum)
    #mutCounts.reconstructed
    #tmpErr <- sum((tmpRes$mutCounts.errors)^2)
    tmpErr <- sum((bckgrnd.removed.mutCounts - tmpRes$mutCounts.reconstructed) ^ 2)
    
    if(verbose)
      message(".", appendLF = FALSE)
    tmpErr
  }))
  if(verbose)
    message("", appendLF = TRUE)
  
  err.points <- c(medianMaxErr, do.call(c, outRes))
  err.points <- (-1) * (err.points - max(err.points))
  err.points <- err.points / max(err.points, na.rm = TRUE)
  
  if (plot) {
    graphics::plot(err.points ~ c(0:(length(err.points) - 1)), 
                   type = "n", las = 1, axes = FALSE,
                   ylab = "", xlab = "", main = "Preliminary Mutational Process Assessment")
    graphics::axis(side = 1, at = NULL, cex = 0.65, font = 3)
    graphics::axis(side = 2, at = seq(0, 1, by = 0.2), cex = 0.65, font = 3, 
                   labels = format(seq(1, 0, by = -0.2), digits = 2, nsmall = 2), las = 1)
    graphics::lines(c(0:(length(err.points) - 1)), err.points, lwd = 1.5, col = "red2")
    graphics::points(c(0:(length(err.points) - 1)), err.points, pch = 19, col = "gray30")
    graphics::title(xlab="Num of Signatures", line=2.2, cex.lab=1.2, font = 2)
    graphics::title(ylab="Error (% vs. Median)", line=3.1, cex.lab=1.2, font = 2)
    graphics::box()
  }
  
  return(data.frame(numProcess = c(0:(length(err.points) - 1)),
                    percentErr = (1 - err.points),
                    stringsAsFactors = TRUE))
}


#' Match Mutational Signatures.
#' 
#' Analyze the similarity between mutational signatures from different analyses/runs.
#' THis function can be helpful to match de novo extracted signatures with previously
#' described signatures (such as COSMIC), or to reveal signatures that can be 
#' identified with alternative NMF algorithms, or that may be due to an algorithm bias.
#' 
#' @param mutSign a mutationSignatures object
#' @param reference a mutationSignatures object. If NULL, COSMIC signatures will be retrieved
#' @param method distance method used to compute similarity (1 - distance)
#' @param threshold signal (similarity) upper threshold for maxing the signal
#' @param plot logical, shall a heatmap be plotted
#' 
#' @return list, including distance matrix and a heatmap plot
#'    
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#'    
#' @importFrom proxy dist
#' @importFrom ggplot2 ggplot aes geom_tile scale_y_discrete scale_x_discrete theme_minimal labs scale_fill_gradient2 theme element_text element_blank margin 
#' @importFrom stats quantile
#'   
#' @export
matchSignatures <- function(mutSign, 
                            reference = NULL, 
                            method = 'cosine',
                            threshold = 0.5,
                            plot = "TRUE")
{
  # avoid NOTEs
  newSign <- NULL
  refSign <- NULL
  
  if(is.null(reference)){
    my.ref <- getCosmicSignatures()
  } else if (class(reference) == "mutationSignatures") {
    my.ref <- reference
  } else {
    stop("reference: wrong data type. Provide NULL or a 'mutationSignatures'-class object")
  }
  if(class(mutSign) != "mutationSignatures") {
    stop("mutSign: wrong data type. Provide a 'mutationSignatures'-class object")
  }
  if (sum(!getMutationTypes(mutSign) %in% getMutationTypes(my.ref)) != 0 |
      sum(!getMutationTypes(my.ref) %in% getMutationTypes(mutSign)) != 0)
    stop("non-compatible mutation types")
  
  # Coerce to tabular objects
  my.ref2 <- my.ref@mutationFreq
  if (ncol(my.ref2) < 2) { stop("You need to provide at least to reference signatures")}
  dimnames(my.ref2) <- list(my.ref@mutTypes[,1], 
                            my.ref@signatureId[,1])
  
  mutSign2 <- mutSign@mutationFreq
  if (ncol(mutSign2) < 2) { stop("You need to provide at least two signatures to compare to the references")}
  dimnames(mutSign2) <- list(mutSign@mutTypes[,1], 
                             mutSign@signatureId[,1])
  
  my.ref <- my.ref2
  mutSign <- mutSign2[rownames(my.ref), ]
  
  #Debug
  #message("Debug - proceeded! :-)")
  
  distMat <- sapply(1:ncol(mutSign), function(i){
    TMP <- cbind(tmpSig=mutSign[,i], my.ref)
    TMPdist <- proxy::dist(TMP, method = method, by_rows = FALSE)
    TMPdist[1:ncol(my.ref)]
  })
  
  dimnames(distMat) <- list(colnames(my.ref), colnames(mutSign))
  p <- NULL
  nuDF <- NULL
  
  if(plot) {
    
    nuDF <- do.call(rbind, lapply(1:nrow(distMat), function(i){
      do.call(rbind, lapply(1:ncol(distMat), function(j){
        data.frame(newSign = colnames(distMat)[j],
                   refSign = rownames(distMat)[i],
                   dist = distMat[i,j],
                   stringsAsFactors = FALSE)
      }))
    }))
    
    fillLims <- c(round(min(nuDF$dist)), round(max(nuDF$dist)))
    if (fillLims[1] != fillLims[2]) {
      nuDF$dist[nuDF$dist >= fillLims[2]] <- fillLims[2]
      nuDF$dist[nuDF$dist <= fillLims[1]] <- fillLims[1]
    } else if (fillLims[1] == 0 && fillLims[2] == 0) {
      fillLims <- c(0, 1)
    } else {
      msg00 <- paste0("Ouch! An error has occurred, this is embarassing! \n", 
                      "Please, try selecting more signatures to compare, and run the function again. \n", 
                      "If the error persists, please email the maintainer ", 
                      "with a reproducible example of what has happened. Thank you.")
      
      stop(msg00)
    }
    
    # Quick cosine fix
    nuDF$dist[nuDF$dist < fillLims[1]] <- fillLims[1]
    nuDF$dist[nuDF$dist > fillLims[2]] <- fillLims[2]
    
    
    p <- ggplot2::ggplot(nuDF, ggplot2::aes(y=newSign, x=refSign))
    p <- p + ggplot2::geom_tile(ggplot2::aes(fill=dist), width=.875, height=.875)
    p <- p + ggplot2::scale_y_discrete(limits=base::rev(colnames(distMat)))
    p <- p + ggplot2::scale_x_discrete(limits=rownames(distMat))
    
    p <- p + ggplot2::theme_minimal(base_size = 11) + ggplot2::labs(x = "", y = "")
    p <- p + ggplot2::labs(title = "Signature Comparison")
    #p <- p + scale_fill_gradient2(low = "#f40000", mid = "#ffe9e9", high = "white",
    #                              midpoint = as.numeric(quantile(distMat, probs = 0.1, na.rm = TRUE)),
    #                              guide = "colourbar", 
    #                              name = paste(method, "\ndistance", sep = ""))
    #p <- p + scale_fill_gradient(low = "red2", high = "white",
    #                             guide = "colourbar", 
    #                             name = paste(method, "\ndistance", sep = ""))
    
    nuMid <- base::as.numeric(stats::quantile(distMat, probs = 0.2, na.rm = TRUE))
    #nuMid2 <- base::as.numeric(mean(distMat, na.rm = TRUE))
    nuMax <- fillLims
    # Try adjust
    #nuMid <- max(c(nuMid, 0.5), na.rm = TRUE)
    
    if (!is.null(threshold)) {
      if (!is.na(base::as.numeric(threshold[1]))){
        nuMid <- base::as.numeric(threshold[1])
      }  
    } else if (fillLims[1] == 0 && fillLims[2] == 1) {
      if (nuMid < 0.425) { nuMid <- 0.425}
    }
    
    p <- p + ggplot2::scale_fill_gradient2(low = "#f40000", mid = "#ffe9e9", high = "white",
                                           midpoint = nuMid,
                                           guide = "colourbar", 
                                           name = paste(method, "\ndistance", sep = ""), 
                                           limits = fillLims)
    p <- p + ggplot2::theme(legend.position = c('right'), # position the legend in the upper left
                            legend.justification = 0, # anchor point for legend.position.
                            legend.text = ggplot2::element_text(size = 9, color = 'gray10'),
                            legend.title = ggplot2::element_text(size = 11),
                            plot.title = ggplot2::element_text(size = 13, face = 'bold', color = 'gray10', hjust = 0.5),
                            axis.text = ggplot2::element_text(face = 'bold'),
                            panel.grid.major.y = ggplot2::element_blank(),
                            panel.grid.major.x = ggplot2::element_blank(),
                            axis.text.x=ggplot2::element_text(angle = 90, hjust = 1 , vjust = 0.5,
                                                              margin=ggplot2::margin(-5,0,-15,0)),
                            axis.text.y=ggplot2::element_text(hjust = 1, vjust = 0.5,
                                                              margin = ggplot2::margin(0,3,0,0)))
    #print(p)
  }
  out <- list()
  out[["distanceMatrix"]] <- distMat
  out[["distanceDataFrame"]] <- nuDF
  out[["plot"]] <- p
  
  return(out)
}


## -----------------------

#' Process VCF data.
#' 
#' Check, annotate, and process variants imported from a list of VCF files, so that it can be used
#' to run a mutational signature analysis
#' 
#' 
#' @param vcfData data.frame, includes mutation data from 2 or more samples 
#' @param BSGenomeDb a BSGenomeDb-class object storing the genomic sequences and coordinates
#' @param chr_colName string, name of the column including the chromosome (seq) name. Defaults to "CHROM"
#' @param pos_colName string, name of the column including the genomic coordinates/position. Defaults to "POS"
#' @param ref_colName string, name of the column including the reference nucleotide. Defaults to "REF"
#' @param alt_colName string, name of the column including the variant nucleotide. Defaults to "ALT"
#' @param sample_colName string, name of the column including the sample ID. Can be NULL
#' @param nucl_contextN integer, span (in nucelotides) of the context around the variants. Defaults to 3
#' @param verbose logical, shall information about the ongoing analysis be printed to console
#'
#' @return a data.frame including processed variants from VCF files
#' 
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#'      
#' @export
processVCFdata <- function(vcfData, 
                           BSGenomeDb,
                           chr_colName = "CHROM",
                           pos_colName = "POS",
                           ref_colName = "REF",
                           alt_colName = "ALT",
                           sample_colName = NULL,
                           nucl_contextN = 3,
                           verbose = TRUE)
{
  # make sure
  if (verbose)
    message("Processing VCF data: ", appendLF = FALSE)
  
  #Initialize
  reprepInput <- NULL
  
  # Fill reprepInput
  if (!is.null(sample_colName)){
    if (length(sample_colName) == 1 & sample_colName[1] %in% colnames(vcfData)){
      
      # compute how many unique samples
      unique.SID <- unique(vcfData[,sample_colName])
      reprepInput <- lapply(unique.SID, function(iID) {
        TMP <- vcfData[vcfData[,sample_colName] == iID, ]
        rownames(TMP)
        TMP
      })
    } else {
      stop("Bad input")
    }
  } else {
    #initialize input as list
    reprepInput <- list(vcfData)
  }
  
  # Triple check
  if(is.null(reprepInput))
    stop("Bad input")
  
  # Now loop
  bigOUT <- lapply(1:length(reprepInput), (function(jj){
    finalOut <- tryCatch({
      if(verbose)
        message(paste("[", jj, "/", length(reprepInput), "]", sep = ""), appendLF = F)
      
      tmpVCF <- suppressMessages({filterSNV(dataSet = reprepInput[[jj]],
                                            seq_colNames = c(ref_colName, alt_colName))})
      if (verbose)
        message(".", appendLF = FALSE)
      
      tmpVCF <- suppressMessages({attachContext(mutData = tmpVCF,
                                                chr_colName = chr_colName,
                                                start_colName = pos_colName,
                                                end_colName = pos_colName,
                                                nucl_contextN = 3,
                                                BSGenomeDb = BSGenomeDb)})
      if (verbose)
        message(".", appendLF = F)
      
      tmpVCF <- suppressMessages({removeMismatchMut(mutData = tmpVCF,
                                                    refMut_colName = ref_colName,
                                                    context_colName = "context",
                                                    refMut_format = "N")})
      if (verbose)
        message(".", appendLF = F)
      
      tmpVCF <- suppressMessages({attachMutType(mutData = tmpVCF,
                                                ref_colName = ref_colName,
                                                var_colName = alt_colName,
                                                var2_colName = alt_colName, 
                                                context_colName = "context")})
      if (verbose)
        message(".", appendLF = F)
      
      tmpVCF
    }, error = function(e) NA)
  }))
  
  bigOUT <- do.call(rbind, bigOUT)
  
  if(verbose) {
    message("", appendLF = T)
    message("Done!", appendLF = T)
  }
  return(bigOUT)
}

#' Remove Mismatched Mutations.
#' 
#' Remove mutation types that do not match the expected nucleotidic context.
#' 
#' @param mutData data.frame including mutation data, as well as the nucleotide
#' context around the mutated position
#' @param refMut_colName string, name of the column storing REF and VAR data. Defaults to "N>N"
#' @param context_colName string, name of the column storing nucleotide context around the variant.
#' @param refMut_format string, format of mutation types. Defaults to "N>N"
#' 
#' @return filtered data.frame
#' 
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#' 
#' @examples 
#' x <- mutSignatures:::getTestRunArgs("removeMismatchMut")
#' y <- mutSignatures:::removeMismatchMut(x, 
#'                                        refMut_colName = "REF", 
#'                                        context_colName = "context", 
#'                                        refMut_format = "N")
#' y
#' 
#' @export
removeMismatchMut <- function(mutData,
                              refMut_colName = "mutation",
                              context_colName = "context",
                              refMut_format = "N>N")
{
  # Validate input data and fields in mutData
  if(!((is.data.frame(mutData) | is.matrix(mutData) ) &
       sum(c(refMut_colName, context_colName) %in% colnames(mutData)) == 2))
    stop ("Issue with the input dataset. Make sure to feed in a data.frame or
          a matrix and double check the name of the fields pointing to chromosome
          name, start and end positions")
  if (! refMut_format %in% c("N>N", "N"))
    stop("Unrecognized refMut_format")
  
  output <- data.frame(mutData, stringsAsFactors = FALSE, row.names = NULL)
  colnames(output) <- colnames(mutData)
  if (refMut_format == "N>N") {
    output$mutSite.dnaSeq.Ref <- substr(mutData[,refMut_colName], 1, 1)
    output$mutSite.dnaSeq.Mut <- substr(mutData[,refMut_colName], 3, 3)
    refMut_colName <- "mutSite.dnaSeq.Ref"
  }
  
  pos.to.extr <- (0.5 * (nchar(output[,context_colName]) - 1)) + 1
  keep.id <- output[,refMut_colName] == substr(output[,context_colName], pos.to.extr, pos.to.extr)
  if (sum( keep.id == FALSE) > 0)
    message(paste("Removing", sum( keep.id == FALSE), "rows because of mismatch"))
  
  output <- output[keep.id, ]
  rownames(output) <- NULL
  return(output)
}

#' Resolve Mutation Signatures.
#' 
#' If Mutation signatures are known (such as COSMIC signatures), we can
#' estimate the contribution of each signature in different samples.
#' This functions used a matrix of mutation counts and a matrix of mutation
#' signatures, and estimates Exposures to Mutational Signature of each sample.
#' 
#' @param mutCountData object storing mutation counts
#' @param signFreqData object storing mutation signatures
#' @param byFreq logical, shall exposures be estimated on per_mille normalized counts
#'
#' @return a list of objects including data about exposures to mutational signatures
#'  
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#' 
#' 
#' @examples 
#'     x <- mutSignatures:::getTestRunArgs("resolveMutSignatures")
#'     y <- mutSignatures::resolveMutSignatures(mutCountData = x$muts, signFreqData = x$sigs)
#'     y
#' 
#' 
#' @export
resolveMutSignatures <- function(mutCountData, 
                                 signFreqData, 
                                 byFreq = TRUE )
{
  # process expected objects
  if (class(mutCountData) == "mutationCounts")
    mutCountData <- coerceObj(x = mutCountData, to = "data.frame")
  
  if (class(signFreqData) == "mutationSignatures")
    signFreqData <- coerceObj(x = signFreqData, to = "data.frame")
  
  if (!(sum(!rownames(mutCountData) %in% rownames(signFreqData)) == 0 &
        sum(! rownames(signFreqData) %in% rownames(mutCountData)) == 0)) {
    stop (paste("There is an issue with the mutTypes.",
                "MutTypes in the mutType Count Matrix",
                "have to match those in the signature",
                "Matrix... check rownames()"))
  }
  mutCountData0 <- mutCountData[rownames(signFreqData),]
  
  # Fix 1 sample only
  if (!is.data.frame(resolveMutSignatures)) {
    TMP <- data.frame(mutCountData0)
    rownames(TMP) <- rownames(signFreqData)
    colnames(TMP) <- colnames(mutCountData)
    mutCountData <- TMP
  } else {
    mutCountData <- mutCountData0
  }
  
  if (byFreq) {
    full.Y <- apply(mutCountData, 2, (function(clmn){
      1000 * clmn / sum(clmn)
    }))
  } else {
    full.Y <- base::as.matrix(mutCountData)    
  }
  
  
  # record sum counts
  mutSums <- apply(mutCountData, 2, sum)
  
  # make sure we have frequencies
  my.signatures <- apply(signFreqData, 2, (function(clmn){
    clmn / sum(clmn)
  }))
  X <- base::as.matrix(my.signatures)
  
  # initialize results collector
  out <- list()
  
  res <- custom_fcnnls(mutCounts = full.Y, signatures = X)

  beta.hat <- data.frame(base::t(res$coef / ifelse (byFreq, 1000, 1)), stringsAsFactors = FALSE)
  
  colnames(beta.hat) <- colnames(signFreqData)
  rownames(beta.hat) <- colnames(mutCountData)
  out$results <- list()
  out$coeffs <- list()
  out$coeffs$beta.hat <- beta.hat
  out$coeffs$unexplained.mutNum <- round((1 - apply(beta.hat, 1, sum)) * mutSums, digits = 0)
  out$coeffs$unexplained.mutFrac <- out$coeffs$unexplained.mutNum  / mutSums
  
  if (byFreq) {
    for (i in 1:nrow(beta.hat)) {
      beta.hat[i,] <- beta.hat[i,] * mutSums[i]
    }
  }
  
  out$results$count.result <- mutSignatures::as.mutsign.exposures(x = beta.hat, samplesAsCols = FALSE)
  out$results$freq.result <- mutSignatures::as.mutsign.exposures(do.call(rbind, lapply(1:nrow(beta.hat), (function(jjj){
    beta.hat[jjj,] / sum(beta.hat[jjj,] )
  }))), samplesAsCols = FALSE)
  
  out$results$fitted <- res$fitted
  out$results$residuals <- res$residuals
  
  return(out)
}

#' Compute Reverse Complement sequences.
#' 
#' Transform a DNA sequence into its reverse-complement sequence.
#' ALternatively, only the reverse sequence (or only the complement) can be returned.
#' 
#' @param DNAseq character vector of DNA sequences
#' @param rev logical, shall the reverse sequence be computed
#' @param compl logical, shall the complementary sequence be computed
#'
#' @return a character vector including transformed DNA sequences
#'
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#' 
#' @examples 
#' A <- c("TAACCG", "CTCGA", "CNNA")
#' mutSignatures::revCompl(A)
#' 
#' @export
revCompl <- function (DNAseq, 
                      rev = TRUE, 
                      compl = TRUE)
{
  if (is.character(DNAseq)) {
    resultVect <- sapply(DNAseq, (function(seqString) {
      mySeq <- toupper(seqString)
      if (rev == TRUE) {
        mySeq <- paste(sapply(1:nchar(mySeq), (function(i) {
          pos <- nchar(mySeq) + 1 - i
          substr(mySeq, pos, pos)
        })), collapse = "")
      }
      if (compl == TRUE) {
        mySeq <- paste(sapply(1:nchar(mySeq), (function(i) {
          aBase <- substr(mySeq, i, i)
          complBase <- c("A", "T", "C", "G", "N", "A")
          names(complBase) <- c("T", "A", "G", "C", "N",
                                "U")
          returnBase <- complBase[aBase]
          if (is.na(returnBase)) {
            stop("Bad input")
          }
          returnBase
        })), collapse = "")
      }
      return(mySeq)
    }))
    names(resultVect) <- NULL
    return(resultVect)
  }
  else {
    warning("Bad input")
  }
}

#' Table Mutation Types by Sample.
#' 
#' Prepare a molten data.frame starting from a mutation count matrix.
#' Mutation types (rows) are countes for each sample (cols). The results are returned in a
#' 3-column data.frame.
#' 
#' @param dataMatrix a numeric matrix including mutation counts
#' @param rowLab string, name for the column that will be storing row IDs, typically sample IDs
#' @param colLab string, name for the column that will be storing column IDs, typically sample IDs
#' @param valueLab string, name for the column that will be storing mutation count values
#' 
#' @return data.frame storing mutation counts by sample
#'
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#' 
#' @examples 
#' A <- cbind(`A>G`=c(5,10),`A>T`=c(3,20),`A>C`=c(15,0))
#' rownames(A) = c("Smpl1", "Smpl2")
#' mutSignatures::table2df(A)
#' 
#' @export
table2df <- function(dataMatrix, 
                     rowLab = "sample", 
                     colLab = "feature", 
                     valueLab = "count") 
{
  #
  if(!is.null(colnames(dataMatrix))) {
    names.X <- colnames(dataMatrix)
  } else {
    names.X <- 1:ncol(dataMatrix)
  }
  if(!is.null(rownames(dataMatrix))) {
    names.Y <- rownames(dataMatrix)
  } else {
    names.Y <- 1:nrow(dataMatrix)
  }
  #
  tmp <- do.call(rbind, lapply(1:nrow(dataMatrix), (function(i){
    t(sapply(1:ncol(dataMatrix), (function(j){
      c(names.Y[i], names.X[j], dataMatrix[i,j])
    })))
  })))
  tmp <- data.frame(tmp, stringsAsFactors = FALSE)
  if (is.numeric(dataMatrix[1,1]))
    tmp[,3] <- base::as.numeric(base::as.character(tmp[,3]))
  rownames(tmp) <- NULL
  colnames(tmp) <- c(rowLab, colLab, valueLab)
  return(tmp)
}


#' Count Mutation Types.
#' 
#' Analyze a table (data.frame) including mutation counts. Count and aggregate Count Mutation Types. 
#' If multiple samples are included in the same table, results are aggregated by samples.
#' 
#' @param mutTable data.frame including mutation types and an optional sample ID column
#' @param mutType_colName string, name of the column storing mutTypes 
#' @param sample_colName string, name of the column storing sample identifiers. Can be NULL
#'
#' @return a mutationCounts-class object
#'  
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#' 
#' @examples 
#' x <- mutSignatures:::getTestRunArgs("countMutTypes")
#' x
#' y <- mutSignatures::countMutTypes(mutTable = x, 
#'                                   mutType_colName = "mutation", 
#'                                   sample_colName = "sample")
#' y
#' 
#' 
#' @export
countMutTypes <- function (mutTable, mutType_colName = "mutType", sample_colName = NULL) 
{
  if (!(is.data.frame(mutTable) | is.matrix(mutTable))) 
    stop("Bad input")
  if (!mutType_colName %in% colnames(mutTable)) 
    stop("mutType field not found")
  if (!is.null(sample_colName)) {
    if (!sample_colName %in% colnames(mutTable)) 
      stop("sample_colName field not found")
  }
  mutType.labels <- c("A[C>A]A", "A[C>A]C", "A[C>A]G", "A[C>A]T", 
                      "A[C>G]A", "A[C>G]C", "A[C>G]G", "A[C>G]T", "A[C>T]A", 
                      "A[C>T]C", "A[C>T]G", "A[C>T]T", "A[T>A]A", "A[T>A]C", 
                      "A[T>A]G", "A[T>A]T", "A[T>C]A", "A[T>C]C", "A[T>C]G", 
                      "A[T>C]T", "A[T>G]A", "A[T>G]C", "A[T>G]G", "A[T>G]T", 
                      "C[C>A]A", "C[C>A]C", "C[C>A]G", "C[C>A]T", "C[C>G]A", 
                      "C[C>G]C", "C[C>G]G", "C[C>G]T", "C[C>T]A", "C[C>T]C", 
                      "C[C>T]G", "C[C>T]T", "C[T>A]A", "C[T>A]C", "C[T>A]G", 
                      "C[T>A]T", "C[T>C]A", "C[T>C]C", "C[T>C]G", "C[T>C]T", 
                      "C[T>G]A", "C[T>G]C", "C[T>G]G", "C[T>G]T", "G[C>A]A", 
                      "G[C>A]C", "G[C>A]G", "G[C>A]T", "G[C>G]A", "G[C>G]C", 
                      "G[C>G]G", "G[C>G]T", "G[C>T]A", "G[C>T]C", "G[C>T]G", 
                      "G[C>T]T", "G[T>A]A", "G[T>A]C", "G[T>A]G", "G[T>A]T", 
                      "G[T>C]A", "G[T>C]C", "G[T>C]G", "G[T>C]T", "G[T>G]A", 
                      "G[T>G]C", "G[T>G]G", "G[T>G]T", "T[C>A]A", "T[C>A]C", 
                      "T[C>A]G", "T[C>A]T", "T[C>G]A", "T[C>G]C", "T[C>G]G", 
                      "T[C>G]T", "T[C>T]A", "T[C>T]C", "T[C>T]G", "T[C>T]T", 
                      "T[T>A]A", "T[T>A]C", "T[T>A]G", "T[T>A]T", "T[T>C]A", 
                      "T[T>C]C", "T[T>C]G", "T[T>C]T", "T[T>G]A", "T[T>G]C", 
                      "T[T>G]G", "T[T>G]T")
  custPatt01 <- "^(A|C|G|T)\\[(A|C|G|T)>(A|C|G|T)\\](A|C|G|T)$"
  if (sum(regexpr(custPatt01, mutTable[, mutType_colName]) > 
          0) != length(mutTable[, mutType_colName])){
    message("Non-standard format... MutTypes will be inferred.")
    
    mutType.labels <- sort(unique(mutTable[, mutType_colName]))
    
  } else {
    # fix reverse-complement if needed
    # only works for the standard three-nucletide system
    idx.to.fix <- which(!mutTable[, mutType_colName] %in% mutType.labels)
    if (length(idx.to.fix) > 0) {
      tmp <- mutTable[, mutType_colName][idx.to.fix]
      corrected.mutTypes <- sapply(1:length(tmp), (function(i) {
        out <- c(revCompl(substr(tmp[i], 7, 7)), "[", revCompl(substr(tmp[i], 
                                                                      3, 3)), ">", revCompl(substr(tmp[i], 5, 5)), 
                 "]", revCompl(substr(tmp[i], 1, 1)))
        paste(out, collapse = "", sep = "")
      }))
      mutTable[, mutType_colName][idx.to.fix] <- corrected.mutTypes
    }
  }
  
  if (sum(!mutTable[, mutType_colName] %in% mutType.labels) > 0) 
    stop("Problem with the mutType... Please check the input")
  if (is.null(sample_colName)) {
    my.mutTypes <- mutTable[, mutType_colName]
    out.1 <- sapply(mutType.labels, (function(mtt) {
      sum(my.mutTypes == mtt)
    }))
    out.1 <- data.frame(cbind(sample = out.1))
  } else {
    unique.cases <- unique(mutTable[, sample_colName])
    out.1 <- lapply(unique.cases, (function(csid) {
      tmp.df <- mutTable[mutTable[, sample_colName] == 
                           csid, ]
      out.3 <- sapply(mutType.labels, (function(mtt) {
        sum(tmp.df[, mutType_colName] == mtt)
      }))
      out.3 <- data.frame(cbind(out.3))
      colnames(out.3) <- csid
      out.3
    }))
    out.1 <- data.frame(do.call(cbind, out.1), stringsAsFactors = FALSE)
    tryCatch({
      colnames(out.1) <- unique.cases
    }, error = function(e) NULL)
  }
  fOUT <- new(Class = "mutationCounts", 
              x = out.1, 
              muts = data.frame(mutTypes = rownames(out.1),
                                stringsAsFactors = FALSE), 
              samples = data.frame(ID = colnames(out.1),
                                   stringsAsFactors = FALSE))
  return(fOUT)
}

#' Sort Data by Mutation Type.
#' 
#' Reorder a mutationSignatures, mutationCounts, data.frame, or matrix object by
#' sorting entries by mutation type.
#' 
#' @param x an object storing mutation count data
#'
#' @return an object of the same class as x, with entries sorted according to mutation types.
#'  
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#' 
#' 
#' @examples 
#' A <- data.frame(S1=1:5, S2=5:1, S3=1:5)
#' rownames(A) <- c("A[A>T]G", "A[C>G]G", "T[A>T]G", "T[C>G]T", "T[C>G]G")
#' mutSignatures::sortByMutations(A)
#' 
#' @export
sortByMutations <- function(x) {
  
  if (class(x) == "mutationSignatures") {
    outClass <- "mutationSignatures"
    x1 <- coerceObj(x = x, to = "data.frame")
  } else if (class(x) == "mutationCounts") {
    outClass <- "mutationCounts"
    x1 <- coerceObj(x = x, to = "data.frame")
  } else if (class(x) %in% c("matrix","data.frame")){
    outClass <- "data.frame"
    x1 <- x
  } else {
    stop ("Bad input")
  }
  
  # Get rownames == mutation names
  if (is.null(rownames(x1)))
    stop("Bad input")
  
  aRN <- rownames(x1)
  aRN <- sapply(sort(unique(sub("^.*\\[(.*)\\].*$", "\\1", aRN))), 
                function(mt) {
                  sort(aRN[grepl(mt, aRN)])
                }, USE.NAMES = FALSE, simplify = FALSE) 
  aRN <- do.call(c, aRN)
  ox <- x1[aRN,]
  
  # Format and return
  if (outClass == "data.frame") {
    return(ox)
  } else if (outClass == "mutationSignatures") {
    return(mutSignatures::as.mutation.signatures(ox))
  } else if (outClass == "mutationCounts") {
    return(mutSignatures::as.mutation.counts(ox))
  }
}

#' Simplify Mutational Signatures.
#' 
#' This function is useufl when working with non-standard muation types, such as
#' tetra-nnucleotide mutation types or mutation types with long/complex context.
#' THe goal of this function is to aggregated together mutations that can be simplified 
#' because of a common mutation core.
#' For example, mutation types AA[A>T]A, TA[A>T]A, CA[A>T]A, and GA[A>T]A can be simplified to the
#' core tri-nucleotide mutation A[A>T]A. THis function identifies mergeable mutation types, 
#' and aggregates the corresponding counts/freqs.
#' 
#' @param x a mutationSignatures-class object
#' @param asMutationSignatures logical, shall the results be returned as a mutationSignatures-class object
#'
#' @return  object including simplified mutational signatures data
#'  
#' @details This function is part of the user-interface set of tools included in mutSignatures. This is an exported function.
#' 
#' @author Damiano Fantini, \email{damiano.fantini@@gmail.com}
#' 
#' @references 
#' More information and examples about mutational signature analysis can be found here:
#' \enumerate{
#'   \item \bold{GitHub Repo}: \url{https://github.com/dami82/mutSignatures/}
#'   \item \bold{More info and examples} about the mutSignatures R library: \url{https://www.data-pulse.com/dev_site/mutsignatures/}
#'   \item \bold{Sci Rep paper}, introducing mutS: \url{https://www.nature.com/articles/s41598-020-75062-0/}
#'   \item \bold{Oncogene paper}, Mutational Signatures Operative in Bladder Cancer: \url{https://www.nature.com/articles/s41388-017-0099-6}
#'  }
#' 
#' @examples 
#' A <- data.frame(Sig1=1:5, Sig2=5:1, Sig3=1:5)
#' A <- A/apply(A, 2, sum)
#' rownames(A) <- c("AA[C>A]A", "CA[C>A]A", "TA[C>A]A", "TA[C>G]A", "A[C>G]AT")
#' A <- mutSignatures::as.mutation.signatures(A)
#' mutSignatures::simplifySignatures(x = A)
#' 
#' 
#' @export
simplifySignatures <- function(x, asMutationSignatures = TRUE) {
  if(class(x) != "mutationSignatures")
    stop("Bad input")
  curMT <- getMutationTypes(x)
  if(sum(!grepl("[ACGT]\\[[ACGT]>[ACGT]\\][ACGT]", curMT)) > 0)
    stop("Mutation Types are not compatible with simplifySignatures")
  
  # Start by defined tri-nucleotides
  mutType.labels <- c("A[C>A]A", "A[C>A]C", "A[C>A]G", "A[C>A]T", "A[C>G]A", "A[C>G]C",
                      "A[C>G]G", "A[C>G]T", "A[C>T]A", "A[C>T]C", "A[C>T]G", "A[C>T]T",
                      "A[T>A]A", "A[T>A]C", "A[T>A]G", "A[T>A]T", "A[T>C]A", "A[T>C]C",
                      "A[T>C]G", "A[T>C]T", "A[T>G]A", "A[T>G]C", "A[T>G]G", "A[T>G]T",
                      "C[C>A]A", "C[C>A]C", "C[C>A]G", "C[C>A]T", "C[C>G]A", "C[C>G]C",
                      "C[C>G]G", "C[C>G]T", "C[C>T]A", "C[C>T]C", "C[C>T]G", "C[C>T]T",
                      "C[T>A]A", "C[T>A]C", "C[T>A]G", "C[T>A]T", "C[T>C]A", "C[T>C]C",
                      "C[T>C]G", "C[T>C]T", "C[T>G]A", "C[T>G]C", "C[T>G]G", "C[T>G]T",
                      "G[C>A]A", "G[C>A]C", "G[C>A]G", "G[C>A]T", "G[C>G]A", "G[C>G]C",
                      "G[C>G]G", "G[C>G]T", "G[C>T]A", "G[C>T]C", "G[C>T]G", "G[C>T]T",
                      "G[T>A]A", "G[T>A]C", "G[T>A]G", "G[T>A]T", "G[T>C]A", "G[T>C]C",
                      "G[T>C]G", "G[T>C]T", "G[T>G]A", "G[T>G]C", "G[T>G]G", "G[T>G]T",
                      "T[C>A]A", "T[C>A]C", "T[C>A]G", "T[C>A]T", "T[C>G]A", "T[C>G]C",
                      "T[C>G]G", "T[C>G]T", "T[C>T]A", "T[C>T]C", "T[C>T]G", "T[C>T]T",
                      "T[T>A]A", "T[T>A]C", "T[T>A]G", "T[T>A]T", "T[T>C]A", "T[T>C]C",
                      "T[T>C]G", "T[T>C]T", "T[T>G]A", "T[T>G]C", "T[T>G]G", "T[T>G]T")
  
  # Needs sorting
  input <- coerceObj(x = x, to = "data.frame")
  
  # sigNames <- x@signatureId$ID
  sigNames <- getSignatureIdentifiers(x)
  
  EXPLD <- sapply(mutType.labels, (function(pat){
    tpat <- sub("\\[", "\\\\[", 
                sub("\\]", "\\\\]", pat))
    input[grepl(tpat, rownames(input)),]
  }), simplify = FALSE, USE.NAMES = TRUE)
  
  OUT <- base::as.data.frame(base::t(sapply(EXPLD, function(xi) {
    base::as.numeric(apply(xi, 2, sum, na.rm = TRUE))
  })), row.names = mutType.labels)
  colnames(OUT) <- sigNames
  
  if (asMutationSignatures){
    return(mutSignatures::as.mutation.signatures(OUT))
  }
  
  altOut <- lapply(1:ncol(OUT), (function(j){
    curSGN <- base::as.numeric(OUT[,j])
    iSig <- data.frame(curSGN, row.names = mutType.labels)
    colnames(iSig) <- sigNames[j]
    
    AttDF <- do.call(rbind, sapply(mutType.labels, function(mmd) {
      zi <- EXPLD[[mmd]]
      nuNames <- gsub("[ACGT]\\[[ACGT]>[ACGT]\\][ACGT]", "...", rownames(zi))
      nuNums <- base::as.data.frame(rbind(zi[,j]))
      names(nuNums) <- nuNames
      nuNums
    }, simplify = FALSE, USE.NAMES = TRUE))
    
    # out
    cbind(iSig, AttDF)
  }))
  names(altOut) <- sigNames
  return(altOut)
}

Try the mutSignatures package in your browser

Any scripts or data that you put into this service are public.

mutSignatures documentation built on Nov. 9, 2020, 9:06 a.m.