R/CIMseqSwarm.R

Defines functions .processSwarm

#'@include All-classes.R
NULL

#' CIMseqSwarm
#'
#' Multiplet deconvolution.
#'
#'
#' @name CIMseqSwarm
#' @rdname CIMseqSwarm
#' @param singlets CIMseqSinglets; A CIMseqSinglets object.
#' @param multiplets CIMseqMultiplets; A CIMseqMultiplets object.
#' @param maxiter integer; The maximum optimization iterations.
#' @param swarmsize integer; The number of swarm particals.
#' @param nSyntheticMultiplets Numeric; Value indicating the number of synthetic
#'  multiplets to generate during deconvolution.
#' @param seed integer; Seed to set before running.
#' @param norm logical; Indicates if the sum of fractions should equal 1.
#' @param report logical; Indicates if additional reporting from the
#'   optimization should be included.
#' @param reportRate integer; If report is TRUE, the iteration interval for 
#' which a report should be generated.
#' @param permute logical; indicates if genes should be permuted before
#'  deconvolution. For use with permutation testing.
#'  @param singletIdx list; Singlet indexes to be used to choose singlets and
#'  synthesize synthetic multiplets. Facilitates using the same synthetic set 
#'  e.g. with repeated runs or permutation. 
#' @param fractions matrix; The deconvolution results.
#' @param costs numeric; The costs after optimization.
#' @param convergence character; The convergence output from \link[pso]{psoptim}.
#' One value per multiplet.
#' @param stats tbl_df; The stats output from \link[pso]{psoptim}
#' @param arguments list; Arguments passed to the CIMseqSwarm function.
#' @param singletIdx list; Indexes indicating singlets that were subset to 
#' synthesize synthetic multiplets. Facilitates recreation of the synthetic 
#' multiplets downstream.
#' @param swarmInit matrix; Initiation positions for the swarm.
#' @param psoControl list; Additional arguments to pso.2.0 (psoptim) function.
#' @param x CIMseqSwarm; A CIMseqSwarm object.
#' @param object CIMseqSwarm; A CIMseqSwarm to show.
#' @param n character; Data to extract from CIMseqSwarm object.
#' @param .Object Internal object.
#' @param ... additional arguments to pass on.
#' @return CIMseqSwarm output.
#' @author Jason T. Serviss
#' @examples
#'
#' #use demo data
#'
NULL

#' @rdname CIMseqSwarm
#' @export

setGeneric("CIMseqSwarm", function(
  singlets, multiplets, ...
){
  standardGeneric("CIMseqSwarm")
})

#' @importFrom future.apply future_lapply
#' @importFrom pso psoptim
#' @importFrom matrixStats rowSums2 rowMeans2
#' @importFrom dplyr "%>%" bind_rows mutate
#' @importFrom purrr map map_dbl
#' @importFrom tibble tibble as_tibble add_column
#' @importFrom tidyr unnest
#' @rdname CIMseqSwarm
#' @export

setMethod("CIMseqSwarm", c("CIMseqSinglets", "CIMseqMultiplets"), function(
  singlets, multiplets,
  maxiter = 10, swarmsize = 150, nSyntheticMultiplets = 200, seed = 11, 
  norm = TRUE, report = FALSE, reportRate = NA,
  permute = FALSE, singletIdx = NULL, swarmInit = NULL, psoControl = list(), ...
){
    
  #put a check here to make sure all slots in the spUnsupervised object are
  #filled. This should actually be regulated by the class definition BUT you
  #should probably double check that it works as expected via unit tests.
  
  #check for same genes in singlets counts and multiplets counts
  
  #input and input checks
  sngCPM <- getData(singlets, "counts.cpm")
  mulCPM <- getData(multiplets, "counts.cpm")
    
  #calculate fractions
  classes <- getData(singlets, "classification")
  fractions <- rep(NA, length(unique(classes)))
    
  #subset top genes for use with optimization
  #sholud also check user input selectInd
  selectInd <- getData(multiplets, "features")
  
  mul <- matrix(
    mulCPM[selectInd, ],
    ncol = ncol(mulCPM),
    dimnames = list(rownames(mulCPM)[selectInd], colnames(mulCPM))
  )
  sng <- matrix(
    sngCPM[selectInd, ],
    ncol = ncol(sngCPM),
    dimnames = list(rownames(sngCPM)[selectInd], colnames(sngCPM))
  )
  
  #setup args for optimization
  control <- list(maxit = maxiter, s = swarmsize)
  control <- c(control, psoControl)
  if(report) {
    control <- c(control, list(
      trace = 1, REPORT = reportRate, trace.stats = TRUE
    ))
  }
  
  #run optimization
  to <- if(ncol(mul) == 1) {to <- 1} else {to <- dim(mul)[2]}
  
  #setup singlets for synthetic multiplets synthesis
  set.seed(seed)
  if(permute) {sng <- .permuteGenes(sng)}
  
  if(is.null(singletIdx)) {
    singletIdx <- purrr::map(1:nSyntheticMultiplets, ~sampleSinglets(classes))
  }
  
  singletSubset <- appropriateSinglets(singlets, singletIdx, selectInd)
  t.singletSubset <- t(singletSubset)
  
  #deconvolution
  opt.out <- future_lapply(
    X = 1:to, FUN = function(i) {
      .optim.fun(
        i, fractions = fractions, multiplets = mul,
        singletSubset = t.singletSubset, n = nSyntheticMultiplets,
        control = control, swarmInit = swarmInit, ...
      )
  })
  
  #process and return results
  cn <- sort(unique(classes))
  rn <- colnames(mul)
  
  new(
    "CIMseqSwarm",
    fractions = .processSwarm(opt.out, cn, rn, norm),
    costs = setNames(map_dbl(opt.out, 2), colnames(mul)),
    convergence = setNames(.processConvergence(opt.out), colnames(mul)),
    stats = if(report) {.processStats(opt.out, cn, rn)} else {tibble()},
    singletIdx = map(singletIdx, as.integer),
    arguments = tibble(
      maxiter = maxiter, swarmsize = swarmsize,
      nSyntheticMultiplets = nSyntheticMultiplets, seed = seed, norm = norm,
      report = report, reportRate = reportRate, features = list(selectInd),
      permute = permute, psoControl = list(psoControl)
    )
  )
})

.processSwarm <- function(opt.out, cn, rn, norm) {
  par <- map(opt.out, 1) %>%
    do.call("rbind", .)
  
  if(norm) {par <- par * 1 / rowSums(par)}
  colnames(par) <- sort(cn)
  rownames(par) <- rn
  par
}

.processConvergence <- function(opt.out) {
  convergence <- map_dbl(opt.out, 4)
  convergenceKey <- c(
  "Maximal number of function evaluations reached.",
  "Maximal number of iterations reached.",
  "Maximal number of restarts reached.",
  "Maximal number of iterations without improvement reached."
  )
  convergenceKey[convergence]
}

.processStats <- function(opt.out, cn, rn) {
  position <- NULL
  stats <- map(opt.out, 6)

  tibble(
    sample = rn,
    iteration = map(stats, function(x) x$it),
    error = map(stats, function(x) x$error),
    fitness = map(stats, function(x) x$f),
    position = map(stats, function(x) {
      map(x$x, function(y) t(y) * 1/colSums(y))
    })
  ) %>%
    unnest() %>%
    mutate(position = map(position, function(x) {
      x %>%
      as.data.frame() %>%
      setNames(cn) %>%
      as_tibble() %>%
      add_column(swarmMemberID = 1:nrow(.), .before = 1)
    }))
}

.optim.fun <- function(
  i, fractions, multiplets, singletSubset,
  n, control, swarmInit, ...
){
  oneMultiplet <- round(multiplets[, i])
  pso.2.0(
    par = fractions, fn = calculateCost, oneMultiplet = oneMultiplet,
    singletSubset = singletSubset, n = n, lower = 0, upper = 1,
    control = control, swarmInit = swarmInit, ...
  )
}

.permuteGenes <- function(counts){
  t(apply(counts, 1, sample))
}

.costCalculationR <- function(oneMultiplet, syntheticMultiplets) {
  dpois <- NULL
  dpois(round(oneMultiplet), lambda = syntheticMultiplets) %>%
  matrixStats::rowMeans2() %>%
  log10() %>%
  ifelse(is.infinite(.) & . < 0, -323.0052, .) %>%
  sum() %>%
  `-` (.)
}

#' appropriateSinglets
#'
#' Sets up the singlets in long form for C++ calculation.
#'
#'
#' @name appropriateSinglets
#' @rdname appropriateSinglets
#' @param singlets A CIMseqSinglets object.
#' @param idx numeric; Singlet indices to subset. Generated with the 
#' \code{\link{sampleSinglets}} function. THIS IS ZERO BASED since upstream 
#' calculations are done in C++.
#' @param features numeric; Indices of selected features used for deconvolution.
#'  If null, all genes are used.
#' @param ... additional arguments to pass on
#' @return Appropriated singlets.
#' @author Jason T. Serviss
#' @examples
#'
#' classes <- getData(CIMseqSinglets_test, "classification")
#' idx <- purrr::map(1:10, ~sampleSinglets(classes))
#' out <- singletSubset <- appropriateSinglets(CIMseqSinglets_test, idx, 1:20)
#'
NULL

#' @rdname appropriateSinglets
#' @importFrom purrr map
#' @importFrom dplyr "%>%"
#' @importFrom stringr str_replace
#' @importFrom readr parse_factor
#' @export

appropriateSinglets <- function(
  singlets, idx, features = NULL
){
  classes <- getData(singlets, "classification")
  sngCPM <- getData(singlets, "counts.cpm")
  if(is.null(features)) features <- 1:nrow(sngCPM)
  singlets <- matrix(
    sngCPM[features, ],
    ncol = ncol(sngCPM),
    dimnames = list(rownames(sngCPM)[features], colnames(sngCPM))
  )
  
  sub <- idx %>%
    purrr::map(., ~subsetSinglets(singlets, .x)) %>%
    purrr::map(., function(x) {rownames(x) <- 1:nrow(x); x}) %>%
    do.call("rbind", .) %>%
    .[order(as.numeric(rownames(.))), ]
  
  rownames(sub) <- paste(
    rep(rownames(singlets), each = length(idx)), 
    1:length(idx), sep = "."
  )
  colnames(sub) <- sort(unique(classes))
  sub
}

.backTransform <- function(singletSubset, n) {
  cn <- paste(rep(colnames(singletSubset), each = n), 1:n, sep = "_")
  genes <- str_replace(rownames(singletSubset), "(.*)\\..*", "\\1")
  rn <- parse_factor(
    genes,
    levels = unique(genes)
  )
  
  out <- split(singletSubset, rn) %>%
    map(~matrix(.x, nrow = 1, dimnames = list(NULL, cn))) %>%
    do.call("rbind", .)
    
  rownames(out) <- unique(genes)
  out
}

#' adjustFractions
#'
#' Transforms continious fractions into a binary martix where 1 indicates a
#' connection between classes.
#'
#' @name adjustFractions
#' @rdname adjustFractions
#' @param singlets CIMseqSinglets; A CIMseqSinglets object.
#' @param multiplets CIMseqMultiplets; A CIMseqMultiplets object.
#' @param swarm CIMseqSwarm or matrix; A CIMseqSwarm object or a matrix of 
#' fractions.
#' @param binary logical; Indicates if adjusted fractions should be returned as
#' binary values.
#' @param theoretical.max integer; See \code{\link{estimateCells}}.
#' @param ... additional arguments to pass on
#' @return Adjusted fractions matrix.
#' @author Jason T. Serviss
#' @examples
#'
#' #use demo data
#'
#'
NULL

#' @rdname adjustFractions
#' @importFrom tibble tibble
#' @importFrom dplyr "%>%" filter full_join group_by summarize pull
#' @importFrom stats median setNames
#' @importFrom rlang .data
#' @export

adjustFractions <- function(
  singlets, multiplets, swarm, binary = TRUE, theoretical.max = Inf, ...
){
  medianCellNumber <- sampleType <- estimatedCellNumber <- NULL
  if(!is.matrix(swarm)) {
    fractions <- getData(swarm, "fractions")
  } else {
    fractions <- swarm
  }
  
  #calculate median cell number per singlet class
  cnc <- cellNumberPerClass(singlets, multiplets) %>%
    {setNames(pull(., medianCellNumber), pull(., class))}
  
  cnc <- cnc[match(colnames(fractions), names(cnc))]
  if(!identical(names(cnc), colnames(fractions))) stop("cnc name mismatch")
  
  #calculate cell number per multiplet
  cnm <- estimateCells(singlets, multiplets, theoretical.max) %>%
    filter(sampleType == "Multiplet") %>%
    {setNames(pull(., estimatedCellNumber), pull(., sample))}
  
  cnm <- cnm[match(rownames(fractions), names(cnm))]
  if(!identical(names(cnm), rownames(fractions))) stop("cnm name mismatch")
  
  #adjust fractions
  frac.renorm <- t(t(fractions) / cnc)
  adjusted <- round(frac.renorm * cnm)
  if(binary) adjusted[adjusted > 0] <- 1
  return(adjusted)
}

#' calculateEdgeStats
#'
#' Calculates the score (obs / expected) and p-value for all possible 
#' connections.
#'
#' @name calculateEdgeStats
#' @rdname calculateEdgeStats
#' @param swarm A CIMseqSwarm object.
#' @param singlets A CIMseqSinglets object.
#' @param multiplets A CIMseqMultiplets object.
#' @param theoretical.max integer; See \code{\link{estimateCells}}.
#' @param ... additional arguments to pass on
#' @return CIMseqSwarm connection weights and p-values.
#' @author Jason T. Serviss
#' @examples
#'
#' output <- calculateEdgeStats(
#' CIMseqSwarm_test, CIMseqSinglets_test, CIMseqMultiplets_test
#' )
#'
NULL

#' @rdname calculateEdgeStats
#' @importFrom stats ppois
#' @importFrom dplyr filter mutate
#' @importFrom purrr map_int map2_dbl map2_int
#' @importFrom matrixStats rowSums2 colSums2
#' @importFrom rlang .data
#' @importFrom stats p.adjust
#' @export

calculateEdgeStats <- function(
  swarm, singlets, multiplets, theoretical.max = Inf, ...
){
  adj <- adjustFractions(
    singlets, multiplets, swarm, binary = TRUE, 
    theoretical.max = theoretical.max
  )

  #calcluate weight
  edges <- .calculateWeight(adj)

  #calculate p-value
  out <- .calculateP(edges, adj)

  return(out)
}

.calculateWeight <- function(adj, ...) {
  from <- to <- NULL
  
  expand.grid(
    from = colnames(adj), to = colnames(adj),
    stringsAsFactors = FALSE
  ) %>%
    filter(from != to) %>% #doesn't calculate self edges
    mutate(weight = map2_int(from, to, function(f, t) {
      sub <- adj[, colnames(adj) %in% c(f, t)]
      length(which(rowSums2(sub) == 2))
    }))
}

.calculateP <- function(
  edges, mat, ...
){
  data <- to.freq <- p.adjust <- pval <- NULL
  from <- to <- weight <- expected.edges <- NULL
  
  #calculate expected edges
  class.freq <- colSums2(mat) #multiplet estimated cell type frequency
  names(class.freq) <- colnames(mat)
  
  edges <- edges %>%
    nest(data = -from) %>%
    mutate(to.freq = map2(from, data, function(f, d) {
      freq <- class.freq[names(class.freq) != f]
      rel <- freq / sum(freq)
      rel[pull(d, to)]
    })) %>%
    mutate(expected.edges = map2(to.freq, data, ~sum(pull(.y, weight)) * .x)) %>%
    unnest(cols = c("data", "to.freq", "expected.edges"))
    
  
  #calculate score = observed / expected
  edges <- mutate(edges, score = weight / expected.edges)
  
  #calculate p-value
  edges %>%
    mutate(pval = map_dbl(1:nrow(.),
      ~phyper(
        q = edges$weight[.x], 
        m = class.freq[edges$to[.x]], 
        n = sum(class.freq) - class.freq[edges$to[.x]], 
        k = sum(edges$weight[edges$from == edges$from[.x]]), 
        lower.tail = FALSE
      )
    )) %>% 
    mutate(pval = p.adjust(pval, 'fdr'))
}

.calculateP.poisson <- function(
  edges, mat, ...
){
  from <- to <- jp <- weight <- expected.edges <- NULL
  #calculate total number of edges
  total.edges <- sum(edges[, "weight"])
  
  #calculate expected edges
  class.freq <- colSums2(mat) #multiplet estimated cell type frequency
  names(class.freq) <- colnames(mat)
  
  allProbs <- expand.grid(
    from = names(class.freq), to = names(class.freq), 
    stringsAsFactors = FALSE
  ) %>%
    filter(from != to) %>%
    mutate(edges = map2_dbl(from, to, function(f, t) {
      abs <- class.freq[names(class.freq) != t]
      rel <- abs / sum(abs)
      as.numeric(rel[f]) * class.freq[t]
    }))
  
  edges <- mutate(edges, expected.edges = map2_dbl(from, to, function(f, t){
    allProbs[allProbs$from == f & allProbs$to == t, "expected"]
  }))
  
  #calculate p-value based on observed (weight) vs. expected (expected.edges)
  edges <- mutate(edges, pval = ppois(
    q = weight, lambda = expected.edges, lower.tail = FALSE
  ))
  
  #calculate score = observed / expected
  edges <- mutate(edges, score = weight / expected.edges)
  edges
}

#' calcResiduals
#'
#' Calculates the residuals for each gene and multiplet after deconvolution
#' based on the CIMseqSwarm results.
#'
#' @name calcResiduals
#' @rdname calcResiduals
#' @param singlets A CIMseqSinglets object.
#' @param multiplets A CIMseqMultiplets object.
#' @param swarm A CIMseqSwarm object.
#' @param include character;  If residuals should only be calculated for a 
#' subset of the multiplets, include their names here. Default is to calculate 
#' for all multiplets.
#' @param fractions matrix; A matrix of fractions. By default the fractions in 
#' the CIMseqSwarm object are used.
#' @param ... additional arguments to pass on
#' @return Residuals (add more description).
#' @author Jason T. Serviss
#'
NULL

#' @rdname calcResiduals
#' @export
#' @importFrom purrr reduce set_names
#' @importFrom future.apply future_lapply
#' @importFrom dplyr bind_cols
#' @importFrom tibble add_column
#' @importFrom tidyr gather

calcResiduals <- function(
  singlets, multiplets, swarm, include = NULL, fractions = NULL, ...
){
  residual <- gene <- NULL
  if(is.null(fractions)) {
    frac <- getData(swarm, "fractions") 
  } else {
    frac <- fractions
  }
  selectInd <- getData(multiplets, "features")
  n <- getData(swarm, "arguments")[['nSyntheticMultiplets']]
  idx <- getData(swarm, "singletIdx")
  sm <- appropriateSinglets(singlets, idx, selectInd)
  
  mulCPM <- getData(multiplets, "counts.cpm")
  if(!is.null(include) & length(include) > 1) mulCPM <- mulCPM[, include]
  if(!is.null(include) & length(include) == 1) {
    mulCPM <- matrix(
      mulCPM[, include], 
      nrow = nrow(mulCPM), 
      dimnames = list(rownames(mulCPM), include))
  }
  
  multiplets <- matrix(
    mulCPM[selectInd, ],
    ncol = ncol(mulCPM),
    dimnames = list(rownames(mulCPM)[selectInd], colnames(mulCPM))
  )
  
  future_lapply(
    X = 1:ncol(multiplets), FUN = function(i) {
      as.numeric(frac[rownames(frac) == colnames(multiplets)[i], ]) %>%
        adjustAccordingToFractions(., sm) %>%
        multipletSums() %>%
        vecToMat(nrow(multiplets), n) %>% #double check that this is happening as expected
        calculateCostDensity(round(multiplets[, i]), .) %>%
        calculateLogRowMeans() %>%
        fixNegInf() %>%
        multiply_by(-1) %>%
        matrix_to_tibble(drop = TRUE)
  }) %>%
    reduce(., bind_cols) %>%
    set_names(colnames(multiplets)) %>%
    add_column(gene = rownames(multiplets), .before = 1) %>%
    gather(sample, residual, -gene)
}

#' getMultipletsForEdge
#'
#' Returns the names of the multiplets that are associated with an edge.
#'
#'
#' @name getMultipletsForEdge
#' @rdname getMultipletsForEdge
#' @param swarm CIMseqSwarm; A CIMseqSwarm object.
#' @param singlets CIMseqSinglets; A CIMseqSinglets object.
#' @param multiplets CIMseqMultiplets; A CIMseqMultiplets object.
#' @param edges data.frame; Edges of interest. Edges are indicated by the nodes
#' they connect with one node in column one and the other node in column 2.
#' @param theoretical.max integer; See \code{\link{estimateCells}}.
#' @param ... additional arguments to pass on
#' @return If the edges argument only includes one row, a vector of multiplet
#'    names is returned. If several edges are interogated a list is returned
#'    with one element per edge containing the names of the multiplets.
#' @author Jason T. Serviss
#' @examples
#'
#' output <- getMultipletsForEdge(
#' CIMseqSwarm_test, 
#' CIMseqSinglets_test, 
#' CIMseqMultiplets_test, 
#' data.frame("A375", "HOS")
#' )
#'
NULL

#' @rdname getMultipletsForEdge
#' @export

setGeneric("getMultipletsForEdge", function(
  swarm, ...
){
  standardGeneric("getMultipletsForEdge")
})

#' @rdname getMultipletsForEdge
#' @importFrom rlang .data
#' @importFrom dplyr mutate_if
#' @importFrom purrr map_dfr
#' @importFrom matrixStats rowSums2
#' @importFrom tibble tibble
#' @export

setMethod("getMultipletsForEdge", "CIMseqSwarm", function(
  swarm, singlets, multiplets, edges, theoretical.max = Inf, ...
){
  
  edges <- mutate_if(edges, is.factor, as.character)
  fractions <- adjustFractions(
    singlets, multiplets, swarm, theoretical.max = theoretical.max
  )
  
  map_dfr(1:nrow(edges), function(i) {
    e <- as.character(edges[i, ])
    sub <- fractions[, e]
    rs <- matrixStats::rowSums2(sub)
    multiplets <- rownames(sub)[rs == 2]
    tibble(
      sample = multiplets,
      from = rep(e[1], length(multiplets)),
      to = rep(e[2], length(multiplets))
    )
  })
})

#' getEdgesForMultiplet
#'
#' Returns the edges detected in a multiplet.
#'
#' @name getEdgesForMultiplet
#' @rdname getEdgesForMultiplet
#' @aliases getEdgesForMultiplet
#' @param swarm A CIMseqSwarm object.
#' @param singlets A CIMseqSinglets object.
#' @param multiplets A CIMseqMultiplets object.
#' @param multipletName character; The name of the multiplet of interest.
#' @param theoretical.max integer; See \code{\link{estimateCells}}.
#' @param drop logical; Remove self connections?
#' @param ... additional arguments to pass on
#' @return Edge names. Note that multiplets that contain no connections are not 
#'  included in the output and neither are self connections.
#' @author Jason T. Serviss
#' @examples
#'
#' output <- getEdgesForMultiplet(
#' CIMseqSwarm_test, CIMseqSinglets_test, CIMseqMultiplets_test,
#' "m.NJB00204.G04"
#' )
#'
NULL

#' @rdname getEdgesForMultiplet
#' @export
#' @importFrom rlang .data
#' @importFrom dplyr filter everything mutate select
#' @importFrom purrr map2
#' @importFrom tibble as_tibble tibble
#' @importFrom tidyr unnest

setGeneric("getEdgesForMultiplet", function(
  swarm, ...
){
  standardGeneric("getEdgesForMultiplet")
})

#' @rdname getEdgesForMultiplet
#' @export

setMethod("getEdgesForMultiplet", "CIMseqSwarm", function(
  swarm, singlets, multiplets, multipletName = NULL, theoretical.max = Inf, 
  drop = TRUE, ...
){
  from <- to <- NULL
  if(is.null(multipletName)) multipletName <- rownames(getData(swarm, "fractions"))
  
  s <- calculateEdgeStats(
    swarm, singlets, multiplets, theoretical.max = theoretical.max
  )
  mat <- adjustFractions(
    singlets, multiplets, swarm, binary = TRUE, 
    theoretical.max = theoretical.max
  )
  
  edges <- expand.grid(
    from = colnames(mat), to = colnames(mat),
    stringsAsFactors = FALSE
  ) %>%
    as_tibble() 
  
  if(drop) edges <- filter(edges, from != to)
  if(!drop) rs <- rowSums2(mat)
  
  data <- edges %>%
    mutate(sample = map2(from, to, function(f, t) {
      if(f == t) {
        rownames(mat)[mat[, colnames(mat) == f] == 1 & rs == 1]
      } else {
        sub <- mat[, colnames(mat) %in% c(f, t)]
        rownames(mat)[which(rowSums2(sub) == 2)]
      }
    })) %>%
    unnest(cols = c(.data$sample)) %>%
    filter(sample %in% multipletName) %>%
    select(sample, everything())
  
  if(nrow(data) == 0) return(tibble(sample = multipletName, from = NA, to = NA))

  return(data)
})

#' getCellsForMultiplet
#'
#' Returns the names of the cell types detected in a multiplet.
#'
#'
#' @name getCellsForMultiplet
#' @rdname getCellsForMultiplet
#' @aliases getCellsForMultiplet
#' @param swarm A CIMseqSwarm object.
#' @param singlets A CIMseqSinglets object.
#' @param multiplets A CIMseqMultiplets object.
#' @param multipletName character; The name of the multiplet of interest.
#' @param drop logical; Remove self connections?
#' @param ... additional arguments to pass on
#' @return Edge names.
#' @author Jason T. Serviss
#' @examples
#'
#' output <- getCellsForMultiplet(
#' CIMseqSwarm_test, CIMseqSinglets_test, CIMseqMultiplets_test,
#' "m.NJB00204.G04"
#' )
#'
NULL

#' @rdname getCellsForMultiplet
#' @export
#' @importFrom rlang .data
#' @importFrom purrr map2
#' @importFrom dplyr mutate select distinct pull bind_rows
#' @importFrom tidyr unnest
#' @importFrom tibble tibble

setGeneric("getCellsForMultiplet", function(
  swarm, ...
){
  standardGeneric("getCellsForMultiplet")
})

#' @rdname getCellsForMultiplet
#' @export

setMethod("getCellsForMultiplet", "CIMseqSwarm", function(
  swarm, singlets, multiplets, multipletName = NULL, drop = TRUE, ...
){
  if(is.null(multipletName)) multipletName <- colnames(getData(multiplets, "counts"))
  
  getEdgesForMultiplet(
    swarm, singlets, multiplets, multipletName, drop = drop
  ) %>%
    mutate(cells = map2(.data$from, .data$to, ~c(.x, .y))) %>%
    select(-.data$from, -.data$to) %>%
    unnest(cols = c(.data$cells)) %>%
    distinct()
})

#' calculateCosts
#'
#' Calculates cost outside of pso using R instead of C++. Convience function.
#'
#' @name calculateCosts
#' @rdname calculateCosts
#' @aliases calculateCosts
#' @param singlets A CIMseqSinglets object.
#' @param multiplets A CIMseqMultiplets object.
#' @param swarm A CIMseqSwarm object.
#' @param fractions WILL PROBABLY BE REMOVED
#' @param ... additional arguments to pass on
#' @return Costs
#' @author Jason T. Serviss
#' @keywords calculateCosts
#' @examples
#'
#' #
#'
NULL

#' @rdname calculateCosts
#' @export

setGeneric("calculateCosts", function(
  singlets, multiplets, swarm, ...
){
  standardGeneric("calculateCosts")
})

#' @rdname calculateCosts
#' @export

setMethod(
  "calculateCosts", c("CIMseqSinglets", "CIMseqMultiplets", "CIMseqSwarm"), 
  function(
    singlets, multiplets, swarm, fractions = NULL, ...
){
  if(is.null(fractions)) fractions <- getData(swarm, "fractions")
  if(is.null(dim(fractions))) fractions <- matrix(fractions, ncol = length(fractions))
  mulCPM <- getData(multiplets, "counts.cpm")
  selectInd <- getData(swarm, "arguments")$features[[1]]
  
  multiplets <- matrix(
    mulCPM[selectInd, ],
    ncol = ncol(mulCPM),
    dimnames = list(NULL, colnames(mulCPM))
  )
  
  #run optimization
  to <- if(ncol(multiplets) == 1) {to <- 1} else {to <- dim(multiplets)[2]}
  
  #setup synthetic multiplets
  sngIdx <- getData(swarm, "singletIdx")
  sngSubset <- appropriateSinglets(singlets, sngIdx, selectInd)
  nSynthMul <- getData(swarm, "arguments")$nSyntheticMultiplets[[1]]
  
  #calculate costs
  opt.out <- future_lapply(
    X = 1:to, FUN = function(i) {
      oneMultiplet <- ceiling(multiplets[, i])
      calculateCost(oneMultiplet, sngSubset, as.numeric(fractions[i, ]), nSynthMul)
  })
  names(opt.out) <- colnames(multiplets)
  opt.out
})
jasonserviss/CIMseq documentation built on Jan. 11, 2020, 4:42 a.m.