R/run.MultiSEPhyR.R

Defines functions run.MultiSEPhyR

#' run.MultiSEPhyR
#'
#' `run.MultiSEPhyR` uses the multi-sample pcf algorithm to segment the LogR ratios across multiple samples.
#'
#' @param cluster_mean The table showing the mean logR ratios after clustering
#' @return Returns the CCF tables for each cluster
#' @importFrom magrittr %>%
#'
#' @export


run.MultiSEPhyR <- function(cluster_mean, cn_max = 8, Gamma = 0.55) {

  frequency_steps <- 1:100 / 100
  ### if cn_max > 0 ?
  possible_CNs <- 0:cn_max
  CN_normal <- 2




  res <- tidyr::expand_grid(cn = colnames(cluster_mean),
                            cn_state = possible_CNs,
                            region = rownames(cluster_mean),
                            frequency = frequency_steps,
                            # inferred_cn = -1,
                            square_diff = -1)
  i <- 1
  for (this_cn_name in colnames(cluster_mean)) {
    print(this_cn_name)
    for (this_cn_state in as.numeric(possible_CNs)) {
      print(paste(this_cn_name, this_cn_state))

      for (this_region_name in rownames(cluster_mean)) {
        for (this_frequency in frequency_steps) {
          expected_LogR <- Gamma * log2((this_frequency * this_cn_state + (1-this_frequency) * CN_normal)/ CN_normal)
          this_square_diff <- abs(cluster_mean[this_region_name, this_cn_name] - expected_LogR)
          res[i, c("frequency", "expected_LogR", "square_diff")] <- list(this_frequency, expected_LogR , this_square_diff)
          i <- i + 1
        }
      }
    }
  }

###left_join  cluster_mean and cn_max .> mutate > expected_logR and sqr diff
  temp <-tibble()
  for (i in colnames(cluster_mean)) {
    temp <- rbind(temp, res %>% group_by(cn, cn_state, region) %>% filter(square_diff == min(square_diff)) %>% group_by(cn, cn_state) %>% mutate(SoS = sum(square_diff)) %>% filter(cn == i) %>% ungroup() %>% filter(SoS == min(SoS)))
  }

  # ## if there are multiple cn states solutions, the lower cn_state will be preferred. ?
  ## if there are multiple cn states solutions, the lower cn_state will be preferred. ?
  for (i in unique(temp$cn)) {
    if (length(which(temp$cn == i)) > nrow(cluster_mean)) {
      print(i)
      temp <- temp[-which(temp$cn == i & abs(2-temp$cn_state) != min(abs(2-temp[temp$cn == i,]$cn_state))),]
    }
  }

  clonal_cn <- temp %>% group_by(cn) %>% summarise(sum(frequency)) %>% data.frame()
  clonal_cn <- clonal_cn[which.max(clonal_cn$sum.frequency.),][,1]


  purity <- as.character(unlist(temp[which(temp$cn == clonal_cn),][,4]))
  names(purity) <- rownames(cluster_mean)

  CCF_region <- list()
  for (i in rownames(cluster_mean)) {
    CCF_region[[i]] <- temp[which(temp$region == i),]
    CCF_region[[i]][,"purity"] <- purity[i]
    CCF_region[[i]][,"clonality"] <- round(as.numeric(CCF_region[[i]]$frequency) / as.numeric(CCF_region[[i]]$purity),3)
  }

  CCF <- CCF_region[[1]]$cn
  for (i in rownames(cluster_mean)) {
    CCF <- cbind(CCF, CCF_region[[i]]$clonality)
  }
  colnames(CCF) <- c("cn", paste0(rownames(cluster_mean)))
  return(CCF)
}
chulingding/MultiSEPhyR documentation built on Dec. 19, 2021, 4:06 p.m.