R/readCurationFile.R

Defines functions .findClosestSolution readCurationFile

Documented in readCurationFile

#' Read curation file
#'
#' Function that can be used to read the curated output of the
#' \code{\link{runAbsoluteCN}} function.
#'
#'
#' @param file.rds Output of the \code{\link{runAbsoluteCN}} function,
#' serialized with \code{saveRDS}.
#' @param file.curation Filename of a curation file that points to the correct
#' tumor purity and ploidy solution.
#' @param remove.failed Do not return solutions that failed.
#' @param report.best.only Only return correct/best solution (useful on low
#' memory machines when lots of samples are loaded).
#' @param min.ploidy Minimum ploidy to be considered. If \code{NULL}, all. Can
#' be used to automatically ignore unlikely solutions.
#' @param max.ploidy Maximum ploidy to be considered. If \code{NULL}, all. Can
#' be used to automatically ignore unlikely solutions.
#' @return The return value of the corresponding \code{\link{runAbsoluteCN}}
#' call, but with the results array manipulated according the curation CSV file
#' and arguments of this function.
#' @author Markus Riester
#' @seealso \code{\link{runAbsoluteCN} \link{createCurationFile}}
#' @examples
#'
#' data(purecn.example.output)
#' file.rds <- "Sample1_PureCN.rds"
#' createCurationFile(file.rds)
#' # User can change the maximum likelihood solution manually in the generated
#' # CSV file. The correct solution is then loaded with readCurationFile.
#' purecn.curated.example.output <-readCurationFile(file.rds)
#'
#' @export readCurationFile
#' @importFrom utils read.csv
readCurationFile <- function(file.rds,
file.curation = gsub(".rds$", ".csv", file.rds),
remove.failed = FALSE, report.best.only = FALSE, min.ploidy = NULL,
max.ploidy = NULL) {
    flog.info("Reading %s...", file.rds)
    res <- readRDS(file.rds)
    if (!file.exists(file.curation)) {
        flog.warn("Curation file %s does not exist, creating one.", file.curation)
        output <- try(createCurationFile(file.rds))
        if (is(output, "try-error")) {
            flog.warn("Failed to write %s: %s", file.curation, output)
            return(res)
        }
    }
    curation <- read.csv(file.curation, as.is=TRUE, nrows=1)
    .checkLogical <- function(field) {
        if (!is.logical(curation[[field]])) {
            .stopUserError("'", field, "' column in ", file.curation, 
                " not logical(1).")
        }
    }
    .checkLogical("Failed")
    .checkLogical("Curated")
    .checkLogical("Flagged")

    ## Mark all solutions as failed if sample is curated as failed
    if (curation$Failed) {
        if (remove.failed) return(NA)
        for (i in seq_along(res$results)) res$results[[i]]$failed <- TRUE
    } else {
        for (i in seq_along(res$results)) res$results[[i]]$failed <- FALSE
    }
    
    # Make sure purity and ploidy are numeric. Stop if not, not warn.
    curation$Purity <- suppressWarnings(as.numeric(curation$Purity))
    curation$Ploidy <- suppressWarnings(as.numeric(curation$Ploidy))
    
    if (is.na(curation$Purity) || is.na(curation$Ploidy) ||
        curation$Purity < 0 || curation$Purity > 1 ||
        curation$Ploidy < 0 || curation$Ploidy > 8) {
        .stopUserError("Purity or Ploidy not numeric or in expected range.")
    }
    res$results <- .findClosestSolution(res$results, curation$Purity,
        curation$Ploidy)
    
    ## Filter by ploidy if necessary
    ploidy <- sapply(res$results, function(x) x$ploidy)
    if (is.null(min.ploidy)) min.ploidy <- min(ploidy)
    if (is.null(max.ploidy)) max.ploidy <- max(ploidy)
    idxPloidyOk <- which(ploidy>=min.ploidy & ploidy <= max.ploidy)
    res$results <- res$results[idxPloidyOk]
     
    if (report.best.only) {
        res$results <- res$results[1]
    }
    res
}    

.findClosestSolution <- function(results, purity, ploidy, ploidy.div = 6) {
    # Find purity/ploidy solution most similar to curation
    diffCurated <- vapply(results, function(x) {
        abs(x$purity - purity) + (abs(x$ploidy - ploidy) / ploidy.div)
    }, double(1))
    idxCurated <- which.min(diffCurated)
    if (idxCurated != 1) {
        results[c(1, idxCurated)] <-  results[c(idxCurated, 1)]
    }
    results
}    
lima1/PureCN documentation built on April 3, 2024, 7:56 a.m.