R/helpers_STORM.R

Defines functions hlpr_confusMatOutcome check_whichRNAmods hlpr_add_REScols getGeneSeqsfromGenome libComplexReport mkGTF bisGenome rmTmpDir mkTmpDir

Documented in bisGenome libComplexReport rmTmpDir

# Magrittr Pipe Operator
`%>%` <- magrittr::`%>%`

# Make Temporary directory for STORMseq processing
mkTmpDir <- function(){
    if(!dir.exists("./STORMtmp_dir")){dir.create("./STORMtmp_dir")}
}

#' Remove STORM temporary directory
#'
#' Removes temporary directory "./STORMtmp_dir" created for alignment steps.
#'
#' @return
#' @export
#'
#' @examples
rmTmpDir <- function(){
    if(dir.exists("./STORMtmp_dir")){unlink("./STORMtmp_dir", recursive = TRUE)}
}


#' Make bisulphite genome
#'
#' @param fastaGenome
#'
#' @return
#' @export
#'
#' @examples
bisGenome <- function(fastaGenome){
    mkTmpDir()
    outName <- paste0(strsplit(fastaGenome, split = "/") %>% unlist %>% utils::tail(1),
                      ".bisulp")
    out_name <- file.path(getwd(), "STORMtmp_dir", outName)
    gen <- Biostrings::readDNAStringSet(fastaGenome)
    gen_r <- lapply(gen, function(x){
        tmp <- stringr::str_replace_all(x, pattern = "C", replacement = "T")
    })
    seqinr::write.fasta(sequences = gen_r, names = names(gen_r), file.out = out_name, as.string = T)
    out_name
}

# Make GTF from BED
mkGTF <- function(bedAnnotation, outName = NULL, source = "user"){
    mkTmpDir()
    tmpDir <- file.path(getwd(), "STORMtmp_dir")
    if(is.null(outName)){outName <- file.path(tmpDir, "tmp_geneAnnot.gtf")}
    com <- paste0("module load kentUtils/v377 && ",
                  "/apps/RH7U2/general/kentUtils/v377/bin/bedToGenePred ",
                  bedAnnotation, " /dev/stdout | /apps/RH7U2/general/",
                  "kentUtils/v377/bin/genePredToGtf file /dev/stdin ", outName)
    system(com)
    tmp <- utils::read.delim(outName, header = FALSE)
    tmp[,2] <- source
    utils::write.table(tmp, file = outName, quote = FALSE, sep = "\t", col.names = FALSE, row.names = FALSE)
}

#' Library complexity report
#'
#' @param META
#' @param maxExtrapolation
#' @param steps
#' @param verbose
#'
#' @return
#' @export
#'
#' @examples
libComplexReport <- function(META, maxExtrapolation = 2.01e6, steps = 1e5, verbose = FALSE){
    if(all(file.exists(META$BAM))){
        for(file in META$BAM){
            com <- paste0("module load libgsl/2.3 && ",
                          "module load preseq/2.0.1 && ",
                          "/apps/RH7U2/gnu/preseq/2.0.1/preseq lc_extrap -P -B ",
                          "-e ", maxExtrapolation, " -s ", steps, " ", file, " -o ",
                          gsub(pattern = ".bam$", replacement = ".lce.txt", x = file),
                          " &")
            system(com)
        }
        Sys.sleep(time = 20) #TODO: change for a while until some of the files exist.
        lastBam <- gsub(pattern = ".bam$", replacement = ".lce.txt", x = META$BAM)
        while(min(difftime(Sys.time(), file.info(lastBam)$mtime, units = "secs"), na.rm = TRUE) < 60){
            Sys.sleep(time = 2)
        }
    }else{
        stop("Files ", paste(META$BAM[!file.exists(META$BAM)], collapse = " "),
             " do not exist")
    }
    if(verbose){cat("DONE: Library complexity reports.")}
}

# Extract gene sequences from genome and geneAnnotation
getGeneSeqsfromGenome <- function(geneAnnot, genome, nCores = 1){
    txtools:::check_mc_windows(nCores)
    txtools:::check_GA_genome_chrCompat(geneAnnot = geneAnnot, genome = genome)
    parallel::mclapply(mc.cores = nCores, seq_along(geneAnnot), function(i){
        selGene <- geneAnnot[i]
        iGene <- as.character(selGene$name)
        iChr <- as.character(GenomicRanges::seqnames(selGene))
        iStr <- as.character(selGene@strand)
        iGA <- selGene
        iBlocks <- S4Vectors::mcols(iGA)$blocks %>% txtools:::if_IRangesList_Unlist() %>%
            IRanges::shift(IRanges::start(iGA) - 1)
        SEQ <- stringr::str_sub(genome[[iChr]], start = IRanges::start(iBlocks),
                                end = IRanges::end(iBlocks)) %>% paste(collapse = "") %>%
            Biostrings::DNAString()
        if(iStr == "-") {
            SEQ <- Biostrings::reverseComplement(SEQ)
        }
        SEQ
    }) %>% Biostrings::DNAStringSet()
}

# Add results to a STORM object. Remove scores if metric is already present.
hlpr_add_REScols <- function(STORM_RES, REScols){
    iMetric <- unique(REScols[,"metric"]) %>% as.character()
    # remove results for identical metric
    if("metric" %in% names(STORM_RES)){
        STORM_RES <- STORM_RES[STORM_RES$metric != iMetric,]
    }
    STORM_RES <- rbind(STORM_RES, REScols)
    STORM_RES
}

# Check which RNAmods are possible to predict based on RNAmod metrics
check_whichRNAmods <- function(STORM){
    if("RES" %in% names(STORM)){
        tmp <- lapply(metricsList, function(x) any(unique(STORM$RES$metric) %in% x)) %>% unlist()
        return(names(tmp[tmp]))
    }else{stop("STORM object does not include 'RES' element")}
}

# Confusion matrix helper
hlpr_confusMatOutcome <- function(x){
    x$outcome <- paste0(ifelse(x$truth == x$pred, "T", "F"), ifelse(as.logical(x$pred), "P", "N"))
    x
}
SchwartzLab/brainSTORM documentation built on May 14, 2022, 5:14 p.m.