R/runMethods.R

Defines functions runMethods

Documented in runMethods

#' Multi-scenario analysis of single-cell epigenomic data
#'
#' Add description here
#'
#' @param sc simulated data
#' @param sim integer
#' @param scenario.input scenario parameters
#' @param path directory
#' @param maxtime maximum allowed time
#'
#' @return Add return here
#'
#' @details Add details here
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#'
#' @export
#'
runMethods <- function(sc,sim,scenario.input,path = '.',maxtime = 21600){

    Counts = N = SumN = rl = NULL

    # Removing islands of zeros
    # For some reason, the scChIPseq data from Grosselin et al. (2019) when mapped onto hg38 exhibit
    # Several chunks of zeros

    Rleagg <- data.table(Counts = Matrix::rowSums(SummarizedExperiment::assay(sc)))[,rl := rleid(Counts)][,N := 1:.N]
    Rleagg <- merge(Rleagg,Rleagg[,list(SumN = .N),by = 'rl'],by = 'rl',all.x = TRUE)
    idx <- Rleagg[,which(Counts==0 & SumN>2)]
    if(length(idx)>0){sc <- sc[-idx,]}
    rm(Rleagg)

    Cusanovich2018 = cisTopic = SnapATAC = Scasat = NULL

    methods <- c('Cusanovich2018','cisTopic','SnapATAC','Scasat')
    label <- file.path(path,paste0('Simulation',sim,'_',methods,'.RData'))
    names(label) <- methods

    toiter <- unlist(lapply(seq_len(length(label)),FUN = function(x){
        tryCatch({
            suppressWarnings({
                load(label[x])
                if(is.na(get(methods[x])$umap)){stop('error')}
                rm(methods[x])
            })
        },error = function(cond){return(x)})
    }))

    message('The following methods the to be run: ',paste(methods[toiter],collapse = ', '))

    for(i in toiter){
        message('Running ',methods[i])

        if(methods[i]=='Cusanovich2018'){
            tryCatch({R.utils::withTimeout({assign(methods[i],runCusanovich2018(sc, peaks = TRUE, returnSc = TRUE),inherits = TRUE)},timeout = maxtime,onTimeout = 'error') },
                     error = function(cond){assign(methods[i],errorCatch(cond,true = SummarizedExperiment::colData(sc)$Cluster,peaks = sc$peaks,method = methods[i]),inherits = TRUE)})

            save(Cusanovich2018,file = label['Cusanovich2018'],compress = 'xz')
            rm(Cusanovich2018)
        } else{
            load(label['Cusanovich2018'])
            if(methods[i]=='cisTopic'){
                tryCatch({R.utils::withTimeout({assign(methods[i],runCisTopic(sc = Cusanovich2018$sc),inherits = TRUE)},timeout = maxtime,onTimeout = 'error') },
                         error = function(cond){assign(methods[i],errorCatch(cond,true = SummarizedExperiment::colData(sc$sc)$Cluster,peaks = sc$peaks,method = methods[i]),inherits = TRUE)})
                save(cisTopic,file = label['cisTopic'],compress = 'xz')
                rm(cisTopic)
            }
            if(methods[i]=='SnapATAC'){
                tryCatch({R.utils::withTimeout({assign(methods[i],runSnapATAC(sc = Cusanovich2018$sc),inherits = TRUE)},timeout = maxtime,onTimeout = 'error') },
                         error = function(cond){assign(methods[i],errorCatch(cond,true = SummarizedExperiment::colData(sc$sc)$Cluster,peaks = sc$peaks,method = methods[i]),inherits = TRUE)})
                save(SnapATAC,file = label['SnapATAC'],compress = 'xz')
                rm(SnapATAC)
            }
            if(methods[i]=='Scasat'){
                tryCatch({R.utils::withTimeout({assign(methods[i],runScasat(sc = Cusanovich2018$sc),inherits = TRUE)},timeout = maxtime,onTimeout = 'error') },
                         error = function(cond){assign(methods[i],errorCatch(cond,true = SummarizedExperiment::colData(sc$sc)$Cluster,peaks = sc$peaks,method = methods[i]),inherits = TRUE)})
                save(Scasat,file = label['Scasat'],compress = 'xz')
                rm(Scasat)
            }
        }
    }

    if(all(file.exists(label))){
        message('All methods ran (or ran with errors/timeout). Now loading the results')
        for(i in label){
            load(i)
        }

        dt <- list()

        dt[['clustering']] <- rbindlist(list(
            cisTopic$clustering$metrics,
            Cusanovich2018$clustering$metrics,
            SnapATAC$clustering$metrics,
            Scasat$clustering$metrics))
        dt[['clustering']] <- cbind(scenario.input,dt[['clustering']])

        dt[['time']] <- rbindlist(list(
            data.table(Method = 'cisTopic', Time = cisTopic$time),
            data.table(Method = 'Cusanovich2018', Time = Cusanovich2018$time),
            data.table(Method = 'SnapATAC', Time = SnapATAC$time),
            data.table(Method = 'Scasat', Time = Scasat$time)))
        dt[['time']] <- cbind(scenario.input,dt[['time']])

        dt[['umap']] <- list(
            'cisTopic' = cisTopic$umap,
            'Cusanovich2018' = Cusanovich2018$umap,
            'SnapATAC' = SnapATAC$umap,
            'Scasat' = Scasat$umap)

        message('Saving the final output...')
        save(dt,file = file.path(path,paste0('Simulation',sim,'.RData')),compress = 'xz')

        message('Removing extra files')
        for(i in label){
            system2('rm',i)
        }
        system2('rm',file.path(path,paste0('Input',sim,'.RData')))
    }
    message('Done!')
}
plbaldoni/scChIPseqsim documentation built on June 11, 2020, 7:41 p.m.