R/helpers.R

Defines functions fetchATAC downloadATAC

#' @importFrom BiocFileCache BiocFileCache bfcadd
downloadATAC <- function(df, row, column, bfc) {
    if (df[row, column] != '') {
        filename <- bfcadd(bfc, "TestWeb", fpath = df[row, column])
        return(readRDS(filename))
    } else {
        return(NULL)
    }

}

fetchATAC <- function(df, row, sparse) {
    #download the data into dataframes
    cache_path <- tempfile()
    bfc <- BiocFileCache(cache_path, ask = FALSE)
    # try to download the data we need
    tryCatch({
        if (sparse == FALSE) {
            if (df[row, 'dense_matrix_link'] == "") {
                stop(df[row, 'Accession'],
                "has no dense matrix, use sparse=TRUE to download it.")
            }
            expression <- downloadATAC(df, row, 'dense_matrix_link', bfc)
        } else if (sparse == TRUE) {
            if (df[row, 'sparse_matrix_link'] == "") {
                stop(df[row, 'Accession'],
                "has no sparse matrix, use sparse=FALSE to download it.")
            }
            expression <- downloadATAC(df, row, 'sparse_matrix_link', bfc)
        }
        # check if expression was empty
        if (is.null(expression)) {
            stop("error downloading counts matrix for dataset")
        }
        labels <- downloadATAC(df, row, 'cell_annotation_link', bfc)
        if (!is.null(labels) &&
        length(labels$cell) != length(colnames(expression))) {
            col.num <- which(colnames(expression) %in% labels$cell)
            expression <- expression[, col.num]
        }
    },
        error = function(e) {
            message("error occured when retrieving dataset: ",
                conditionMessage(e))
        }
    )

    dataset_data_meta <- list(pmid = df[row, 'PMID'],
                        author = df[row, 'Author'],
                        technology = df[row, 'Sequencing_Technology'],
                        score_type = df[row, 'Score_Type'],
                        organism = df[row, 'Organism'],
                        genome_build = df[row, 'Genome_Build'],
                        cell_categories = df[row,
                            'Broad_Cell_Categories_Present'],
                        tissue_type = df[row, 'Tissue_Cell_Type'],
                        disease = df[row, 'Disease'],
                        summary = df[row, 'Data_Summary'],
                        cells = colnames(expression),
                        matrix_name = df[row, 'Matrix_Names'],
    #identifiers for peak matrix
                        regions = row.names(expression),
                        accession = df[row, 'Accession'])
    if (is.null(labels)) {
        dataset <- SingleCellExperiment(list(counts = expression),
                                            metadata = dataset_data_meta)
    } else {
        dataset <- SingleCellExperiment(list(counts = expression),
        colData = data.frame(
            label = labels[, c("cluster", "cell_label")]),
            metadata = dataset_data_meta)
    }

    return(dataset)

}
shooshtarilab/scATAC.Explorer documentation built on Oct. 20, 2024, 8:20 p.m.