R/GMQLtoGRanges.R

Defines functions .importGMQL.gdm .importGMQL.gtf import_gmql

Documented in import_gmql

#' Create GRangesList from GMQL dataset
#'
#' It creates a GRangesList from GMQL samples in dataset. 
#' It reads sample files in GTF or GDM/tab-delimited format.
#'
#' @importFrom rtracklayer import
#' @importClassesFrom GenomicRanges GRangesList
#' @importFrom S4Vectors metadata
#' @importFrom utils read.delim
#' @import xml2
#'
#' @param dataset_path string with GMQL dataset folder path
#' @param is_gtf logical value indicating if dataset samples are in GTF format;
#' if TRUE and dataset does not contain GTF samples, an error occurs 
#' 
#' @return GRangesList containing all GMQL samples in dataset
#'
#' @seealso \code{\link{export_gmql}}
#'
#' @examples
#' 
#' ## This statement defines the path to the subdirectory "example" of the 
#' ## package "RGMQL" and imports as GRangesList the contained GMQL dataset
#' 
#' test_path <- system.file("example", "DATASET", package = "RGMQL")
#' grl = import_gmql(test_path, TRUE)
#'
#'
#' @export
#'
import_gmql <- function(dataset_path, is_gtf)
{
    if(is_gtf)
        .importGMQL.gtf(dataset_path)
    else
        .importGMQL.gdm(dataset_path)
}

.importGMQL.gtf <- function(datasetName)
{
    datasetName <- sub("/*[/]$","",datasetName)
    if(basename(datasetName) !="files")
        datasetName <- file.path(datasetName,"files")
    
    if(!dir.exists(datasetName))
        stop("Directory does not exists")

    if(!length(list.files(datasetName)))
        stop("no samples present in this dataset")

    regions <- list.files(datasetName, pattern = "*.gtf$",full.names = TRUE)
    if(length(regions))
    {
        name_samples <- lapply(regions, function(x){
            gsub("*.gtf", "", basename(x))})
        sampleList <- lapply(regions, function(x){
            rtracklayer::import(con = x, format = "gtf")} )
        names(sampleList) <- name_samples
        gRange_list <- GenomicRanges::GRangesList(sampleList)
    }
    else
        stop("No GTF files present")

    meta <- list.files(datasetName, pattern = "*.gtf.meta$",full.names = TRUE)
    if(length(meta))
    {
        meta_list <- lapply(meta, .add_metadata)
        names(meta_list) <- name_samples
    }
    else
        stop("No meta GTF files present")

    S4Vectors::metadata(gRange_list) <- meta_list
    return(gRange_list)
}

.importGMQL.gdm <- function(datasetName)
{
    datasetName <- sub("/*[/]$","",datasetName)
    if(basename(datasetName) !="files")
        datasetName <- file.path(datasetName,"files")
    
    if(!dir.exists(datasetName))
        stop("Directory does not exists")

    if(!length(list.files(datasetName)))
        stop("no samples present in this dataset")

    regions <- list.files(datasetName, pattern = "*.gdm$",full.names = TRUE)
    if(length(regions))
    {
        name_samples <- lapply(regions, function(x){
            gsub("*.gdm", "",basename(x))})
        vector_field <- .schema_header(datasetName)
        type_and_coord <- .schema_type_coordinate(datasetName)
        names(vector_field) <- NULL
        if(type_and_coord$coordinate_system %in% c("1-based"))
        {
            sampleList <- lapply(regions,function(x){
                df <- read.delim(x,col.names = vector_field,header = FALSE)
                g <- GenomicRanges::makeGRangesFromDataFrame(df,
                        keep.extra.columns = TRUE,
                        start.field = "left",
                        end.field = "right")
            })
        }
        else
        {
            sampleList <- lapply(regions,function(x){
                df <- read.delim(x,col.names = vector_field,header = FALSE)
                df$left = df$left +1
                g <- GenomicRanges::makeGRangesFromDataFrame(df,
                        keep.extra.columns = TRUE,
                        start.field = "left",
                        end.field = "right")
            })
        }
       
        names(sampleList) <- name_samples
        gRange_list <- GenomicRanges::GRangesList(sampleList)
    }
    else
        stop("No GDM files present")

    meta <- list.files(datasetName, pattern = "*.gdm.meta$",full.names = TRUE)
    if(length(meta))
    {
        meta_list <- lapply(meta, .add_metadata)
        names(meta_list) <- name_samples
    }
    else
        stop("No meta GDM files present")

    S4Vectors::metadata(gRange_list) <- meta_list
    return(gRange_list)
}

Try the RGMQL package in your browser

Any scripts or data that you put into this service are public.

RGMQL documentation built on Nov. 8, 2020, 5:59 p.m.