R/gmql_project.R

Defines functions .trasform_update gmql_project select.GMQLDataset

select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL, 
                            all_but_meta = FALSE, regions = NULL, 
                            regions_update = NULL, all_but_reg = FALSE) 
{
    data = value(.data)
    r_update <- substitute(regions_update)
    if(!is.null(r_update))
    {
        reg_update <- .trasform_update(deparse(r_update))
        reg_update <- paste(reg_update,collapse = "")
    }
    else
        reg_update <- .jnull("java/lang/String")
    
    m_update <- substitute(metadata_update)
    if(!is.null(m_update))
    {
        meta_update <- .trasform_update(deparse(m_update))
        meta_update <- paste(meta_update,collapse = "")
    }
    else
        meta_update <- .jnull("java/lang/String")
    
    gmql_project(data, metadata, meta_update, all_but_meta, regions, 
                    reg_update, all_but_reg)
}

#' Method select
#' 
#' @description Wrapper to GMQL PROJECT operator 
#' 
#' @description It creates, from an existing dataset, a new dataset with all 
#' the samples from input dataset, but keeping for each sample in the input 
#' dataset only those metadata and/or region attributes specified.
#' Region coordinates and values of the remaining metadata and/or region 
#' attributes remain equal to those in the input dataset. It allows to:
#' \itemize{
#' \item{Remove existing metadata and/or region attributes from a dataset}
#' \item{Update or set new metadata and/or region attributes in the result}
#' }
#' 
#' @importFrom rJava J .jnull .jarray
#' @importFrom dplyr select
#' 
#' @param .data GMQLDataset class object
#' 
#' @param metadata vector of strings made up by metadata attributes
#' @param regions vector of strings made up by region attributes
#' @param all_but_reg logical value indicating which region attributes
#' you want to exclude; if FALSE, only the regions attributes specified in 
#' \emph{regions} argumentare kept in the output of the operation; if TRUE, 
#' the regions attributes are all kept except those in \emph{regions} argument.
#' If \emph{regions} is not defined, \emph{all_but_reg} is not considerd.
#' @param all_but_meta logical value indicating which metadata 
#' you want to exclude; If FALSE, only the metadata attributes specified in 
#' \emph{metadata} argument are kept in the output of the operation; if TRUE, 
#' the metadata are all kept except those in \emph{metadata} argument.
#' If \emph{metadata} input parameter is not defined \emph{all_but_meta} 
#' is not considerd.
#' @param regions_update list of updating rules in the form of 
#' key = value generating new genomic region attributes and/or values.
#' The following options are available:
#' \itemize{
#' \item{All aggregation functions already defined by AGGREGATES object}
#' \item{All basic mathematical operations (+, -, *, /), including parenthesis}
#' \item{SQRT, META, NIL constructor objects defined by OPERATOR object}
#' }
#' @param metadata_update list of updating rules in the form of 
#' key = value generating new metadata attributes and/or attribute values.
#' The following options are available:
#' \itemize{
#' \item{All aggregation functions already defined by AGGREGATES object}
#' \item{All basic mathematical operations (+, -, *, /), including parenthesis}
#' \item{SQRT constructor object defined by OPERATOR object}
#' }
#'  
#' @return GMQLDataset object. It contains the value to use as input 
#' for the subsequent GMQLDataset method
#'
#' @examples
#' ## This statement initializes and runs the GMQL server for local execution 
#' ## and creation of results on disk. Then, with system.file() it defines 
#' ## the path to the folder "DATASET" in the subdirectory "example" 
#' ## of the package "RGMQL" and opens such folder as a GMQL dataset 
#' ## named "data"
#' 
#' init_gmql()
#' test_path <- system.file("example", "DATASET", package = "RGMQL")
#' data = read_gmql(test_path)
#' 
#' ## This statement creates a new dataset called CTCF_NORM_SCORE by preserving 
#' ## all region attributes apart from score, and creating a new region 
#' ## attribute called new_score by dividing the existing score value of each 
#' ## region by 1000.0 and incrementing it by 100.
#' ## It also generates, for each sample of the new dataset, 
#' ## a new metadata attribute called normalized with value 1, 
#' ## which can be used in future selections.
#' 
#' 
#' CTCF_NORM_SCORE = select(data, metadata_update = list(normalized = 1), 
#'     regions_update = list(new_score = (score / 1000.0) + 100), 
#'     regions = c("score"), all_but_reg = TRUE)
#' 
#'
#' 
#' @name select
#' @rdname select
#' @aliases select,GMQLDataset-method
#' @aliases select-method
#' @export
setMethod("select", "GMQLDataset",select.GMQLDataset)

gmql_project <-function(input_data, metadata, metadata_update, all_but_meta, 
                            regions, regions_update, all_but_reg)
{
    if(!is.null(metadata))
    {
        if(!is.character(metadata))
            stop("metadata: no valid input")
        
        metadata <- metadata[!metadata %in% ""]
        metadata <- metadata[!duplicated(metadata)]
        
        if(!length(metadata))
            metadata <- .jnull("java/lang/String")
        
        metadata <- .jarray(metadata)
    }
    else
        metadata <- .jnull("java/lang/String")
    
    if(!is.null(regions))
    {
        if(!is.character(regions))
            stop("regions: no valid input")
        
        regions = regions[!regions %in% ""]
        regions = regions[!duplicated(regions)]
        
        if(!length(regions))
            regions <- .jnull("java/lang/String")
        
        regions <- .jarray(regions)
    }
    else
        regions <- .jnull("java/lang/String")
    

    if(length(all_but_meta)>1)
        warning("all_but_meta: no multiple values")
    
    if(length(all_but_reg)>1)
        warning("all_but_reg: no multiple values")
    
    all_but_reg <- all_but_reg[1]
    all_but_meta <- all_but_meta[1]
    
    WrappeR <- J("it/polimi/genomics/r/Wrapper")
    response <- WrappeR$project(metadata, metadata_update, all_but_meta, 
                    regions, regions_update, all_but_reg, input_data)
    error <- strtoi(response[1])
    val <- response[2]
    if(error)
        stop(val)
    else
        GMQLDataset(val)
}

.trasform_update <- function(predicate)
{
    predicate <- gsub("list\\(","",predicate)
    predicate <- gsub("\\)$","",predicate)
    predicate <- gsub("=","AS",predicate)
    predicate <- gsub("NIL","NULL",predicate)
    predicate <- gsub("\"","",predicate)
}

Try the RGMQL package in your browser

Any scripts or data that you put into this service are public.

RGMQL documentation built on Nov. 8, 2020, 5:59 p.m.