R/query_fcts.R

Defines functions queryAnnotDB

Documented in queryAnnotDB

#' Annotation Queries with ChEMBL IDs
#'
#' This function can be used to query compound annotations from the default
#' resources as well as the custom resources stored in the SQLite annotation
#' database. The default annotation resources are DrugAge, DrugBank, CMAP02 and
#' LINCS. Detailed description of the SQLite database is available at the
#' corresponding \code{\link[compoundCollectionData]{compoundCollectionData}}
#' package. The customized compound annotations could be added/deleted by the
#' \code{\link{customAnnot}} utilities.
#'
#' The input of this query function is a set of ChEMBL IDs, it returns a
#' data.frame storing annotations of the input compounds from selected
#' annotation resources defined by the \code{annot} argument.
#'
#' @param chembl_id character vector of ChEMBL IDs.
#' @param annot character vector of annotation resources, such as
#' \code{DrugAge}, \code{DrugBank}, \code{CMAP02}, \code{LINCS} or names of
#' the annotation tables added by users
#' @return data.frame of annotation result
#' @import RSQLite
#' @examples
#' query_id <- c("CHEMBL1000309", "CHEMBL100014", "CHEMBL100109",
#'                "CHEMBL100", "CHEMBL1000", "CHEMBL10")
#' annot_res <- queryAnnotDB(query_id, annot=c("DrugAge", "LINCS"))
#'
#' # Add a custom compound annotation table
#' chembl_id <- c("CHEMBL1000309", "CHEMBL100014", "CHEMBL10",
#'                "CHEMBL100", "CHEMBL1000", NA)
#' annot_tb <- data.frame(compound_name=paste0("name", 1:6),
#'         chembl_id=chembl_id,
#'         feature1=paste0("f", 1:6),
#'         feature2=rnorm(6))
#' addCustomAnnot(annot_tb, annot_name="mycustom")
#'
#' # query custom annotation
#' annot_res2 <- queryAnnotDB(query_id, annot=c("LINCS", "mycustom"))
#' @export
queryAnnotDB <- function(chembl_id,
                        annot=c("DrugAge", "DrugBank", "CMAP02", "LINCS")){
    chembl_id <- paste0("(\"", paste(chembl_id, collapse="\", \""), "\")")
    ah <- AnnotationHub()
    dbpath <- ah[["AH79563"]]
    conn <- dbConnect(SQLite(), dbpath)
    query <- dbSendQuery(conn, paste("SELECT a.chembl_id, a.drugbank_id,
            a.lincs_id, b.species, b.strain, b.dosage, b.avg_lifespan_change,
            b.max_lifespan_change, b.gender, b.significance,
            c.name, c.'cas-number', c.unii, c.state, c.groups, c.'fda-label',
            d.instance_id, d.'concentration..M.', d.'duration..h.',
            d.cell2, d.array3, d.perturbation_scan_id, d.vehicle_scan_id4,
            d.scanner, d.catalog_number,
            e.pert_iname, e.is_touchstone, e.inchi_key, e.pubchem_cid
            FROM id_mapping AS a
            LEFT JOIN drugAgeAnnot AS b ON a.drugage_id = b.drugage_id
            LEFT JOIN DrugBankAnnot AS c ON a.drugbank_id = c.drugbank_id
            LEFT JOIN cmapAnnot AS d ON a.cmap_id = d.cmap_id
            LEFT JOIN lincsAnnot AS e ON a.lincs_id = e.lincs_id
            WHERE a.chembl_id IN", chembl_id,
            "GROUP BY a.chembl_id
            ORDER BY a.chembl_id"))
    assays <- dbFetch(query)
    dbClearResult(query)
    result <- data.frame(assays)
    da_cols <- c("species", "strain", "dosage", "avg_lifespan_change",
                 "max_lifespan_change", "gender", "significance")
    db_cols <- c("drugbank_id", "name", "cas.number", "unii", "state",
                 "groups", "fda.label")
    cmap_cols <- c("instance_id", "concentration..M.", "duration..h.", "cell2",
                   "array3", "perturbation_scan_id", "vehicle_scan_id4",
                   "scanner", "catalog_number")
    lincs_cols <- c("lincs_id", "pert_iname", "is_touchstone", "inchi_key",
                    "pubchem_cid")
    res <- result[, "chembl_id", drop=FALSE]
    for(x in annot){
        if(x == "DrugAge") res <- cbind(res, result[,da_cols])
        if(x == "DrugBank") res <- cbind(res, result[,db_cols])
        if(x == "CMAP02") res <- cbind(res, result[,cmap_cols])
        if(x == "LINCS") res <- cbind(res, result[,lincs_cols])
        if(! x %in% c("DrugAge", "DrugBank", "CMAP02", "LINCS")){
            idcol <- paste0(x, "_id")
            cust_annot <- dbGetQuery(conn, paste0("SELECT a.chembl_id, a.",
                idcol, ", b.* FROM id_mapping AS a LEFT JOIN ", x,
                " AS b ON a.", idcol, " = b.", idcol,
                " WHERE a.chembl_id IN", chembl_id,
                " GROUP BY a.chembl_id
                  ORDER BY a.chembl_id"))
            cust_annot <- cust_annot[, !colnames(cust_annot) %in%
                                         c("chembl_id", idcol)]
            res <- cbind(res, cust_annot)
        }
    }
    dbDisconnect(conn)
    return(res)
}
yduan004/compoundCollection documentation built on Sept. 20, 2020, 5:59 a.m.