Nothing
#' Build DrugAge Annotation Database
#'
#' This function builds the DrugAge annotation SQLite database from the
#' 'drugage_id_mapping' table stored in the 'inst/extdata' directory of this
#' package. The 'drugage_id_mapping.tsv' table contains the DrugAge compounds
#' annotation information (such as species, avg_lifespan_change etc) as well
#' as the compound name to ChEMBL id and PubChem id mappings.
#'
#' Part of the id mappings in the 'drugage_id_mapping.tsv' table is generated
#' by the \code{processDrugage} function for compound names that have ChEMBL
#' ids from the ChEMBL database (version 24). The missing IDs were added
#' manually. A semi-manual approach was to use this web service:
#' \url{https://cts.fiehnlab.ucdavis.edu/batch}. After the semi-manual process,
#' the left ones were manually mapped to ChEMBL, PubChem and DrugBank ids.
#' The mixed items were commented.
#' @param da_path character(1), file path to the tabular file generated from
#' \code{processDrugage} function and manually edited the missing IDs.
#' @param dest_path character(1), destination path of the result DrugAge
#' annotation SQLite database
#' @return DrugAge annotation SQLite database
#' @examples
#' buildDrugAgeDB(dest_path=tempfile(fileext="_drugage.db"))
#' @export
#'
buildDrugAgeDB <- function(da_path=system.file("extdata/drugage_id_mapping.tsv",
package="customCMPdb"), dest_path){
## Read DrugAge Mapping from inst/extdata
# require(gsheet)
# url<-"https://bit.ly/3dCWKWo"
# drugAge_mapping <- suppressWarnings(gsheet2tbl(url))
# ## Add columns names
# names(drugAge_mapping) <- as.character(unlist(drugAge_mapping[1,]))
# drugAge_mapping <- drugAge_mapping[-1,]
drugAge_mapping <- read.delim(da_path)
## Create internal DrugAge_id named ida000xxx
drugAge_ids <- paste0("ida",sprintf("%05d",1:nrow(drugAge_mapping)))
drugAge_mapping2 <- data.frame(drugage_id=drugAge_ids, drugAge_mapping)
## Create DrugAge Database
da <- dbConnect(SQLite(), dest_path)
## write id_mapping table.
id_maptb <- na.omit(data.frame(drugage_id=drugAge_mapping2$drugage_id,
chembl_id=drugAge_mapping2$chembl_id,
stringsAsFactors=FALSE))
dbWriteTable(da,'id_mapping', id_maptb)
## write DrugAge Annotation table
drugAgeAnnot <- data.frame(drugAge_mapping2[1:13], drugAge_mapping2[15:16])
dbWriteTable(da, 'drugAgeAnnot', drugAgeAnnot)
dbDisconnect(da)
}
#' Process Source DrugAge Dataset
#'
#' This function processes the source DrugAge datasets by adding the ChEMBL,
#' PubChem and DrugBank id mapping information to the source DrugAge table
#' which only has compound names without id mapping information. Source file of
#' DrugAge is linked here: \url{http://genomics.senescence.info/drugs/dataset.zip}
#'
#' This function only annotates compound names that have ChEMBL
#' ids from the ChEMBL database (version 24). The missing IDs were added
#' manually. A semi-manual approach was to use this web service:
#' \url{https://cts.fiehnlab.ucdavis.edu/batch}. After the semi-manual process,
#' the left ones were manually mapped to ChEMBL, PubChem and DrugBank ids.
#' The mixed items were commented.
#' @param dest_file character(1), file path to the generated DrugAge annotation
#' tabular file with id mappings. The default will write the file named as
#' "drugage_id_mapping.tsv" to user's current working directory.
#' @param verbose logical(1), If descriptive message and list of issues should be
#' included as output
#' @return write the default 'drugage_id_mapping.tsv' file to user's current working
#' directory or the file path defined by users to the \code{dest_dafile} argument.
#' @importFrom rappdirs user_cache_dir
#' @importFrom utils untar
#' @import BiocFileCache
#' @examples
#' library(ChemmineR)
#' \dontrun{
#' processDrugage(dest_file="drugage_id_mapping.tsv")
#' # Now the missing IDs need to be added manually. A semi-manual approach is to
#' # use this web service: https://cts.fiehnlab.ucdavis.edu/batch
#' }
#' @export
#'
processDrugage <- function(dest_file="drugage_id_mapping.tsv", verbose=TRUE) {
# download DrugAge dataset to Cache directory
dazip <- download_data("http://genomics.senescence.info/drugs/dataset.zip",
"DrugAge_dataset_zip", verbose=verbose)
## read tsv file in the zip by temporally extracting zip to tempfile
tmpdir <- tempdir()
unzip(dazip, exdir=tmpdir)
drugage <- read.csv(paste0(tmpdir, "/drugage.csv"))
drugage <- drugage[!duplicated(drugage$compound_name),]
## Get mol_dict table from chembl_db
chembldb <- download_data("ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_24/archived/chembl_24_sqlite.tar.gz",
"ChEMBL24", verbose=verbose, ftpUpdate=FALSE)
bfc <- BiocFileCache(cache=rappdirs::user_cache_dir(appname="customCMPdb"), ask=TRUE)
cache_dir <- bfccache(bfc)
if(!dir.exists(paste0(cache_dir, "/chembl_24")))
untar(chembldb, exdir = cache_dir)
mydb <- dbConnect(SQLite(), paste0(cache_dir, "/chembl_24/chembl_24_sqlite/chembl_24.db"))
mol_dict <- dbGetQuery(mydb, 'SELECT * FROM molecule_dictionary')
mol_dict_sub <- mol_dict[!is.na(mol_dict$pref_name),] # mol_dict from below
prefname <- as.character(mol_dict_sub$pref_name)
chemblid <- as.character(mol_dict_sub$chembl_id)
fact <- tapply(chemblid, factor(prefname), paste, collapse=", ")
# Load ChEMBL to PubChem CID and DrugBank ID mappings
## UniChem CMP ID mappings from here:
## https://www.ebi.ac.uk/unichem/ucquery/listSources
## download ChEMBL to DrugBank mapping from UniChem in src1src2.txt.gz:
chembl2db <- download_data("ftp://ftp.ebi.ac.uk/pub/databases/chembl/UniChem/data/wholeSourceMapping/src_id1/src1src2.txt.gz",
"chembl2drugbank", verbose = verbose)
## ChEMBL to PubChem CID mapping in src1src22.txt.gz
chembl2pc <- download_data("ftp://ftp.ebi.ac.uk/pub/databases/chembl/UniChem/data/wholeSourceMapping/src_id1/src1src22.txt.gz",
"chembl2pubchem", verbose = verbose)
chembl2pubchem <- read.delim(gzfile(chembl2pc))
chembl2pubchem_vec <- tapply(as.character(chembl2pubchem[,2]),
factor(chembl2pubchem[,1]), paste, collapse=", ")
chembl2drugbank <- read.delim(gzfile(chembl2db))
chembl2drugbank_vec <- tapply(as.character(chembl2drugbank[,2]),
factor(chembl2drugbank[,1]), paste, collapse=", ")
## Assemble results
drugage <- cbind(drugage, pref_name=names(fact[toupper(drugage$compound_name)]),
chembl_id=fact[toupper(drugage$compound_name)])
drugage <- cbind(drugage, pubchem_cid=chembl2pubchem_vec[as.character(drugage$chembl_id)],
DrugBank_id=chembl2drugbank_vec[as.character(drugage$chembl_id)])
write.table(drugage, dest_file, row.names=FALSE, quote=FALSE, sep="\t")
}
download_data <- function(url, rname, verbose=TRUE, ftpUpdate=TRUE){
bfc <- BiocFileCache(cache=rappdirs::user_cache_dir(appname="customCMPdb"), ask=TRUE)
rid <- bfcquery(bfc, url)$rid
if(!length(rid)){
if(verbose) message(paste("Downloading", rname, "from", url))
rid <- names(bfcadd(bfc, rname, url))
}
# check to see if the resource needs to be updated
if (isTRUE(bfcneedsupdate(bfc, rid)) | ftpUpdate)
bfcdownload(bfc, rid, ask=TRUE)
bfcrpath(bfc, rids = rid)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.