#'Convert ids
#'@description convert input entities to internal neo4j ids and grinn ids.
#'@usage convertId(x, nodetype, searchby, exactmatch, returnas)
#'@param x a character vector or data frame of input entities, see details.
#'It can be the value of any node property key e.g. \code{txtinput} = c('pubchemId1', 'pubchemId2'), see also \code{searchby}.
#'@param nodetype a string specifying the type of a query node. It can be one of compound, protein, gene, pathway, rna, dna, phenotype.
#'@param searchby a string specifying a node property key used for the query. It can be one of neo4jid, grinnid, name, synonyms, xref (default).
#'@param exactmatch a logical value. If TRUE (default), match exactly, case-sensitive and data type-sensitive.
#'@param returnas a string specifying output type. It can be one of dataframe, list, json. Default is dataframe.
#'@details If \code{x} is a character vector, the results include the input entities, neo4j ids and grinn ids.
#'
#'\code{x} can be a data frame containing other information e.g. stat values. First column must be the input entities for the mapping.
#'The results will include the input entities, neo4j ids, grinn ids and the rest of the input information.
#'
#'Original input will be returned for unmapped entities.
#'
#'The database uses two id systems. The neo4j id is a numeric, internal id automatically generated by the database system.
#'The grinn id (gid) is an id system of Grinn database that uses main ids of standard resources
#'i.e. ENSEMBL for genes (e.g.ENSG00000139618), UniProt for proteins (e.g.P0C9J6), PubChem CID for compounds (e.g.5793), KEGG for pathways (e.g.hsa00010).
#'@return input entities and the matched neo4j ids and grinn ids. Return empty list or data frame if error.
#'@author Kwanjeera W \email{kwanich@@ucdavis.edu}
#'@examples
#'# Convert compound names
#'#kw <- list('1-Methylhistidine','D-Lactic acid')
#'#result <- convertId(x=kw, nodetype="compound", searchby="name")
#'# Query compounds by KEGG ids
#'#kw <- data.frame(kegg=c('C01152','C00256','C00345'),pval=c(0.01:0.3))
#'#result <- convertId(x=kw, nodetype="compound", searchby="xref")
#'@export
convertId <- function(x, nodetype, searchby="xref", exactmatch=TRUE, returnas="dataframe") UseMethod("convertId")
#'@export
convertId.default <- function(x, nodetype, searchby="xref", exactmatch=TRUE, returnas="dataframe"){
out <- tryCatch(
{
tmparg <- try(nodetype <- match.arg(tolower(nodetype), c("compound","protein","gene","pathway","rna","dna","phenotype"), several.ok = FALSE), silent = TRUE)
if (class(tmparg) == "try-error") {
stop("argument 'nodetype' is not valid, choose one from the list: compound,protein,gene,pathway,rna,dna,phenotype")
}
tmparg <- try(searchby <- match.arg(tolower(searchby), c("xref","name","synonyms","grinnid","neo4jid"), several.ok = FALSE), silent = TRUE)
if (class(tmparg) == "try-error") {
stop("argument 'searchby' is not valid, choose one from the list: grinnid,name,neo4jid,synonyms,description,properties,xref")
}
if(!is.null(dim(x))){#dataframe input
txtinput = unique(stringr::str_trim(unlist(x[,1]))) #remove whiteline, duplicate
isDF = ifelse(ncol(x) > 1, TRUE, FALSE)
}else{#list input
txtinput = unique(stringr::str_trim(unlist(x))) #remove whiteline, duplicate
isDF = FALSE
}
#construct query
nodetype = Hmisc::capitalize(nodetype)
cat("Converting ids ...\n")
cat("Register parallel computing ...\nWarning: querying a large number of nodes will take long time. \n")
if(isDF){#return all input data
# nodes = foreach(i=1:length(txtinput), .combine=rbind) %dopar% {
# res = formatNode.LIST(x=txtinput[i],y=nodetype,z=searchby)[,1:2] #get input attributes: id and gid
# data.frame(txtinput[i], res, x[i,2:ncol(x)], stringsAsFactors = FALSE) #combine with the rest of input
# }
nodes = data.frame(stringsAsFactors = FALSE)
for(i in 1:length(txtinput)){
res = formatNode.LIST(x=txtinput[i],y=nodetype,z=searchby)[,1:2] #get input attributes: id and gid
nodes = rbind(nodes,data.frame(txtinput[i], res, x[i,2:ncol(x)], stringsAsFactors = FALSE)) #combine with the rest of input
}
colnames(nodes) = c("input","neo4jid","grinnid",colnames(x)[2:ncol(x)])
row.names(nodes) = NULL
}else{#list input
# nodes = foreach(i=1:length(txtinput), .combine=rbind) %dopar% {
# res = formatNode.LIST(txtinput[i],y=nodetype,z=searchby)[,1:2] #get input attributes: id and gid
# data.frame(txtinput[i], res, stringsAsFactors = FALSE)
# }
nodes = lapply(txtinput, function (x) data.frame(x, formatNode.LIST(x,y=nodetype,z=searchby)[,1:2], stringsAsFactors = FALSE))
nodes = do.call(rbind, lapply(nodes, data.frame, stringsAsFactors=FALSE)) #total no. of entities
colnames(nodes) = c("input","neo4jid","grinnid")
row.names(nodes) = NULL
}
cat("Format and returning output of size ",nrow(nodes)," ...\n")
## output
switch(returnas,
dataframe = nodes,
list = split(nodes, seq(nrow(nodes))),
json = jsonlite::toJSON(nodes),
stop("Error: incorrect 'returnas' type"))
},error = function(e) {
message(e)
cat("\nError: RETURN no data ..\n")
switch(returnas,
dataframe = data.frame(),
list = list(),
json = list())
})
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.