.getClass <- function(obj) {
cls <- class(obj)
return(cls[1])
}
mapToSymbol <- function(inputData,organism="hsapiens",inputType="genelist",idType="auto",edgeType="unweighted", collapse_mode="maxSD", is_outputFile=FALSE, outputFileName="", verbose=TRUE){
organisms <- c("hsapiens","mmusculus","rnorvegicus","drerio","celegans","scerevisiae","cfamiliaris","dmelanogaster","athaliana")
if(length(which(organisms==organism))==0){
stop("Currently, the package supports the following nine organisms: hsapiens, mmusculus, rnorvegicus, drerio, celegans, scerevisiae, cfamiliaris, dmelanogaster and athaliana!")
}
if(length(which(inputType %in% c("genelist","network","sbt","sct","matrix")))==0){
stop("The input 'inputType' is invalide! Please select an option from 'genelist', 'netwrok', 'sbt', 'sct', and 'matrix'!\n")
}
if(length(which(collapse_mode %in% c("mean","median","maxSD","maxIQR","max","min")))==0){
stop("The input 'collapse_mode' is invalide! Please select an option from 'mean', 'median', 'maxSD', 'maxIQR', 'max' and 'min' (use mean, median, max standard derivation, max interquartile range, maximum and minimum of duplicate genes for each sample)!\n")
}
if(inputType=="genelist"){
if(is_outputFile==TRUE && outputFileName==""){
outputFileName <- "geneList_symbol.txt"
}
mapresult <- .GeneToSymbol(geneList=inputData,organism=organism,idType=idType,verbose=verbose)
if(is.null(mapresult)){
if(verbose==TRUE){
stop("The ids can not be transformed to gene symbols! Please check whether the inut idType is correct!\n",sep="")
}
}else{
if(.getClass(mapresult)=="numeric"){
if(verbose==TRUE){
stop(paste("Only ",round(mapresult*100),"% ids can be transformed to gene symbols. Please check whether the inut idType is correct!\n",sep=""))
}
mapresult <- NULL
}else{
if(is_outputFile==TRUE){
write.table(mapresult,file=outputFileName,row.names=F,col.names=F,sep="\t",quote=F)
}
}
}
}
if(inputType=="network"){
if(is_outputFile==TRUE && outputFileName==""){
outputFileName <- "network_symbol.net"
}
mapresult <- .NetToSymbol(inputNetwork=inputData, organism=organism, idType=idType, edgeType=edgeType, verbose=verbose)
if(is.null(mapresult)){
if(verbose==TRUE){
stop("The ids in the input network can not be transformed to gene symbols! Please check whether the inut idType is correct!\n",sep="")
}
}else{
if(.getClass(mapresult)=="numeric"){
if(verbose==TRUE){
stop(paste("Only ",round(mapresult*100),"% ids in the input network can be transformed to gene symbols. Please check whether the inut idType is correct!\n",sep=""))
}
mapresult <- NULL
}else{
if(is_outputFile==TRUE){
net <- mapresult$network
write.table(net,file=outputFileName,row.names=F,col.names=F,sep="\t",quote=F)
}
}
}
}
if(inputType=="matrix"){
if(is_outputFile==TRUE && outputFileName==""){
outputFileName <- "matrix_symbol.cct"
}
mapresult <- .MatToSymbol(inputMat=inputData, organism=organism, idType=idType, collapse_mode=collapse_mode, verbose=verbose)
if(is.null(mapresult)){
if(verbose==TRUE){
stop("The ids in the input matrix can not be transformed to gene symbols! Please check whether the inut idType is correct!\n")
}
}else{
if(.getClass(mapresult)=="numeric"){
if(verbose==TRUE){
stop(paste("Only ",round(mapresult*100),"% ids in the input matrix can be transformed to gene symbols. Please check whether the inut idType is correct!\n",sep=""))
}
mapresult <- NULL
}else{
if(is_outputFile==TRUE){
matrix <- mapresult$data_symbol
matrix <- cbind(rownames(matrix),matrix)
colnames(matrix)[1] <- "GeneSymbol"
write.table(matrix,file=outputFileName,row.names=F,col.names=T,sep="\t",quote=F)
}
}
}
}
if(inputType=="sbt"){
if(is_outputFile==TRUE && outputFileName==""){
outputFileName <- "sbt_symbol.sbt"
}
mapresult <- .sbtToSymbol(inputFile=inputData, organism=organism, idType=idType, verbose=verbose)
if(is.null(mapresult)){
if(verbose==TRUE){
stop("The ids in the SBT file can not be transformed to gene symbols! Please check whether the inut idType is correct!\n")
}
}else{
l <- mapresult$oriLength
mapresult <- mapresult$transformedData
if(l!=nrow(mapresult)){
if(verbose==TRUE){
warning(paste("The ids in ",nrow(mapresult)," tracks among all ",l," tracks in the input SBT file can be transformed to gene symbols!\n",sep=""))
}
}
if(is_outputFile==TRUE){
write.table(mapresult,file=outputFileName,row.names=F,col.names=F,sep="\t",quote=F)
}
}
}
if(inputType=="sct"){
if(is_outputFile==TRUE && outputFileName==""){
outputFileName <- "sct_symbol.sct"
}
mapresult <- .sctToSymbol(inputFile=inputData, organism=organism, idType=idType, verbose=verbose, collapse_mode=collapse_mode)
if(is.null(mapresult)){
if(verbose==TRUE){
stop("The ids in the SCT file can not be transformed to gene symbols! Please check whether the inut idType is correct!\n")
}
}else{
if(.getClass(mapresult)=="numeric"){
if(verbose==TRUE){
stop(paste("Only ",round(mapresult*100),"% ids in the SCT file can be transformed to gene symbols. Please check whether the inut idType is correct!\n",sep=""))
}
mapresult <- NULL
}else{
sctM <- mapresult$sct_symbol
sctM <- cbind(rownames(sctM),sctM)
colnames(sctM)[1] <- "GeneSymbol"
if(is_outputFile==TRUE){
write.table(sctM,file=outputFileName,row.names=F,col.names=T,sep="\t",quote=F)
}
}
}
}
return(mapresult)
}
.GeneToSymbol <-
function(geneList, organism="hsapiens", idType="auto",verbose=TRUE){
#require(biomaRt) || stop("Package biomaRt version 2.18.0 is required!")
if(!is.null(dim(geneList))){
stop("The input gene list should be a vector!!\n")
}
if(organism=="athaliana"){
mart <- useMart("plants_mart",host="plants.ensembl.org")
m <- try(mart <- useDataset(paste(organism,"_eg_gene",sep=""),mart),silent=TRUE)
}else{
mart <- useMart("ENSEMBL_MART_ENSEMBL",host="https://www.ensembl.org")
m <- try(mart <- useDataset(paste(organism,"_gene_ensembl",sep=""),mart),silent=TRUE)
}
if(.getClass(m)=="try-error"){
stop(paste("The function can not connect to Biomart. Please try again!\n",sep=""))
}
allMartAttr <- listAttributes(mart)
allFilter <- listFilters(mart)
finalAttr <- intersect(allMartAttr[,1],allFilter[,1])
genesymbol <- ""
if(organism=="hsapiens"){
noAttr <- c("strand","transcript_status","atlas_diseasestate","transcript_count","phenotype_description","source","go_id","transcript_source","goslim_goa_accession","ens_hs_gene","chromosome_name","status","mim_morbid_accession","atlas_celltype","germ_line_variation_source")
genesymbol <- "hgnc_symbol"
}
if(organism=="mmusculus"){
noAttr <- c("chromosome_name","go_id","goslim_goa_accession","strand","ikmcs_no_products_available_yet","status","germ_line_variation_source","hgnc_id","hgnc_symbol","hgnc_transcript_name","prints","phenotype_description","source")
genesymbol <- "mgi_symbol"
}
if(organism=="rnorvegicus"){
noAttr <- c("chromosome_name","go_id","goslim_goa_accession","strand","status","germ_line_variation_source","mgi_id","mgi_symbol","mgi_transcript_name","prints","source","transcript_source","transcript_count","transcript_biotype","transcript_status")
genesymbol <- "rgd_symbol"
}
if(organism=="drerio"){
noAttr <- c("chromosome_name","go_id","goslim_goa_accession","strand","status","status","germ_line_variation_source","hgnc_id","hgnc_symbol","hgnc_transcript_name","prints","phenotype_description","source","transcript_source","transcript_count","transcript_biotype","transcript_status")
genesymbol <- "zfin_symbol"
}
if(organism=="celegans"){
noAttr <- c("chromosome_name","go_id","prints","source","status","strand","transcript_source","transcript_count","transcript_biotype","transcript_status")
genesymbol <- "wormbase_locus"
}
if(organism=="scerevisiae"){
noAttr <- c("chromosome_name","germ_line_variation_source","go_id","goslim_goa_accession","prints","source","status","strand","transcript_source","transcript_count","transcript_biotype","transcript_status")
genesymbol <- "external_gene_name"
}
if(organism=="cfamiliaris"){
noAttr <- c("chromosome_name","germ_line_variation_source","go_id","goslim_goa_accession","hgnc_id","hgnc_symbol","hgnc_transcript_name","phenotype_description","prints","source","status","strand","transcript_source","transcript_count","transcript_biotype","transcript_status")
genesymbol <- "external_gene_name"
}
if(organism=="dmelanogaster"){
noAttr <- c("chromosome_name","germ_line_variation_source","go_id","goslim_goa_accession","prints","source","status","strand","transcript_source","transcript_count","transcript_biotype","transcript_status")
genesymbol <- "external_gene_name"
}
if(organism=="athaliana"){
noAttr <- c("chromosome_name","prints_pf","strand")
genesymbol <- "tair_symbol"
}
finalAttr <- setdiff(finalAttr,noAttr)
finalAttr <- sort(finalAttr)
if(idType!="auto" && length(which(finalAttr==idType))==0){
cat("The input 'idType' is not supported by Biomart. Please select one id type from the following list:\n")
cat(finalAttr,sep="\n")
stop("The users can also set 'idType' as 'auto' and the function will automatically search idtype based on the input data. However, this function may take 20-60 minutes based on users' internet speed\n")
}
if(idType=="auto"){
idtype <- .autoSearchIdType(geneList,finalAttr,mart,genesymbol,verbose)
if(!is.null(idtype)){
idmap <- getBM(attributes=c(idtype,genesymbol),filters=idtype,values=geneList,mart=mart)
idmap <- idmap[idmap[,2]!="",]
t <- tapply(idmap[,2],idmap[,1],paste,collapse="|")
idmap <- data.frame(id=names(t),genesymbol=t,stringsAsFactors=F)
return(idmap)
}else{
return(NULL)
}
}else{
idmap <- getBM(attributes=c(idType,genesymbol),filters=idType,values=geneList,mart=mart)
if(nrow(idmap)==0){
return(NULL)
}else{
idmap <- idmap[idmap[,2]!="",]
t <- tapply(idmap[,2],idmap[,1],paste,collapse="|")
idmap <- data.frame(id=names(t),genesymbol=t,stringsAsFactors=F)
if(nrow(idmap)/length(geneList)<0.1){
return(nrow(idmap)/length(geneList))
}else{
return(idmap)
}
}
}
}
.autoSearchIdType <- function(geneList,finalAttr,mart,genesymbol,verbose=TRUE){
if(length(geneList)>40){
randomNum <- 40
}else{
randomNum <- length(geneList)
}
if(verbose==TRUE){
cat("\nSearching the array type...\n")
}
randomId <- sample(geneList,randomNum)
finalIdtype <- c()
tempIdtype <- data.frame(id="",rate=0,stringsAsFactors=F)
ti <- 1
rang <- ifelse(length(finalAttr)>10,10,length(finalAttr))
startA <- 1
endA <- rang
startB <- length(finalAttr)-rang+1
endB <- length(finalAttr)
l <- 0
u <- 0
fi <- 0
while(u<length(finalAttr)){
if(l==0){
s <- startA
e <- endA
}else{
s <- startB
e <- endB
}
for(i in c(s:e)){
idtype <- finalAttr[i]
idmap <- getBM(attributes=c(idtype,genesymbol),filters=idtype,values=randomId,mart=mart)
if(length(unique(idmap[,1]))>(0.8*length(randomId))){
finalIdtype <- idtype
fi <- 1
break
}else{
if(length(unique(idmap[,1]))>(0.6*length(randomId))){
tempIdtype[ti,1] <- idtype
tempIdtype[ti,2] <- (length(unique(idmap[,1]))/length(randomId))
ti <- ti + 1
}
}
}
if(fi==1){
break
}
if(l==0){
l <- 1
u <- u+endA-startA+1
startA <- startA + rang
endA <- startA + rang - 1
if(endA>=startB){
endA <- startB-1
}
}else{
l <- 0
u <- u+endB-startB+1
endB <- startB-1
startB <- endB - rang + 1
if(startB<endA){
startB <- endA+1
}
}
}
is.map <- TRUE
if(length(finalIdtype)>0){
idtype <- finalIdtype
is.map <- TRUE
}else{
if(tempIdtype[1,1]!=""){
tempIdtype <- tempIdtype[order(-tempIdtype[,2]),]
idtype <- tempIdtype[1,1]
is.map <- TRUE
}else{
is.map <- FALSE
}
}
if(is.map==TRUE){
if(verbose==TRUE){
cat("The input Ids can map to ",idtype, "!\n")
}
return(idtype)
}else{
if(verbose==TRUE){
cat("The function can not find the matched id type from biomart!\n")
}
return(NULL)
}
}
.NetToSymbol <-
function(inputNetwork, organism="hsapiens", idType="auto", edgeType="unweighted", verbose=TRUE){
#require(tools) || stop("Package tools version 3.0.0 is required!")
#load Network
if(verbose==TRUE){
cat("Transforming the ids in the input network to gene symbols...\n")
}
if(.getClass(inputNetwork)=="character"){
if(file_ext(inputNetwork)!="net"){
stop("The extension of the input file should be 'net'!\n")
}else{
network <- read_graph_ncol(inputNetwork)
if(edgeType=="weighted"){
if(is.null(E(network)$weight)){
stop("The input network does not contain edge weights. Please add the edge weights or change parameter 'edgeType' to 'unweigthed'!")
}
}else{
if(!is.null(E(network)$weight)){
stop("The input network contains edge weights. Please remove the edge weights or change parameter 'edgeType' to 'weigthed'!")
}
}
}
}else{
if(.getClass(inputNetwork)=="data.frame" || .getClass(inputNetwork)=="matrix"){
if(edgeType=="weighted"){
if(ncol(inputNetwork)!=3){
stop("Data object should contain three columns: interactor1, interactor2 and edge weight!")
}else{
weight <- inputNetwork[,3]
if(!is.numeric(weight)){
stop("The edge weight should be numeric!")
}else{
inputNetwork <- as.matrix(inputNetwork[,c(1,2)])
inputNetwork_S <- as.character(inputNetwork)
inputNetwork_S <- array(inputNetwork_S,dim=dim(inputNetwork))
network <- graph.edgelist(inputNetwork_S,directed=F)
E(network)$weight <- weight
}
}
}else{
if(ncol(inputNetwork)!=2){
stop("data object should contain two columns!\n");
}else{
inputNetwork <- as.matrix(inputNetwork)
inputNetwork_S <- as.character(inputNetwork)
inputNetwork_S <- array(inputNetwork_S,dim=dim(inputNetwork))
network <- graph.edgelist(inputNetwork_S,directed=F)
}
}
rm(inputNetwork,inputNetwork_S)
gc()
}else{
if(.getClass(inputNetwork)=="igraph"){
if(edgeType=="unweighted"){
if(!is.null(E(inputNetwork)$weight)){
stop("The input network contains edge weights. Please remove the edge weights or change parameter 'edgeType' to 'weigthed'!")
}else{
network <- inputNetwork
}
}else{
if(is.null(E(inputNetwork)$weight)){
stop("The input network does not contain edge weights. Please add the edge weights or change parameter 'edgeType' to 'unweigthed'!")
}else{
network <- inputNetwork
}
}
rm(inputNetwork)
gc()
}else{
if(.getClass(inputNetwork)=="graphNEL"){
network <- igraph.from.graphNEL(inputNetwork)
if(edgeType=="unweighted"){
if(!is.null(E(network)$weight)){
stop("The input network contains edge weights. Please remove the edge weights or change parameter 'edgeType' to 'weigthed'!")
}
}else{
if(is.null(E(network)$weight)){
stop("The input network does not contain edge weights. Please add the edge weights or change parameter 'edgeType' to 'unweigthed'!")
}
}
rm(inputNetwork)
gc()
}else{
stop("The input network should be from a file or a data object with data.frame, matrix, graphNEL or igraph class. Other types of input are invalid!\n")
}
}
}
}
netNode <- get.vertex.attribute(network,"name")
idmap <- .GeneToSymbol(netNode,organism,idType,verbose)
if(is.null(idmap)){
return(NULL)
}else{
if(.getClass(idmap)=="numeric"){
return(idmap)
}else{
network <- .extractSubNetwork(network,idmap[,1],edgeType)
rownames(idmap) <- idmap[,1]
idmap <- idmap[get.vertex.attribute(network,"name"),]
V(network)$name <- idmap[,2]
network <- simplify(network)
# if(verbose==TRUE){
# cat("After mapping to the gene symbol, the network contains ",vcount(network)," nodes and ",ecount(network)," edges!\n",sep="")
#}
network_edgelist <- get.edgelist(network)
network_edgelist <- as.data.frame(network_edgelist)
if(edgeType=="weighted"){
wei <- get.edge.attribute(network,name="weight")
network_edgelist$weight <- wei
}
re <- list(network=network_edgelist,idmap=idmap)
return(re)
}
}
}
.extractSubNetwork <- function(network,subList,edgeType){
if(edgeType=="unweighted"){
x <- get.edgelist(network)
x <- x[x[,1] %in% subList & x[,2] %in% subList,]
network <- graph.edgelist(x,directed=F)
}else{
x <- get.edgelist(network)
weight <- get.edge.attribute(network,"weight")
ind <- which(x[,1] %in% subList & x[,2] %in% subList)
x <- x[ind,]
weight <- weight[ind]
network <- graph.edgelist(x,directed=F)
E(network)$weight <- weight
}
return(network)
}
.MatToSymbol <-
function(inputMat, organism="hsapiens", idType="auto", collapse_mode="maxSD", verbose=TRUE){
#require(biomaRt) || stop("Package biomaRt version 2.18.0 is required!")
#require(tools) || stop("Package tools version 3.0.0 is required!")
#require(multicore) || stop("Package multicore version 0.1-7 is required!")
#require(doSNOW) || stop("Package doSNOW version 1.0.6 is required!")
#require(foreach) || stop("Package foreach version 1.4.0 is required!")
#mart <- useMart("ensembl")
#m <- try(mart <- useDataset(paste(organism,"_gene_ensembl",sep=""),mart),silent=TRUE)
#if(class(m)=="try-error"){
# stop(paste("The organism ",organism," your input is not valid! Correct organism names can be obtained with the listDatasets function.\n",sep=""))
#}
re <- testFileFormat(inputMat=inputMat, collapse_mode=collapse_mode)
inputMat <- re$inputMat
inputId <- rownames(inputMat)
idmap <- .GeneToSymbol(inputId,organism,idType,verbose)
if(!is.null(idmap)){
if(.getClass(idmap)=="numeric"){
return(idmap)
}else{
inputMat <- inputMat[idmap[,1],]
id <- as.vector(idmap[,2])
inputMat <- mergeDuplicate(id,inputMat,collapse_mode)
re <- list(data_symbol=inputMat,idmap=idmap)
return(re)
}
}else{
return(NULL)
}
}
.sbtToSymbol <- function(inputFile, organism="hsapiens", idType="auto", verbose=TRUE){
if(.getClass(inputFile)=="character"){
if(file_ext(inputFile)!="sbt"){
stop("The extension of the input file should be 'sbt'. The detail of the 'sbt' file format can be found in the NetGestalt (www.netgestalt.org)!\n")
}else{
inputMat <- read.csv(inputFile,header=FALSE,sep="\t",stringsAsFactors=FALSE)
}
}else{
stop("The 'inputFile' should be the directory of a SBT file\n")
}
.calculateLength <- function(vector){
vector <- vector[vector!=""]
return(length(vector))
}
sbtLength <- apply(inputMat,1,.calculateLength)
if(length(which(sbtLength<3))>0){
stop("The SBT file should contain at least three columns. The following rows of the input file only contain one or two columns: ",which(sbtLength<3),"\n",sep="")
}
transformedData <- data.frame(name="",des="",genelist="",stringsAsFactors=F)
ti <- 1
for(i in c(1:nrow(inputMat))){
geneL <- inputMat[i,3:ncol(inputMat)]
geneL <- geneL[geneL!=""]
idmap <- .GeneToSymbol(geneL,organism,idType,verbose)
mapNum <- c()
if(!is.null(idmap) && .getClass(idmap)!="numeric"){
mappedL <- unique(idmap[,2])
transformedData[ti,1] <- inputMat[i,1]
transformedData[ti,2] <- inputMat[i,2]
mappedL <- paste(mappedL,collapse="\t")
transformedData[ti,3] <- mappedL
ti <- ti+1
}
}
if(transformedData[1,1]!=""){
re <- list(transformedData=transformedData,oriLength=nrow(inputMat))
return(re)
}else{
return(NULL)
}
}
.sctToSymbol <- function(inputFile, organism="hsapiens", idType="auto", verbose=TRUE, collapse_mode="min"){
if(.getClass(inputFile)=="character"){
if(file_ext(inputFile)!="sct"){
stop("The extension of the input file should be 'sct'. The detail of the 'sct' file format can be found in the NetGestalt (www.netgestalt.org)!\n")
}else{
inputMat <- read.csv(inputFile,header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)
}
}else{
stop("The 'inputFile' should be the directory of a SCT file\n")
}
if(ncol(inputMat)<2){
stop("The SCT file should contain at least two columns (gene id and statistic score). \n")
}
id <- inputMat[,1]
inputMat <- as.matrix(inputMat[,2:ncol(inputMat)])
if(length(unique(id))<length(id)){
cat("The SCT file contains duplidate Ids. The function will use 'median' to collapse duplicate Ids for each statistic.\n")
inputMat <- mergeDuplicate(id,inputMat,collapse_mode)
}else{
rownames(inputMat) <- id
}
idmap <- .GeneToSymbol(id,organism,idType,verbose)
if(!is.null(idmap)){
if(.getClass(idmap)=="numeric"){
return(idmap)
}else{
inputMat <- as.matrix(inputMat[idmap[,1],])
id <- as.vector(idmap[,2])
inputMat <- mergeDuplicate(id,inputMat,collapse_mode)
re <- list(sct_symbol=inputMat,idmap=idmap)
return(re)
}
}else{
return(NULL)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.