R/clicky11_linux.R

Defines functions gestalt_save gestalt_plot gestalt_read gestalt_run clickr gestaltheat Heatp rmerge

#library(colorout)



rmerge<-function(dat_mat1,dat_mat2,all=T){
  dat_out=merge(dat_mat1,dat_mat2,by="row.names",all=all)
    rownames(dat_out)=dat_out$Row.names
  dat_out=dat_out[,-which(colnames(dat_out)=="Row.names")]
  return(dat_out)
}




Heatp<-function(pval_mat,sig=T,dat_descr=''){
  library(gplots)
#  if(breaks=="default"){breaks=seq(0,max(-log10(pval_mat)),length=(102))}
#  if(col=="default"){col=colorRampPalette(c("white","#ffffbf","#fee090","#fdae61","#f46d43","#d73027","#a50026","darkred"))(length(breaks)-1)}

margin=c(max(13,(125-nrow(pval_mat))),12)
lheight=c(0.06,0.94)

  if(sig){
      dat_sig=as.data.frame(pval_mat)
      dummy=pval_mat<=1e-5
      dat_sig[dummy]="****"
      dummy=pval_mat<=1e-4 & pval_mat>1e-5
      dat_sig[dummy]="***"
      dummy=pval_mat<=1e-3 & pval_mat>1e-4
      dat_sig[dummy]="**"
      dummy=pval_mat<=1e-2 & pval_mat>1e-3
      dat_sig[dummy]="*"
      dummy=pval_mat<=0.05 & pval_mat>1e-2
      dat_sig[dummy]="+"
      dummy=pval_mat>0.05
      dat_sig[dummy]=""
    heatmap.2(-log10(pval_mat),margins=margin,lhei=lheight,cellnote=(dat_sig),col=colorRampPalette(c("white","#ffffbf","#fee090","#fdae61","#f46d43","#d73027","#a50026","darkred"))(101),breaks=seq(0,max(-log10(pval_mat)),length=(102)),notecol="black",tracecol=F,dendrogram="none",Rowv=F,Colv=F,density.info="none",keysize=1,cexCol=0.7,cexRow=0.7,symkey=F,lwid=c(0.7,0.3),lmat=rbind(c(4,3),c(1,2)))
  }
 if(!sig){
    heatmap.2(-log10(pval_mat),margins=margin,lhei=lheight,col=colorRampPalette(c("white","#ffffbf","#fee090","#fdae61","#f46d43","#d73027","#a50026","darkred"))(101),breaks=seq(0,max(-log10(pval_mat)),length=(102)),tracecol=F,dendrogram="none",Rowv=F,Colv=F,density.info="none",keysize=1,cexCol=0.7,cexRow=0.7,symkey=F,lwid=c(0.7,0.3),lmat=rbind(c(4,3),c(1,2)))
  }
  ## text in top right corner
	par(xpd = NA)
	mtext(dat_descr, adj = 1, side = 3)
	par(xpd = F)
}



gestaltheat<-function(dat_mat,dat_descr,multi_page=F){
  if(multi_page){
    if(nrow(dat_mat>100)){
      k=0
      remainder=(nrow(dat_mat)-(k+100))
      cat("\tmulti-page plot :\n")

      while(remainder>0){
        cat("\t",k+1,k+100,"\n")
        dat_plot=dat_mat[(k+1):(k+100),]
        k=k+100
        remainder=(nrow(dat_mat)-(k+100))
        Heatp(dat_plot,sig=T,dat_descr=dat_descr)
      }

      if(remainder<0){
        cat("\t",k+1,k+(100+remainder),"\n")
        dat_plot=dat_mat[(k+1):(k+(100+remainder)),]
        Heatp(dat_plot,sig=T,dat_descr=dat_descr)
      }
    }
  }
}




clickr<-function(module_genes_list,module_bkrnd,out_dir=getwd(),dat_descr='',id_type="hsapiens__gene_symbol",ORG='hsapiens',enrich_terms=c("Wikipathways Analysis","GO Analysis","KEGG Analysis","Pathway Commons Analysis","Transcription Factor Target Analysis"),SIG=0.1){
  if(dat_descr!=''){dat_descr=paste0(dat_descr,'_')}

  if(class(module_genes_list)!='list'){
    stop('\n\tERROR :\trequire module_genes_list as list, each element -  vector or matrix of module genes')
  }


  current_dir=getwd()
  setwd(out_dir)
  
  system('mkdir tmp_in_files')
  system('mkdir tmp_out_files')
  write.table(module_bkrnd,file="tmp_in_files/tmp_bkgrnd.txt", quote=F, sep="\t", row.names=F, col.names=F)  

  for(imod in 1:length(module_genes_list)){
   cat('\t',names(module_genes_list)[imod],dat_descr,'\t',length(module_genes_list[[1]]),'\t',length(module_bkrnd),'\n')
    write.table(module_genes_list[[imod]],file="tmp_in_files/tmp_genelist.txt", quote=F, sep="\t", row.names=F, col.names=F)
    gestalt_run(
        module_path=paste0(out_dir,"/tmp_in_files/tmp_genelist.txt")                     # currenlty read from disk - input is a file path
        ,bkgrnd_path=paste0(out_dir,"/tmp_in_files/tmp_bkgrnd.txt")                      # currenlty read from disk - input is a file path
        ,id_type=id_type
        ,dat_descr=paste0(dat_descr,names(module_genes_list)[imod])

#        ,enrich_terms=c("Wikipathways Analysis","GO Analysis","KEGG Analysis","Pathway Commons Analysis","Transcription Factor Target Analysis")
	,enrich_terms=enrich_terms
        ,out_dir=paste0(out_dir,'/tmp_out_files/')
        ,email="[email protected]"
	,ORG=ORG
	,SIG=SIG
        )
  }
  setwd(current_dir)
  cat('\n\tall output files saved to:   ',paste0(out_dir,'/tmp_out_files/'),'\n')
}



gestalt_run<-function(
                    module_path
                    ,bkgrnd_path
                    ,id_type="hsapiens__gene_symbol"       # "hsapiens__entrezgene"
#                    ,id_type="hsapiens__entrezgene"
                    ,dat_descr=''
                    ,enrich_terms=c("GO Analysis","KEGG Analysis","Wikipathways Analysis","Pathway Commons Analysis","Transcription Factor Target Analysis")
                    ,out_dir=getwd()
                    ,email="[email protected]"
		    ,sleep=5
		    ,ORG="hsapiens"
		    ,SIG=0.1
                    ){

    library(RSelenium)
    RSelenium::checkForServer()
    RSelenium::startServer()

    SLEEP <- function(x)
         {
             p1 <- proc.time()
             Sys.sleep(x)
             proc.time() - p1 # The cpu usage should be negligible
         }



    remDr <- remoteDriver(remoteServerAddr = "localhost" 
                          , port = 4444
                          , browserName = "firefox"
                          )
    remDr$open(silent = TRUE)
#    remDr$open()
  
    remDr$navigate("http://bioinfo.vanderbilt.edu/webgestalt")
    start = remDr$findElement("link text",'START')
    start$clickElement()
    cat('\t####Page2\n')
    login_form = remDr$findElement("id","loginForm")
    login_form_email =login_form$findChildElement("id","email")
    login_form_email$sendKeysToElement(list(email, key="enter"))
    cat('\t####Page3\n')
    dropmenu.1=remDr$findElement("name","organism")
#    dropmenu.1$sendKeysToElement(list("hsapiens",key="enter"))
    dropmenu.1$sendKeysToElement(list(ORG,key="enter"))	
    SLEEP(2)
    dropmenu.2=remDr$findElement("name","idtype")
    dropmenu.2$sendKeysToElement(list(id_type,key="enter"))
    SLEEP(2)
    inputfiles=remDr$findElement("name","inputfile")
    inputfiles$sendKeysToElement(list(module_path))
    SLEEP(2)
    enter = remDr$findElement("css selector" ,'[value=ENTER]')
    enter$clickElement()

## this is page 4, but if the element in the following line is not found there is likely a fault with the inputs
    analysis_menu = remDr$findElement("class name",'dropdown')
    cat('\t####Page4\n')
    analysis_menu$clickElement()
    analysis = remDr$findElement("link text", enrich_terms[1])
    analysis$clickElement()
    SLEEP(2)
    refsetfile = remDr$findElement("name",'refsetfile')
    refsetfile$sendKeysToElement(list(bkgrnd_path))
    SLEEP(2)
    dropmenu.4.1=remDr$findElement("name","upload_idtype")
    dropmenu.4.1$sendKeysToElement(list(id_type,key="enter"))
    SLEEP(2)
    dropmenu.4.2=remDr$findElement("name","cutoff")
#    dropmenu.4.2$sendKeysToElement(list(".1",key="enter"))
    dropmenu.4.2$sendKeysToElement(list(SIG,key="enter"))
    SLEEP(2)
    submit = remDr$findElement("css selector",'[value="Run Enrichment Analysis"]')
    submit$clickElement()
    WINDOW_ID=submit$getWindowHandles()[[1]][1]
    ANALYSIS_LIST_WINDOW_ID.tmp=list()
    j=1
#    ANALYSIS_LIST_WINDOW_ID=c(submit$getWindowHandles()[[1]][2])
    ANALYSIS_LIST_WINDOW_ID.tmp[[j]]=submit$getWindowHandles()[[1]][2]
    a=3
    for(i in enrich_terms[2:length(enrich_terms)])
    {
	    j=j+1
            submit$switchToWindow(submit$getWindowHandles()[[1]][1])
            analysis_menu$clickElement()
            analysis = remDr$findElement("link text",i)
            analysis$clickElement()
            submit = remDr$findElement("css selector",'[value="Run Enrichment Analysis"]')
            submit$clickElement()
#            ANALYSIS_LIST_WINDOW_ID=c(ANALYSIS_LIST_WINDOW_ID,submit$getWindowHandles()[[1]][a])
		ANALYSIS_LIST_WINDOW_ID.tmp[[j]]=submit$getWindowHandles()[[1]][a]
            a=a+1
            SLEEP(3)
    }
 ANALYSIS_LIST_WINDOW_ID=unlist(ANALYSIS_LIST_WINDOW_ID.tmp)
   SLEEP(sleep)
#cat('\t####',length(enrich_terms),length(ANALYSIS_LIST_WINDOW_ID),'\n')

    for (i in 1:length(ANALYSIS_LIST_WINDOW_ID))
    {
#cat('\t####',i,enrich_terms[i],"\n")
	    tmp.res=FALSE
            submit$switchToWindow(ANALYSIS_LIST_WINDOW_ID[i])
            tmp=remDr$getPageSource()
#            tmp.res=length(grep("Export TSV",tmp))
#	    tmp.res=grepl("Export TSV",tmp)
            NORES=0
#SLEEP(10)
#cat('\t####',i,"tmp.res=",tmp.res,"nores=",NORES,"-- entering while loop\n")

            while(!tmp.res)
            {
                    SLEEP(5)
                    tmp=remDr$getPageSource()
                    tmp.res=grepl("Export TSV",tmp)
#cat('\t####',i,tmp.res,"-- checkiing if run is complete \n")

                    if(!tmp.res)
                    {
                            tmp.res=length(grep("Close",tmp))
                            if(tmp.res)
                            {
                                    NORES=1
#cat('\t####',i,"tmp.res=",tmp.res,"nores=",NORES,"-- found close\n")

                            }
                    }
            }
#cat('\t####',i,"tmp.res=",tmp.res,"nores=",NORES,"-- exited while loop\n")

            if(NORES==0)
            {
#		    SLEEP(10)
                    url.tmp = remDr$findElement("link text",'Export TSV Only')
                    url=url.tmp$getElementAttribute("href")
		    print (url)
                    NAME=gsub(" ","_",enrich_terms[i])
                    outfile=paste(out_dir,"/",dat_descr,"_",NAME,".tsv",sep="")
                    system(paste("wget -O",outfile," '",as.character(url),"'",sep=""))
#cat('\t####',i,"tmp.res=",tmp.res,"nores=",NORES,"-- downloading file URL=",unlist(url),"\n")
#		   SLEEP(10)
            }
#SLEEP(10) 
           NAME=gsub(" ","_",enrich_terms[i])
            outfile=paste(out_dir,"/",dat_descr,"_",NAME,".tsv",sep="")
            if(NORES==1)
            {
                    text="No Significant results found";
                    write.table(file=outfile,text)
#cat('\t#### i=',i,"NORES=",NORES,"no signifcant file written\n")
		   NORES=0
#		SLEEP(10)
            }
#cat('\t####',i,NORES,tmp.res,"\n")
#SLEEP(10)
    }

    remDr$close()

}




gestalt_read<-function(enrich_type=c('KEGG','Commons','GO','Wikipathways','Transcription'),mod_names='',in_path=getwd(),out_path='',dat_descr='',verbose=T){
#if(verbose){
# cat('\tUSE\t: read and process files saved from WebGestalt from specified folder, default: in_path=getwd()\n')
# cat('\tNOTE\t: if out_path is provided, the new tables will also be saved at the provided location\n')
# cat('\tINPUTS\t: enrich_type currently supported  : 'KEGG', 'WIKI', 'Commons', 'GO'\n')
# cat('\tOUTPUT\t: list of module enrichments for use with gestalt_plot() function\n\n')
#}
	options(stringsAsFactors=F)

## get file names matching input criteria     --------------------------------------
	inf=list.files(in_path)#,pattern=paste(enrich_type,'&',mod_names,collapse=''))
	inf=inf[grep(paste(mod_names,collapse='|'),inf)]
	inf=inf[grep(paste(enrich_type,collapse='|'),inf)]

	if(length(inf)==0){
		stop(paste0('no files matching criteria: ',paste(mod_names,collapse=', '),', ',paste(enrich_type,collapse=', '),'\nfound at: ',in_path,'\n'))
	}

	 cat('\t',length(inf),'files match criteria\n')
	enrich=list()

##  simplify search terms and output naming     --------------------------------------
key=as.data.frame(c('KEGG','Commons','GO','GO','GO','Wikipathways','Transcription'))
	colnames(key)='enrich_types'
key$name=c('KEGG','PathwayCommons','GO_bp','GO_cc','GO_mf','Wikipathways','Transcription')
key$search=c('KEGG pathway','Pathway Commons','biological process|cellular component|molecular function','biological process|cellular component|molecular function','biological process|cellular component|molecular function','Wikipathways','Transcription')
key$key=c('KEGG pathway','Pathway Commons','biological process','cellular component','molecular function','Wikipathways','Transcription')
key=key[key$enrich_types%in%enrich_type,]

##  create a database for all terms and modules, can be done while processing but apparently less good     ----------------------------
	dtb=vector(mode='list', length=length(key$name))
	names(dtb)=key$name
	for(ilis in 1:length(dtb)){
		dtb[[ilis]]=vector(mode='list', length=length(mod_names))
		names(dtb[[ilis]])=mod_names
	}


  	for(ityp in 1:nrow(key)){
		 cat('\t',key$name[ityp],'\t================================================\n')
  		for(imod in 1:length(mod_names)){
			 cat('\t\t',mod_names[imod],'    \t')
  			path=list()

  			if(length(inf[grepl(key$enrich_types[ityp],inf)&grepl(paste0(mod_names[imod],'_'),inf)])>1){
  				stop(paste0('found multiple files matching :\t',mod_names[imod],' & ',key$enrich_types[ityp]))
  			###	print(inf[grepl(key$enrich_types[ityp],inf)&grepl(mod_names[imod],inf)])
  			}
  			if(length(inf[grepl(key$enrich_types[ityp],inf)&grepl(paste0(mod_names[imod],'_'),inf)])==0){
  				warning(paste0('found no files matching :\t',mod_names[imod],' & ',key$enrich_types[ityp]))
  			###	print(inf[grepl(key$enrich_types[ityp],inf)&grepl(paste0(mod_names[imod],'_'),inf)])
  			}

  			mod=readLines(paste(in_path,'/',(inf[grepl(key$enrich_types[ityp],inf)&grepl(paste0(mod_names[imod],'_'),inf)]),sep=''))
			mod=mod[10:length(mod)]	# risky but somewhat necessary, alternative is to remove the first match
			mod=mod[mod!='']

			pos=grep(key$search[ityp],mod)


  		if(length(pos)==0){
			cat('no significant terms\n')
		}

		if(length(pos)>0){
			path=list(type=paste(gsub('\t.*','',mod[pos])))
			path$term=as.vector(t(as.data.frame(strsplit(mod[pos],'\t'))[2,]))
		 		
			path$nGenes=gsub('.*O=|;.*','',mod[pos+1])
			path$rawPval=gsub('.*rawP=|;.*','',mod[pos+1])
			path$adjPval=gsub('.*adjP=','',mod[pos+1])

			endc=c((pos[-1]-1),length(mod))

			gen_list=list()
		for(ipat in 1:length(pos)){
			gen=(mod[(pos[ipat]+2):(endc[ipat])])
			gen_list[[ipat]]=paste(gsub("(.*)\t.*\t.*\t.*\t.*\t.*",'\\1',gen),collapse=', ')
		}
		path$genes=unlist(gen_list)
		path=as.data.frame(path)
		path=path[path$type==key$key[ityp],]		## yes doing the work x3 for GO terms, but i cant see a better way without making GO a special case
		cat('\tterms found:',nrow(path),'\n')
	}
	dtb[[key$name[ityp]]][[mod_names[imod]]]=path
}
}
	return(dtb)
}



gestalt_plot<-function(enrich_list,dat_descr='',p_threshold=0.1,height=22,width=11,margins=c(5, 20, 4, 2),points_sig=T){

	##  'simplify' search terms and output naming     --------------------------------------
	key=as.data.frame(c('KEGG','PathwayCommons','GO_bp','GO_cc','GO_mf','Wikipathways','Transcription'))
		colnames(key)='name'
	key$key=c('KEGG pathway','Pathway Commons','GO: biological process','GO: cellular component','GO: molecular function','Wikipathways Pathway','Transcription Factor')
	key=key[key$name%in%names(enrich_list),]
		print(key)
	names(enrich_list)=key$key

for(ityp in 1:length(enrich_list)){
	  cat('\t==================  ',names(enrich_list)[ityp],'  ==================\n')


	for(imod in 1:length(enrich_list[[ityp]])){
		if(length(enrich_list[[ityp]][[imod]])>0){
		  cat('\t\t',names(enrich_list[[ityp]])[imod],'\n')
		  rownames(enrich_list[[ityp]][[imod]])=enrich_list[[ityp]][[imod]]$term
		  enrich_list[[ityp]][[imod]]=enrich_list[[ityp]][[imod]][,'adjPval',drop=F]
		  colnames(enrich_list[[ityp]][[imod]])=names(enrich_list[[ityp]][imod])

		if(imod==1){enrich=as.data.frame(enrich_list[[ityp]][[imod]])}
		if(imod>1){enrich=rmerge(enrich,(enrich_list[[ityp]][[imod]]))}
		}

		if(length(enrich_list[[ityp]][[imod]])==0){
		  cat('\t\t',names(enrich_list[[ityp]])[imod],'\n')
		  enrich_list[[ityp]][[imod]]=as.data.frame(1)
		  rownames(enrich_list[[ityp]][[imod]])=''
		  colnames(enrich_list[[ityp]][[imod]])=names(enrich_list[[ityp]][imod])

		if(imod==1){enrich=as.data.frame(enrich_list[[ityp]][[imod]])}
		if(imod>1){enrich=rmerge(enrich,(enrich_list[[ityp]][[imod]]))}
		}

	}


	enrich=make.numeric(enrich)

	if(''%in%rownames(enrich)){
		enrich=(enrich[-which(rownames(enrich)==''),,drop=F])
	}

	enrich[is.na(enrich)]=1
	enrich[enrich>p_threshold]=1

# remove rows where all values == 1 (ie. missing/ns)  ||    enrich[-0,] breaks, hence the logic check first
	if(sum(apply(enrich,1,sum)==ncol(enrich))>0){
		enrich=(enrich[!(apply(enrich,1,sum)==ncol(enrich)),,drop=F])
	}

#	pdf(paste('~/Dropbox/bin/clicky/graphics/dummy.',names(enrich_list)[ityp],'.pdf',sep=''),width=width,height=height)
	if(nrow(enrich)==0){
		cat('\tno plot\n')
		plot.new()
		legend('top',legend=paste(dat_descr,names(enrich_list)[ityp],colnames(enrich),'\n','NO SIGNIFICANT TERMS at P<',p_threshold,'\n'),cex=2)

	}

	if(nrow(enrich)>0){

	if(ncol(enrich)==1){
		cat('\tsingle plot\n')
#		print(dim(enrich))
		enrich=-log10(make.numeric(enrich))
		enrich=enrich[order(enrich[,1]),,drop=F]
		margins=c(100-(nrow(enrich)*1),(max(nchar(rownames(enrich)))/1.8),2.5,1)
		par(mar=margins)
		barplot(t(enrich),horiz=T,las=1,main=paste(names(enrich_list)[ityp],'\n',dat_descr,colnames(enrich)))
		abline(v=-log10(0.01),col='dodgerblue')
		abline(v=-log10(0.05),col='red')
		margins=c(5.1,4.1,4.1,2.1)	# reset default margins
	}

	if(ncol(enrich)>1){
		cat('\tmulti plot\n')
		#library(corrplot)
		enrich=enrich[do.call(order, (lapply(1:NCOL(enrich), function(i) enrich[, i]))), ]
		gestaltheat(enrich,names(enrich_list)[ityp],T)
		enrich=-log10(make.numeric(enrich))
	}
	}
	rm(enrich)
}
return(invisible(enrich_list))
}





gestalt_save<-function(enrich_list,out_path=getwd(),dat_descr='',sep='\t',enrich_type=c('KEGG','Commons','GO','Wikipathways','Transcription'),mod_names=''){
	# save processed files
	# using sep=',' allows for gene names to have individual columns if opened in excel
	# using sep='\t', etc, preserves gene names as single column with coma separated values
	if(dat_descr!=''){dat_descr=paste0(dat_descr,'_')}

	key=as.data.frame(c('KEGG','Commons','GO','GO','GO','Wikipathways','Transcription'))
		colnames(key)='enrich_types'
	key$name=c('KEGG','PathwayCommons','GO_bp','GO_cc','GO_mf','Wikipathways','Transcription')
	key=key[key$enrich_types%in%enrich_type,]

	enrich_list=enrich_list[key$name]
	for(ityp in 1:length(enrich_list)){
		for(imod in 1:length(enrich_list[[ityp]])){
			if(mod_names!='' & names(enrich_list[[ityp]])[imod]%in%mod_names){
				 cat('\tsaving',paste0(out_path,'/',dat_descr,names(enrich_list[[ityp]])[imod],'_',names(enrich_list)[ityp],'.txt'),'\n')
				write.table(enrich_list[[names(enrich_list)[ityp]]][[names(enrich_list[[ityp]])[imod]]],file=paste0(out_path,'/',dat_descr,names(enrich_list[[ityp]])[imod],'_',names(enrich_list)[ityp],'.txt'),row.names=F,col.names=T)
			}
			if(mod_names==''){
				 cat('\tsaving',paste0(out_path,'/',dat_descr,names(enrich_list[[ityp]])[imod],'_',names(enrich_list)[ityp],'.txt'),'\n')
				write.table(enrich_list[[names(enrich_list)[ityp]]][[names(enrich_list[[ityp]])[imod]]],file=paste0(out_path,'/',dat_descr,names(enrich_list[[ityp]])[imod],'_',names(enrich_list)[ityp],'.txt'),row.names=F,col.names=T)
			}

		}
	}
}
ks471/clickyLinux documentation built on May 19, 2017, 6:22 a.m.