#' Tcga Detail information for a special Cancer
#'
#' @description Detail information for a special Cancer which can be dowanload,
#' this function could be used with function TcgaCancerName. Into increasing the speed, we use
#' concurrency spider.
#'
#' @param cancer The cancer names of abbreviation
#' @return A big data frame
#' @export
#'
#' @examples TcgaCancerCanBeDownFor_Detail("OV")
TcgaCancerCanBeDownFor_Detail<-function(cancer){
library(rvest)
url="http://gdac.broadinstitute.org/"
#get html data
tcgahome<-read_html(url) %>% html_nodes("table#counts_table") %>% html_table( )
tcgahomedataframe=tcgahome[[1]]
#aline to left by default
tcgahomedataframeleft=format(x=tcgahomedataframe,justify = "left")
tcgahomedataframeleft1=tcgahomedataframeleft[,1:2]
matchcancer=cancer %in% gsub(" ","",tcgahomedataframeleft1[,2])
if (matchcancer==FALSE){
stop("your cancer name is WRONG. Please use function TcgaCancerName() to verify your input.")
}
urlTime="http://gdac.broadinstitute.org/runs/"
#get whole url
arrayTime=read_html(urlTime) %>% html_nodes("td a") %>% html_text()#long time
Judge_stddata=grepl(pattern = "stddata__[0-9]",x = arrayTime )
stddata=arrayTime[Judge_stddata==TRUE]
stddataT=gsub(pattern = "stddata__",replacement = "",x = stddata)
cancertime=gsub(pattern = "/",replacement = "",x = stddataT)
system.time({
loopcraw<-function(i){
library(rvest)
urlTime="http://gdac.broadinstitute.org/runs/"
#get whole url
arrayTime=read_html(urlTime) %>% html_nodes("td a") %>% html_text()
Judge_stddata=grepl(pattern = "stddata__[0-9]",x = arrayTime )
stddata=arrayTime[Judge_stddata==TRUE]
stddataT=gsub(pattern = "stddata__",replacement = "",x = stddata)
cancertime=gsub(pattern = "/",replacement = "",x = stddataT)
B=paste0(urlTime,stddata,"ingested_data.html")
if (grepl(x=cancertime[i],pattern = "2012_11_02")==TRUE){
"http://gdac.broadinstitute.org/runs/stddata__2012_11_02/gdac_counts.html" %>%
read_html() %>% html_nodes("table") %>% html_table(fill=TRUE,header=TRUE)
}else{
B[i] %>% read_html() %>% html_nodes("table") %>% html_table(fill=TRUE,header=TRUE)
}
}
library(parallel)
x <- 1:length(stddata)
core=detectCores()
cl <- makeCluster(core) # make cluster core
results <- parLapply(cl,x,loopcraw) # par cluster
stopCluster(cl) # close cluster
takement=data.frame()
for (i in 1:length(stddata)){
A=results[[i]]
if (length(A)==0){
cat("\n")
cat("data for",cancertime[i],"can not be done","\n")
}else{
if (A[[1]][1,1]==""){
A_1=A[[1]][-1,]
}else{
A_1=A[[1]]
}
if ("PANCANCER" %in% A_1[,1]){
norow=grep(x = A_1[,1],pattern = "PANCANCER")
takement1=cbind(cancertime[i],A_1[-norow,])
takement=c(takement,list(takement1))
}else if ("Totals" %in% A_1[,1]){
norow=grep(x = A_1[,1],pattern = "Totals")
takement1=cbind(cancertime[i],A_1[-norow,])
takement=c(takement,list(takement1))
}else{
takement1=cbind(cancertime[i],A_1)
takement=c(takement,list(takement1))
}
}
}
cat("\n")
tttt="ttttt
1"
cancercanbedwon=read.table(textConnection(tttt),header = TRUE)
for (i in 1:length(takement)){
if (cancer %in% takement[[i]][,2]){
#if cancer is not included in this data
cancercanbedwonT=takement[[i]][takement[[i]][,2]==cancer,-2]#second column is cancer name so delet
cancercanbedwonT[setdiff(names(cancercanbedwon), names(cancercanbedwonT))] <- 0
cancercanbedwon[setdiff(names(cancercanbedwonT), names(cancercanbedwon))] <- 0
cancercanbedwon=rbind(cancercanbedwon,cancercanbedwonT)
}
}
cancercanbedwonF=cancercanbedwon[-1,]
rownames(cancercanbedwonF)=1:nrow(cancercanbedwonF)
CancerCanBeDownD=cancercanbedwonF[,-grep(pattern = "ttttt",x = names(cancercanbedwonF))]
namesort=sort(names(CancerCanBeDownD[,-match("cancertime[i]",names(CancerCanBeDownD)) ]))
CancerCanBeSortName=CancerCanBeDownD[,c("cancertime[i]",namesort)]
return(CancerCanBeSortName)
})#detect time
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.