#' Tcga Detail information for a special Date
#'
#' @param tcgadate date form should be like "2013_09_23" for day, or "2013_09" for month, or "2013" for year
#'
#' @description Detail information for a special Date which can be dowanload,
#' this function could be used with function TcgaCancerDate. Into increasing the speed, we use
#' concurrency spider.
#'
#' @return A big data frame
#' @export
#'
#' @examples Tcga_DateCanBeDownFor_Cancer(date="2013")
#' @examples Tcga_DateCanBeDownFor_Cancer(date="2013_09")
#' @examples Tcga_DateCanBeDownFor_Cancer(date="2013_09_23")
Tcga_DateCanBeDownFor_Cancer<-function(tcgadate){
library(rvest)
urlTime="http://gdac.broadinstitute.org/runs/"
#get whole url
arrayTime=read_html(urlTime) %>% html_nodes("td a") %>% html_text()#long time
Judge_stddata=grepl(pattern = "stddata__[0-9]",x = arrayTime )
stddata=arrayTime[Judge_stddata==TRUE]
stddataT=gsub(pattern = "stddata__",replacement = "",x = stddata)
cancertime=gsub(pattern = "/",replacement = "",x = stddataT)
system.time({
loopcraw<-function(i){
library(rvest)
urlTime="http://gdac.broadinstitute.org/runs/"
#get whole url
arrayTime=read_html(urlTime) %>% html_nodes("td a") %>% html_text()
Judge_stddata=grepl(pattern = "stddata__[0-9]",x = arrayTime )
stddata=arrayTime[Judge_stddata==TRUE]
stddataT=gsub(pattern = "stddata__",replacement = "",x = stddata)
cancertime=gsub(pattern = "/",replacement = "",x = stddataT)
B=paste0(urlTime,stddata,"ingested_data.html")
if (grepl(x=cancertime[i],pattern = "2012_11_02")==TRUE){
"http://gdac.broadinstitute.org/runs/stddata__2012_11_02/gdac_counts.html" %>%
read_html() %>% html_nodes("table") %>% html_table(fill=TRUE,header=TRUE)
}else{
B[i] %>% read_html() %>% html_nodes("table") %>% html_table(fill=TRUE,header=TRUE)
}
}
library(parallel)
x <- 1:length(stddata)
core=detectCores()
cl <- makeCluster(core) # make cluster core
results <- parLapply(cl,x,loopcraw) # par cluster
stopCluster(cl) # close cluster
takement=data.frame()
for (i in 1:length(stddata)){
A=results[[i]]
if (length(A)==0){
cat("")
}else{
if (A[[1]][1,1]==""){
A_1=A[[1]][-1,]
}else{
A_1=A[[1]]
}
if ("PANCANCER" %in% A_1[,1]){
norow=grep(x = A_1[,1],pattern = "PANCANCER")
takement1=cbind(cancertime[i],A_1[-norow,])
takement=c(takement,list(takement1))
}else if ("Totals" %in% A_1[,1]){
norow=grep(x = A_1[,1],pattern = "Totals")
takement1=cbind(cancertime[i],A_1[-norow,])
takement=c(takement,list(takement1))
}else{
takement1=cbind(cancertime[i],A_1)
takement=c(takement,list(takement1))
}
}
}
#set a check to be change
checktcgadate=0
if (nchar(tcgadate)==10){
for (i in 1:length(takement)){
if (grepl(tcgadate,takement[[i]][,1][1])){
checktcgadate=i
CancerCanBeSortName=takement[[i]]
return(CancerCanBeSortName)
}
}
}
if (nchar(tcgadate)==7){
tcagdate7=data.frame()
for (i in 1:length(takement)){
if (grepl(tcgadate,takement[[i]][,1][1])){
checktcgadate=i
#complite dif
if (nrow(tcagdate7) !=0){
tcagdate7[setdiff(names(takement[[i]]), names(tcagdate7))] <- 0
}
takement[[i]][setdiff(names(tcagdate7), names(takement[[i]]))] <- 0
#combine
tcagdate7=rbind(tcagdate7,takement[[i]])
}
}
return(tcagdate7)
}
if (nchar(tcgadate)==4){
tcagdate4=data.frame()
for (i in 1:length(takement)){
if (grepl(tcgadate,takement[[i]][,1][1])){
checktcgadate=i
#complite dif
if (nrow(tcagdate4) !=0){
tcagdate4[setdiff(names(takement[[i]]), names(tcagdate4))] <- 0
}
takement[[i]][setdiff(names(tcagdate4), names(takement[[i]]))] <- 0
#combine
tcagdate4=rbind(tcagdate4,takement[[i]])
}
}
return(tcagdate4)
}
if (nchar(tcgadate)==0){
tcagdate0=data.frame()
for (i in 1:length(takement)){
checktcgadate=i
#complite dif
if (nrow(tcagdate0) !=0){
tcagdate0[setdiff(names(takement[[i]]), names(tcagdate0))] <- 0
}
takement[[i]][setdiff(names(tcagdate0), names(takement[[i]]))] <- 0
#combine
tcagdate0=rbind(tcagdate0,takement[[i]])
}
rownames(tcagdate0)=1:nrow(tcagdate0)
TumorColn=grep(pattern = "Tumor",x = colnames(tcagdate0))
tcagdateall=cbind(tcagdate0[,c(1,TumorColn)],tcagdate0[,c(-1,-TumorColn)])
return(tcagdateall)
}
#if checktcgadate stll equal 0, that indicates no date can be found
if (checktcgadate==0){
cat("There is no data for your date. Please check your date fromat using function TcgaCancerDate()","\n")
}
})#detect time
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.