R/CancerCanBeDownFor_Detail.R

#' Detail information for a special Cancer
#' 
#' Detail information for a special Cancer which can be dowanload
#' 
#' @param cancer The cancer names of abbreviation
#' @import rvest
#' @return CancerCanBeDownD
#' @export
#'
#' @examples CancerCanBeDownFor_Detail("OV")
CancerCanBeDownFor_Detail<-function(cancer){
  list.of.packages <- c("rvest")
  new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
  if(length(new.packages)) install.packages(new.packages)
  
  library(rvest)
  
  urlTime="http://gdac.broadinstitute.org/runs/"
  
  #get whole url
  arrayTime=read_html(urlTime) %>% html_nodes("td a") %>% html_text()
  Judge_stddata=grepl(pattern = "stddata__[0-9]",x = arrayTime )
  stddata=arrayTime[Judge_stddata==TRUE]
  stddataT=gsub(pattern = "stddata__",replacement = "",x = stddata)
  cancertime=gsub(pattern = "/",replacement = "",x = stddataT)
  
  B=paste0(urlTime,stddata,"ingested_data.html")
  
  takement=list()
  cat(rep("-",length(stddata)),"\n")
  for (i in 1:length(stddata)){
    if (cancertime[i]=="2012_11_02"){
      A="http://gdac.broadinstitute.org/runs/stddata__2012_11_02/gdac_counts.html" %>% 
        read_html() %>% html_nodes("table") %>% html_table(fill=TRUE,header=TRUE)
    }else{
      A=B[i] %>% read_html() %>% html_nodes("table") %>% html_table(fill=TRUE,header=TRUE)
    }
    
    if (length(A)==0){
      cat("\n")
      cat("data for",cancertime[i],"can not be done","\n")
    }else{  
      
      if (A[[1]][1,1]==""){
        A_1=A[[1]][-1,]
      }else{
        A_1=A[[1]]
      }
      
      if ("PANCANCER" %in% A_1[,1]){
        norow=grep(x = A_1[,1],pattern = "PANCANCER")
        takement1=cbind(cancertime[i],A_1[-norow,])
        takement=c(takement,list(takement1))
        cat("- ")
      }else if ("Totals" %in% A_1[,1]){
        norow=grep(x = A_1[,1],pattern = "Totals")
        takement1=cbind(cancertime[i],A_1[-norow,])
        takement=c(takement,list(takement1))
        cat("- ")
      }else{
        takement1=cbind(cancertime[i],A_1)
        takement=c(takement,list(takement1))
        cat("- ")
      }
    }
  }
  cat("\n")
  tttt="ttttt 
  1"
  cancercanbedwon=read.table(textConnection(tttt),header = TRUE)
  
  for (i in 1:length(takement)){
    if (cancer %in% takement[[i]][,2]){ 
      
      #if cancer is not included in this data
      cancercanbedwonT=takement[[i]][takement[[i]][,2]==cancer,-2]#second column is cancer name so delet
      
      cancercanbedwonT[setdiff(names(cancercanbedwon), names(cancercanbedwonT))] <- 0
      cancercanbedwon[setdiff(names(cancercanbedwonT), names(cancercanbedwon))] <- 0
      
      
      cancercanbedwon=rbind(cancercanbedwon,cancercanbedwonT)
    }
  }
  
  cancercanbedwonF=cancercanbedwon[-1,]
  rownames(cancercanbedwonF)=1:nrow(cancercanbedwonF)
  CancerCanBeDownD=cancercanbedwonF[,-grep(pattern = "ttttt",x = names(cancercanbedwonF))]
  return(CancerCanBeDownD)
}
yikeshu0611/ConvTCGA documentation built on May 17, 2019, 7:58 a.m.