draft/getTwse.R

getTwse <- function() {
  # 券商總公司
  
  url           <- 'http://www.twse.com.tw/ch/products/broker_service/broker_list.php'
  htmldoc1      <- content(GET(url, encoding='utf8'), as='text', encoding='utf8')
  htmldoc2      <- htmlParse(htmldoc1, encoding='utf8')
  tables        <- readHTMLTable(htmldoc2)[[1]]
  names(tables) <- c("store_id","store_nm","open_dt","addr","tel_no","aa")
  tables$aa     <- NULL
  
  col1          <- str_replace_all(tables$addr,'\\(.+\\)',"")
  datas         <- data.frame('證交所',tables$store_id, tables$store_nm, tables$open_dt,col1,tables$tel_no)
  names(datas)  <- c("brand_nm","store_id","store_nm","open_dt","addr","tel_no")
  
  
  # 券商分公司
  
  url1   <- 'http://www.twse.com.tw/ch/products/broker_service/broker2_list.php'
  HTML11 <- GET(url1, encoding='utf8',user_agent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36"))
  HTML21 <- htmlParse(HTML11, asText = TRUE, encoding='utf8')
  INFO1  <- iconv(cssApply(HTML21," .basictxt " , cssCharacter),"utf8","LATIN1") 
  INFO2  <- str_split(INFO1,"\n")
  
  #t  <- length(INFO2)
  #aa <- list()
  #for (i in 1:t){
  #  aa[[i]] <- length(INFO2[[i]])
  #}
  
  del1  <- sapply(INFO2, length)
  del2  <- which(del1==1)
  INFO3 <- INFO2[-del2]
  
  INFO4 <- as.data.frame(do.call(rbind,INFO3))
  DATAS <- data.frame('證交所',
                      str_replace_all(INFO4$V1, ' ', ''),
                      str_replace_all(INFO4$V2, ' ', ''),
                      str_replace_all(INFO4$V3, ' ', ''),
                      str_replace_all(INFO4$V4, ' ', ''),
                      str_replace_all(INFO4$V5, ' ', ''))
  
  
  names(DATAS) <- c("brand_nm","store_id","store_nm","open_dt","addr","tel_no")
  
  dataall <- rbind(datas,DATAS)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.