draft/getStandard.R

getStandard <- function(){
  # 總頁數
  url      <- 'http://www.standardchartered.com.tw/locator/branch-table.asp?CurrPage=1'
  htmldoc1 <- GET(url, encoding='utf8',user_agent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36"))
  htmldoc2 <- htmlParse(htmldoc1, asText = TRUE, encoding='utf8')
  tables   <- readHTMLTable(htmldoc2)[[2]]
  page     <- cssApply(htmldoc2,"#totalpage>a" , cssCharacter)
  page1    <- str_extract(page,'[0-9]+')
  
  # total
  TABLES <- list()
  for (i in 1:page1){
    URL         <- sprintf('http://www.standardchartered.com.tw/locator/branch-table.asp?CurrPage=%s',i)
    HTML1       <- GET(URL, encoding='utf8',user_agent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36"))
    HTML2       <- htmlParse(HTML1, asText = TRUE, encoding='utf8')
    TABLES[[i]] <- readHTMLTable(HTML2)[[2]]
  }
  
  data1 <- as.data.frame(do.call(rbind,TABLES))
  data2 <- data.frame('渣打銀行',data1[, c(1:4)])
  del   <- which(data2$V2=="")
  data3 <- data2[-del,]
  final_data <- data.frame(brand_nm = data3$X.渣打銀行., store_nm = data3$V1, addr = data3$V2,
                           tel_no = data3$V3, tm = data3$V4, stringsAsFactors = FALSE)
  
  return(final_data)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.