draft/getFE.R

getFE <- function(){
  URL <- 'https://www.feib.com.tw/servicelocation/servicelocation01a.aspx'
  res <- GET(URL, encoding='utf8')
  res2 <- htmlParse(content(res, "text", encoding = "utf8"), encoding = "utf8")
  pageURL <- cssApply(res2,'tr > td > a',cssLink)
  pageURL <- str_extract(pageURL,'^servicelocation01.+aspx$')
  pageCity <- cssApply(res2,'tr > td > a',cssCharacter)
  pageInfo <- cbind(pageURL,pageCity)
  #排除非台灣區域
  pageURL <- pageInfo[!(pageInfo[,2]=='香港'|is.na(pageInfo[,1])),1]
  #各縣市連結
  wantPages <- sprintf('https://www.feib.com.tw/servicelocation/%s',pageURL)
  #分行資訊
  getinfo <- function(URL){
    res <- GET(URL, encoding='utf8')
    res2 <- htmlParse(content(res, "text", encoding = "utf8"), encoding = "utf8")
    result <- cssApply(res2,'tr > td.mainText',cssCharacter)
    result2 <- str_replace_all(result,'^[:space:]+','')
    result3 <- str_extract(result2,'^.+\r\n')
    result3 <- result3[!str_detect(result3, '保管箱|\\‧')]  
    result3 <- str_replace(result3,'\r\n','')
    result3 <- result3[!is.na(result3)]
    result3 <- matrix(result3, ncol = 2, byrow = TRUE)
    tel_no <- str_extract(result2,'^\\(.+[0-9]')
    tel_no <- tel_no[!is.na(tel_no)]
    data.frame(brand_nm='遠東銀行分行',store_nm=result3[,1],addr=result3[,2],tel_no=tel_no, stringsAsFactors=FALSE)  
  }
  allList <- lapply(wantPages, getinfo) 
  feib <- do.call(rbind, allList)
  return(feib)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.