draft/getFubon.R

getFubon <- function(){
  res <- GET("https://www.fubon.com/Fubon_Portal/financial/local.jsp?bu=T&maptype=branch")
  res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
  #取縣市分頁
  pagelist<- xpathSApply(res, '//div[@class="map-area"]/table/tr/td[1]/table/tr[9]/td[2]/table/tr', xmlValue)
  pagelist=pagelist[pagelist!="分行英文檢索"]
  wantPages <- sprintf('https://www.fubon.com/Fubon_Portal/financial/local.jsp?bu=T&maptype=branch&temp_zoned=%s',pagelist)
  maxPage=length(pagelist)
  #取區分頁-台北市(較特別-有分區)
  OUTPUT1=list()
  for(i in 1:1){
    res <- GET(wantPages[1])
    res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
    area_pagelist<- xpathSApply(res, '//div[@class="area-selecter"]/ul/li', xmlValue)
    area_pagelist <- str_replace_all(area_pagelist, '(\r|\n| |\t)+', '')
    area_pagelist=area_pagelist[-1]
    area_pagelist_maxPage=length(area_pagelist)
    wantPages2 <- sprintf(paste(wantPages[1],'&zoned_kind=%s'),area_pagelist)
    for(j in 1:area_pagelist_maxPage){
      res <- GET(wantPages2[j], encoding='utf8')
      res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
      store_nm<- xpathSApply(res, '//div[@class="post"]/table/tr/td/table/tr/td/table/tr/td[1]', xmlValue)
      addr<- xpathSApply(res, '//div[@class="post"]/table/tr/td/table/tr/td/table/tr/td[2]', xmlValue)
      OUTPUT1[[j]]= data.frame(brand_nm='富邦分行',store_nm=store_nm,addr=addr, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE )
    }
  }
  FINISH1=do.call(rbind,OUTPUT1)
  
  #取區分頁-非台北市(無分區)
  OUTPUT2=list()
  for(i in 2:maxPage){
    res <- GET(wantPages[i])
    res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
    store_nm<- xpathSApply(res, '//div[@class="post"]/table/tr/td/table/tr/td/table/tr/td[1]', xmlValue)
    addr<- xpathSApply(res, '//div[@class="post"]/table/tr/td/table/tr/td/table/tr/td[2]', xmlValue)
    OUTPUT2[[i]]= data.frame(brand_nm='富邦分行',store_nm=store_nm,addr=addr, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE )
  }
  FINISH2=do.call(rbind,OUTPUT2)
  FINISH=rbind(FINISH1, FINISH2)
  
  FINISH$store_nm=str_replace_all(FINISH$store_nm, '(\r|\n| |\t)+', '')
  FINISH$addr <- str_replace_all(FINISH$addr, '([(]).+$', '')
  return(FINISH)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.