draft/getTbb.R

getTbb <- function(){
  #台灣企銀
  getsidurl <- function(){
    wantURL <- 'http://www.tbb.com.tw/wps/wcm/connect/TBBInternet/index/aboutTBB/e04/e0401/e040102'
    res <- GET(wantURL, encoding='utf8')
    res2 <- content(res, encoding='utf8')
    (maxPage<- xpathSApply(res2, '/html/body/table[2]/tr/td[2]/table[5]/tr/td[3]/table/tr/td/a', xmlAttrs))
    
    wantPages<- sapply(maxPage, 
                       function(wantPage) sprintf('http://www.tbb.com.tw%s', wantPage)
    )
    return(wantPages)
  }
  
  getAddr <- function(URL){
    res <- GET(URL, encoding='utf8')
    res2 <- content(res, encoding='utf8')
    
    sec_id  <- xpathSApply(res2, '/html/body/table[2]/tr/td[2]/table[5]/tr/td[1]/table[2]/tr/td[2]/a', xmlValue)  
    name  <- xpathSApply(res2, '/html/body/table[2]/tr/td[2]/table[5]/tr/td[1]/table[2]/tr/td[3]/a', xmlValue)
    addr  <- xpathSApply(res2, '/html/body/table[2]/tr/td[2]/table[5]/tr/td[1]/table[2]/tr/td[4]', xmlValue)
    tel   <- xpathSApply(res2, '/html/body/table[2]/tr/td[2]/table[5]/tr/td[1]/table[2]/tr/td[5]', xmlValue)
    email <- xpathSApply(res2, '/html/body/table[2]/tr/td[2]/table[5]/tr/td[1]/table[2]/tr/td[6]/a', xmlValue)  
    
    addr=addr[-1]
    tel=tel[-1]
    
    return(data.frame(brand_nm='台灣企銀', store_nm=name,addr=addr, tel_no=tel,Email=email, SEC_ID=sec_id,stringsAsFactors = FALSE))
  }  
  
  UrlList <- lapply(getsidurl(), getAddr)
  UrlList_1 <- do.call(rbind, UrlList)
  return(UrlList_1)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.