draft/getNanShan.R

getNanShan <- function(){
  URL <- 'https://www.nanshanlife.com.tw/Public_web/Service/Place/Office/office.html'
  res <- GET(URL, encoding='utf8')
  res2 <- htmlParse(content(res, "text", encoding = "utf8"), encoding = "utf8")
  result <- cssApply(res2,'div > a',cssLink)
  
  #各縣市連結
  wantPages <- sprintf('https://www.nanshanlife.com.tw%s',result)
  
  #URL <- 'https://www.nanshanlife.com.tw/eServicePublic/publicweb/office/OfficeArticle.action?chanelMap=1'
  getInfo <- function(URL){
    res <- GET(URL, encoding='utf8')
    res2 <- htmlParse(content(res, "text", encoding = "utf8"), encoding = "utf8")
    result <- cssApply(res2,'tr ',cssCharacter)
    result2 <- strsplit(result,'\r\n\t\t\t') 
    result2 <- unlist(result2)
    result2 <- str_replace_all(result2,'[:space:]','')
    #刪除表頭
    result3 <- result2[nchar(result2)>0][-1] 
    result3 <- matrix(unlist(result3), ncol = 7, byrow = TRUE)
    
    set_dt <- str_replace(result3[,6],'00:00:00.0','')
    
    data.frame(brand_nm='南山人壽',
               store_nm=result3[,1],
               store_dept=result3[,2],             
               addr=result3[,3],
               tel_no=result3[,4],
               fax=result3[,5],
               set_dt=set_dt,
               city=result3[,7],
               stringsAsFactors=FALSE)  
  }
  
  allList <- lapply(wantPages, getInfo) 
  nanshan <- do.call(rbind, allList) 
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.