draft/getIroo.R

getIroo <- function(){
  URL <- 'http://www.iroo.com.tw/2015SS/store.html?woman_taiwan'
  res <- GET(URL, encoding='utf8')
  res2 <- htmlParse(content(res, "text", encoding = "utf8"), encoding = "utf8")
  
  areaURL <- xpathApply(res2,'//*[@id="taiwan_city_list"]/li',xmlAttrs)
  areaURL <- unlist(areaURL)
  areaURL <- matrix(areaURL, ncol = 3, byrow = TRUE)
  areaURL <- areaURL[,3]
  
  #各縣市連結
  wantPages <- sprintf('http://www.iroo.com.tw/2015SS/city_store_list.html?iroo_%s',areaURL)
  
  #URL <- 'http://www.iroo.com.tw/2015SS/city_store_list.html?iroo_woman_northern_taiwan'
  getInfo <- function(URL) {
    res <- GET(URL, encoding='utf8')
    res2 <- htmlParse(content(res, "text", encoding = "utf8"), encoding = "utf8")
    pageURL <- str_replace(URL,'http://www.iroo.com.tw/2015SS/city_store_list.html\\?iroo_','')
    
    wantNm <- sprintf('ul#iroo_%s > li > div.store_name',pageURL)
    store_nm <- cssApply(res2,wantNm,cssCharacter)
    
    wantAddr <- sprintf('ul#iroo_%s > li > div.store_add',pageURL)
    addr <- cssApply(res2,wantAddr,cssCharacter)  
    
    wantTel <- sprintf('ul#iroo_%s > li > div.store_phone',pageURL)
    tel_no <- cssApply(res2,wantTel,cssCharacter)
    tel_no <- str_replace_all(tel_no, '[:space:]','')
    
    data.frame(brand_nm='IROO',
               store_nm=store_nm,
               addr=addr,
               tel_no=tel_no,
               stringsAsFactors=FALSE)  
  }
  
  allList <- lapply(wantPages, getInfo) 
  iroo <- do.call(rbind, allList) 
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.