draft/getYonex.R

getYonex <- function(){
  URL <- 'http://www.yonex.com.tw/store.php'
  res <- GET(URL, encoding='utf8')
  res2 <- htmlParse(content(res, "text", encoding = "utf8"), encoding = "utf8")
  resno <- cssApply(res2,'h2 > a',cssLink)
  
  #各分頁連結
  wantPages <- sprintf('http://www.yonex.com.tw/%s',resno)
  
  URL <- 'http://www.yonex.com.tw/store.php?c1=1&c2=1'
  getInfo <- function(URL){
    res <- GET(URL, encoding='utf8')
    res2 <- htmlParse(content(res, "text", encoding = "utf8"), encoding = "utf8")
    store <- cssApply(res2,'tr > td',cssCharacter)
    store <- str_replace_all(store,'[:space:]','')
    store <- matrix(data=store[-1:-3], ncol=2, byrow = TRUE)
    
    addr <- str_extract(store[,2], '^.+電話')
    addr <- str_replace(addr, '電話','')
    
    tel_no <- str_extract(store[,2], '電話.+[0-9]')
    tel_no <- str_replace(tel_no, '電話:','')
    
    data.frame(brand_nm='YONEX',store_nm=store[,1],addr=addr,tel_no=tel_no,stringsAsFactors=FALSE)  
  }
  
  
  allList <- lapply(wantPages, getInfo) 
  yonex <- do.call(rbind, allList) 
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.