draft/getWaCoal.R

getWaCoal <- function(){
  url_link <-'http://www.wacoal.com.tw/store/index'
  res_link <- GET(url_link)
  res_link <- content(res_link,'text', encoding = 'utf8')
  html_link<- htmlParse(res_link, encoding = "utf8")
  link <- cssApply(html_link, 'div.height-35.padding-top-15.float-right > table > tr > td > a', cssLink)
  link <- str_replace_all(link, 'http://www.wacoal.com.tw/store/index/0/0/0/0/','')
  
  
  result<-list()
  url   <-list()
  
  for(i in 1:link){
    
    url[[i]]<-sprintf('http://www.wacoal.com.tw/store/index/0/0/0/0/%s',i)
    res <- GET(url[[i]])
    res <- content(res,'text', encoding = 'utf8')
    html<- htmlParse(res, encoding = "utf8")
    data<- cssApply(html,"tr > td > table", cssCharacter) 
    
    store_nm <- str_replace_all(str_extract(data, '店點名稱\\\r\\\n.+\\\r\\\n'),'店點名稱|\\\r\\\n|[:space:]','')
    tel_no   <- str_replace_all(str_extract(data, '連絡電話\\\r\\\n.+\\\r\\\n'),'連絡電話|\\\r\\\n|[:space:]|t','')
    addr1    <- str_replace_all(str_extract(data, '縣市區域\\\r\\\n.+\\\r\\\n'),'縣市區域|\\\r\\\n|[:space:]|t','')
    addr2    <- str_replace_all(str_extract(data, '詳細地址\\\r\\\n.+\\\r\\\n'),'詳細地址|\\\r\\\n|[:space:]|t','')
    addr     <- paste0(addr1,addr2)
    
    result[[i]]  <-data.frame(brand_nm='華歌爾',store_nm, tel_no, addr, stringsAsFactors = FALSE)
  }
  
  data_fin <- do.call(rbind, result)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.