draft/getKavalanWhisky.R

getKavalanWhisky <- function(){
  
  #酒專通路
  res <- GET("http://www.kavalanwhisky.com/location/index5.aspx")
  res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
  #--------------------------------------------------------------------------------------
  store_nm=xpathSApply(res, '//*[@class="title"]', xmlValue)
  store_nm <- str_replace_all(store_nm, '(\r|\n| )+', '')
  addr=xpathSApply(res, '//*[@class="addr"]', xmlValue)
  addr <- str_replace_all(addr, '(\r|\n| )+', '')
  tel_no=xpathSApply(res, '//*[@class="phone"]', xmlValue)
  tel_no <- str_replace_all(tel_no, '(\r|\n| )+', '')
  #--------------------------------------------------------------------------------------
  FINISH1= data.frame(brand_nm='金車噶瑪蘭威士忌',type='酒專通路',store_nm=store_nm,addr=addr,tel_no=tel_no,open_time='無資料',data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE )
  #--------------------------------------------------------------------------------------
  
  
  
  
  #展售通路
  res <- GET("http://www.kavalanwhisky.com/location/index.aspx")
  res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
  #--------------------------------------------------------------------------------------
  store_nm=xpathSApply(res, '//*[@class="title"]', xmlValue)
  store_nm <- str_replace_all(store_nm, '(\r|\n| )+', '')
  addr=xpathSApply(res, '//*[@class="addr"]', xmlValue)
  addr <- str_replace_all(addr, '(\r|\n| |[地址:])+', '')
  addr <- str_replace_all(addr, '([(]).+$', '')
  tel_no=xpathSApply(res, '//*[@class="phone"]', xmlValue)
  tel_no <- str_replace_all(tel_no, '(\r|\n| |[電話:])+', '')
  open_time=xpathSApply(res, '//*[@class="time"]', xmlValue)
  open_time <- str_replace_all(open_time, '(\r|\n| |[營業時間:])+', '')
  #--------------------------------------------------------------------------------------
  FINISH2= data.frame(brand_nm='金車噶瑪蘭威士忌',type='展售通路',store_nm=store_nm,addr=addr,tel_no=tel_no,open_time=open_time,data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE )
  #--------------------------------------------------------------------------------------
  
  FINISH=rbind(FINISH1,FINISH2)
  
}
leoluyi/address_crawler documentation built on May 21, 2019, 5:09 a.m.