draft/getPuma.R

getPuma <- function(){
  wantURL <- 'http://www.puma-nightrun.com.tw/all_shop_map.html'
  res=htmlParse(wantURL, encoding='utf8')
  
  store_nm=xpathSApply(res, '//*[@id="shop_map_main"]/table/tr/td[1]', xmlValue)
  store_nm=store_nm[regexpr("北部門市",store_nm)==-1]
  store_nm=store_nm[regexpr("中部門市",store_nm)==-1]
  store_nm=store_nm[regexpr("南部門市",store_nm)==-1]
  store_nm=store_nm[regexpr("東部門市",store_nm)==-1]
  store_nm=store_nm[regexpr("外島門市",store_nm)==-1]
  store_nm=store_nm[-1]
  addr=xpathSApply(res, '//*[@id="shop_map_main"]/table/tr/td[2]', xmlValue)
  addr=addr[-1]
  tel_no=xpathSApply(res, '//*[@id="shop_map_main"]/table/tr/td[3]', xmlValue)
  tel_no=tel_no[-1]
  
  FINISH= data.frame(brand_nm='PUMA',store_nm=store_nm,addr=addr,tel_no=tel_no, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE )
  
}
leoluyi/address_crawler documentation built on May 21, 2019, 5:09 a.m.