draft/getMaywufa.R

getMaywufa <- function(){
  wantURL <- 'http://www.maywufa.com.tw/store_list.php'
  res=htmlParse(wantURL, encoding='utf8')
  store_nm<- xpathSApply(res, '//*[@class="papers prohealthcare zebra"]/tr/td[1]', xmlValue)
  store_nm <- str_replace_all(store_nm, '(\r|\n| )+', '')
  store_nm <- str_replace_all(store_nm, '(\xc2\xa0)+', '')
  addr<- xpathSApply(res, '//*[@class="papers prohealthcare zebra"]/tr/td[2]', xmlValue)
  addr <- str_replace_all(addr, '(\r|\n|\t| )+', '')
  addr <- str_replace_all(addr, '(\xc2\xa0)+', '')
  addr <- str_replace_all(addr, '([(]).+$', '')
  tel_no<- xpathSApply(res, '//*[@class="papers prohealthcare zebra"]/tr/td[3]', xmlValue)
  tel_no <- str_replace_all(tel_no, '(\r|\n|\t| )+', '')
  tel_no <- str_replace_all(tel_no, '(\xc2\xa0)+', '')
  open_time<- xpathSApply(res, '//*[@class="papers prohealthcare zebra"]/tr/td[4]', xmlValue)
  open_time <- str_replace_all(open_time, '(\r|\n|\t| )+', '')
  open_time <- str_replace_all(open_time, '(\xc2\xa0)+', '')
  
  OUTPUT= data.frame(brand_nm='美吾華-博登藥局',store_nm=store_nm,addr=addr,tel_no=tel_no,open_time=open_time, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE )
  return(OUTPUT)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.