draft/getJytnet.R

getJytnet <- function(){
  wantURL <- 'http://www.jytnet.com.tw/search.asp'
  res <- GET(wantURL, encoding='utf8')
  res2 <- content(res, encoding='utf8')
  
  
  #組分頁網址
  wanthttp=c()
  for(i in 1:8)
  {
    wanthttp[i] <- sprintf('http://www.jytnet.com.tw/search.asp?ar=%s',i)
  }
  
  #wantURL <- wanthttp[1] 
  
  #取分頁中資訊
  OUTPUT=list()
  for(i in 1:8)
  {
    wantURL <- wanthttp[i] 
    res <- GET(wantURL, encoding='utf8')
    res2 <- content(res, encoding='utf8')
    store_nm=xpathSApply(res2, '//*[@id="form3"]/table/tr/td[2]', xmlValue)
    store_nm=store_nm[-1]
    tel_no=xpathSApply(res2, '//*[@id="form3"]/table/tr/td[3]', xmlValue)
    tel_no=tel_no[-1]  
    fax_no=xpathSApply(res2, '//*[@id="form3"]/table/tr/td[4]', xmlValue)
    fax_no=fax_no[-1]   
    addr=xpathSApply(res2, '//*[@id="form3"]/table/tr/td[5]', xmlValue)
    addr=addr[-1]
    zip_cd=substr(addr,1,3)
    addr=substr(addr,4,100)
    addr <- str_replace_all(addr, '(\r|\n| | )+', '')
    OUTPUT[[i]]= data.frame(brand_nm='金玉堂',store_nm=store_nm,addr=addr,zip_cd=zip_cd,tel_no=tel_no,fax_no=fax_no, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE )
  }
  FINISH=do.call(rbind,OUTPUT)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.