draft/getTai168.R

getTai168 <- function(){
  wantURL <- 'http://www.tai168.com.tw/store.php?act=list'
  res <- GET(wantURL, encoding='utf8')
  res2 <- content(res, encoding='utf8')
  
  
  page_list=cssApply(res2,'tr > td:nth-child(2) > div > a:nth-child',cssLink)
  page_list=page_list[-1]
  page_list=page_list[-length(page_list)]
  wantURL <- sprintf('http://www.tai168.com.tw/%s',page_list)
  page_num=length(page_list)
  
  OUTPUT=list()
  for(i in 1:page_num)
  { 
    res <- GET(wantURL[i], encoding='utf8')
    res2 <- content(res, encoding='utf8')
    store_nm=matrix(xpathSApply(res2, '//*[@class="title"]', xmlValue))
    tel_no=matrix(xpathSApply(res2, '//*[@class="tel"]', xmlValue))
    addr=matrix(xpathSApply(res2, '//*[@class="add"]', xmlValue))
    OUTPUT[[i]]= data.frame(brand_nm='太師傅',store_nm=store_nm,addr=addr,tel_no=tel_no, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE)
  }
  FINISH=do.call(rbind,OUTPUT)
  return(FINISH)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.