draft/getPgo.R

getPgo <- function(){
  res <- GET("http://www.pgo.com.tw/sellService.php")
  res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
  
  page_list=cssApply(res,'#city > option:nth-child',cssValue)
  page_list=page_list[-1]
  wantURL <- sprintf('http://www.pgo.com.tw/sellService.php?city=%s&area=#a',page_list)
  page_url_num=length(page_list)
  
  OUTPUT=list()
  for(i in 1:page_url_num){
    res <- GET(wantURL[i])
    res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
    store_nm=xpathSApply(res, '//*[@id="dealer_list"]/table/tr/td[2]', xmlValue)
    addr=xpathSApply(res, '//*[@id="dealer_list"]/table/tr/td[3]', xmlValue)
    tel_no=xpathSApply(res, '//*[@id="dealer_list"]/table/tr/td[4]', xmlValue)
    if (length(tel_no)>0) {
      OUTPUT[i]= list(data.frame(brand_nm='PGO-摩特動力機車',store_nm=store_nm,addr=addr,tel_no=tel_no, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE ))
    }
  }
  FINISH=do.call(rbind,OUTPUT)
  return(FINISH)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.