draft/getSum.R

getSum <- function(){
  
  #車商-----------------------------------------------
  wantURL <- 'http://www.sum.com.tw/siteQuery2.php?Action=Query&kindtype=1&choice=&zone1=&zone2=&address=&compname='
  res <- GET(wantURL, encoding='utf8')
  res2 <- content(res, encoding='utf8')
  
  page_num=xpathSApply(res2, '//*[@id="wrapper"]/div[1]/div/div[4]/div[2]/p/span/text()[1]', xmlValue)
  page_num=str_replace_all(page_num, '(\r|\n| |[‧共]|[頁,])+', '')
  
  wantsubURL=list()
  for(i in 1:page_num)
  {
    wantsubURL=rbind(wantsubURL,sprintf('http://www.sum.com.tw/siteQuery2.php?Action=Query&kindtype=1&choice=&zone1=&zone2=&address=&compname=&page=%s', i))
  }
  
  OUTPUT=list()
  for(i in 1:page_num)
  {
    wantURL=wantsubURL[i]
    res <- GET(wantURL, encoding='utf8')
    res2 <- content(res, encoding='utf8')
    area=xpathSApply(res2, '//*[@class="searchResults"]/table/tr/td[1]', xmlValue)
    store_nm=xpathSApply(res2, '//*[@class="searchResults"]/table/tr/td[2]', xmlValue)
    boss=xpathSApply(res2, '//*[@class="searchResults"]/table/tr/td[3]', xmlValue)
    addr=xpathSApply(res2, '//*[@class="searchResults"]/table/tr/td[5]', xmlValue)
    addr <- str_replace_all(addr, '([滿意度]).+$', '')
    addr <- str_replace_all(addr, '([評價總]).+$', '')
    addr <- str_replace_all(addr, '([(]).+$', '')
    OUTPUT[[i]]= data.frame(brand_nm='SUM-尚盟汽車',type='車商',store_nm=store_nm,addr=addr,boss=boss,area=area, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE )
  }
  
  FINISH1=do.call(rbind,OUTPUT)
  
  
  #保修-----------------------------------------------
  wantURL <- 'http://www.sum.com.tw/siteQuery2.php?Action=Query&kindtype=2&choice=&zone1=&zone2=&address=&compname='
  res <- GET(wantURL, encoding='utf8')
  res2 <- content(res, encoding='utf8')
  
  page_num=xpathSApply(res2, '//*[@id="wrapper"]/div[1]/div/div[4]/div[2]/p/span/text()[1]', xmlValue)
  page_num=str_replace_all(page_num, '(\r|\n| |[‧共]|[頁,])+', '')
  
  wantsubURL=list()
  for(i in 1:page_num)
  {
    wantsubURL=rbind(wantsubURL,sprintf('http://www.sum.com.tw/siteQuery2.php?Action=Query&kindtype=2&choice=&zone1=&zone2=&address=&compname=&page=%s', i))
  }
  
  
  
  OUTPUT=list()
  for(i in 1:page_num)
  {
    wantURL=wantsubURL[i]
    res <- GET(wantURL, encoding='utf8')
    res2 <- content(res, encoding='utf8')
    area=xpathSApply(res2, '//*[@class="searchResults"]/table/tr/td[1]', xmlValue)
    store_nm=xpathSApply(res2, '//*[@class="searchResults"]/table/tr/td[2]', xmlValue)
    boss=xpathSApply(res2, '//*[@class="searchResults"]/table/tr/td[3]', xmlValue)
    addr=xpathSApply(res2, '//*[@class="searchResults"]/table/tr/td[5]', xmlValue)
    addr <- str_replace_all(addr, '([滿意度]).+$', '')
    addr <- str_replace_all(addr, '([評價總]).+$', '')
    addr <- str_replace_all(addr, '([(]).+$', '')
    OUTPUT[[i]]= data.frame(brand_nm='SUM-尚盟汽車',type='保修',store_nm=store_nm,addr=addr,boss=boss,area=area, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE )
  }
  
  FINISH2=do.call(rbind,OUTPUT)
  
  FINISH=rbind(FINISH1, FINISH2)
  
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.