draft/getTCat.R

getTCat <- function(){
  wantURL <- 'http://www.t-cat.com.tw/inquire/Foothold_List.aspx?Area=All'
  res <- GET(wantURL, encoding='utf8')
  res2 <- content(res, encoding='utf8')
  
  
  page_url=xpathSApply(res2, '//*[@id="ctl00_ContentPlaceHolder1_Pager_lblPage"]/a', xmlAttrs)
  page_url=c(matrix(page_url))
  page_url=page_url[length(page_url)]
  page_url=substr(page_url,regexpr("Page=",page_url)[1]+5,900)
  max_page <- str_replace_all(page_url, '(["]).+$', '')
  wantURL=list()
  for(i in 1:max_page)
  {
    wantURL[i] <- sprintf('http://www.t-cat.com.tw/Inquire/Foothold_List.aspx?Area=All&Page=%s', i)
  }
  
  OUTPUT=list()
  for(i in 1:max_page)
  {
    res <- GET(wantURL[i], encoding='utf8')
    res2 <- content(res, encoding='utf8')
    area=xpathSApply(res2, '//*[@class="tablelist"]/tr/td[1]', xmlValue)
    area=area[-1]
    store_nm=xpathSApply(res2, '//*[@class="tablelist"]/tr/td[2]', xmlValue)
    store_nm=store_nm[-1]
    addr=xpathSApply(res2, '//*[@class="tablelist"]/tr/td[3]', xmlValue)
    addr=addr[-1]
    tel_no=xpathSApply(res2, '//*[@class="tablelist"]/tr/td[4]', xmlValue)
    tel_no=tel_no[-1]
    tel_no <- str_replace_all(tel_no, '([請洽客戶服務專線:]|[客服專線:])+', '')
    OUTPUT[i]= list(data.frame(brand_nm='黑貓宅急便',area=area,store_nm=store_nm,addr=addr,tel_no=tel_no, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE ))
  }
  
  FINISH=do.call(rbind,OUTPUT)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.