draft/getToto.R

getToto <- function(){
  getsidurl <- function(){
    wantURL <- 'http://www.twtoto.com.tw/dealer.aspx?id=3'
    res <- GET(wantURL, encoding='utf8')
    res2 <- content(res, encoding='utf8')
    
    (maxPage<- xpathSApply(res2, '//*[@id="main-content"]/article/nav/ul/li/a', xmlAttrs))  
    maxPage <- str_replace(str_extract(unlist(maxPage),'dealer.aspx\\?id=[0-9]+$'),'(dealer.aspx\\?id=)|(current)', '')
    maxPage=maxPage[-3]
    
    wantPages<- sapply(maxPage, 
                       function(wantPage) sprintf('http://www.twtoto.com.tw/dealer.aspx?id=%s', wantPage)
    )
    return(wantPages)
  }
  
  #getsidurl()
  
  #URL='http://www.twtoto.com.tw/dealer.aspx?id=3'
  
  getAddr <- function(URL){
    res <- GET(URL, encoding='utf8')
    res2 <- content(res, encoding='utf8')
    
    
    name  <- xpathSApply(res2, '//*[@id="main-content"]/article/div/table/tr/td[1]', xmlValue)
    addr  <- xpathSApply(res2, '//*[@id="main-content"]/article/div/table/tr/td[2]', xmlValue)
    tel   <- xpathSApply(res2, '//*[@id="main-content"]/article/div/table/tr/td[3]', xmlValue)
    
    
    addr  <- str_replace_all(str_replace_all( addr, '\\(.+| ', ''),' ','')  
    
    return(data.frame( brand_nm='TOTO',store_nm=name,addr=addr,tel_no=tel, stringsAsFactors = FALSE))
  }
  
  #getAddr(URL)
  
  UrlList <- lapply(getsidurl(), getAddr)
  UrlList_1 <- do.call(rbind, UrlList)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.