draft/getSym.R

getSym <- function(){
  URL <- 'http://sbc.sym.com.tw/location.php'
  getSymURL <- function(URL){  
    res <- GET(URL, encoding='utf8')
    res2 <- htmlParse(content(res, "text", encoding = "utf8"), encoding = "utf8")
    
    maxPage <- xpathSApply(res2,'//div[@class="page"]/ul/li/a',xmlAttrs)  
    maxPage=str_extract(unlist(maxPage[length(maxPage)]),'\\?page=[0-9]+$')
    maxPage <- str_replace(maxPage[2],'\\?page=','')
    
    wantPages <- sapply(1:maxPage, function(wantPage) sprintf('http://sbc.sym.com.tw/location.php?page=%s',wantPage))
    return(wantPages)
  }
  
  
  URL <- 'http://sbc.sym.com.tw/location.php?page=1'
  getSyminfo <- function(URL){
    res <- content(GET(URL), encoding='utf8')
    result  <- xpathSApply(res, '//td', xmlValue)
    result <- str_replace_all(result, '[:space:]', '')
    result2 <- matrix(result,ncol=5, byrow=TRUE)
    
    result2 <- as.data.frame(result2, stringsAsFactors = FALSE)
    names(result2) <- c('city', 'section', 'store_nm', 'addr', 'tel_no') 
    result2 <- data.frame(brand_nm='三陽', result2, stringsAsFactors = FALSE)
    return(result2)    
  }
  
  URL <- 'http://sbc.sym.com.tw/location.php'
  UrlList <- getSymURL(URL) 
  allList <- lapply(UrlList, getSyminfo)  
  sbcsym <- do.call(rbind, allList)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.