draft/getTatung.R

getTatung <- function(){
  res <- GET("http://tcpc.tatung.com/Service/Index?search_tree1=21&page=1")
  res <- content(res, encoding = 'utf8')
  maxPage <- xpathSApply(res,'//*[@id="page"]/div', xmlValue)
  maxPage <- str_extract(maxPage, '/.+頁')
  maxPage <- str_extract(maxPage, '[0-9]+')
  
  visitPage <- sprintf('http://tcpc.tatung.com/Service/Index?search_tree1=21&page=%s', 1:maxPage)
  
  result <- list()
  for( i in 1:length(visitPage)){
    res <- GET(visitPage[i])
    res <- content(res, encoding = 'utf8')
    store_nm <- xpathSApply(res,'//*[@id="store_name"]/div/div/table/tr[1]/td',xmlValue)
    
    addr <-xpathSApply(res,'//*[@id="store_name"]/div/div/table/tr[3]/td',xmlValue)
    addr <- str_replace_all(addr, '地址|:| ', '')
    tel_no <- xpathSApply(res,'//*[@id="store_name"]/div/div/table/tr[4]/td',xmlValue)
    tel_no <- str_replace_all(tel_no, 'TEL|:| |\\(|\\)|-', '')
    open <- xpathSApply(res,'//*[@id="store_name"]/div/div/table/tr[5]/td',xmlValue)
    open <- toupper(str_replace_all(open, '(營業時間:)| ', ''))
    
    result[[i]] <- data.frame(store_nm, tel_no, addr, open, stringsAsFactors = FALSE)
  }
  result <- do.call(rbind, result)
  result <- data.frame(brand_nm='大同', result, stringsAsFactors = FALSE)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.