draft/getSlc.R

getSlc <- function(){
  res <- GET("http://www.slc.com.tw/08/index1.php?mp=2-3")
  resText <- content(res,'text', encoding = 'big5')
  resText <- str_extract_all(resText, '<table(.| |\r|\n)+/table>')
  resText <- toUTF8(resText[[1]])
  resTextParse <- htmlParse(resText, encoding = 'UTF8')
  wantData <- readHTMLTable(resTextParse)
  wantData <- xpathSApply(resTextParse, '//table/tr/td',xmlValue)[8]
  wantData <- str_replace_all(wantData, '(\r|\t|\n| )+', ' ')
  wantData <- str_replace_all(wantData, '查 看地圖', '查看地圖')
  wantData <- strsplit(wantData, ' ')[[1]]
  wantData <- wantData[-c(1:(which(wantData==1)-1))]
  wantData <- matrix(wantData,ncol=7, byrow = TRUE)
  result <- as.data.frame(wantData, stringsAsFactors = FALSE)
  names(result) <- c('index', 'area', 'store_nm', 'addr', 'map', 'tel_no', 'fax')
  result$tel_no <- str_replace_all(result$tel_no, '\\(|\\)|-','')
  result$fax <- str_replace_all(result$fax, '\\(|\\)|-','')
  result <- result[, c('store_nm', 'addr', 'tel_no', 'fax', 'area' )]
  result <- data.frame(brand_nm='山隆加油站', result, stringsAsFactors = FALSE)
  result 
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.