draft/getDonhi.R

getDonhi <- function() {
  url      <- 'http://donhi.com.tw/modules/myclass/'
  htmldoc1 <- GET(url, encoding='utf8',
    user_agent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36"))
  htmldoc2 <- htmlParse(htmldoc1, asText = TRUE, encoding='utf8')
  
  data1    <- cssApply(htmldoc2,".shadow" ,cssCharacter)
  data2    <- gsub("(\n    大東海 - |\n    |\\(|\\)| )","",data1)
  data3    <- data2[-c(length(data2),length(data2)-1)] 
  
  store_nm <- str_extract(data3,'^.+班')
  tel_no   <- str_extract(data3,'[0-9]{2}-[0-9]{8}|([0-9]{5}-[0-9]{4})|[0-9]{10}|[0-9]{6}-[0-9]{3}|[0-9]{9}')
  addr     <- str_replace(str_replace(data3, '^.+班', ''), '^([0-9]|-)+', '')
  
  datas    <- data.frame('大東海',store_nm,tel_no,addr, stringsAsFactors = FALSE)
  names(datas)  <- c("brand_nm","store_nm","tel_no","addr")
  return(datas)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.