draft/getHucc.R

getHucc <- function(){
  url <- "https://www.hucc-coop.tw/stores-all"
  res <- GET(url, encoding='utf8')
  res2 <- content(res,"text", encoding='utf8')
  html <- htmlParse(res2, encoding = "utf8")
  
  tables <- readHTMLTable(html)
  
  mylist <- list()
  for (i in 1:length(tables)){
    data <- tables[[i]]
    mylist=rbind(mylist,data)
  }
  
  data1 <- mylist[,-4]
  data2 <- str_replace_all(data1[,2], '^[0-9]{5}|^[0-9]{3}|[:space:]|(.+)', "")
  data3 <- str_replace_all(data1[,3],';.+', "")
  data30 <-substr(data3, 1, 3)  
  data31 <-str_replace_all(substring(data3, 4),"-", "")
  data32 <-paste0(data30, data31) 
  
  data05=data.frame("主婦聯盟", data1[,1], data2, data32, stringsAsFactors=FALSE) 
  
  colnames(data05) = c("brand_nm", "store_nm", "addr", "tel_no")
  return(data05)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.