draft/getBestPals.R

getBestPals <- function(){
  url <- "http://www.bestpals.com.tw/%E9%96%80%E5%B8%82%E6%93%9A%E9%BB%9E/action-store.htm"
  res <- GET(url, encoding='utf8')
  res2 <- content(res,"text", encoding='utf8')
  html <- htmlParse(res2, encoding = "utf8")
  res3 <- cssApply(html,"#store_menubox > div > a", cssLink)
  
  mylist <- list()
  for (i in 1: length(res3))
  {
    
    url <- sprintf("http://www.bestpals.com.tw%s", res3[i])
    res <- GET(url, encoding='utf8')
    res2 <- content(res,"text", encoding='utf8')
    html <- htmlParse(res2, encoding = "utf8")
    tables <- readHTMLTable(html)
    
    data1 <- tables[[1]]
    data2 <- data1[,-5]
    
    mylist[[i]] <- data2
    
  }
  
  
  data6 <- rbind(mylist[[1]], mylist[[2]], mylist[[3]], mylist[[4]], mylist[[5]],
                 mylist[[6]], mylist[[7]]
  )
  
  data01  <-str_replace(unlist(data6[,2]),"^[0-9]{3}|[:space:]", "")
  data02  <-str_replace_all(unlist(data01),"(\\(.+\\))|[:space:]", "")
  
  data7=data.frame("百事特", data6[,1], data02, data6[,3], data6[,4], stringAsFactors=FALSE) 
  data8 <- data7[,-6]
  
  colnames(data8) = c("brand_nm", "store_nm", "addr", "tel_no","opn_time")
  data8
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.