draft/getBobson.R

getBobson <- function(){
  url <-"http://www.bobsonjeans.com.tw/location.php"
  res <- GET(url, encoding='utf8')
  res2 <- content(res,"text",encoding='utf8')
  html <- htmlParse(res2, encoding = "utf8")
  tables <- readHTMLTable(html)
  
  store1 <- cssApply(html,"#storelistarea > li > a", cssCharacter)
  store2 <- str_replace_all(unlist(store1),'[:space:]', "")
  
  addr1 <- str_extract_all(unlist(res2), "地址:.+</dd")
  addr2 <- str_replace_all(unlist(addr1),"地址:|</dd|[:space:]", "")
  addr3 <- str_replace_all(unlist(addr2),"^[0-9]{5}", "")
  addr4 <- str_extract_all(unlist(addr3), ".+(號|樓|F)")
  
  tel1 <- str_extract_all(unlist(res2), "地址.+門市電話:.+</dd")
  tel2 <- str_replace_all(unlist(tel1),"地址.+門市電話:", "")
  tel3 <- str_replace_all(unlist(tel2),"</dd", "")
  
  data05=data.frame('bobson', unlist(store2), unlist(tel3), unlist(addr4), stringAsFactors=FALSE) 
  data06 <- data05[,-5]
  
  colnames(data06) = c("brand_nm", "store_nm", "tel_no", "addr")
  data06
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.