draft/getHandsTea.R

getHandsTea <- function(){
  url="http://twcoupon.com/brandshop-%E8%8C%B6%E7%9A%84%E9%AD%94%E6%89%8B-%E9%9B%BB%E8%A9%B1-%E5%9C%B0%E5%9D%80.html"
  res <- GET(url, encoding='utf8')
  res2 <- content(res,"text", encoding='utf8')
  html <- htmlParse(res2, encoding = "utf8")
  
  data1  <- cssApply(html,"#form1 > div.WRAPPER > div.CONTENT_Q > div.brand_shop > div.box > div > div.right > li > span > a", cssCharacter)
  data11 <- str_replace_all(data1, '茶的魔手', "") 
  
  data2  <- cssApply(html,"#form1 > div.WRAPPER > div.CONTENT_Q > div.brand_shop > div.box > div > div.right > li> span.add > b", cssCharacter)
  data21 <- str_replace_all(data2, '^[0-9]{3}|[:space:]|\\(.+\\)', "") 
  
  data3  <- cssApply(html,"#form1 > div.WRAPPER > div.CONTENT_Q > div.brand_shop > div.box > div > div.right > li> span.tel > b", cssCharacter)
  data31 <- substr(data3, 1, 2) 
  data32 <- str_replace_all(substring(data3, 3),"-", "")
  data33 <- paste0(data31, '-', data32)
  
  data4=data.frame('茶的魔手', data11, data21, data33, stringAsFactors=FALSE) 
  data5 <- data4[,-5]
  colnames(data5) = c("brand_nm", "store_nm", "addr", "tel_no")
  return(data5)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.