draft/getHilltop.R

getHilltop <- function(){
  url<-'http://www.hilltop.tw/sales.php'
  res <- GET(url)
  res <- content(res,'text', encoding = 'utf8')
  html<- htmlParse(res, encoding = "utf8")
  
  store_nm <- cssApply(html, '#sales_content > a > h1 ', cssCharacter)
  data     <- cssApply(html, '#sales_content > a > p', cssCharacter)
  
  tel_no   <- str_extract(data, '[0-9]+-[0-9]+')
  addr <- str_replace_all(data,tel_no,'') #因為地址電話黏一起用扣除已抓取欄位方式抓地址
  
  data_fin<-data.frame(brand_nm='hilltop',store_nm, tel_no, addr, stringsAsFactors = FALSE)
  return(data_fin)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.