draft/getBagtoyou.R

getBagtoyou <- function(){
  #1.找到最終頁數
  url_link <-'http://www.btu.com.tw/%E9%96%80%E5%B8%82%E6%90%9C%E5%B0%8B/action-store_share-class_id-ss.htm'
  res_link <- GET(url_link)
  res_link <- content(res_link,'text', encoding = 'utf8')
  html_link<- htmlParse(res_link, encoding = "utf8")
  link <- cssApply(html_link, '#store_page > span', cssCharacter)
  link <- str_replace_all(link, '1 / ','')
  
  url   <-list()
  data  <-list()
  
  
  for(i in 1:link){
    
    a<-'http://www.btu.com.tw/%E9%96%80%E5%B8%82%E6%90%9C%E5%B0%8B/action-store_share-class_id-ss-page-'
    url[[i]]<-sprintf('%s%s.htm',a,i)
    res <- GET(url[[i]])
    #url <-'http://www.btu.com.tw/%E9%96%80%E5%B8%82%E6%90%9C%E5%B0%8B/action-store_share-class_id-ss-page-1.htm'
    #res <- GET(url)
    
    res <- content(res,'text', encoding = 'utf8')
    html<- htmlParse(res, encoding = "utf8")
    
    tables   <- readHTMLTable(html) #$`NULL`表示為table的名稱
    tables   <- tables[[1]]
    data[[i]]<-tables[nchar(as.character(tables[,1]))!=0,1:3] #先抓出第一欄店名的位置再帶出後面1-3欄
    
  }
  
  data_fin <- do.call(rbind, data)
  names(data_fin) <- c('store_nm', 'addr', 'tel_no')
  data_fin <- cbind('Bagtoyou', data_fin )
  names(data_fin) <- c('brand_nm','store_nm', 'addr', 'tel_no')
  data_fin
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.