draft/getHoliday.R

getHoliday <- function(){
  
  url2 <-'http://www.holiday.com.tw/room/room.aspx'
  res2 <- GET(url2)
  res2 <- content(res2,'text', encoding = 'utf8')
  html2 <- htmlParse(res2, encoding = "utf8")
  page_nm <- cssApply(html2, '.style2 > p > a', cssLink)
  
  #2.將每個網址代入爬出內容
  data<-list()
  url <-list()
  
  for(i in 1:length(page_nm)){
    
    url[[i]] <- sprintf('http://www.holiday.com.tw/room/%s',page_nm[i])
    res1 <- GET(url[[i]])
    res1 <- content(res1,'text', encoding = 'utf8')
    html1 <- htmlParse(res1, encoding = "utf8")
    
    store_nm <- xpathSApply(html1, '//*[@id="Table4"]/tr[2]/td/table/tr[2]/td/table/tr/td[2]/div', xmlValue)
    store_nm <- str_replace_all(store_nm, '[:space:]|\r ','')
    store_nm <- str_replace_all(store_nm, '※Newopen※','')
    store_nm <- store_nm[-1]
    tel_no   <- xpathSApply(html1, '//*[@id="Table4"]/tr[2]/td/table/tr[2]/td/table/tr/td[3]/div', xmlValue)
    tel_no   <- str_replace_all(tel_no, '[:space:]', '')
    tel_no   <- tel_no[-1]
    addr     <- xpathSApply(html1, '//*[@id="Table4"]/tr[2]/td/table/tr[2]/td/table/tr/td[4]', xmlValue)
    addr     <- str_replace_all(addr, '[:space:]', '')
    addr     <- str_replace(addr, '\\(.+\\)', '')
    addr     <- addr[-1]
    
    data[[i]]<-data.frame(brand_nm='好樂迪',store_nm, tel_no, addr, stringsAsFactors = FALSE)
    
  }
  data_fin <- do.call(rbind, data)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.