draft/getShowlinSalon.R

getShowlinSalon <- function(){
  res <- GET("http://showlin-salon.com.tw/store.php?&page=1")
  res <- content(res, encoding = 'utf8')
  wantPages <- xpathSApply(res, '//*[@id="store"]/div/div[@class="storeShow"]/div/div/a', xmlAttrs)
  wantPages <- unlist(wantPages)
  wantPages <- wantPages[names(wantPages)=='href']
  wantPages <- wantPages[length(wantPages)]
  wantPages <- str_extract(wantPages, '(?<=&page=)[0-9]+')
  visitPages <- sprintf("http://showlin-salon.com.tw/store.php?&page=%s", 1:wantPages)
  
  result <- list()
  for(i in 1:length(visitPages)){
    res <- GET(visitPages[i])
    res <- content(res, encoding = 'utf8') 
    store_nm <- xpathSApply(res, '//*[@id="store"]/div/div[2]/ul/li/div/h6', xmlValue)
    addr <- xpathSApply(res, '//*[@id="store"]/div/div[2]/ul/li/div/table/tr[1]/td[2]', xmlValue)
    tel_no <- xpathSApply(res, '//*[@id="store"]/div/div[2]/ul/li/div/table/tr[2]/td[2]', xmlValue)
    open <- xpathSApply(res, '//*[@id="store"]/div/div[2]/ul/li/div/table/tr[3]/td[2]', xmlValue)
    note <- xpathSApply(res, '//*[@id="store"]/div/div[2]/ul/li/div/table/tr[4]/td[2]', xmlValue)
    result[[i]] <- data.frame(brand_nm='小林髮廊', store_nm, tel_no, addr, open, note, stringsAsFactors = FALSE)  
  }
  result <- do.call(rbind, result)
  result$store_nm <- str_replace_all(result$store_nm, ' ', '')
  result$tel_no <- str_replace_all(result$tel_no, '\\(|\\)| |-', '')
  result$addr <- str_replace_all(result$addr, ' |Google|Map', '')
  result$open <- str_replace_all(result$open, ' | ', '')
  result$note <- str_replace_all(result$note, '[:space:]', '')
  result
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.