draft/getFmsc.R

getFmsc <- function(){
  result<-list()
  url   <-list()
  
  for(i in 1:100){
    url[[i]]<-sprintf('http://www.fmsc.com.tw/retail_p.php?id=%s',i)
    res <- GET(url[[i]],user_agent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36"))
    res <- content(res,'text', encoding = 'big5')
    store_nm <- str_extract_all(res, '<b>.+<\\/b')[[1]]
    store_nm <- str_replace_all(store_nm, '<b>|</b|[:space:]','')
    addr <- str_extract_all(res, '地址.+<br')[[1]]
    addr <- str_replace_all(addr, '地址:|\\<br|[:space:]','')
    tel_no <- str_extract_all(res, '電話.+<br')[[1]]
    tel_no <- str_replace_all(tel_no, '電話:|\\<br|[:space:]','')
    if(nchar(addr)>0){
      result[[i]]<-data.frame(brand_nm='鬍鬚張',store_nm,tel_no,addr, stringsAsFactors = FALSE)
    }
  }
  
  data_fin <- do.call(rbind, result)
  data_fin <- data_fin[!str_detect(data_fin$store_nm, '大前店|塚店'),]
  return(data_fin)
}
leoluyi/address_crawler documentation built on May 21, 2019, 5:09 a.m.