draft/getFuJiFilm.R

getFuJiFilm <- function(){
  
  #北
  wantURL <- 'http://www.fujifilm.com.tw/fdishop/index_001.html'
  res <- GET(wantURL, encoding='utf8')
  res2 <- content(res, encoding='utf8')
  store_nm <- xpathSApply(res2, '//*[@id="senfe"]/tr/td[1]', xmlValue)
  store_nm=store_nm[-1]
  store_nm <- str_replace_all(store_nm, '(\r|\n| | )+', '')
  tel_no <- xpathSApply(res2, '//*[@id="senfe"]/tr/td[2]', xmlValue)
  tel_no <- str_replace_all(tel_no, '(\r|\n| )+', '')
  tel_no=tel_no[-1]
  addr <- xpathSApply(res2, '//*[@id="senfe"]/tr/td[3]', xmlValue)
  addr=addr[-1]
  OUTPUT1= data.frame(brand_nm='富士沖印',store_nm=store_nm,addr=addr,tel_no=tel_no, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE )
  
  
  #中
  wantURL <- 'http://www.fujifilm.com.tw/fdishop/index_002.html'
  res <- GET(wantURL, encoding='utf8')
  res2 <- content(res, encoding='utf8')
  store_nm <- xpathSApply(res2, '//*[@id="senfe"]/tr/td[1]', xmlValue)
  store_nm=store_nm[-1]
  store_nm <- str_replace_all(store_nm, '(\r|\n| | )+', '')
  tel_no <- xpathSApply(res2, '//*[@id="senfe"]/tr/td[2]', xmlValue)
  tel_no <- str_replace_all(tel_no, '(\r|\n| )+', '')
  tel_no=tel_no[-1]
  addr <- xpathSApply(res2, '//*[@id="senfe"]/tr/td[3]', xmlValue)
  addr=addr[-1]
  OUTPUT2= data.frame(brand_nm='富士沖印',store_nm=store_nm,addr=addr,tel_no=tel_no, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE )
  
  
  
  #南
  wantURL <- 'http://www.fujifilm.com.tw/fdishop/index_003.html'
  res <- GET(wantURL, encoding='utf8')
  res2 <- content(res, encoding='utf8')
  store_nm <- xpathSApply(res2, '//*[@id="senfe"]/tr/td[1]', xmlValue)
  store_nm=store_nm[-1]
  store_nm <- str_replace_all(store_nm, '(\r|\n| | )+', '')
  tel_no <- xpathSApply(res2, '//*[@id="senfe"]/tr/td[2]', xmlValue)
  tel_no <- str_replace_all(tel_no, '(\r|\n| )+', '')
  tel_no=tel_no[-1]
  addr <- xpathSApply(res2, '//*[@id="senfe"]/tr/td[3]', xmlValue)
  addr=addr[-1]
  OUTPUT3= data.frame(brand_nm='富士沖印',store_nm=store_nm,addr=addr,tel_no=tel_no, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE )
  
  
  OUTPUT=rbind(OUTPUT1, OUTPUT2,OUTPUT3)
}
leoluyi/address_crawler documentation built on May 21, 2019, 5:09 a.m.