draft/getAquanaut.R

getAquanaut <- function(){
  wantURL <- 'http://www.aquanaut.com.tw/aquanaut/stores.aspx?b=2'
  res=htmlParse(wantURL, encoding='utf8')
  maxPage<- xpathSApply(res, '//*[@id="content"]/section/div[1]/div/a', xmlAttrs)
  maxPage <- str_replace_all(maxPage, '(href)+', '')
  maxPage <- str_replace_all(maxPage, '([stores.aspx?b=])+', '')
  maxPage <- length(maxPage)
  wantPages<- sapply(1:maxPage,function(wantPage) sprintf('http://www.aquanaut.com.tw/aquanaut/stores.aspx?b=%s', wantPage))
  
  wantPages=wantPages[-10]
  wantPages=wantPages[-12]
  maxPage=maxPage-2
  
  OUTPUT=list()
  for(i in 1:maxPage)
  {
    res=htmlParse(wantPages[i], encoding='utf8')
    area <- xpathSApply(res, '//*[@id="cityname"]', xmlValue)
    store_nm <- xpathSApply(res, '//*[@class="storesList right"]/li/span[1]', xmlValue)
    store_nm <- paste(store_nm, '')
    store_nm <- str_replace_all(store_nm, '(\r|\n| )+', '')
    addr <- xpathSApply(res, '//*[@class="storesList right"]/li/span[2]', xmlValue)
    addr <- str_replace_all(addr, '(\r|\n| )+', '')
    addr <- str_replace_all(addr, '([(]).+$', '')
    OUTPUT[[i]]= data.frame(brand_nm='奧可那',area=area,store_nm=store_nm,addr=addr, data_dt=gsub('-','',Sys.Date()) ,stringsAsFactors=FALSE )
  }
  FINISH=do.call(rbind,OUTPUT)
  
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.