draft/getMwd.R

getMwd <- function(){
  url_link <-'http://www.superqin.com.tw/mwd/index.php?act=customer&county=0&keyword=&page=1'
  res_link <- GET(url_link)
  res_link <- content(res_link,'text', encoding = 'utf8')
  html_link<- htmlParse(res_link, encoding = "utf8")
  link <- cssApply(html_link, '#content_text > table  > tr > td:nth-child(2) > div.inside > div.page > table  > tr > td.PAGE-5 > span:nth-child(2)', cssCharacter)
  link <- link[2]   
  
  url   <-list()
  data  <-list()
  
  for(i in 1:link){
    url[[i]]<-sprintf('http://www.superqin.com.tw/mwd/index.php?act=customer&county=0&keyword=&page=%s',i)
    res <- GET(url[[i]])
    res <- content(res,'text', encoding = 'utf8')
    html <- htmlParse(res, encoding = "utf8")
    tables <- readHTMLTable(html)
    tables <- tables[3]
    tables <- tables[[1]]
    data[[i]]<- tables[,1:2]
  }
  
  data_fin <- do.call(rbind, data)
  names(data_fin) <- c('store_nm', 'addr')
  data_fin <- cbind('麥味登', data_fin )
  names(data_fin) <- c('brand_nm','store_nm', 'addr')
  return(data_fin)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.