draft/getTokuyo.R

getTokuyo <- function(){
  url_link <-'http://www.tokuyo.com.tw/map.php'
  res_link <- GET(url_link)
  res_link <- content(res_link,'text', encoding = 'utf8')
  html_link<- htmlParse(res_link, encoding = "utf8")
  
  link <- xpathSApply(html_link, '//*[@id="content"]/div/div[2]/div[1]/select/option', xmlAttrs) #抓value中的值
  link <- unlist(link)
  link <- str_replace_all(link,'value|selected|[:space:]','')
  link <- link[nchar(link)!=0]
  
  result<-list()
  url   <-list()
  
  for(i in 1:length(link)){
    
    url[[i]]<-sprintf('http://www.tokuyo.com.tw/map.php?area=%s',link[i])
    res <- GET(url[[i]])
    
    res  <- content(res,'text', encoding = 'utf8')
    html <- htmlParse(res, encoding = "utf8")  
    
    data <- cssApply(html, '#content > div > div:nth-child(2) > div:nth-child(2) > div > table ', cssCharacter)
    data <- unlist(strsplit(data,'\n')) #用\n分隔資料
    data <- str_replace_all(data,'[:space:]','')
    data <- data[nchar(data)!=0] #刪除空值
    data <- matrix(data, ncol=2, byrow=TRUE)
    
    store_nm <-data[,1]
    addr     <-data[,2]
    if(length(store_nm)>0){
      result[[i]]<-data.frame(brand_nm='tokuyo',store_nm, addr, stringsAsFactors = FALSE)
    }
    
  }
  
  data_fin <- do.call(rbind, result)
  return(data_fin)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.