draft/getJourdeness.R

getJourdeness <- function(){
  url<-'http://ac.jourdeness.com/je9m/register/taiwan_store'
  res <- GET(url)
  res <- content(res,'text', encoding = 'utf8')
  res <- str_replace_all(res, '\t|\n|\r', '')
  resTp <- str_extract(res, '<table.+/table>')
  resTp <- strsplit(resTp, '<table')
  
  result <- list()
  
  for (i in 2:length(resTp[[1]])){
    
    reTable <- paste0('<table', str_extract(resTp[[1]][i], '( |.)+</table>'))
    html <- htmlParse(reTable, encoding = "utf8")
    data <- xpathSApply(html, '//td', xmlValue)
    #data <- cssApply(html, 'td', cssCharacter)
    data <- matrix(data, ncol = 4, byrow=TRUE)
    
    if(length(data)>0){
      store_nm <-data[,1]
      #store_nm <-str_replace_all(store_nm,'據點|[:space:]','')
      tel_no   <-data[,2]
      #tel_no   <-str_replace_all(tel_no,'TEL|[:space:]','')
      #tel_no   <-str_replace(tel_no,'[0-9]{2}-[0-9]{8}|[0-9]{2}-[0-9]{7}|[0-9]{3}-[0-9]{8}','')
      addr     <-data[,4]
      #addr     <-str_replace_all(addr,'[0-9]{5}|\\(.+\\)|地址|[:space:]','')
    }
    
    result[[i]]<-data.frame(brand_nm='佐登妮絲',store_nm,tel_no,addr, stringsAsFactors = FALSE)
    
    #result[[i]] <- as.data.frame(data, stringsAsFactors = FALSE)
  }
  
  
  data_fin  <- do.call(rbind, result)
  data_fin  <- data_fin[!str_detect(data_fin$store_nm, '據點'),]
  
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.