draft/getLevis.R

getLevis <- function(){
  res <- GET("http://levi.com.tw/tw/storelocator?loc=a.%20%E5%8F%B0%E5%8C%97%E5%B8%82")
  res <- content(res, encoding = 'utf8')
  visitPages <- xpathSApply(res, '//*[@id="cityList"]/ul/li/a', xmlAttrs)["href",]
  visitPages <- toUTF8(visitPages)
  visitPages <- sapply(visitPages,URLencode)
  
  result <- list()
  for( i in 1:length(visitPages)){
    res <- GET(visitPages[i])
    res <- content(res, encoding = 'utf8') 
    store_nm <- xpathSApply(res, '//*[@id="addStore"]', xmlValue)
    store_nm <- str_replace_all(store_nm, '^[:space:]|[:space:]$', '')
    addr <- xpathSApply(res, '//*[@id="addAddress"]', xmlValue)
    tel_no <- xpathSApply(res, '//*[@id="addPhone"]', xmlValue)
    result[[i]] <- data.frame(brand_nm="Levi's", store_nm, tel_no, addr, stringsAsFactors = FALSE)
    
  }
  result <- do.call(rbind, result)
  result$store_nm  <- str_replace_all(result$store_nm , '\t', '')
  result$tel_no <- str_replace_all(result$tel_no , '[:space:]|\\(|\\)|-', '')
  result$addr <- str_replace_all(result$addr , '\t|\r|\n', '')
  result
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.