draft/getUNIQLO.R

getUNIQLO <- function(){
  res <- GET("http://www.uniqlo.com/tw/zh/stores/")
  res <- content(res, encoding = 'utf8')
  visitPages <- xpathSApply(res, '//*[@id="shoplist"]/section/section/div/section/h1/a', xmlAttrs)
  visitPages <- sprintf('http://www.uniqlo.com%s', visitPages)
  
  result <- list()
  for( i in 1:length(visitPages)){
    res <- GET(visitPages[i])
    res <- content(res, encoding = 'utf8') 
    store_nm <- xpathSApply(res, '//*[@id="shopdetail"]/section/section//h1', xmlValue)
    addr <- xpathSApply(res, '//*[@id="shopdetail"]/section/section/div/table/tr[2]/td', xmlValue)
    tel_no <- xpathSApply(res, '//*[@id="shopdetail"]/section/section/div/table/tr[1]/td', xmlValue)
    open <- xpathSApply(res, '//*[@id="shopdetail"]/section/section/div/table/tr[3]/td', xmlValue)
    sell <- xpathSApply(res, '//*[@id="shopdetail"]/section/section/div/table/tr[4]/td', xmlValue)
    car <- xpathSApply(res, '//*[@id="shopdetail"]/section/section/div/table/tr[5]/td', xmlValue)
    if (is.null(car)) car=NA
    result[[i]] <- data.frame(brand_nm='UNIQLO', store_nm, tel_no, addr, open, sell, car, stringsAsFactors = FALSE)
  }
  result <- do.call(rbind, result)
  result$store_nm <- str_replace_all(result$store_nm, '\n', '')
  result$tel_no <- str_replace_all(result$tel_no, '[:space:]|-', '')
  result$addr <- str_replace_all(str_replace_all(result$addr, '\n', ' '), ' +', ' ')
  result$open <- str_replace_all(result$open, '[:space:]', '')
  result$sell <- str_replace_all(result$sell, '[:space:]', '')
  result$car <- str_replace_all(str_replace_all(result$car, '[:space:]+', ' '), '\n', '')
  result
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.