draft/getCoco.R

getCoco <- function(){
  res <- POST('http://www.coco-tea.com/bin/index.php?Plugin=o_coco&Action=ococosearch',
              body = list(country=335))
  res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
  visitPage <-xpathSApply(res, '//div/a',xmlAttrs)
  visitPage <- visitPage["title",visitPage["href",]=="pagenum"]
  maxPage <- visitPage[length(visitPage)]
  maxPage <- str_extract(maxPage, '(?<=P=)[0-9]+')
  urlTp <- strsplit(visitPage[length(visitPage)], paste0('&P=',maxPage))[[1]]
  visitPage <- paste0('http://www.coco-tea.com',urlTp[1],'&P=',1:maxPage, urlTp[2])
  
  result <- list()
  for( i in 1:length(visitPage)){
    res <- GET(visitPage[i])
    res <- htmlParse(content(res,'text', encoding = 'utf8'), encoding = 'utf8')
    store_nm <-xpathSApply(res, '//table/tbody/tr/td[@class="store_name"]',xmlValue)
    tel_no <-xpathSApply(res, '//table/tbody/tr/td[@class="center"]',xmlValue)
    addr <-xpathSApply(res, '//table/tbody/tr/td[3]',xmlValue)
    store_link <- xpathSApply(res, '//table/tbody/tr/td/a[@class="ococomap"]',xmlAttrs)["href",]
    result[[i]] <- data.frame(store_nm, tel_no, addr, store_link, stringsAsFactors = FALSE)
  }
  result <- do.call(rbind, result)
  result <- sapply(result,str_replace_all, pattern='[:space:]', replacement='')
  result <- as.data.frame(result, stringsAsFactors = FALSE)
  result$tel_no <- str_replace_all(result$tel_no, '\\(|\\)|-', '')
  result <- data.frame(brand_nm='CoCo', as.matrix(result), stringsAsFactors = FALSE)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.