draft/getTaGlasses.R

getTaGlasses <- function(){
  URL <- 'http://www.taglasses.com/shopping.php'
  res <- GET(URL, encoding='utf8')
  res2 <- htmlParse(httr::content(res, "text", encoding = "utf8"), encoding = "utf8")
  result <- cssApply(res2,'div > a',cssLink)
  cnt <- length(result)-1
  result <- result[2:cnt]
  
  wantPages <- sprintf('http://www.taglasses.com/%s',result)
  
  #URL <- 'http://www.taglasses.com/shopping.php?Page=2&G0=0&KeyWord='
  getInfo <- function(URL){
    res <- GET(URL, encoding='utf8')
    res2 <- htmlParse(httr::content(res, "text", encoding = "utf8"), encoding = "utf8")
    result <- cssApply(res2,'tr > td',cssCharacter)
    result <- str_replace_all(result,'[:space:]','')
    cnt <- length(result)-3
    result <- result[18:cnt]
    result2 <- matrix(result, ncol = 13, byrow = TRUE)
    
    addr <- str_extract(result2[,9],'^.+\\(營')
    addr <- str_replace(addr,'\\(營','')
    
    open_time <- str_extract(result2[,9],'\\(營.+\\)')
    open_time <- str_replace(open_time,'\\(','')
    open_time <- str_replace(open_time,'\\)','')
    
    data.frame(brand_nm='上光眼鏡',
               store_nm=result2[,6],           
               addr=addr,
               tel_no=result2[,7],
               open_time=open_time,
               stringsAsFactors=FALSE) 
  }
  
  
  allList <- lapply(wantPages, getInfo) 
  taglasses <- do.call(rbind, allList) 
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.