draft/get3375.R

get3375 <- function(){
  
  info3 <- list()
  for (i in 1:3){
    url        <- sprintf('http://www.3375.com.tw/stores/index.aspx?aid=%s',i)
    htmldoc1   <- content(GET(url, encoding='utf8'), as='text', encoding='utf8')
    htmldoc2   <- htmlParse(htmldoc1, encoding='utf8')
    
    info1      <- cssApply(htmldoc2," div.marginBottom > div.search_resultBar > table > tr " , cssCharacter)
    info2      <- str_split(info1,"\n")
    info3[[i]] <- as.data.frame(do.call(rbind,info2))
    
  }
  
  info4        <- as.data.frame(do.call(rbind,info3))
  info5        <- data.frame('三商巧福',gsub(" ","",info4$V2),gsub(" ","",info4$V4),gsub("\\(.+\\)|\t|\\(.+\\)| |\\(.+","",info4$V6),gsub(" ","",info4$V16))
  
  names(info5) <- c('brand_nm','store_nm','tel_no', 'addr', 'tm')
  info5
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.