draft/getWellcome.R

getWellcome <- function(){
  #Get City Names
  url <- "http://www.wellcome.com.tw/CHT/HOME/Store"
  res <- GET(url)
  res2 <- htmlParse(content(res, 'text', encoding = 'utf8'), encoding = 'utf8')
  city_names <- cssApply(res2, 'option', cssCharacter)
  city_names <- city_names[-grep('選擇', city_names, perl = T)]
  city_names <- city_names[-grep('24', city_names, perl = T)]
  
  #Get City Pages
  city_list <- list()
  for (i in 1:length(city_names)){
    res <- POST("http://www.wellcome.com.tw/CHT/Home/Search",
                body = list(strCity = city_names[i],
                            strArea = '',
                            strRoad = '',
                            strStoreName = '',
                            strRoadSearch = '',
                            strBusinessHr = '0',
                            page = '1',
                            flag = '1',
                            strLanguag = 'CHT'))
    res2 <- htmlParse(content(res, 'text', encoding = 'utf8'), encoding = 'utf8')
    store_count <- cssApply(res2, 'h3 > span', cssCharacter)
    page_count <- ceiling(as.integer(store_count)/12)
    city_list[[i]] <- data.frame(city = city_names[i], page = page_count, stringsAsFactors = FALSE)
  }
  
  city_list <- do.call(rbind, city_list)
  
  #Get Store Information
  final <- list()
  for (i in 1:nrow(city_list)){
    page_list <- c(1:city_list[i, 2])
    output <- list()
    for (j in 1:length(page_list)){
      res <- POST("http://www.wellcome.com.tw/CHT/Home/Search",
                  body = list(strCity = city_list[i, 1],
                              strArea = '',
                              strRoad = '',
                              strStoreName = '',
                              strRoadSearch = '',
                              strBusinessHr = '0',
                              page = page_list[j],
                              flag = '1',
                              strLanguag = 'CHT'))
      res2 <- htmlParse(content(res, 'text', encoding = 'utf8'), encoding = 'utf8')
      store_nm <- cssApply(res2, 'dt', cssCharacter)
      addr <- cssApply(res2, 'address', cssCharacter)
      tel <- cssApply(res2, '.tel', cssCharacter)
      fax <- cssApply(res2, '.fax', cssCharacter)
      time <- cssApply(res2, 'time', cssCharacter)
      output[[j]] <- data.frame(city = city_list[i, 1], store_nm, addr, tel, fax, time, stringsAsFactors = FALSE)
    }
    output <- do.call(rbind, output)
    final[[i]] <- output
  }
  
  final <- do.call(rbind, final)
  final <- data.frame(brand_nm = '頂好超市', final, stringsAsFactors = FALSE)
  
  #Data Cleaning
  final$addr <- gsub("地址:", "", final$addr)
  final$tel <- gsub("電話:", "", final$tel)
  final$fax <- gsub("傳真:", "", final$fax)
  final$time <- gsub("營業時間:", "", final$time)
  
  return(final)
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.