R/getHiLife.R

Defines functions getHiLife

Documented in getHiLife

#' HiLife
#' 
#' Basically it's provider is the same as FamilyMart......
#'  
#' @seealso
#' url: \url{http://www.hilife.com.tw/storeInquiry_name.aspx} \cr
#' data: \url{http://www.hilife.com.tw/storeInquiry_name.aspx}
#' 
#' @return data.table
#' @export
getHiLife <- function() {
  url = "http://www.hilife.com.tw/storeInquiry_name.aspx"
  res = POST(url,
             # add_headers(`Content-Type` = "application/x-www-form-urlencoded"),
             body = list(
              `__VIEWSTATE` = "/wEPDwUJNDc1NTA2MDgzD2QWAgIHD2QWAgIBD2QWAgIBDxYCHgRUZXh0BSwkKCcjc3RvcmVJbnF1aXJ5X25hbWUnKS5hdHRyKCdjbGFzcycsJ3NlbCcpO2RkO877KLt0PDfBkWyucvAEgbC+HFMuG0sjJMUX4lkdhuw=",
              `__EVENTVALIDATION` = "/wEdAAOt/Z7/oNOMvQ7eXvUiw2sGBfapguS6OYFwN7hoQk5FPegPUnnQ1NWw7q+TtnLeKzNBR1/+n0EQeoDi+kjWQ+5n8x9nh9SuQ86tRbIlGvyo/w==",
              `__VIEWSTATEGENERATOR` = "1E120BCF",
              btnQuery = "+搜++尋+",
              txtKeyWord = "%"),
            encode = "form")
  res_xml <- content(res)
  dt_hilife <- res_xml %>% 
    html_nodes("div.searchResults > table") %>% 
    html_table() %>% 
    as.data.table() %>% 
    .[, X4 := NULL]
  setnames(dt_hilife, c("store_nm", "addr", "tel_no"))
  
  ## data cleansing
  dt_hilife <- na.omit(dt_hilife, "store_nm") %>% # remove NA by store name
    unique(by = "store_nm")
  
  
  ## Get GPS from google
  nrow(dt_hilife)
  
  result_gps_list <- lapply(
    seq_along(dt_hilife[, store_nm]),
    function (x) {
      # x = 20
      res_google <- POST("http://www.hilife.com.tw/getGoogleSpot.ashx",
           # add_headers(`Content-Type` = "application/x-www-form-urlencoded"),
           body = list(city_name="", town_name="",shop_id = x),
           encode = "form")
      
      out <- data.table() # pre-allocate
      
      if (!http_error(res_google)) {
      out <- res_google %>% 
        content("text", encoding = "UTF-8") %>% 
        jsonlite::fromJSON() %>% 
        as.data.table()
      } else {
        return(out)
      }
      
      if (length(out)) {
        setnames(out, c("lon_x", "lat_y", "store_nm", "misc"))
        out[, store_nm := store_nm %>% str_replace("^萊爾富\\s*", "")]
        
        misc_html <- xml2::read_html(out[,misc])
        out[, addr := misc_html %>%
              html_text() %>%
              str_match("地址:(.+?)(?:電話:\\d+[-]\\d+)?\\s*$") %>%
              .[, 2]]
        out[, tel_no := misc_html %>%
              html_text() %>%
              str_match("電話:(\\d+[-]?\\d+)\\s*$") %>%
              .[, 2]]
        out[, misc := misc_html %>%
              html_nodes("img") %>%
              html_attr("title") %>%
              na.omit() %>%
              paste0(., collapse = ";")]
      }
    })
  
  result_gps <- result_gps_list %>% 
    rbindlist(use.names = TRUE, fill = TRUE) %>% 
    unique(by = "store_nm") %>% 
    na.omit("store_nm")
  
  dt_hilife <- result_gps
  # ## join
  # dt_hilife[result_gps, on = (store_nm = store_nm)]

  # add brand name
  dt_hilife[, `:=`(brand_nm = "萊爾富", keyword = "HiLife")]
  
  ## add url, time, full name
  dt_hilife[, store_url := "http://www.hilife.com.tw"][
    , data_time := format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")][
      , full_nm := paste0(brand_nm, store_nm)]
  
  key_var <- c("full_nm", "brand_nm", "keyword", "store_nm", "addr",
               "lon_x", "lat_y", "store_url")
               
  ## move brand_nm, key_word to first two colmuns
  setcolorder(dt_hilife, c(key_var, setdiff(names(dt_hilife), key_var)))
  
  dt_hilife
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.