R/getOKmart.R

Defines functions getOKmart

Documented in getOKmart

#' OK Mart
#' 
#' OK Mart
#'  
#' @seealso 
#' url: \url{http://www.okmart.com.tw/convenient_shopSearch} \cr
#' data: \url{http://www.okmart.com.tw/convenient_shopSearch_Result.asp}
#' 
#' @return data.table
#' @export
getOKmart <- function(gps_api="m") {
  url <- "http://www.okmart.com.tw/convenient_shopSearch_Result.asp"
  res <- GET(url,
             query = list(
               city = "",
               zipcode = "",
               key = "%",
               service = "",
               `_` = "1470644458935"
             ))
  res_xml <- content(res)
  
  out <- data.table(store_nm =  out$store_nm <- res_xml %>% 
                      html_nodes('h2') %>%
                      html_text() %>% 
                      str_trim())
  out$addr <- res_xml %>% 
    html_nodes("span") %>% 
    html_text()
  out$shop_no <- res_xml %>% 
    html_nodes("a") %>% 
    html_attr("href") %>% 
    str_match("showshop\\('(.+)',") %>% 
    .[,2]
  out[, c("food", "tel_no") := sapply(shop_no,
         function(x) {
           # x <- "1231"
           res <- GET("http://www.okmart.com.tw/convenient_shopSearch_ShopResult.asp",
                      query = sprintf("id=%s&_=1470644998197", x))
           
           res_xml <- content(res)
           food <- res_xml %>% 
             html_nodes(".food") %>% 
             html_text() %>% 
             paste0(collapse = ";")
           
           tel_no <- res_xml %>% 
             html_nodes("li") %>% 
             html_text() %>% 
             grep("電話", ., value = TRUE, perl = TRUE) %>% 
             str_extract("\\d{2,}[-]?\\d{5,}")
           
           data.table(food, tel_no)
         }, USE.NAMES = FALSE, simplify = FALSE) %>% rbindlist()]
  
  if (gps_api == "google") {
  ## Google Map API
    geo_list <- sapply(out$addr,
                       function (x) {
                         # x <- "基隆市中正區中正路762號"
                         res_geo <- GET("http://maps.googleapis.com/maps/api/geocode/json",
                                        add_headers(
                                          `Accept-Language`="zh-TW,zh;q=0.8,en-US;q=0.6,en;q=0.4,zh-CN;q=0.2"
                                        ),
                                        query = list(
                                          address = x
                                        ))
                         geo_list <- content(res_geo, "text") %>% 
                           jsonlite::fromJSON()
                         
                         if (geo_list$status == "OK") {
                           data.table(
                             lon_x = geo_list$results$geometry$location$lat,
                             lat_y = geo_list$results$geometry$location$lng
                           )} else {
                             cat(x, "(Google Map address not found)", "\n")
                             return(NULL)
                           }
                       }, USE.NAMES = TRUE, simplify = FALSE)
  } else {
    geo_list <- sapply(out$addr,
                       function (x) {
                         # x <- "基隆市中正區中正路762號"
                         url <- "http://api.map.com.tw/net/GraphicsXY.aspx"
                         res <- GET(url,
                                    add_headers(
                                      Referer = "http://www.map.com.tw/",
                                      `User-Agent` = "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36"
                                    ),
                                    query = list(
                                      search_class = "address",
                                      SearchWord = x,
                                      searchkey = "D43A19151569F32A449B7EDCB8555165B68B5F95"))
                         if (http_error(res)) {
                           return(NULL)
                         }
                         res %>% 
                           content("text") %>% 
                           stringr::str_replace_all('^\\(|\\)$', '') %>% 
                           jsonlite::fromJSON() %>% 
                           .[[1]] %>% 
                           as.data.table %>% 
                           .[,.(lon_x = lng, lat_y = lat)]
                       }, USE.NAMES = TRUE, simplify = FALSE)
  }
  
  ## Merge result
  out_dt <- merge(out, rbindlist(geo_list, idcol = "addr"),
        by = "addr", all.x = TRUE)

  # add brand name
  out_dt[, `:=`(brand_nm = "OK超商", keyword = "OK Mart")]
  
  ## add url, time, full name
  out_dt[, store_url := "http://www.okmart.com.tw/"][
    , data_time := format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")][
      , full_nm := paste0(brand_nm, store_nm)]
  
  key_var <- c("full_nm", "brand_nm", "keyword", "store_nm", "addr",
               "lon_x", "lat_y", "store_url")
  ## move brand_nm, key_word to first two colmuns
  setcolorder(out_dt, c(key_var, setdiff(names(out_dt), key_var)))
  
  out_dt
}
leoluyi/CRMaddress documentation built on May 21, 2019, 5:08 a.m.