#' HiLife
#'
#' Basically it's provider is the same as FamilyMart......
#'
#' @seealso
#' url: \url{http://www.hilife.com.tw/storeInquiry_name.aspx} \cr
#' data: \url{http://www.hilife.com.tw/storeInquiry_name.aspx}
#'
#' @return data.table
#' @import pbapply
#' @export
get_hilife <- function(max_iter = 5) {
url = "http://www.hilife.com.tw/storeInquiry_name.aspx"
doc_init <- GET(url, ua) %>% content()
VS <- doc_init %>% html_node("#__VIEWSTATE") %>% html_attr("value")
VS_GEN <- doc_init %>% html_node("#__VIEWSTATEGENERATOR") %>% html_attr("value")
EVENTVALIDATION <- doc_init %>% html_node("#__EVENTVALIDATION") %>% html_attr("value")
for (i in 1:max_iter) {
tryCatch({
res <- POST(url,
ua,
add_headers(
Referer = "http://www.hilife.com.tw/storeInquiry_name.aspx"
),
body = list(
`__VIEWSTATE` = VS,
`__EVENTVALIDATION` = EVENTVALIDATION,
`__VIEWSTATEGENERATOR` = VS_GEN,
btnQuery = "+搜++尋+",
txtKeyWord = "%"),
encode = "form")
break
}, error = function(e) {
futile.logger::flog.error(e)
if (i == max_iter) {
stop("Max try exceeded - ", e, call. = FALSE)
}
})
}
res_xml <- content(res)
out_dt <- res_xml %>%
html_nodes("div.searchResults > table") %>%
html_table() %>%
as.data.table() %>%
.[, c("X1", "X5") := NULL]
setnames(out_dt, c("store_nm", "addr", "tel_no"))
## data cleansing
out_dt <- na.omit(out_dt, "store_nm") %>% # remove NA by store name
unique(by = "store_nm")
## Get GIS from google
# nrow(out_dt)
hilife_gis <- function (x) {
# x = 20
res_google <- POST("http://www.hilife.com.tw/getGoogleSpot.ashx",
ua,
body = list(city_name="", town_name="",shop_id = x),
encode = "form")
out <- data.table() # pre-allocate
if (!http_error(res_google)) {
out <- res_google %>%
content("text", encoding = "UTF-8") %>%
jsonlite::fromJSON() %>%
as.data.table()
} else {
return(out)
}
if (length(out)) {
setnames(out,
c("經度", "緯度", "名稱", "說明"),
c("lon_x", "lat_y", "store_nm", "misc"))
out[, store_nm := store_nm %>%
str_replace("^萊爾富\\s*[A-Z0-9]+\\s+", "")]
misc_html <- xml2::read_html(out[,misc])
out[, addr := misc_html %>%
html_text() %>%
str_match("地址:(.+?)(?:電話:\\d+[-]\\d+)?\\s*$") %>%
.[, 2]]
out[, tel_no := misc_html %>%
html_text() %>%
str_match("電話:(\\d+[-]?\\d+)\\s*$") %>%
.[, 2]]
out[, misc := misc_html %>%
html_nodes("img") %>%
html_attr("title") %>%
na.omit() %>%
paste0(., collapse = ";")]
}
}
result_gps_list <- pblapply(
seq_along(out_dt[, store_nm]),
hilife_gis
)
result_gps <- result_gps_list %>%
rbindlist(use.names = TRUE, fill = TRUE) %>%
unique(by = "store_nm") %>%
na.omit("store_nm")
out_dt <- result_gps
# ## join
# out_dt[result_gps, on = (store_nm = store_nm)]
# add brand name
out_dt[, `:=`(brand_nm = "萊爾富", keyword = "HiLife")]
## add url, time, full name
out_dt[, store_url := "http://www.hilife.com.tw"][
, data_time := format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")][
, full_nm := paste0(brand_nm, store_nm)]
key_var <- c("full_nm", "brand_nm", "keyword", "store_nm", "addr",
"lon_x", "lat_y", "store_url")
## move brand_nm, key_word to first two colmuns
setcolorder(out_dt, c(key_var, setdiff(names(out_dt), key_var)))
out_dt[]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.