#' @title Get search results for for 'satilik'
#' @description Get search results for for 'satilik'
#' @param address_town id is of towns. Omited if district provided
#' @param districts df of districts/quarters to search for. For details \link{get_districts}
#' @param write_dir Dir to write hmtl files and logs under random dir name, if NA, nothing is written to disk
#' @param sleep how long should wait before getting results of next page, in seconds
#' @return list of following elements:
#' @export
#' @importFrom utils "write.csv"
#' @examples
#' \dontrun{
#' d <- get_districts(address_town = 421)
#' s <- search_for_sale_batch(districts = d[1,], write_dir = NA, sleep = 5)
#' }
search_for_sale_batch <- function(
address_town = 421,
districts,
sleep = 20,
write_dir = getwd()
){
temp_dir <- stringi::stri_rand_strings(n = 1, length = 10)
sts <- Sys.time()
if (missing(districts)) {
districts <- get_districts(address_town = address_town)
} else {
districts <- districts
}
next_url <- NA
idx_row <- 1
idx_page <- 0
logs <- dplyr::tibble()
contents <- list()
cat(paste("Getting searches from", paste(unique(districts$town_city_name),unique(districts$town_name),sep = " ", collapse = ", "), "\n"))
while (!is.na(next_url) || idx_row <= nrow(districts)) {
idx_page <- idx_page + 1
if (is.na(next_url)) {
s <- search_for_sale(address_quarter = districts$quarter_id[idx_row])
} else {
s <- search_for_sale(page_url = next_url)
}
if (!is.na(write_dir)) {
if (!dir.exists(file.path(write_dir, temp_dir))) dir.create(file.path(write_dir, temp_dir))
xml2::write_html(x = s$content, file = file.path(write_dir, temp_dir, paste0(s$hashed_url,".html")))
}
avg_page_per_quarter <- idx_page / idx_row
remaining_pages <- avg_page_per_quarter * (nrow(districts) - idx_row) +
(max(s$meta$pages, 1, na.rm = T) - max(s$meta$current_page, 1, na.rm = T))
total_pages <- remaining_pages + (idx_row * avg_page_per_quarter)
avg_second_per_page <- as.numeric(difftime(Sys.time(), sts, units = "secs"), "secs") / idx_page
text_to_print <-
paste(
"Overal:",
paste0(round(idx_page / total_pages * 100, 2), "% ", "completed,", sep = ""),
"ETA: ", Sys.time() + (avg_second_per_page * remaining_pages + sleep),
">> Quarter", idx_row, "/", nrow(districts), "#",
districts$name[idx_row], "-",districts$quarter_name[idx_row],
"Page:",
max(s$meta$current_page, 1, na.rm = T), "/", max(s$meta$pages, 1, na.rm = T),
sep = " ")
# width <- getOption("width") - 1
width <- max(as.integer(system("tput cols", intern = TRUE)), getOption("width")) - 1
cat("\r", strrep(".", width))
cat("\r", substr(text_to_print, 1, width))
if (is.na(s$next_page_url)) {
idx_row <- idx_row + 1
next_url <- NA
} else {
next_url <- s$next_page_url
}
l <- dplyr::tibble(
timestamp = Sys.time(),
hashed_url = s$hashed_url,
url = s$url,
prev_page_url = s$prev_page_url,
next_page_url = s$next_page_url,
title = s$meta$title,
description = s$meta$description,
paging_size = s$meta$paging_size,
pages = s$meta$pages,
current_page = s$meta$current_page,
results = s$meta$results,
rank_last_result = s$meta$rank_last_result
)
logs <- dplyr::bind_rows(logs, l)
if (!is.na(write_dir)) {
if (!dir.exists(file.path(write_dir, temp_dir))) dir.create(file.path(write_dir, temp_dir))
write.csv(x = logs, file = file.path(write_dir, temp_dir, "logs.csv"), na = "", row.names = F)
}
remove(l)
contents[length(contents) + 1] <- list(s$content)
Sys.sleep(sleep)
}
cat("\n")
if (!is.na(write_dir)) {
cat(paste0("HTML files and logs can be found in ", file.path(write_dir, temp_dir), "\n"))
}
return(list(logs = logs, contents = contents))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.