R/search_for_sale.R

#' @title Get search results for for 'satilik'
#' @description Get search results for for 'satilik'
#' @param page_url Full url to be retrieved page, by default obtained by other params  
#' @param address_city int vector city ids
#' @param address_town int vector towns ids
#' @param address_district int vector district ids
#' @param address_quarter int vector quarter ids
#' @param pagingSize one length int, Nb of results to be shown in each page. 20 or 50 
#' @param pagingOffset one length int, offset of results to start with
#' @param viewType one length character, Only 'Classic' supported for now
#' @param sorting one length character
#' @param print_steps Print out proress 
#' @return list of following elements:
#' content: html document of search results, can be written in disk visk xml2::write_xml()
#' meta: list of meta data of search 
#' prev_page_url: full url to next page in the search , NA if not any
#' next_page_url: full url to previous page in the search, NA if not any
#' url: url of the page
#' hashed_url: md5 has of url which might be used as key
#' @importFrom magrittr "%>%"
#' @export
#' @examples 
#' # Get search results in Istanbul, Eyup
#' ex <- search_for_sale(address_city = 34, address_town = 421)
#' # Get search results in next page
#' ex2 <- search_for_sale(page_url = ex$next_page_url)
#' # Parse classifieds to data frame
#' classifieds <- parse_classifieds(ex$content)
search_for_sale <- function(
  page_url = NULL,
  address_city = NULL,
  address_town = NULL, 
  address_district = NULL, 
  address_quarter = NULL,
  pagingSize = 50,
  pagingOffset = 0,
  viewType = "Classic",
  sorting = "date_desc",
  print_steps = F){

  sorting_options <- c("date_desc", "date_asc", 
                       "price_desc", "price_asc", 
                       "m2-brut-nu_desc", "m2-brut-nu_asc",
                       "address_desc", "address_asc")
  
  if(!tolower(sorting) %in% tolower(sorting) ) sorting <- "date_desc"
  # https://www.sahibinden.com/satilik-daire?
  # address_quarter=22558&address_quarter=22557&address_town=421&
  # sorting=date_desc&address_city=34
  
  base_url <- "https://www.sahibinden.com"
  final_url <- httr::parse_url(base_url)
  final_url$path <- "satilik"
  final_url$query <- c(
    sapply(unique(address_city), function(x) list(address_city = x)),
    sapply(unique(address_town), function(x) list(address_town = x)),
    sapply(unique(address_district), function(x) list(address_district = x)),
    sapply(unique(address_quarter), function(x) list(address_quarter = x)),
    "pagingSize" = pagingSize,
    "pagingOffset" = pagingOffset,
    "viewType" = viewType,
    "sorting" = tolower(sorting)
    )
  if (!is.null(page_url)) final_url <- httr::parse_url(page_url)
  
  u <- tolower(trimws(utils::URLdecode(httr::build_url(final_url))))
  hashed_url <- digest::digest(u, algo = "md5", serialize = F)
  
  response <- httr::GET(
    url = httr::build_url(final_url), 
    if(print_steps) httr::verbose() else NULL
  )
  
  if(httr::status_code(response) == 200) {
    if(print_steps) cat("Search results are retrieved with success\n")
    content <- httr::content(response)
    meta <- parse_meta(content)
    next_page_url <- as.character(NA)
    prev_page_url <- as.character(NA)
    if (!is.na(meta$next_path)) {
      next_page <- final_url
      next_page$query$pagingOffset <- httr::parse_url(meta$next_path)$query$pagingOffset
      next_page_url <- httr::build_url(next_page)
    }
    if (!is.na(meta$prev_path)) {
      prev_page <- final_url
      prev_page$query$pagingOffset <- httr::parse_url(meta$prev_path)$query$pagingOffset
      prev_page_url <- httr::build_url(prev_page)
    }

    res <- list(
      content = content,
      url = u,
      hashed_url = hashed_url,
      next_page_url = next_page_url,
      prev_page_url = prev_page_url,
      meta = meta
      )
    return(res)
  } else {
    if(print_steps) cat("Data not found!\n")
    return(NULL)
  }
}
bhakyuz/sahibinden documentation built on June 12, 2019, 2:28 p.m.