#' @title Parse classifieds in tabular form from html of classifieds
#' @description Get classifieds from html files
#' @param x html of classifieds
#' @return data frame of classifieds
#' @importFrom magrittr "%>%"
#' @export
parse_classifieds <- function(x){
# first row
header <- x %>%
rvest::html_node(css = "#searchResultsTable") %>%
rvest::html_node("thead") %>%
rvest::html_nodes("td") %>%
rvest::html_text() %>%
stringi::stri_trim_both()
header_names <- header %>%
toupper() %>%
tolower() %>%
stringi::stri_replace_all(replacement = " ", regex = "\\W+") %>%
stringi::stri_trans_general("nfd; [:nonspacing mark:] remove; nfc") %>%
stringi::stri_trim_both() %>%
make.names(unique = T) %>%
stringi::stri_replace_all(replacement = "_", regex = "\\.") %>%
tolower()
rows <- x %>%
rvest::html_node(css = "#searchResultsTable") %>%
rvest::html_nodes("tr.searchResultsItem[data-id]")
row_to_table <- function(x){
df <- x %>% rvest::html_nodes("td") %>%
rvest::html_text(trim = T) %>%
stringi::stri_replace_all(replacement = " ", regex = "\\s+") %>%
as.list() %>%
as.data.frame(col.names = header_names, stringsAsFactors = F)
base_url <- "https://www.sahibinden.com"
classified_path <- x %>% rvest::html_node("td.searchResultsLargeThumbnail a") %>%
rvest::html_attr('href')
df$classified_url <- ifelse(is.na(classified_path), NA, paste0(base_url, classified_path))
store_path <- x %>% rvest::html_node(".titleIcon.store-icon") %>%
rvest::html_attr('href')
df$store_url <- ifelse(is.na(store_path), NA, store_path)
df$id <- x %>% rvest::html_attr('data-id')
return(df)
}
xml2::xml_find_all(rows, ".//br") %>% xml2::xml_add_sibling("p", "\n")
classifieds <- lapply(rows, row_to_table) %>% dplyr::bind_rows()
return(classifieds)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.