#' takes a css attribute and web object and returns value
#'
#' @param main_page html object from rvest
#' @param css attribute to filter on
#'
#' @return text
#' @importFrom magrittr "%>%"
#' @export
#'
#' @examples
#' page <- xml2::read_html("inst/rawdata/webpages/gumtree-cat-1-1-2015-10-08.html")
#' get_attribute(page, "#ad-title")
get_attribute <- function(main_page, css){
tmp = try(main_page %>% rvest::html_nodes(css) %>%
rvest::html_text() %>% stringr::str_trim(),silent = TRUE)
if(class(tmp) == "try-error"){
return(NA)
}
if(length(tmp)==0){
return(NA)
}
tmp <- clean_string(tmp)
return(tmp)
}
#' get lat long from gumtree
#'
#' @param main_page html object
#'
#' @return vector of lat long
#' @export
#' @importFrom magrittr "%>%"
#'
#' @examples
#' page <- xml2::read_html("inst/rawdata/webpages/gumtree-cat-1-1-2015-10-08.html")
#' get_lat_long(page)
get_lat_long <- function(main_page){
lat <- try(main_page %>% rvest::html_nodes(".c-pointer") %>%
rvest::html_attr("data-lat")%>%.[1], silent = TRUE)
if(class(lat)=="try-error" | length(lat)==0){
lat <- NA
}
long <- try(main_page %>% rvest::html_nodes(".c-pointer") %>%
rvest::html_attr("data-lng")%>%.[1], silent = TRUE)
if(class(long)=="try-error" | length(long)==0){
long <- NA
}
return(c(lat,long))
}
#' get date listed from gumtree
#'
#' @inheritParams get_lat_long
#'
#' @return date
#' @export
#'
#' @examples
#' page <- xml2::read_html("inst/rawdata/webpages/gumtree-cat-1-1-2015-10-08.html")
#' get_date_listed(page)
get_date_listed <- function(main_page){
tmp = try(main_page %>% rvest::html_nodes(".ad-attribute") %>%
rvest::html_nodes("dd") %>%
rvest::html_text() %>% stringr::str_trim()%>%.[1],silent = TRUE)
if(class(tmp) == "try-error"){
return(NA)
}
if(length(tmp)==0){
return(NA)
}
return(tmp)
}
#' clean string
#'
#' @param str a character string to be cleaned
#'
#' @return str without \\r and \\n or excess space
#' @export
#'
#' @examples
#' clean_string("bob\r\n")
clean_string <- function(str)
{
str <- stringr::str_replace_all(str, "\r", "")
str <- stringr::str_replace_all(str, "\n", "")
str <- stringr::str_replace_all(str, " ", " ")
str <- stringr::str_replace_all(str, " $", "")
return(str)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.