.parse_content_to_page <-
function(content) {
page <-
content %>%
stri_trans_general("Latin-ASCII") %>%
read_html()
page
}
.generate_cookies <-
function() {
df_call <- generate_url_reference()
h <-
new_handle(
accept_encoding = NULL,
verbose = F,
useragent = df_call$urlReferer
)
req <-
curl_fetch_memory(url = 'https://www.realtor.com/', handle = h)
handle_cookies(h) %>%
as_tibble() %>%
unite(param, name, value, sep = "=") %>%
pull(param) %>%
str_c(collapse = "; ")
}
#' Property type dictionary
#'
#' Searchable property types
#' for isolating listings
#'
#' @return a \code{tibble}
#' @export
#' @family dictionary
#' @family listing search
#'
#' @examples
#' dictionary_property_types()
dictionary_property_types <-
function() {
tibble(
nameType = c(
"House",
"Condo",
"Townhouse",
"Multifamily",
"Mobile Home",
"Farm",
"Land"
),
slugType = c(
"single-family-home",
"condo-townhome-row-home-co-op",
"condo-townhome-row-home-co-op",
"multi-family-home",
"mfd-mobile-home",
"farms-ranches",
"land"
)
)
}
#' Property feature dictionary
#'
#' This function returns a dictionary
#' of searchable property features. These features
#' can be used as search parameters in the \link{listing_counts}, \link{listings}, \link{listing_counts}
#' and \link{map_listings} functions.
#'
#' @return a \code{tibble}
#' @export
#' @family dictionary
#' @family listing search
#' @examples
#' dictionary_listing_features()
dictionary_listing_features <-
function() {
tibble(
nameFeature =
c(
"Basement",
"Garage",
"Central Air",
"Central Heat",
"Boat Facilities",
"Community Clubhouse",
"Community Golf",
"Community Security",
"Community Spa",
"Community Pool",
"Community Tennis",
"Corner Lot",
"Cul De Sac",
"Home Office",
"Dining Room",
"Disability Features",
"Family Room",
"Fireplace",
"Forced Air",
"2 Car Garage",
"Golf Course Frontage",
"Hardwood Floors",
"Mountain Community",
"Horse Facilities",
"Lake View",
"Laundry Room",
"Ocean View",
"River View",
"RV Parking",
"Senior Community",
"Single Story",
"Spa",
"Swimming Pool",
"Multi Stories",
"Waterfront"
),
slugFeature =
c(
"basement",
"carport",
"central_air",
"central_heat",
"community_boat_facilities",
"community_clubhouse",
"community_golf",
"community_security_features",
"community_spa_or_hot_tub",
"community_swimming_pool",
"community_tennis_court",
"corner_lot",
"cul_de_sac",
"den_or_office",
"dining_room",
"disability_features",
"family_room",
"fireplace",
"forced_air",
"garage_2_or_more",
"golf_course_lot_or_frontage",
"hardwood_floors",
"hill_mountain",
"horse_facilities",
"lake_view",
"laundry_room",
"ocean_view",
"river_view",
"rv_or_boat_parking",
"senior_community",
"single_story",
"spa_or_hot_tub",
"swimming_pool",
"two_or_more_stories",
"waterfront"
)
)
}
.generate_headers <-
function(generate_new_cookies = T) {
df_headers <-
.headers_base()
df_call <- generate_url_reference()
df_headers <-
df_headers %>%
mutate(`user-agent` = df_call$userAgent)
if (generate_new_cookies) {
new_cookie <-
.generate_cookies()
df_headers <-
df_headers %>%
mutate(cookie = new_cookie)
}
df_headers
}
.parse_data_properties <-
function(data_properties) {
all_results <-
seq_along(data_properties) %>%
map_df(function(x) {
glue("Parsing {x}") %>% cat(fill = T)
data_row <- data_properties[[x]]
df_col_types <-
data_row %>% map(class) %>% as_tibble() %>%
gather(column, type)
remove <-
df_col_types %>% filter(type == "NULL") %>% pull(column)
data_row <- data_row[!data_row %>% names() %in% remove]
df_base_cols <-
df_col_types %>%
filter(!type %in% c("NULL", "list"))
df_list_cols <-
df_col_types %>%
filter(type %in% 'list')
df_base <-
data_row[names(data_row) %in% df_base_cols$column] %>%
flatten_df()
df_list <-
data_row[names(data_row) %in% df_list_cols$column] %>%
flatten()
df_list_class <-
df_list %>%
map(class) %>%
as_tibble() %>%
gather(column, class)
if (df_list_class %>% filter(class == "NULL") %>% nrow() > 0) {
null_cols <-
df_list_class %>% filter(class == "NULL") %>% pull(column)
df_list <-
df_list[!df_list %>% names() %in% null_cols]
}
if (df_list_class %>% filter(column == 'coordinates') %>% nrow() > 0) {
data_list <-
df_list %>%
flatten_df()
names(data_list)[1:2] <-
c('lon', 'lat')
} else {
data_list <-
df_list %>%
flatten_df()
}
df <-
df_base %>%
bind_cols(data_list) %>%
mutate(numberProperty = x) %>%
select(numberProperty, everything())
if (df %>% has_name("bed")) {
df <-
df %>%
mutate(groupBeds = as.character(bed),
bed = as.integer(bed))
}
df
})
all_results <-
all_results %>%
dplyr::select(-dplyr::matches("display")) %>%
dplyr::select(-one_of(c("id", "type", "plot")))
df_names <-
dictionary_realtor_names()
actual_names <-
names(all_results) %>%
map_chr(function(name) {
df_row <- df_names %>% filter(nameRealtor == name)
if (df_row %>% nrow() == 0) {
glue("Missing {name}") %>%
cat(fill = T)
return(name)
}
df_row %>% pull(nameActual)
})
all_results <-
all_results %>%
set_names(actual_names) %>%
dplyr::select(-dplyr::matches("remove"))
if (all_results %>% has_name("slugLDP")) {
all_results <-
all_results %>%
mutate(urlListing = 'https://www.realtor.com' %>% str_c(slugLDP)) %>%
select(-one_of("slugLDP"))
}
all_results <-
all_results %>%
.munge_realtor()
if (all_results %>% has_name('areaPropertySF')) {
all_results <-
all_results %>%
mutate(priceListingPerSF = priceListing / areaPropertySF)
}
all_results
}
.parse_data_parameters <-
function(data_param) {
df_class <-
data_param %>% map(class) %>% flatten_df() %>%
gather(column, class)
df_base_names <-
df_class %>%
filter(!class %in% c("NULL", "list")) %>%
pull(column)
df_params <-
data_param[names(data_param) %in% df_base_names] %>% flatten_df() %>%
gather(item, value) %>%
mutate_all(funs(ifelse(. == '', NA_character_, .))) %>%
filter(!is.na(value)) %>%
left_join(dictionary_search() %>% rename(item = column)) %>%
select(nameActual, value) %>%
spread(nameActual, value) %>%
suppressMessages()
list_cols <-
df_class %>%
filter(class %in% c("list")) %>%
pull(column)
df_list <- data_param[names(data_param) %in% list_cols]
df_facets <- df_list$facets
df_facets[!names(df_facets) %in% c("features_hash")]
df_list <-
df_facets[!names(df_facets) %in% c("features_hash")] %>%
flatten_df() %>%
gather(item, value) %>%
mutate_all(funs(ifelse(. == '', NA_character_, .))) %>%
filter(!is.na(value)) %>%
left_join(dictionary_search() %>% rename(item = column)) %>%
select(nameActual, value) %>%
spread(nameActual, value) %>%
suppressWarnings() %>%
suppressMessages()
if (df_facets$features_hash %>% length() > 0) {
df_list <-
df_list %>%
mutate(listFeatures = df_facets$features_hash %>% str_c(collapse = ", "))
}
if (df_list %>% ncol() > 0) {
df_params <-
df_params %>%
bind_cols(df_list)
}
df_params %>%
select(-dplyr::matches("remove_")) %>%
.munge_realtor()
}
dictionary_search <-
function() {
tibble(
column = c(
"search_criteria",
"city",
"county",
"discovery_mode",
"state",
"postal",
"sort",
"position",
"facets",
"search_controller",
"neighborhood",
"street",
"searchType",
"school",
"types",
"searchFacetsToDTM",
"searchFeaturesToDTM",
"pos",
"page_size",
"viewport_height",
"pin_height",
"page",
"beds_min",
"beds_max",
"baths_min",
"baths_max",
"price_min",
"price_max",
"prop_type",
"sqft_min",
"sqft_max",
"acre_min",
"acre_max",
"lot_unit",
"age_max",
"age_min",
"radius",
"pets",
"days_on_market",
"open_house",
"show_listings",
"pending",
"foreclosure",
"new_construction",
"multi_search",
"include_pending_contingency",
"features_hash"
),
nameActual = c(
"locationSearch",
"citySearch",
"countySearch",
"isDiscoveryMode",
"stateSearch",
"zipcodeSearch",
"remove_sort",
"remove_position",
"listFacets",
"remove_search_controller",
"neighborhoodSearch",
"streetSearch",
"typeSearch",
"schoolSearch",
"typeProperty",
"remove_searchFacetsToDTM",
"remove_searchFeaturesToDTM",
"remove_pos",
"remove_zie_page",
"remove_viewport_height",
"remove_pin_height",
"numberPage",
"countBedsMin",
"countBedsMax",
"countBathsMin",
"countBathsMax",
"priceMin",
"priceMax",
"typeProperty",
"areaSFMin",
"areaSFMax",
"areaLandAcreMin",
"areaLandAcreMax",
"lotUnit",
"ageMax",
"ageMin",
"radiusMiles",
"hasPets",
"countDaysOnMarket",
"hasOpenHouse",
"isShowListing",
"isPending",
"isForeclosure",
"isNewConstruction",
"isMultiSearch",
"hasPendings",
"listFeatures"
)
)
}
.headers_base <-
function() {
structure(
list(
cookie = "threshold_value=56; automation=false; clstr=v; clstr_tcv=14; __vst=06687a89-26aa-424d-8d01-4b45dba07097; __ssn=358102aa-3f72-4ee6-855c-b5fcc80b4a25; __ssnstarttime=1520516724; basecamp=false; ajs_user_id=null; ajs_group_id=null; ajs_anonymous_id=%22f6e9aa90-48d4-4348-8ab9-c0bfa18c7593%22; AMCVS_8853394255142B6A0A4C98A4%40AdobeOrg=1; far_geo=%7B%22CityState%22%3A%7B%22City%22%3A%22Marietta%22%2C%22SearchAreaID%22%3A114%2C%22StateID%22%3A%22GA%22%7D%2C%22CityStateSearch%22%3Afalse%2C%22ConfidenceLevel%22%3A0%2C%22Country%22%3A%22USA%22%2C%22Intersection%22%3A%22%22%2C%22Latitude%22%3A33.927089%2C%22Longitude%22%3A-84.541084%2C%22MatchedMethod%22%3A3%2C%22PostalCode%22%3A%2230060%22%2C%22PostalCodeSearch%22%3Atrue%2C%22Street%22%3A%22%22%7D; _agent-profile_session=SGRBeHNiQ0gvWGx6b0F6VVdCSlF3aVFac0M4WkZiTlBHT3ZwQ0ptUXN4YktiS1BsL2NPckt6MVVZWU93YXBzZ1NHSGxyZFVqek5PU3JKbG1aeS9Vd2IrVlg0NEFVeGpUVTJPK0xPN1htWWk0Ylo2b2p0OTM2TURaci9pcko2dDM2MnBESVpOZjF2VHN1OGg3aU9ST2d3PT0tLWFMaFlGdndMR25JVWt4allBQjF5b3c9PQ%3D%3D--f9e08c793eeb963927612fce658aff9db51418fe; search_params=%7B%22geo_slug%22%3A%2230060%22%2C%22_pjax%22%3A%22%23pjax-container%22%2C%22agent_rating_min%22%3A%225%22%2C%22has_photo%22%3A%221%22%2C%22price_range%22%3A%22500000_7000000%22%7D; srp.viewType=map; criteria=loc%3DMarietta%2C+GA%26locSlug%3DMarietta_GA%26lat%3D33.967466%26long%3D-84.521937%26status%3D1%26pos%3D33.632572%2C-85.211782%2C34.246659%2C-84.071951%2C10%26pg%3D1%26pgsz%3D15%26sprefix%3D%2Frealestateandhomes-search%26city%3DMarietta%26state_id%3DGA%26county_fips%3D13067%26county_fips_multi%3D13067-13057-13121; srchID=b2e1aea3f17045a8afa891a6476b9d24; AMCV_8853394255142B6A0A4C98A4%40AdobeOrg=-179204249%7CMCIDTS%7C17599%7CMCMID%7C44784613493650775057191498471160544248%7CMCAID%7CNONE%7CMCOPTOUT-1520546418s%7CNONE; header_slugs=gs%3DMarietta_GA%26lo%3DMarietta%26st%3Dcity%2Cgs%3DCobb-County_GA%26lo%3DCobb%26st%3Dcity; _rdc-next_session=SlJZdWxsdHhJdkY0WmVMVTF4SjBHVklJLzdQNTJyNjJ0dlNWK1J6TGo2ck1GVDNnNGRWbitrelZuQ010MDFXUk43WjkzTFhuMEVENkRvYWhwYW9zTnp2ZTF4a0pLbDdlTTFBSWpPOXJDRmh3ZmVGM0lVWFRDRmQ3K0VzdUFiV2hpMm81Vlo1TUhsUy9LdFJHd3RpUXZnPT0tLU9ZbW9xLy9LalB5UU5La0VpZDZFdGc9PQ%3D%3D--5e31ae7054e456e659789555eebafab0925c5602",
origin = "https://www.realtor.com",
`accept-encoding` = "gzip, deflate, br",
`x-csrf-token` = "PDN4JVcUZeeXyI2vgIJfUx0mpgj65VfxL0fgtdF1neE/VgMi8U1V7cT6bfdkdgWAP77baGC49jAtiECyjuq2xw==",
`accept-language` = "en-US,en;q=0.9",
`x-requested-with` = "XMLHttpRequest",
`x-newrelic-id` = "VwEPVF5XGwYEV1JaDwAD",
`user-agent` = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.124 Safari/537.36",
`content-type` = "application/json",
accept = "application/json, text/javascript, */*; q=0.01",
referer = "https://www.compass.com/",
authority = "www.realtor.com",
dnt = "0"
),
.Names = c(
"cookie",
"origin",
"accept-encoding",
"x-csrf-token",
"accept-language",
"x-requested-with",
"x-newrelic-id",
"user-agent",
"content-type",
"accept",
"referer",
"authority",
"dnt"
),
row.names = c(NA, -1L),
class = c("tbl_df", "tbl", "data.frame")
)
}
.data_base <-
function() {
structure(
list(
search_criteria = "location_slug",
city = "",
county = "",
discovery_mode = TRUE,
state = "",
postal = NULL,
sort = NULL,
position = NULL,
facets = structure(
list(
beds_min = NULL,
beds_max = NULL,
baths_min = NULL,
baths_max = NULL,
price_min = NULL,
price_max = NULL,
prop_type = "",
sqft_min = NULL,
sqft_max = NULL,
acre_min = NULL,
acre_max = NULL,
lot_unit = NULL,
age_max = NULL,
age_min = NULL,
radius = NULL,
pets = NULL,
days_on_market = NULL,
open_house = NULL,
show_listings = NULL,
pending = NULL,
foreclosure = NULL,
new_construction = NULL,
multi_search = structure(list(), .Names = character(0)),
include_pending_contingency = TRUE,
features_hash = list()
),
.Names = c(
"beds_min",
"beds_max",
"baths_min",
"baths_max",
"price_min",
"price_max",
"prop_type",
"sqft_min",
"sqft_max",
"acre_min",
"acre_max",
"lot_unit",
"age_max",
"age_min",
"radius",
"pets",
"days_on_market",
"open_house",
"show_listings",
"pending",
"foreclosure",
"new_construction",
"multi_search",
"include_pending_contingency",
"features_hash"
)
),
search_controller = "Search::PropertiesController",
neighborhood = NULL,
street = NULL,
searchType = "city",
school = NULL,
types = "property",
searchFacetsToDTM = "pf_not_visible",
searchFeaturesToDTM = list(),
pos = "",
page_size = 50L,
# 2500L
viewport_height = 1000L,
pin_height = 240L,
page = 1L
),
.Names = c(
"search_criteria",
"city",
"county",
"discovery_mode",
"state",
"postal",
"sort",
"position",
"facets",
"search_controller",
"neighborhood",
"street",
"searchType",
"school",
"types",
"searchFacetsToDTM",
"searchFeaturesToDTM",
"pos",
"page_size",
"viewport_height",
"pin_height",
'page'
)
)
}
.generate_data <-
function(location_name,
listing_type = "sale",
search_type = "city",
page = 1,
city_isolated = NULL,
county_isolated = NULL,
zipcode_isolated = NULL,
state_isolated = NULL,
street_isolated = NULL,
neighborhood_isolated = NULL,
beds_min = NULL,
beds_max = NULL,
baths_min = NULL,
baths_max = NULL,
features = NULL,
price_min = NULL,
price_max = NULL,
property_type = NULL,
sqft_min = NULL,
sqft_max = NULL,
acre_min = NULL,
acre_max = NULL,
age_min = NULL,
age_max = NULL,
days_on_market = NULL,
pending = NULL,
only_open_houses = NULL,
is_new_construction = NULL,
include_pending_contingency = TRUE) {
listing_type_slug <-
str_to_lower(listing_type)
options(scipen = 99999)
location_name <-
as.character(location_name)
data <-
.data_base()
if (listing_type_slug %>% str_detect("rent")) {
data$search_controller <-
"Search::ApartmentsController"
}
df_loc_val <-
validate_locations(locations = location_name, return_message = F)
data$search_criteria <-
df_loc_val$slugLocation
if (!city_isolated %>% is_null()) {
data$city <- city_isolated
}
if (!county_isolated %>% is_null()) {
data$county <- county_isolated
}
if (!zipcode_isolated %>% is_null()) {
data$postal <- zipcode_isolated
}
if (!state_isolated %>% is_null()) {
data$state <- state_isolated
}
if (!neighborhood_isolated %>% is_null()) {
data$neighborhood <- neighborhood_isolated
}
if (!street_isolated %>% is_null()) {
data$street <- street_isolated
}
if (!only_open_houses %>% is_null()) {
data$show_listings <-
'oh'
}
if (!beds_min %>% is_null()) {
data$facets$beds_min <-
as.character(beds_min)
}
if (!beds_max %>% is_null()) {
data$facets$beds_max <-
as.character(beds_max)
}
if (!baths_min %>% is_null()) {
data$facets$baths_min <-
as.character(baths_min)
}
if (!baths_max %>% is_null()) {
data$facets$baths_max <-
as.character(baths_max)
}
if (!features %>% is_null()) {
f_t <-
features %>% str_to_lower()
df_features <-
dictionary_listing_features() %>%
mutate(nameFeature = nameFeature %>% str_to_lower())
feature_slugs <-
df_features %>%
filter(nameFeature %in% f_t) %>%
pull(slugFeature) %>%
unique() %>%
str_c(collapse = ",")
data$facets$features_hash <-
c(feature_slugs)
}
if (!price_min %>% is_null()) {
data$facets$price_min <-
as.character(price_min)
}
if (!price_max %>% is_null()) {
data$facets$price_max <-
as.character(price_max)
}
if (!property_type %>% is_null()) {
p_t <-
property_type %>% str_to_lower()
df_types <-
dictionary_property_types() %>%
mutate(nameType = nameType %>% str_to_lower())
property_slugs <-
df_types %>%
filter(nameType %in% p_t) %>%
pull(slugType) %>%
unique() %>%
str_c(collapse = ",")
data$facets$prop_type <- property_slugs
}
if (!sqft_min %>% is_null()) {
data$facets$sqft_min <-
sqft_min
}
if (!sqft_max %>% is_null()) {
data$facets$sqft_max <- sqft_max
}
if (!acre_min %>% is_null()) {
data$facets$acre_min <- acre_min
}
if (!acre_max %>% is_null()) {
data$facets$acre_max <- acre_max
}
if (!days_on_market %>% is_null()) {
data$facets$days_on_market <- days_on_market
}
if (!pending %>% is_null()) {
data$facets$pending <- pending
}
if (!is_new_construction %>% is_null()) {
data$facets$new_construction <- is_new_construction
}
if (!age_max %>% is_null()) {
data$facets$age_max <- age_max
}
if (!age_min %>% is_null()) {
data$facets$age_min <- age_min
}
if (!include_pending_contingency %>% is_null()) {
data$facets$include_pending_contingency <-
include_pending_contingency
}
data$page <- page
data
}
.get_location_counts <-
function(location_name = 10016,
listing_type = "sale",
search_type = "city",
features = NULL,
city_isolated = NULL,
county_isolated = NULL,
zipcode_isolated = NULL,
state_isolated = NULL,
street_isolated = NULL,
neighborhood_isolated = NULL,
beds_min = NULL,
only_open_houses = F,
beds_max = NULL,
baths_min = NULL,
baths_max = NULL,
price_min = NULL,
price_max = NULL,
property_type = NULL,
sqft_min = NULL,
sqft_max = NULL,
acre_min = NULL,
acre_max = NULL,
age_min = NULL,
age_max = NULL,
days_on_market = NULL,
pending = NULL,
is_new_construction = NULL,
include_pending_contingency = TRUE,
generate_new_cookies = F) {
if (location_name %>% str_to_lower() %>% str_detect("county")) {
search_type <- "county"
}
url <- "https://www.realtor.com/search_result_count"
headers =
.generate_headers(generate_new_cookies = generate_new_cookies)
data <-
.generate_data(
location_name = location_name,
search_type = search_type,
listing_type = listing_type,
page = 1,
city_isolated = city_isolated,
county_isolated = county_isolated,
zipcode_isolated = zipcode_isolated,
state_isolated = state_isolated,
only_open_houses = only_open_houses,
street_isolated = street_isolated,
neighborhood_isolated = neighborhood_isolated,
beds_min = beds_min,
beds_max = beds_max,
baths_min = baths_min ,
baths_max = baths_max,
price_min = price_min,
price_max = price_max,
features = features,
property_type = property_type,
sqft_min = sqft_min,
sqft_max = sqft_max,
acre_min = acre_min,
acre_max = acre_max,
age_min = age_min,
age_max = age_max,
days_on_market = days_on_market,
pending = pending,
is_new_construction = is_new_construction,
include_pending_contingency = include_pending_contingency
)
df_params <-
.parse_data_parameters(data_param = data)
df_call <- generate_url_reference()
h <-
new_handle(verbose = F,
useragent = df_call$userAgent) %>%
handle_setopt(copypostfields = data %>% toJSON(auto_unbox = T),
customrequest = "POST") %>%
handle_setheaders(.list = headers %>% as.list())
resp <-
curl_fetch_memory(url = url, handle = h)
json_data <-
resp$content %>%
rawToChar() %>%
fromJSON(flatten = T, simplifyVector = T)
count <-
json_data$properties_count
df_loc_val <-
validate_locations(locations = location_name)
df_loc_val <-
df_loc_val %>%
mutate(countListings = count)
df_loc_val <-
df_loc_val %>%
mutate(id = 1) %>%
left_join(df_params %>% mutate(id = 1) %>% select(-locationSearch) %>% mutate(typeListing = listing_type) %>% select(typeListing, everything())) %>%
select(locationSearch, typeListing, one_of(names(df_params)), everything()) %>%
select(-id) %>%
suppressMessages()
df_loc_val
}
#' Listing count
#'
#' This function returns a summary of the
#' number of total listings for your specified locations
#' and parameters
#'
#' @param locations vector of locations
#' @param search_type search type options include \itemize{
#' \item city - \code{default}
#' \item county
#' }
#' @param property_type if not \code{NULL} type of property options
#' see \link{dictionary_property_types} for options
#' @param features if not \code{NULL} list of searchable features
#' see \link{dictionary_listing_features} for options
#' @param city_isolated if not \code{NULL} isolates
#' @param county_isolated if not \code{NULL} isolates county
#' @param zipcode_isolated if not \code{NULL} isolates zipcode
#' @param state_isolated if not \code{NULL} isolates state
#' @param street_isolated if not \code{NULL} isolates street
#' @param neighborhood_isolated if not \code{NULL} isolates
#' @param beds_min if not \code{NULL} minimum bedrooms
#' @param beds_max if not \code{NULL} maximum bedrooms
#' @param baths_min if not \code{NULL} minimum bathrooms
#' @param baths_max if not \code{NULL} maximum bathrooms
#' @param price_min if not \code{NULL} minimum price
#' @param price_max if not \code{NULL} maximum price
#' @param sqft_min if not \code{NULL} minimum square footage
#' @param sqft_max if not \code{NULL} maximum square footage
#' @param acre_min if not \code{NULL} minimum acres
#' @param acre_max if not \code{NULL} maximum acres
#' @param age_min if not \code{NULL} minimum age
#' @param age_max if not \code{NULL} maximum age
#' @param days_on_market if not \code{NULL} count of days on market
#' @param pending if \code{TRUE} include pending
#' @param is_new_construction if \code{TRUE} isolates to new construction
#' @param include_pending_contingency if \code{TRUE} also includes pending and contingent sales
#' @param only_open_houses if \code{TRUE} isolates open houses
#' @param listing_type type of listing \itemize{
#' \item sale
#' \item rent
#' }
#' @param generate_new_cookies if \code{TRUE} generates new cookies
#' #'
#' @return a \code{tibble}
#' @export
#' @family summary search
#' @family listing search
#'
#' @examples
#' library(dplyr)
#' library(realtR)
#' ## New Construction in selected markets
#' df_nc <-
#' listing_counts(
#' locations = c("Miami Beach, FL", "SOMA, San Francisco, CA", 10013,
#' "Bethesda, MD"),
#' is_new_construction = TRUE
#' )
#'
#' df_nc %>%
#' select(locationSearch, countListings)
listing_counts <-
function(locations ,
listing_type = "sale",
search_type = "city",
features = NULL,
city_isolated = NULL,
county_isolated = NULL,
zipcode_isolated = NULL,
state_isolated = NULL,
street_isolated = NULL,
neighborhood_isolated = NULL,
beds_min = NULL,
beds_max = NULL,
baths_min = NULL,
baths_max = NULL,
price_min = NULL,
price_max = NULL,
only_open_houses = NULL,
property_type = NULL,
sqft_min = NULL,
sqft_max = NULL,
acre_min = NULL,
acre_max = NULL,
age_min = NULL,
age_max = NULL,
days_on_market = NULL,
generate_new_cookies = F,
pending = NULL,
is_new_construction = NULL,
include_pending_contingency = TRUE) {
.get_location_counts_safe <-
possibly(.get_location_counts, tibble())
locations %>%
map_dfr(function(location) {
.get_location_counts(
location_name = location,
listing_type = listing_type,
search_type = search_type,
city_isolated = city_isolated,
county_isolated = county_isolated,
zipcode_isolated = zipcode_isolated,
state_isolated = state_isolated,
street_isolated = street_isolated,
neighborhood_isolated = neighborhood_isolated,
beds_min = beds_min,
beds_max = beds_max,
baths_min = baths_min ,
baths_max = baths_max,
price_min = price_min,
price_max = price_max,
property_type = property_type,
sqft_min = sqft_min,
sqft_max = sqft_max,
acre_min = acre_min,
acre_max = acre_max,
age_min = age_min,
age_max = age_max,
days_on_market = days_on_market,
pending = pending,
is_new_construction = is_new_construction,
include_pending_contingency = include_pending_contingency,
features = features,
generate_new_cookies = generate_new_cookies
)
}) %>%
.add_date()
}
.get_location_listings_json <-
function(location_name = 10016,
listing_type = "sale",
search_type = "city",
city_isolated = NULL,
county_isolated = NULL,
zipcode_isolated = NULL,
state_isolated = NULL,
street_isolated = NULL,
features = NULL,
only_open_houses = NULL,
neighborhood_isolated = NULL,
beds_min = NULL,
beds_max = NULL,
baths_min = NULL,
baths_max = NULL,
price_min = NULL,
price_max = NULL,
property_type = NULL,
sqft_min = NULL,
sqft_max = NULL,
acre_min = NULL,
acre_max = NULL,
age_min = NULL,
age_max = NULL,
days_on_market = NULL,
pending = NULL,
is_new_construction = NULL,
generate_new_cookies = F,
include_pending_contingency = TRUE) {
df_count <-
.get_location_counts(
location_name = location_name,
search_type = search_type,
city_isolated = city_isolated,
county_isolated = county_isolated,
zipcode_isolated = zipcode_isolated,
state_isolated = state_isolated,
street_isolated = street_isolated,
neighborhood_isolated = neighborhood_isolated,
beds_min = beds_min,
beds_max = beds_max,
baths_min = baths_min ,
baths_max = baths_max,
price_min = price_min,
price_max = price_max,
property_type = property_type,
sqft_min = sqft_min,
sqft_max = sqft_max,
acre_min = acre_min,
acre_max = acre_max,
age_min = age_min,
age_max = age_max,
days_on_market = days_on_market,
pending = pending,
is_new_construction = is_new_construction,
include_pending_contingency = include_pending_contingency,
features = features,
only_open_houses = only_open_houses
)
pages <-
df_count$countListings %/% 50
pages <- max(1, pages)
all_properties <-
1:pages %>%
map_dfr(function(page) {
glue("Parsing page {page} of {pages} for location {location_name}") %>% message()
data <-
.generate_data(
location_name = location_name,
listing_type = listing_type,
search_type = search_type,
page = page,
city_isolated = city_isolated,
county_isolated = county_isolated,
zipcode_isolated = zipcode_isolated,
state_isolated = state_isolated,
street_isolated = street_isolated,
neighborhood_isolated = neighborhood_isolated,
beds_min = beds_min,
beds_max = beds_max,
baths_min = baths_min ,
baths_max = baths_max,
price_min = price_min,
price_max = price_max,
property_type = property_type,
sqft_min = sqft_min,
sqft_max = sqft_max,
acre_min = acre_min,
acre_max = acre_max,
age_min = age_min,
age_max = age_max,
days_on_market = days_on_market,
pending = pending,
is_new_construction = is_new_construction,
include_pending_contingency = include_pending_contingency,
features = features,
only_open_houses = only_open_houses
)
df_params <-
.parse_data_parameters(data_param = data)
headers <-
.generate_headers(generate_new_cookies = generate_new_cookies)
df_call <- generate_url_reference()
h <-
new_handle(verbose = F,
useragent = df_call$urlReferer) %>%
handle_setopt(copypostfields = data %>% toJSON(auto_unbox = T),
customrequest = "POST") %>%
handle_setheaders(.list = headers %>% as.list())
resp <-
curl_fetch_memory(url = "https://www.realtor.com/search_result.json", handle = h)
json_data <-
resp$content %>%
rawToChar() %>%
fromJSON(flatten = T, simplifyVector = T)
data_properties <-
json_data$results$property$items
all_data <-
data_properties %>%
.parse_data_properties() %>%
mutate(numberPage = page)
all_data <-
all_data %>%
select(-one_of("typeProperty")) %>%
left_join(df_params %>% mutate(numberPage = as.numeric(numberPage))) %>%
select(names(df_params), everything()) %>%
suppressMessages()
all_data
})
if (all_properties %>% has_name("typeListing")) {
all_properties <-
all_properties %>%
rename(typeListingAgency = typeListing)
}
all_properties %>%
select(-numberPage) %>%
distinct() %>%
mutate(typeListing = listing_type) %>%
select(locationSearch, typeListing, everything()
)
}
#' Mapped listing data
#'
#' This function returns data
#' from an API that maps the most
#' pertinent matches to a users input.
#'
#' This function is faster than \code{listings}
#' but returns less detailed information.
#'
#' @param locations vector of locations
#' @param property_type if not \code{NULL} type of property options
#' see \link{dictionary_property_types} for options
#' @param features if not \code{NULL} list of searchable features
#' see \link{dictionary_listing_features} for options
#' @param search_type search type options include \itemize{
#' \item city - \code{default}
#' \item county
#' }
#' @param city_isolated if not \code{NULL} isolates
#' @param county_isolated if not \code{NULL} isolates county
#' @param zipcode_isolated if not \code{NULL} isolates zipcode
#' @param state_isolated if not \code{NULL} isolates state
#' @param street_isolated if not \code{NULL} isolates street
#' @param neighborhood_isolated if not \code{NULL} isolates
#' @param beds_min if not \code{NULL} minimum bedrooms
#' @param beds_max if not \code{NULL} maximum bedrooms
#' @param baths_min if not \code{NULL} minimum bathrooms
#' @param baths_max if not \code{NULL} maximum bathrooms
#' @param price_min if not \code{NULL} minimum price
#' @param price_max if not \code{NULL} maximum price
#' @param sqft_min if not \code{NULL} minimum square footage
#' @param sqft_max if not \code{NULL} maximum square footage
#' @param acre_min if not \code{NULL} minimum acres
#' @param acre_max if not \code{NULL} maximum acres
#' @param age_min if not \code{NULL} minimum age
#' @param age_max if not \code{NULL} maximum age
#' @param days_on_market if not \code{NULL} count of days on market
#' @param pending if \code{TRUE} include pending
#' @param is_new_construction if \code{TRUE} isolates to new construction
#' @param include_pending_contingency if \code{TRUE} also includes pending and contingent sales
#' @param only_open_houses if \code{TRUE} isolates open houses
#' @param generate_new_cookies generate new cookies
#' @param listing_type Listing type \itemize{
#' \item rent
#' \item sale
#' }
#'
#' @return a \code{tibble}
#' @family listing search
#' @family detailed search
#' @export
#' @examples
#' ## New Construction Waterfront actual mapped listings
#' library(dplyr)
#' library(realtR)
#' df_new_water <-
#' map_listings( locations = c("Miami Beach, FL", "Naples, FL"),
#' features = "Waterfront", is_new_construction = TRUE )
#'
#' df_new_water %>%
#' glimpse()
#'
#' df_new_water %>%
#' group_by(cityProperty, stateProperty, typeProperty) %>%
#' summarise( meanPSF = mean(priceListingPerSF, na.rm = T),
#' meanPrice = mean(priceListing, na.rm = T), countListings = n()) %>%
#' ungroup()
#'
#'
map_listings <-
function(locations = NULL,
listing_type = "sale",
search_type = "city",
city_isolated = NULL,
county_isolated = NULL,
zipcode_isolated = NULL,
state_isolated = NULL,
street_isolated = NULL,
features = NULL,
only_open_houses = NULL,
neighborhood_isolated = NULL,
beds_min = NULL,
beds_max = NULL,
baths_min = NULL,
baths_max = NULL,
price_min = NULL,
price_max = NULL,
property_type = NULL,
sqft_min = NULL,
sqft_max = NULL,
acre_min = NULL,
acre_max = NULL,
age_min = NULL,
age_max = NULL,
days_on_market = NULL,
pending = NULL,
is_new_construction = NULL,
generate_new_cookies = F,
include_pending_contingency = TRUE) {
.get_location_listings_json_safe <-
possibly(.get_location_listings_json, tibble())
all_data <-
locations %>%
map_dfr(function(location) {
.get_location_listings_json_safe(
location_name = location,
listing_type = listing_type,
search_type = search_type,
city_isolated = city_isolated,
county_isolated = county_isolated,
zipcode_isolated = zipcode_isolated,
state_isolated = state_isolated,
street_isolated = street_isolated,
neighborhood_isolated = neighborhood_isolated,
beds_min = beds_min,
beds_max = beds_max,
baths_min = baths_min ,
baths_max = baths_max,
price_min = price_min,
price_max = price_max,
property_type = property_type,
sqft_min = sqft_min,
sqft_max = sqft_max,
acre_min = acre_min,
acre_max = acre_max,
age_min = age_min,
age_max = age_max,
days_on_market = days_on_market,
pending = pending,
is_new_construction = is_new_construction,
include_pending_contingency = include_pending_contingency,
features = features,
generate_new_cookies = generate_new_cookies,
only_open_houses = only_open_houses
)
})
all_data <-
all_data %>%
remove_na() %>%
.add_date()
all_data
}
.headers_search_json_base <-
function() {
structure(
list(
cookie = "threshold_value=19; automation=false; clstr=n1; clstr_tcv=7; __vst=657f20bc-079c-4563-8471-adcfaa0e6610; __ssn=d53e9ebc-204b-4b57-a4bb-75a6a6b02731; __ssnstarttime=1520691388; basecamp=false; bcc=false; ajs_user_id=null; ajs_group_id=null; ajs_anonymous_id=%227aae3639-1cd2-4639-81ad-2ef60b8f42ff%22; gpl=v1; seen_ny_prop=true; buyer=false; AMCVS_8853394255142B6A0A4C98A4%40AdobeOrg=1; AMCV_8853394255142B6A0A4C98A4%40AdobeOrg=-179204249%7CMCIDTS%7C17601%7CMCMID%7C37907106676684272897760290470733580491%7CMCAID%7CNONE%7CMCOPTOUT-1520702593s%7CNONE; header_slugs=gs%3DQueens-County_NY%26lo%3DQueens%26st%3Dcounty; criteria=loc%3DQueens+County%2C+NY%26locSlug%3DQueens-County_NY%26lat%3D40.657513%26long%3D-73.838803%26status%3D1%26sl%3Dnc%26pg%3D1%26pgsz%3D15%26features%3Ds2%26sprefix%3D%2Frealestateandhomes-search%26city%3DQueens+County%26state_id%3DNY; srchID=c72a73c777bb485a8fee8f6a1ff4239b; _rdc-next_session=M3JvOFdNVnhuSE1iVmJkcXM5akZCZGk1QzRPUzdwdGhRSmc5ZUVpUitmYU5BUWFVckZFWWZ5NC9tWjVuNEdvaUxBN0dRRVM4SGI3dDJLL283Kzl4OGQ0OGF1RGZqZXlpTmF5ejROY3ZPL1lvQjQrOHE2eU42c3VVbWlhb3V3ZnRzWFM2SkhVUGRlU3pLK1FIUlFSOVZ3PT0tLU5QdlZVTmlzdkIvL3h2Rkw3Q0FMY3c9PQ%3D%3D--cfcc77729e49e8c6623ff7828c1ddf89b1edeae5",
origin = "https://www.realtor.com",
`accept-encoding` = "gzip, deflate, br",
`x-csrf-token` = "Ni86Jtz9U1QtskRgX5dsT2TJuSi885v9nPQMpImiZrWrNN8lFa30jdz/FhqQL8znbXG+lNUbaScjD5zcM/P+zw==",
`accept-language` = "en-US,en;q=0.9",
`x-requested-with` = "XMLHttpRequest",
`x-newrelic-id` = "VwEPVF5XGwYEV1JaDwAD",
`user-agent` = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.124 Safari/537.36",
`content-type` = "application/json",
accept = "text/html, */*; q=0.01",
referer = "https://nra.com/",
authority = "www.realtor.com",
dnt = "0"
),
.Names = c(
"cookie",
"origin",
"accept-encoding",
"x-csrf-token",
"accept-language",
"x-requested-with",
"x-newrelic-id",
"user-agent",
"content-type",
"accept",
"referer",
"authority",
"dnt"
),
row.names = c(NA, -1L),
class = c("tbl_df", "tbl", "data.frame")
)
}
.headers_search_json <-
function(generate_new_cookies = F) {
df_call <- generate_url_reference()
df_headers <-
.headers_search_json_base() %>%
mutate(`user-agent` = df_call$userAgent)
if (generate_new_cookies) {
new_cookie <- .generate_cookies()
df_headers <-
df_headers %>%
mutate(cookie = new_cookie)
}
df_headers
}
.get_location_listings <-
function(location_name = 10016,
listing_type = "sale",
search_type = "city",
city_isolated = NULL,
county_isolated = NULL,
zipcode_isolated = NULL,
state_isolated = NULL,
street_isolated = NULL,
features = NULL,
only_open_houses = NULL,
neighborhood_isolated = NULL,
beds_min = NULL,
beds_max = NULL,
baths_min = NULL,
baths_max = NULL,
price_min = NULL,
price_max = NULL,
property_type = NULL,
sqft_min = NULL,
sqft_max = NULL,
acre_min = NULL,
acre_max = NULL,
age_min = NULL,
age_max = NULL,
days_on_market = NULL,
pending = NULL,
is_new_construction = NULL,
generate_new_cookies = T,
include_pending_contingency = TRUE) {
if (location_name %>% str_to_lower() %>% str_detect("county")) {
search_type <- "county"
}
listing_counts.safe <- possibly(listing_counts, tibble())
df_count <-
listing_counts.safe(
locations = location_name,
listing_type = listing_type,
search_type = search_type,
features = features,
city_isolated = city_isolated ,
county_isolated = county_isolated,
zipcode_isolated = zipcode_isolated,
state_isolated = state_isolated ,
street_isolated = street_isolated,
neighborhood_isolated = neighborhood_isolated,
beds_min = beds_min,
beds_max = beds_max ,
baths_min = baths_min,
baths_max = baths_max,
price_min = price_min ,
price_max = price_max,
only_open_houses = only_open_houses,
property_type = property_type,
sqft_min = sqft_min,
sqft_max = sqft_max,
acre_min = acre_min,
acre_max = acre_max,
age_min = age_min,
age_max = age_max,
days_on_market = days_on_market,
generate_new_cookies = generate_new_cookies,
pending = pending,
is_new_construction = is_new_construction,
include_pending_contingency = include_pending_contingency
)
if (df_count %>% nrow() == 0) {
return(invisible())
}
pages <-
df_count$countListings %/% 50
pages <- max(1, pages)
headers <-
.headers_search_json(generate_new_cookies = generate_new_cookies)
all_properties <-
1:pages %>%
map_dfr(possibly(function(page_no) {
glue("Parsing page {page_no} of {pages} for location {location_name}") %>% cat(fill = T)
if (page_no == 1) {
url <- "https://www.realtor.com/search_result"
} else {
url <- 'https://www.realtor.com/pagination_result'
}
data <-
.generate_data(
location_name = location_name,
search_type = search_type,
listing_type = listing_type,
page = page_no,
city_isolated = city_isolated,
county_isolated = county_isolated,
zipcode_isolated = zipcode_isolated,
state_isolated = state_isolated,
street_isolated = street_isolated,
neighborhood_isolated = neighborhood_isolated,
beds_min = beds_min,
beds_max = beds_max,
baths_min = baths_min ,
baths_max = baths_max,
price_min = price_min,
price_max = price_max,
property_type = property_type,
sqft_min = sqft_min,
sqft_max = sqft_max,
acre_min = acre_min,
acre_max = acre_max,
age_min = age_min,
age_max = age_max,
days_on_market = days_on_market,
pending = pending,
is_new_construction = is_new_construction,
include_pending_contingency = include_pending_contingency,
features = features,
only_open_houses = only_open_houses
)
df_params <-
.parse_data_parameters(data_param = data)
df_call <- generate_url_reference()
h <-
new_handle(verbose = F,
useragent = df_call$urlReferer) %>%
handle_setopt(copypostfields = data %>% toJSON(auto_unbox = T),
customrequest = "POST") %>%
handle_setheaders(.list = headers %>% as.list())
resp <-
curl_fetch_memory(url = url, handle = h)
content <-
resp$content %>%
rawToChar() %>%
str_split("\n") %>%
flatten_chr() %>%
str_c(collapse = "")
page <-
.parse_content_to_page(content = content)
page_nodes <-
page %>% html_nodes(".component_property-card")
if (search_type %>% str_to_lower() != "rent") {
data_prop <-
seq_along(page_nodes) %>%
map_dfr(function(x) {
fact_node <-
page_nodes[[x]]
if (fact_node %>% html_attr("class") %>% str_detect("ads-wrapper")) {
return(invisible())
}
page_node <-
fact_node %>%
html_nodes(".data-wrap") %>% html_attrs()
if (length(page_node) == 0) {
return(invisible())
}
wrap_nodes <-
page_node %>% .[[1]]
wrap_names <- names(wrap_nodes)
wrap_values <-
as.character(wrap_nodes)
df_wrap <-
tibble(name = wrap_names, value = wrap_values) %>% filter(!name == "class")
data_atrs <-
fact_node %>% html_attrs()
df_attrs <-
tibble(name = names(data_atrs),
value = data_atrs %>% as.character()) %>%
bind_rows(df_wrap) %>%
filter(!name %in% c("class", "data-lead_attributes", "data-search_flags")) %>%
distinct()
df_json_rows <-
df_attrs %>%
filter(name %in% c("data-lead_attributes", "data-search_flags"))
df_base <-
df_attrs %>%
filter(!name %in% c("class", "data-lead_attributes", "data-search_flags"))
meta_nodes <-
fact_node %>% html_nodes('meta')
meta_values <-
meta_nodes %>% html_attr("content")
meta_names <-
meta_nodes %>% html_attr("itemprop")
df_meta <-
tibble(name = meta_names, value = meta_values)
df_base <-
df_base %>%
bind_rows(df_meta) %>%
distinct()
property_nodes <-
fact_node %>% html_nodes(".seo-wrap span")
property_names <-
property_nodes %>% html_attr("itemprop")
property_values <-
property_nodes %>% html_text() %>% str_trim() %>% gsub("\\s+", " ", .)
df_property <-
tibble(name = property_names, value = property_values) %>% filter(!value == "")
df_base <-
df_base %>% bind_rows(df_property) %>% distinct()
broker_node <-
fact_node %>% html_nodes(".broker-info span")
broker_name <-
broker_node %>% html_attr("data-label") %>% discard(is.na)
broker_value <-
broker_node %>% html_text() %>% str_trim() %>% str_c(collapse = " ") %>% str_remove_all("Brokered by") %>% str_trim()
df_broker <-
tibble(name = broker_name, value = broker_value)
bf_base <-
df_base %>% bind_rows(df_broker) %>% distinct()
if (df_json_rows %>% nrow() > 0) {
df_json_data <-
1:nrow(df_json_rows) %>%
map_dfr(function(x) {
df_json_rows %>% dplyr::slice(x) %>% pull(value) %>% fromJSON() %>%
flatten_df() %>%
mutate_all(as.character) %>%
gather(name, value)
})
df_base <-
df_base %>%
bind_rows(df_json_data)
}
has_image <-
fact_node %>%
html_nodes('.photo-wrap img') %>%
length() > 0
if (has_image) {
image_node <-
fact_node %>%
html_nodes('.photo-wrap img')
image_url <-
image_node %>%
html_attr('src') %>%
.[[1]]
address <-
image_node %>%
html_attr('title') %>%
.[[1]]
df_base <-
df_base %>%
bind_rows(tibble(
name = c('addressPropertyFull', 'urlImage'),
value = c(address, image_url)
))
}
df_base <-
df_base %>%
mutate(numberListing = x) %>%
filter(
!name %in% c(
"data-rank",
"id",
"brand",
"productID",
"image",
"manufacturer",
"URL",
"category"
)
) %>%
select(numberListing, everything())
df_base
})
} else {
data_prop <-
seq_along(page_nodes) %>%
map_dfr(function(x) {
df_base <- tibble()
meta_nodes <-
fact_node %>% html_nodes('meta')
meta_values <-
meta_nodes %>% html_attr("content")
meta_names <-
meta_nodes %>% html_attr("itemprop")
df_meta <-
tibble(name = meta_names, value = meta_values)
df_base <- tibble()
df_base <-
df_base %>%
bind_rows(df_meta) %>%
distinct()
property_nodes <-
fact_node %>% html_nodes(".seo-wrap span")
property_names <-
property_nodes %>% html_attr("itemprop")
property_values <-
property_nodes %>% html_text() %>% str_trim() %>% gsub("\\s+", " ", .)
df_property <-
tibble(name = property_names, value = property_values) %>% filter(!value == "")
df_base <-
df_base %>% bind_rows(df_property) %>% distinct()
broker_node <-
fact_node %>% html_nodes(".broker-info span")
broker_name <-
broker_node %>% html_attr("data-label") %>% discard(is.na)
broker_value <-
broker_node %>% html_text() %>% str_trim() %>% str_c(collapse = " ") %>% str_remove_all("Brokered by") %>% str_trim()
df_broker <-
tibble(name = broker_name, value = broker_value)
bf_base <-
df_base %>% bind_rows(df_broker) %>% distinct()
has_image <-
fact_node %>%
html_nodes('.photo-wrap img') %>%
length() > 0
if (has_image) {
image_node <-
fact_node %>%
html_nodes('.photo-wrap img')
image_url <-
image_node %>%
html_attr('src') %>%
.[[1]]
address <-
image_node %>%
html_attr('title') %>%
.[[1]]
df_base <-
df_base %>%
bind_rows(tibble(
name = c('addressPropertyFull', 'urlImage'),
value = c(address, image_url)
))
}
df_base <-
df_base %>%
mutate(numberListing = x) %>%
filter(
!name %in% c(
"data-rank",
"id",
"brand",
"productID",
"image",
"manufacturer",
"URL",
"category"
)
) %>%
select(numberListing, everything())
df_base
})
}
df_prop <-
data_prop %>%
left_join(dictionary_css_page() %>% rename(name = id)) %>%
suppressMessages()
if (df_prop %>% filter(nameActual %>% is.na()) %>% nrow() > 0) {
missing_names <- df_prop %>% filter(nameActual %>% is.na()) %>%
pull(name) %>%
unique() %>%
str_c(collapse = "\n")
glue("Missing {missing_names}") %>% cat(fill = T)
}
df_prop <-
df_prop %>%
select(numberListing, nameActual, value) %>%
mutate_all(funs(ifelse(. == "", NA_character_, .))) %>%
filter(!is.na(value)) %>%
filter(!nameActual %>% str_detect("remove_")) %>%
distinct() %>%
group_by(numberListing, nameActual) %>%
mutate(id = 1:n()) %>%
ungroup() %>%
filter(id == min(id)) %>%
select(-id) %>%
spread(nameActual, value)
df_prop <-
df_prop %>%
.munge_realtor() %>%
suppressMessages() %>%
mutate(numberPage = page_no) %>%
select(numberPage, everything())
df_prop
}, tibble()))
df_count_merge <-
df_count %>%
select(-one_of(
c(
"cityProperty",
"urlListing",
"countListings",
"numberPage",
"typeProperty",
"zipcodeProperty"
)
))
all_data <-
all_properties %>%
mutate(id = 1) %>%
left_join(df_count_merge %>% mutate(id = 1)) %>%
select(-id) %>%
select(one_of(names(df_count_merge)), everything()) %>%
select(-numberPage) %>%
distinct() %>%
suppressMessages()
all_data <-
all_data %>%
mutate(urlListing = urlListing %>% gsub("https://www.realtor.com//", "https://www.realtor.com/", .))
all_data <-
all_data %>%
mutate(
urlPropertyAPI = glue("https://www.realtor.com/property-overview/M{idProperty}") %>% as.character()
)
all_data
}
#' MLS listing data
#'
#' Returns MLS data for
#' specified locations and parameters
#'
#' @param locations vector of locations
#' @param property_type if not \code{NULL} type of property options
#' see \link{dictionary_property_types} for options
#' @param features if not \code{NULL} list of searchable features
#' see \link{dictionary_listing_features} for options
#' @param search_type search type options include \itemize{
#' \item city - \code{default}
#' \item county
#' }
#' @param city_isolated if not \code{NULL} isolates
#' @param county_isolated if not \code{NULL} isolates county
#' @param zipcode_isolated if not \code{NULL} isolates zipcode
#' @param state_isolated if not \code{NULL} isolates state
#' @param street_isolated if not \code{NULL} isolates street
#' @param neighborhood_isolated if not \code{NULL} isolates
#' @param beds_min if not \code{NULL} minimum bedrooms
#' @param beds_max if not \code{NULL} maximum bedrooms
#' @param baths_min if not \code{NULL} minimum bathrooms
#' @param baths_max if not \code{NULL} maximum bathrooms
#' @param price_min if not \code{NULL} minimum price
#' @param price_max if not \code{NULL} maximum price
#' @param sqft_min if not \code{NULL} minimum square footage
#' @param sqft_max if not \code{NULL} maximum square footage
#' @param acre_min if not \code{NULL} minimum acres
#' @param acre_max if not \code{NULL} maximum acres
#' @param age_min if not \code{NULL} minimum age
#' @param age_max if not \code{NULL} maximum age
#' @param days_on_market if not \code{NULL} count of days on market
#' @param pending if \code{TRUE} include pending
#' @param is_new_construction if \code{TRUE} isolates to new construction
#' @param include_pending_contingency if \code{TRUE} also includes pending and contingent sales
#' @param only_open_houses if \code{TRUE} isolates open houses
#' @param generate_new_cookies generate new cookies
#' @param sleep_time sleep time
#' @param listing_type listing type \itemize{
#' \item rent
#' \item sale
#' }
#'
#' @return a \code{tibble}
#' @export
#' @family listing search
#' @examples
#' library(realtR)
#' library(dplyr)
#' df_big_ass_houses_with_pools <-
#' listings(
#' locations = c(
#' "Buckhead, Atlanta, GA",
#' 90210,
#' "Greenwich, CT",
#' "Malibu, CA",
#' "Soho, New York, NY"
#' ),
#' beds_min = 4,
#' features = 'Swimming Pool',
#' sqft_min = 3000
#' )
listings <-
function(locations = NULL,
listing_type = "sale",
search_type = "city",
city_isolated = NULL,
county_isolated = NULL,
zipcode_isolated = NULL,
state_isolated = NULL,
street_isolated = NULL,
features = NULL,
only_open_houses = NULL,
neighborhood_isolated = NULL,
beds_min = NULL,
beds_max = NULL,
baths_min = NULL,
baths_max = NULL,
price_min = NULL,
price_max = NULL,
property_type = NULL,
sqft_min = NULL,
sqft_max = NULL,
acre_min = NULL,
acre_max = NULL,
age_min = NULL,
age_max = NULL,
days_on_market = NULL,
pending = NULL,
is_new_construction = NULL,
generate_new_cookies = F,
include_pending_contingency = TRUE,
sleep_time = 5) {
if (locations %>% is_null()) {
stop("Enter locations")
}
.get_location_listings_safe <-
possibly(.get_location_listings,
tibble())
all_data <-
locations %>%
map_dfr(function(location) {
data <-
.get_location_listings_safe(
location_name = as.character(location),
listing_type = listing_type,
search_type = search_type,
city_isolated = city_isolated,
county_isolated = county_isolated,
zipcode_isolated = zipcode_isolated,
state_isolated = state_isolated,
street_isolated = street_isolated,
generate_new_cookies = generate_new_cookies,
neighborhood_isolated = neighborhood_isolated,
beds_min = beds_min,
beds_max = beds_max,
baths_min = baths_min ,
baths_max = baths_max,
price_min = price_min,
price_max = price_max,
property_type = property_type,
sqft_min = sqft_min,
sqft_max = sqft_max,
acre_min = acre_min,
acre_max = acre_max,
age_min = age_min,
age_max = age_max,
days_on_market = days_on_market,
pending = pending,
is_new_construction = is_new_construction,
include_pending_contingency = include_pending_contingency,
features = features,
only_open_houses = only_open_houses
)
if (!sleep_time %>% is_null()) {
Sys.sleep(time = sleep_time)
}
data
}) %>%
suppressWarnings()
all_data <-
all_data %>%
remove_columns() %>%
group_by(urlListing) %>%
mutate(idListing = 1:n()) %>%
filter(idListing == min(idListing)) %>%
ungroup() %>%
select(-idListing) %>%
.add_date()
all_data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.