library(tidyverse) library(here) library(lubridate) library(janitor) library(zoo) library(ggthemes) library(forecast) library(stargazer) library(data.table) library(tableone) library(lattice) library(pwr) library(rcompanion) library(scales) library(plm) library(readxl) library(MatchIt) library(lfe) library(Synth) library(gsynth) library(panelView) library(CausalImpact) library(optmatch) library(zeallot)
devtools::load_all()
c(metrics, property) %<-% get_property_data()
Features to match (these are the features that can potentially explain why a property was chosen for the online visit option):
avg_review_rating_num
: average rating of the property
number_of_reviews_on_the_property_num
: number of reviews a property has
instant_booking_enabled
: whether a property has the instant booking feature enabled
premier_partner_badge
: whether a property is from a premier partner
maximum_occupancy
: maximum occupancy of the property
bedrooms
: number of bedrooms
bathrooms
: number of bathrooms
number_of_photos_of_the_property_on_the_property_page
: number of photos on the property's page
bookings_per_day
: number of bookings per day (before 2019-01-01)
booking_amount_per_day
: amount of booking dollar per day (before 2019-01-01)
booking_requests_per_day
: number of booking request per day (before 2019-01-01)
property_page_views_per_visit_per_day
: number of page views per visit per day (before 2019-01-01)
* displays_in_searchpage_per_day
: number of times the property got displayed in the search page (before 2019-01-01)
metrics_agg <- metrics %>% filter(date < as.Date('2019-01-01')) %>% group_by(property_id) %>% summarize(bookings_per_day = mean(bookings, na.rm = TRUE), booking_amount_per_day = mean(booking_amount, na.rm = TRUE), booking_requests_per_day = mean(booking_requests, na.rm = TRUE), property_page_views_per_visit_per_day = mean(property_page_views_visits, na.rm = TRUE), displays_in_searchpage_per_day = mean(number_of_times_the_property_got_displayed_in_search_page, na.rm = TRUE)) %>% ungroup()
property_to_match <- property %>% left_join(metrics_agg, by = 'property_id') %>% dplyr::select( property_id, location, avg_review_rating_num, number_of_reviews_on_the_property_num, instant_booking_enabled, premier_partner_badge, maximum_occupancy, bedrooms, bathrooms, number_of_photos_of_the_property_on_the_property_page, treat, bookings_per_day, booking_amount_per_day, booking_requests_per_day, property_page_views_per_visit_per_day, displays_in_searchpage_per_day ) %>% drop_na() %>% mutate_at(vars(-location), as.numeric)
Visualizing common support between the online visit group and non-online visit group:
Logistic regression model (only two features are statistically significant):
avg_review_rating_num
: average ratings
property_page_views_per_visit_per_day
: number of page views per visit per day (before 2019-01-01)
propensity_model <- glm(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day, family = 'binomial', data = property_to_match) summary(propensity_model)
visualize_common_support(propensity_model)
Matching with optimal algorithm:
property_match <- matchit(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day, method = 'optimal', data = property_to_match, replace = TRUE, # Discard properties outside of common support region discard = 'both') summary(property_match)
property_matched <- match.data(property_match) dim(property_matched)
Perform the t-test on variables selected for the propensity score matching procedure (the expectation is that they are not too different between the control and treatment group):
continuous_vars <- c('avg_review_rating_num', 'number_of_reviews_on_the_property_num', 'instant_booking_enabled', 'premier_partner_badge', 'maximum_occupancy', 'bedrooms', 'bathrooms', 'number_of_photos_of_the_property_on_the_property_page', 'bookings_per_day', 'booking_amount_per_day', 'booking_requests_per_day', 'property_page_views_per_visit_per_day', 'displays_in_searchpage_per_day') map(continuous_vars, vectorize_ttest_for_property, property_matched)
df_for_test <- property_matched %>% left_join(metrics, by = 'property_id') %>% filter(date > as.Date('2018-12-31')) %>% group_by(property_id, treat) %>% summarize(bookings_per_prop = mean(bookings, na.rm = TRUE), booking_amount_per_prop = mean(booking_amount, na.rm = TRUE), booking_requests_per_prop = mean(booking_requests, na.rm = TRUE), property_page_views_per_visit_per_prop = mean(property_page_views_visits, na.rm = TRUE), displays_in_searchpage_per_prop = mean(number_of_times_the_property_got_displayed_in_search_page, na.rm = TRUE)) %>% ungroup()
Result:
bookings_per_prop
: stat sig (-0.0059)
booking_amount_per_prop
: not stat sig
booking_requests_per_prop
: stat sig (-0.0075)
property_page_views_per_visit_per_prop
: (-1.27991)
* displays_in_searchpage_per_prop
: not stat sig
# bookings_per_prop # booking_amount_per_prop # booking_requests_per_prop # property_page_views_per_visit_per_prop # displays_in_searchpage_per_prop t.test(displays_in_searchpage_per_prop ~ treat, data = df_for_test, var.equal = FALSE, alternative = 'greater')
property_to_match_FL <- property_to_match %>% filter(location == 'Destin, Florida')
propensity_model_FL <- glm(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day, family = 'binomial', data = property_to_match_FL) visualize_common_support(propensity_model_FL)
property_match_FL <- matchit(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day, method = 'optimal', data = property_to_match_FL, replace = TRUE, discard = 'both') property_matched_FL <- match.data(property_match_FL) df_for_test_FL <- property_matched_FL %>% left_join(metrics, by = 'property_id') %>% filter(date > as.Date('2018-12-31')) %>% group_by(property_id, treat) %>% summarize(bookings_per_prop = mean(bookings, na.rm = TRUE), booking_amount_per_prop = mean(booking_amount, na.rm = TRUE), booking_requests_per_prop = mean(booking_requests, na.rm = TRUE), property_page_views_per_visit_per_prop = mean(property_page_views_visits, na.rm = TRUE), displays_in_searchpage_per_prop = mean(number_of_times_the_property_got_displayed_in_search_page, na.rm = TRUE)) %>% ungroup()
Results for Florida:
bookings_per_prop
: not stat sig
booking_amount_per_prop
: not stat sig
booking_requests_per_prop
: not stat sig
property_page_views_per_visit_per_prop
: not stat sig
* displays_in_searchpage_per_prop
: not stat sig
# bookings_per_prop # booking_amount_per_prop # booking_requests_per_prop # property_page_views_per_visit_per_prop # displays_in_searchpage_per_prop t.test(displays_in_searchpage_per_prop ~ treat, data = df_for_test_FL, var.equal = FALSE, alternative = 'greater')
property_to_match_NV <- property_to_match %>% filter(location == 'Vegas, Nevada')
propensity_model_NV <- glm(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day, family = 'binomial', data = property_to_match_NV) visualize_common_support(propensity_model_NV)
property_match_NV <- matchit(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day, method = 'optimal', data = property_to_match_NV, replace = TRUE, discard = 'both') property_matched_FL <- match.data(property_match_NV) df_for_test_NV <- property_matched_FL %>% left_join(metrics, by = 'property_id') %>% filter(date > as.Date('2018-12-31')) %>% group_by(property_id, treat) %>% summarize(bookings_per_prop = mean(bookings, na.rm = TRUE), booking_amount_per_prop = mean(booking_amount, na.rm = TRUE), booking_requests_per_prop = mean(booking_requests, na.rm = TRUE), property_page_views_per_visit_per_prop = mean(property_page_views_visits, na.rm = TRUE), displays_in_searchpage_per_prop = mean(number_of_times_the_property_got_displayed_in_search_page, na.rm = TRUE)) %>% ungroup()
Results for Nevada:
bookings_per_prop
: not stat sig
booking_amount_per_prop
: not stat sig
booking_requests_per_prop
: not stat sig
property_page_views_per_visit_per_prop
: not stat sig
* displays_in_searchpage_per_prop
: not stat sig
# bookings_per_prop # booking_amount_per_prop # booking_requests_per_prop # property_page_views_per_visit_per_prop # displays_in_searchpage_per_prop t.test(displays_in_searchpage_per_prop ~ treat, data = df_for_test_NV, var.equal = FALSE, alternative = 'greater')
property_to_match_TX <- property_to_match %>% filter(location == 'Austin, Texas')
propensity_model_TX <- glm(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day, family = 'binomial', data = property_to_match_TX) visualize_common_support(propensity_model_TX)
property_match_TX <- matchit(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day, method = 'optimal', data = property_to_match_TX, replace = TRUE, discard = 'both') property_matched_TX <- match.data(property_match_TX) df_for_test_TX <- property_matched_TX %>% left_join(metrics, by = 'property_id') %>% filter(date > as.Date('2018-12-31')) %>% group_by(property_id, treat) %>% summarize(bookings_per_prop = mean(bookings, na.rm = TRUE), booking_amount_per_prop = mean(booking_amount, na.rm = TRUE), booking_requests_per_prop = mean(booking_requests, na.rm = TRUE), property_page_views_per_visit_per_prop = mean(property_page_views_visits, na.rm = TRUE), displays_in_searchpage_per_prop = mean(number_of_times_the_property_got_displayed_in_search_page, na.rm = TRUE)) %>% ungroup()
Results for Texas:
bookings_per_prop
: stat sig (-0.00871577)
booking_amount_per_prop
: not stat sig
booking_requests_per_prop
: stat sig (-0.00996454)
property_page_views_per_visit_per_prop
: not stat sig
* displays_in_searchpage_per_prop
: not stat sig
# bookings_per_prop # booking_amount_per_prop # booking_requests_per_prop # property_page_views_per_visit_per_prop # displays_in_searchpage_per_prop t.test(displays_in_searchpage_per_prop ~ treat, data = df_for_test_TX, var.equal = FALSE, alternative = 'greater')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.