1. Load Libraries

library(tidyverse)
library(here)
library(lubridate)
library(janitor)
library(zoo)
library(ggthemes)

library(forecast)
library(stargazer)
library(data.table)
library(tableone)
library(lattice)
library(pwr)
library(rcompanion)
library(scales)
library(plm)
library(readxl)
library(MatchIt)
library(lfe)
library(Synth)
library(gsynth)
library(panelView)
library(CausalImpact)
library(optmatch)

library(zeallot)
devtools::load_all()

2. Load and Process Data

c(metrics, property) %<-% get_property_data()

3. Matching - Aggregated Data

Features to match (these are the features that can potentially explain why a property was chosen for the online visit option): avg_review_rating_num: average rating of the property number_of_reviews_on_the_property_num: number of reviews a property has instant_booking_enabled: whether a property has the instant booking feature enabled premier_partner_badge: whether a property is from a premier partner maximum_occupancy: maximum occupancy of the property bedrooms: number of bedrooms bathrooms: number of bathrooms number_of_photos_of_the_property_on_the_property_page: number of photos on the property's page bookings_per_day: number of bookings per day (before 2019-01-01) booking_amount_per_day: amount of booking dollar per day (before 2019-01-01) booking_requests_per_day: number of booking request per day (before 2019-01-01) property_page_views_per_visit_per_day: number of page views per visit per day (before 2019-01-01) * displays_in_searchpage_per_day: number of times the property got displayed in the search page (before 2019-01-01)

metrics_agg <- metrics %>% 
  filter(date < as.Date('2019-01-01')) %>% 
  group_by(property_id) %>% 
  summarize(bookings_per_day = mean(bookings, na.rm = TRUE),
            booking_amount_per_day = mean(booking_amount, na.rm = TRUE),
            booking_requests_per_day = mean(booking_requests, na.rm = TRUE),
            property_page_views_per_visit_per_day = mean(property_page_views_visits, na.rm = TRUE),
            displays_in_searchpage_per_day = mean(number_of_times_the_property_got_displayed_in_search_page, na.rm = TRUE)) %>% 
  ungroup()
property_to_match <- property %>% 
  left_join(metrics_agg, by = 'property_id') %>% 
  dplyr::select(
    property_id, 
    location,
    avg_review_rating_num, 
    number_of_reviews_on_the_property_num,
    instant_booking_enabled,
    premier_partner_badge, 
    maximum_occupancy,
    bedrooms,
    bathrooms,
    number_of_photos_of_the_property_on_the_property_page,
    treat, 
    bookings_per_day, 
    booking_amount_per_day, 
    booking_requests_per_day, 
    property_page_views_per_visit_per_day,
    displays_in_searchpage_per_day
  ) %>% 
  drop_na() %>% 
  mutate_at(vars(-location), as.numeric)

Visualizing common support between the online visit group and non-online visit group:

Logistic regression model (only two features are statistically significant): avg_review_rating_num: average ratings property_page_views_per_visit_per_day: number of page views per visit per day (before 2019-01-01)

propensity_model <- glm(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day,
                  family = 'binomial',
                  data = property_to_match)

summary(propensity_model)
visualize_common_support(propensity_model)

Matching with optimal algorithm:

property_match <- matchit(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day,
                          method = 'optimal', 
                          data = property_to_match,
                          replace = TRUE, 
                          # Discard properties outside of common support region
                          discard = 'both')

summary(property_match)
property_matched <- match.data(property_match)
dim(property_matched)

Perform the t-test on variables selected for the propensity score matching procedure (the expectation is that they are not too different between the control and treatment group):

continuous_vars <- c('avg_review_rating_num', 
                     'number_of_reviews_on_the_property_num',
                     'instant_booking_enabled',
                     'premier_partner_badge', 
                     'maximum_occupancy',
                     'bedrooms',
                     'bathrooms',
                     'number_of_photos_of_the_property_on_the_property_page',
                     'bookings_per_day', 
                     'booking_amount_per_day', 
                     'booking_requests_per_day', 
                     'property_page_views_per_visit_per_day',
                     'displays_in_searchpage_per_day')

map(continuous_vars, vectorize_ttest_for_property, property_matched)
df_for_test <- property_matched %>% 
  left_join(metrics, by = 'property_id') %>% 
  filter(date > as.Date('2018-12-31')) %>% 
  group_by(property_id, treat) %>% 
  summarize(bookings_per_prop = mean(bookings, na.rm = TRUE),
            booking_amount_per_prop = mean(booking_amount, na.rm = TRUE),
            booking_requests_per_prop = mean(booking_requests, na.rm = TRUE),
            property_page_views_per_visit_per_prop = mean(property_page_views_visits, na.rm = TRUE),
            displays_in_searchpage_per_prop = mean(number_of_times_the_property_got_displayed_in_search_page, na.rm = TRUE)) %>% 
  ungroup()

Result: bookings_per_prop: stat sig (-0.0059) booking_amount_per_prop: not stat sig booking_requests_per_prop: stat sig (-0.0075) property_page_views_per_visit_per_prop: (-1.27991) * displays_in_searchpage_per_prop: not stat sig

# bookings_per_prop
# booking_amount_per_prop
# booking_requests_per_prop
# property_page_views_per_visit_per_prop
# displays_in_searchpage_per_prop

t.test(displays_in_searchpage_per_prop ~ treat, data = df_for_test,
       var.equal = FALSE, alternative = 'greater')

4. Matching - By Location

4.1. Florida

property_to_match_FL <- property_to_match %>% 
  filter(location == 'Destin, Florida')
propensity_model_FL <- glm(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day,
                  family = 'binomial',
                  data = property_to_match_FL)

visualize_common_support(propensity_model_FL)
property_match_FL <- matchit(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day,
                          method = 'optimal', 
                          data = property_to_match_FL,
                          replace = TRUE, 
                          discard = 'both')
property_matched_FL <- match.data(property_match_FL)

df_for_test_FL <- property_matched_FL %>% 
  left_join(metrics, by = 'property_id') %>% 
  filter(date > as.Date('2018-12-31')) %>% 
  group_by(property_id, treat) %>% 
  summarize(bookings_per_prop = mean(bookings, na.rm = TRUE),
            booking_amount_per_prop = mean(booking_amount, na.rm = TRUE),
            booking_requests_per_prop = mean(booking_requests, na.rm = TRUE),
            property_page_views_per_visit_per_prop = mean(property_page_views_visits, na.rm = TRUE),
            displays_in_searchpage_per_prop = mean(number_of_times_the_property_got_displayed_in_search_page, na.rm = TRUE)) %>% 
  ungroup()

Results for Florida: bookings_per_prop: not stat sig booking_amount_per_prop: not stat sig booking_requests_per_prop: not stat sig property_page_views_per_visit_per_prop: not stat sig * displays_in_searchpage_per_prop: not stat sig

# bookings_per_prop
# booking_amount_per_prop
# booking_requests_per_prop
# property_page_views_per_visit_per_prop
# displays_in_searchpage_per_prop

t.test(displays_in_searchpage_per_prop ~ treat, data = df_for_test_FL,
       var.equal = FALSE, alternative = 'greater')

4.2. Nevada

property_to_match_NV <- property_to_match %>% 
  filter(location == 'Vegas, Nevada')
propensity_model_NV <- glm(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day,
                  family = 'binomial',
                  data = property_to_match_NV)

visualize_common_support(propensity_model_NV)
property_match_NV <- matchit(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day,
                          method = 'optimal', 
                          data = property_to_match_NV,
                          replace = TRUE, 
                          discard = 'both')
property_matched_FL <- match.data(property_match_NV)

df_for_test_NV <- property_matched_FL %>% 
  left_join(metrics, by = 'property_id') %>% 
  filter(date > as.Date('2018-12-31')) %>% 
  group_by(property_id, treat) %>% 
  summarize(bookings_per_prop = mean(bookings, na.rm = TRUE),
            booking_amount_per_prop = mean(booking_amount, na.rm = TRUE),
            booking_requests_per_prop = mean(booking_requests, na.rm = TRUE),
            property_page_views_per_visit_per_prop = mean(property_page_views_visits, na.rm = TRUE),
            displays_in_searchpage_per_prop = mean(number_of_times_the_property_got_displayed_in_search_page, na.rm = TRUE)) %>% 
  ungroup()

Results for Nevada: bookings_per_prop: not stat sig booking_amount_per_prop: not stat sig booking_requests_per_prop: not stat sig property_page_views_per_visit_per_prop: not stat sig * displays_in_searchpage_per_prop: not stat sig

# bookings_per_prop
# booking_amount_per_prop
# booking_requests_per_prop
# property_page_views_per_visit_per_prop
# displays_in_searchpage_per_prop

t.test(displays_in_searchpage_per_prop ~ treat, data = df_for_test_NV,
       var.equal = FALSE, alternative = 'greater')

4.3. Texas

property_to_match_TX <- property_to_match %>% 
  filter(location == 'Austin, Texas')
propensity_model_TX <- glm(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day,
                  family = 'binomial',
                  data = property_to_match_TX)

visualize_common_support(propensity_model_TX)
property_match_TX <- matchit(treat ~ avg_review_rating_num + number_of_reviews_on_the_property_num + instant_booking_enabled + premier_partner_badge + maximum_occupancy + bedrooms + bathrooms + number_of_photos_of_the_property_on_the_property_page + bookings_per_day + booking_amount_per_day + booking_requests_per_day + property_page_views_per_visit_per_day + displays_in_searchpage_per_day,
                          method = 'optimal', 
                          data = property_to_match_TX,
                          replace = TRUE, 
                          discard = 'both')
property_matched_TX <- match.data(property_match_TX)

df_for_test_TX <- property_matched_TX %>% 
  left_join(metrics, by = 'property_id') %>% 
  filter(date > as.Date('2018-12-31')) %>% 
  group_by(property_id, treat) %>% 
  summarize(bookings_per_prop = mean(bookings, na.rm = TRUE),
            booking_amount_per_prop = mean(booking_amount, na.rm = TRUE),
            booking_requests_per_prop = mean(booking_requests, na.rm = TRUE),
            property_page_views_per_visit_per_prop = mean(property_page_views_visits, na.rm = TRUE),
            displays_in_searchpage_per_prop = mean(number_of_times_the_property_got_displayed_in_search_page, na.rm = TRUE)) %>% 
  ungroup()

Results for Texas: bookings_per_prop: stat sig (-0.00871577) booking_amount_per_prop: not stat sig booking_requests_per_prop: stat sig (-0.00996454) property_page_views_per_visit_per_prop: not stat sig * displays_in_searchpage_per_prop: not stat sig

# bookings_per_prop
# booking_amount_per_prop
# booking_requests_per_prop
# property_page_views_per_visit_per_prop
# displays_in_searchpage_per_prop

t.test(displays_in_searchpage_per_prop ~ treat, data = df_for_test_TX,
       var.equal = FALSE, alternative = 'greater')


hnguyen1174/online_visits_assessment documentation built on Dec. 20, 2021, 4:46 p.m.