1. Load Libraries

library(tidyverse)
library(here)
library(lubridate)
library(janitor)
library(zoo)
library(ggthemes)

library(forecast)
library(stargazer)
library(data.table)
library(tableone)
library(lattice)
library(pwr)
library(rcompanion)
library(scales)
library(plm)
library(readxl)
library(MatchIt)
library(lfe)
library(Synth)
library(gsynth)
library(panelView)
library(CausalImpact)
devtools::load_all()

2. Load and Process Data

metrics <- readr::read_csv(file.path(here::here(), 'data/data.csv'))
property <- readr::read_csv(file.path(here::here(), 'data/property_info.csv'))
c(metrics, property) %<-% get_property_data()

property_FL <- property %>% 
  filter(location == 'Destin, Florida')

property_TX <- property %>% 
  filter(location == 'Austin, Texas')

property_NV <- property %>% 
  filter(location == 'Vegas, Nevada')

3. Exploratory Data Analysis - Properties

3.1. Number of Properties by Location and Online Visit Status

p <- property %>% 
  group_by(location, online_visits) %>% 
  tally() %>% 
  ungroup() %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  ggplot(aes(x = location, y = n, fill = `Online Visits`, label = n)) +
  geom_col(position = 'dodge') +
  geom_text(position = position_dodge(width = 0.9), vjust = -1) +
  ylim(0, 650) +
  ylab('Location') +
  xlab('Number of Property') +
  theme_hc() +
  ggtitle('Number of Properties by Location')

print(p)
ggsave(file.path(here::here(), 'fig/01_num_prop_location.png'), p)

3.2. Mean Ratings and Mean Number of Reviews by Online Visit Status

p <- property %>% 
  group_by(online_visits) %>% 
  summarize(mean_rating = mean(avg_review_rating_num),
            mean_num_reviews = mean(number_of_reviews_on_the_property_num)) %>% 
  ungroup() %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  gather(key = 'metrics', value = 'value', -`Online Visits`) %>% 
  mutate(metrics = if_else(metrics == 'mean_rating', "Average Rating", "Average Number of Rating")) %>% 
  ggplot(aes(x = metrics, y = value, fill = `Online Visits`, label = round(value, 2))) +
  geom_col(position = 'dodge') +
  geom_text(position = position_dodge(width = 0.9), vjust = -1) +
  xlab('Metrics') +
  ylab('Value') +
  theme_hc() +
  ggtitle('Mean Ratings and Mean Number of Reviews by Online Visit Status') +
  ylim(0, 15)

print(p)
ggsave(file.path(here::here(), 'fig/02_avg_rating_review_by_tour.png'), p)
p <- property %>% 
  group_by(location, online_visits) %>% 
  summarize(mean_rating = mean(avg_review_rating_num),
            mean_num_reviews = mean(number_of_reviews_on_the_property_num)) %>% 
  ungroup() %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  gather(key = 'metrics', value = 'value', -`Online Visits`, -location) %>% 
  mutate(metrics = if_else(metrics == 'mean_rating', "Average Rating", "Average Number of Rating")) %>% 
  ggplot(aes(x = metrics, y = value, fill = `Online Visits`, label = round(value, 2))) +
  geom_col(position = 'dodge') +
  geom_text(position = position_dodge(width = 0.9), hjust = -0.15) +
  xlab('Metrics') +
  ylab('Value') +
  theme_hc() +
  ggtitle('Mean Ratings and Mean Number of Reviews by Online Visit Status') +
  ylim(0, 20) +
  coord_flip() +
  facet_wrap(~location)

print(p)
ggsave(file.path(here::here(), 'fig/02B_avg_rating_review_by_tour_by_loc.png'), p, width = 8, height = 4, dpi = 300)

There is weak evidence that the properties in the online visit group has higher average ratings than the non-online visit group.

continuous_vars <- c('avg_review_rating_num', 'number_of_reviews_on_the_property_num')
map(continuous_vars, vectorize_ttest_for_property, property)

When breaking into specific locations: There is weak evidence that the properties in the online visit group has higher average ratings than the non-online visit group in Texas. There is weak evidence that the properties in the online visit group has lower average number of reviews than the non-online visit group in Nevada

continuous_vars <- c('avg_review_rating_num', 'number_of_reviews_on_the_property_num')
cat('================= FLORIDA ================= \n\n')
map(continuous_vars, vectorize_ttest_for_property, property_FL)

cat('================= TEXAS ================= \n\n')
map(continuous_vars, vectorize_ttest_for_property, property_TX)

cat('================= NEVADA ================= \n\n')
map(continuous_vars, vectorize_ttest_for_property, property_NV)

3.3. Property Size by Online Visit Status

p <- property %>% 
  group_by(online_visits) %>% 
  summarize(`Avg # of Bedrooms` = mean(bedrooms),
            `Avg # of Bathrooms` = mean(bathrooms),
            `Avg Maximum Occupancy` = mean(maximum_occupancy)) %>% 
  ungroup() %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  gather(key = 'metrics', value = 'value', -`Online Visits`) %>% 
  ggplot(aes(x = metrics, y = value, fill = `Online Visits`, label = round(value, 2))) +
  geom_col(position = 'dodge') +
  geom_text(position = position_dodge(width = 0.9), vjust = -1) +
  xlab('Metrics') +
  ylab('Value') +
  theme_hc() +
  ggtitle('Average Property Size by Online Visit Option') +
  ylim(0, 10)

print(p)
ggsave(file.path(here::here(), 'fig/03_prop_size_by_online_visit.png'), p)
p <- property %>% 
  group_by(location, online_visits) %>% 
  summarize(`Avg # of Bedrooms` = mean(bedrooms),
            `Avg # of Bathrooms` = mean(bathrooms),
            `Avg Maximum Occupancy` = mean(maximum_occupancy)) %>% 
  ungroup() %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  gather(key = 'metrics', value = 'value', -`Online Visits`, -location) %>% 
  ggplot(aes(x = metrics, y = value, fill = `Online Visits`, label = round(value, 2))) +
  geom_col(position = 'dodge') +
  geom_text(position = position_dodge(width = 0.9), hjust = -0.15) +
  xlab('Metrics') +
  ylab('Value') +
  theme_hc() +
  ggtitle('Average Property Size by Online Visit Option') +
  ylim(0, 15) +
  coord_flip() +
  facet_wrap(~location)

print(p)
ggsave(file.path(here::here(), 'fig/03B_prop_size_by_online_visit_by_loc.png'), p, width = 8, height = 4, dpi = 300)

With p-values > 0.05, none of these variables are that different between the online visit group and non-online visit group.

continuous_vars <- c('bedrooms', 'bathrooms', 'maximum_occupancy')
map(continuous_vars, vectorize_ttest_for_property, property)
continuous_vars <- c('bedrooms', 'bathrooms', 'maximum_occupancy')

cat('================= FLORIDA ================= \n\n')
map(continuous_vars, vectorize_ttest_for_property, property_FL)

cat('================= TEXAS ================= \n\n')
map(continuous_vars, vectorize_ttest_for_property, property_TX)

cat('================= NEVADA ================= \n\n')
map(continuous_vars, vectorize_ttest_for_property, property_NV)

3.4. Comparisons: Instant bookings and premier partner badge

p <- property %>% 
  group_by(online_visits) %>% 
  summarize(instant_booking_enabled = mean(instant_booking_enabled),
            premier_partner_badge = mean(premier_partner_badge)) %>% 
  ungroup() %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  gather(key = 'metrics', value = 'value', -`Online Visits`) %>% 
  ggplot(aes(x = metrics, y = value, fill = `Online Visits`, label = scales::percent(value, 0.01))) +
  geom_col(position = 'dodge') +
  geom_text(position = position_dodge(width = 0.9), vjust = -1) +
  xlab('Metrics') +
  ylab('Value') +
  theme_hc() +
  ggtitle('Percentage of Properties with Instant Booking and Premier Partner Badge') +
  ylim(0, 0.4)

print(p)
ggsave(file.path(here::here(), 'fig/04_IB_and PP_type_by_tour.png'), p)
p <- property %>% 
  group_by(location, online_visits) %>% 
  summarize(instant_booking_enabled = mean(instant_booking_enabled),
            premier_partner_badge = mean(premier_partner_badge)) %>% 
  ungroup() %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  gather(key = 'metrics', value = 'value', -`Online Visits`, -location) %>% 
  ggplot(aes(x = metrics, y = value, fill = `Online Visits`, label = scales::percent(value, 0.01))) +
  geom_col(position = 'dodge') +
  geom_text(position = position_dodge(width = 0.9), hjust = -0.15) +
  xlab('Metrics') +
  ylab('Value') +
  theme_hc() +
  ggtitle('Percentage of Properties with Instant Booking and Premier Partner Badge') +
  ylim(0, 0.6) +
  coord_flip() +
  facet_wrap(~location)

print(p)
ggsave(file.path(here::here(), 'fig/04B_IB_and PP_type_by_tour_by_loc.png'), p, width = 10, height = 4, dpi = 300)

With p-values > 0.05, instant_booking_enabled and premier_partner_badge are that different between the online visit group and non-online visit group.

indicator_vars <- c('instant_booking_enabled', 'premier_partner_badge')
map(indicator_vars, vectorize_propztest_for_property, property)

There is weak evidence (p-value < 0.1) that there are more more properties with instant booking enabled with the online visit option than without the non-online visit option in Texas.

indicator_vars <- c('instant_booking_enabled', 'premier_partner_badge')

cat('================= FLORIDA ================= \n\n')
map(indicator_vars, vectorize_propztest_for_property, property_FL)

cat('================= TEXAS ================= \n\n')
map(indicator_vars, vectorize_propztest_for_property, property_TX)

cat('================= NEVADA ================= \n\n')
map(indicator_vars, vectorize_propztest_for_property, property_NV)

3.5. Number of Property Types by Online Visit Option

p <- property %>% 
  group_by(property_type, online_visits) %>% 
  tally() %>% 
  ungroup() %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  ggplot(aes(x = reorder(property_type, n), y = n, fill = `Online Visits`)) +
  geom_col(position = 'dodge') +
  coord_flip() +
  theme_hc() +
  ylab('Number of Property') +
  xlab('Type') +
  ggtitle('Number of Property by Type by Online Visit Option')

print(p)
ggsave(file.path(here::here(), 'fig/05_number_prop_by_type.png'), p)
p <- property %>% 
  group_by(location, property_type, online_visits) %>% 
  tally() %>% 
  ungroup() %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  ggplot(aes(x = reorder(property_type, n), y = n, fill = `Online Visits`)) +
  geom_col(position = 'dodge') +
  coord_flip() +
  theme_hc() +
  ylab('Number of Property') +
  xlab('Type') +
  ggtitle('Number of Property by Type by Online Visit Option') +
  facet_wrap(~location)

print(p)
ggsave(file.path(here::here(), 'fig/05B_number_prop_by_type_by_loc.png'), p, width = 8, height = 4, dpi = 300)

4. Exploratory Data Analysis - Metrics

4.1. No Location (Aggregated)

metrics_by_months <- metrics %>% 
  left_join(property, by = 'property_id') %>% 
  mutate(year_month = as.yearmon(date)) %>% 
  group_by(date, online_visits) %>% 
  summarize(bookings_per_prop = mean(bookings, na.rm = TRUE),
            booking_amount_per_prop = mean(booking_amount, na.rm = TRUE),
            booking_requests_per_prop = mean(booking_requests, na.rm = TRUE),
            property_page_views_per_visit_per_prop = mean(property_page_views_visits, na.rm = TRUE),
            displays_in_searchpage_per_prop = mean(number_of_times_the_property_got_displayed_in_search_page, na.rm = TRUE)) %>% 
  ungroup()
p <- metrics_by_months %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  ggplot(aes(x = date, y = booking_amount_per_prop, color = `Online Visits`)) +
  geom_line() +
  geom_smooth(method = 'loess', se = FALSE) +
  xlab('Month') +
  ylab('Average Booking Amount ($) Per Property') +
  ggtitle('Average Booking Amount Per Property by Month') +
  geom_vline(xintercept = as.Date('2019-01-01'), 
             linetype = 'longdash', 
             color = 'black', 
             size = 0.3) +
  theme_hc()

print(p)
ggsave(file.path(here::here(), 'fig/06A_booking_amount_per_prop_nolo.png'), p)
p <- metrics_by_months %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  ggplot(aes(x = date, y = bookings_per_prop, color = `Online Visits`)) +
  geom_line() +
  geom_smooth(method = 'loess', se = FALSE) +
  xlab('Month') +
  ylab('Average Booking Per Property') +
  ggtitle('Average Booking Per Property by Month') +
  geom_vline(xintercept = as.Date('2019-01-01'), 
             linetype = 'longdash', 
             color = 'black', 
             size = 0.3) +
  theme_hc()

print(p)
ggsave(file.path(here::here(), 'fig/06B_bookings_per_prop_nolo.png'), p)
p <- metrics_by_months %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  ggplot(aes(x = date, y = booking_requests_per_prop, color = `Online Visits`)) +
  geom_line() +
  geom_smooth(method = 'loess', se = FALSE) +
  xlab('Month') +
  ylab('Average Booking Requests Per Property') +
  ggtitle('Average Booking Requests Per Property by Month') +
  geom_vline(xintercept = as.Date('2019-01-01'), 
             linetype = 'longdash', 
             color = 'black', 
             size = 0.3) +
  theme_hc()

print(p)
ggsave(file.path(here::here(), 'fig/06C_booking_requests_per_prop_nolo.png'), p)
p <- metrics_by_months %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  ggplot(aes(x = date, y = property_page_views_per_visit_per_prop, color = `Online Visits`)) +
  geom_line() +
  geom_smooth(method = 'loess', se = FALSE) +
  xlab('Month') +
  ylab('Average Page Views Per Visit Per Property') +
  ggtitle('Average Page Views Per Visit Per Property by Month') +
  geom_vline(xintercept = as.Date('2019-01-01'), 
             linetype = 'longdash', 
             color = 'black', 
             size = 0.3) +
  theme_hc()

print(p)
ggsave(file.path(here::here(), 'fig/06D_page_views_per_visit_per_prop_nolo.png'), p)
p <- metrics_by_months %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  ggplot(aes(x = date, y = displays_in_searchpage_per_prop, color = `Online Visits`)) +
  geom_line() +
  geom_smooth(method = 'loess', se = FALSE) +
  xlab('Month') +
  ylab('Average Displays in Search Page Per Property') +
  ggtitle('Average Displays in Search Page Per Property by Month') +
  geom_vline(xintercept = as.Date('2019-01-01'), 
             linetype = 'longdash', 
             color = 'black', 
             size = 0.3) +
  theme_hc()

print(p)
ggsave(file.path(here::here(), 'fig/06E_displays_in_searchpage_per_prop_nolo.png'), p)

4.2. By Location

metrics_by_months_by_loc <- metrics %>% 
  left_join(property, by = 'property_id') %>% 
  mutate(year_month = as.yearmon(date)) %>% 
  group_by(date, online_visits, location) %>% 
  summarize(bookings_per_prop = mean(bookings, na.rm = TRUE),
            booking_amount_per_prop = mean(booking_amount, na.rm = TRUE),
            booking_requests_per_prop = mean(booking_requests, na.rm = TRUE),
            property_page_views_per_visit_per_prop = mean(property_page_views_visits, na.rm = TRUE),
            displays_in_searchpage_per_prop = mean(number_of_times_the_property_got_displayed_in_search_page, na.rm = TRUE)) %>% 
  ungroup()
p <- metrics_by_months_by_loc %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  ggplot(aes(x = date, y = booking_amount_per_prop, color = `Online Visits`)) +
  geom_line() +
  geom_smooth(method = 'loess', se = FALSE) +
  xlab('Month') +
  ylab('Average Booking Amount ($) Per Property') +
  ggtitle('Average Booking Amount Per Property by Month') +
  geom_vline(xintercept = as.Date('2019-01-01'), 
             linetype = 'longdash', 
             color = 'black', 
             size = 0.3) +
  theme_hc() +
  facet_wrap(~location, nrow = 3) 

print(p)
ggsave(file.path(here::here(), 'fig/06A_booking_amount_per_prop_byloc.png'), p, height = 7, width = 5, dpi = 300)
p <- metrics_by_months_by_loc %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  ggplot(aes(x = date, y = bookings_per_prop, color = `Online Visits`)) +
  geom_line() +
  geom_smooth(method = 'loess', se = FALSE) +
  xlab('Month') +
  ylab('Average Booking Per Property') +
  ggtitle('Average Booking Per Property by Month') +
  geom_vline(xintercept = as.Date('2019-01-01'), 
             linetype = 'longdash', 
             color = 'black', 
             size = 0.3) +
  theme_hc() +
  facet_wrap(~location, nrow = 3) 

print(p)
ggsave(file.path(here::here(), 'fig/06B_bookings_per_prop_byloc.png'), p, height = 7, width = 5, dpi = 300)
p <- metrics_by_months_by_loc %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  ggplot(aes(x = date, y = booking_requests_per_prop, color = `Online Visits`)) +
  geom_line() +
  geom_smooth(method = 'loess', se = FALSE) +
  xlab('Month') +
  ylab('Average Booking Requests Per Property') +
  ggtitle('Average Booking Requests Per Property by Month') +
  geom_vline(xintercept = as.Date('2019-01-01'), 
             linetype = 'longdash', 
             color = 'black', 
             size = 0.3) +
  theme_hc() +
  facet_wrap(~location, nrow = 3) 

print(p)
ggsave(file.path(here::here(), 'fig/06C_booking_requests_per_prop_byloc.png'), p, height = 7, width = 5, dpi = 300)
p <- metrics_by_months_by_loc %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  ggplot(aes(x = date, y = property_page_views_per_visit_per_prop, color = `Online Visits`)) +
  geom_line() +
  geom_smooth(method = 'loess', se = FALSE) +
  xlab('Month') +
  ylab('Average Page Views Per Visit Per Property') +
  ggtitle('Average Page Views Per Visit Per Property by Month') +
  geom_vline(xintercept = as.Date('2019-01-01'), 
             linetype = 'longdash', 
             color = 'black', 
             size = 0.3) +
  theme_hc() +
  facet_wrap(~location, nrow = 3) 

print(p)
ggsave(file.path(here::here(), 'fig/06D_page_views_per_visit_per_prop_byloc.png'), p, height = 7, width = 5, dpi = 300)
p <- metrics_by_months_by_loc %>% 
  dplyr::rename(`Online Visits` = online_visits) %>% 
  ggplot(aes(x = date, y = displays_in_searchpage_per_prop, color = `Online Visits`)) +
  geom_line() +
  geom_smooth(method = 'loess', se = FALSE) +
  xlab('Month') +
  ylab('Average Displays in Search Page Per Property') +
  ggtitle('Average Displays in Search Page Per Property by Month') +
  geom_vline(xintercept = as.Date('2019-01-01'), 
             linetype = 'longdash', 
             color = 'black', 
             size = 0.3) +
  theme_hc() +
  facet_wrap(~location, nrow = 3) 

print(p)
ggsave(file.path(here::here(), 'fig/06E_displays_in_searchpage_per_prop_byloc.png'), p, height = 7, width = 5, dpi = 300)


hnguyen1174/online_visits_assessment documentation built on Dec. 20, 2021, 4:46 p.m.