library(tidyverse) library(here) library(lubridate) library(janitor) library(zoo) library(ggthemes) library(forecast) library(stargazer) library(data.table) library(tableone) library(lattice) library(pwr) library(rcompanion) library(scales) library(plm) library(readxl) library(MatchIt) library(lfe) library(Synth) library(gsynth) library(panelView) library(CausalImpact)
devtools::load_all()
metrics <- readr::read_csv(file.path(here::here(), 'data/data.csv')) property <- readr::read_csv(file.path(here::here(), 'data/property_info.csv'))
c(metrics, property) %<-% get_property_data() property_FL <- property %>% filter(location == 'Destin, Florida') property_TX <- property %>% filter(location == 'Austin, Texas') property_NV <- property %>% filter(location == 'Vegas, Nevada')
p <- property %>% group_by(location, online_visits) %>% tally() %>% ungroup() %>% dplyr::rename(`Online Visits` = online_visits) %>% ggplot(aes(x = location, y = n, fill = `Online Visits`, label = n)) + geom_col(position = 'dodge') + geom_text(position = position_dodge(width = 0.9), vjust = -1) + ylim(0, 650) + ylab('Location') + xlab('Number of Property') + theme_hc() + ggtitle('Number of Properties by Location') print(p) ggsave(file.path(here::here(), 'fig/01_num_prop_location.png'), p)
p <- property %>% group_by(online_visits) %>% summarize(mean_rating = mean(avg_review_rating_num), mean_num_reviews = mean(number_of_reviews_on_the_property_num)) %>% ungroup() %>% dplyr::rename(`Online Visits` = online_visits) %>% gather(key = 'metrics', value = 'value', -`Online Visits`) %>% mutate(metrics = if_else(metrics == 'mean_rating', "Average Rating", "Average Number of Rating")) %>% ggplot(aes(x = metrics, y = value, fill = `Online Visits`, label = round(value, 2))) + geom_col(position = 'dodge') + geom_text(position = position_dodge(width = 0.9), vjust = -1) + xlab('Metrics') + ylab('Value') + theme_hc() + ggtitle('Mean Ratings and Mean Number of Reviews by Online Visit Status') + ylim(0, 15) print(p) ggsave(file.path(here::here(), 'fig/02_avg_rating_review_by_tour.png'), p)
p <- property %>% group_by(location, online_visits) %>% summarize(mean_rating = mean(avg_review_rating_num), mean_num_reviews = mean(number_of_reviews_on_the_property_num)) %>% ungroup() %>% dplyr::rename(`Online Visits` = online_visits) %>% gather(key = 'metrics', value = 'value', -`Online Visits`, -location) %>% mutate(metrics = if_else(metrics == 'mean_rating', "Average Rating", "Average Number of Rating")) %>% ggplot(aes(x = metrics, y = value, fill = `Online Visits`, label = round(value, 2))) + geom_col(position = 'dodge') + geom_text(position = position_dodge(width = 0.9), hjust = -0.15) + xlab('Metrics') + ylab('Value') + theme_hc() + ggtitle('Mean Ratings and Mean Number of Reviews by Online Visit Status') + ylim(0, 20) + coord_flip() + facet_wrap(~location) print(p) ggsave(file.path(here::here(), 'fig/02B_avg_rating_review_by_tour_by_loc.png'), p, width = 8, height = 4, dpi = 300)
There is weak evidence that the properties in the online visit group has higher average ratings than the non-online visit group.
continuous_vars <- c('avg_review_rating_num', 'number_of_reviews_on_the_property_num') map(continuous_vars, vectorize_ttest_for_property, property)
When breaking into specific locations: There is weak evidence that the properties in the online visit group has higher average ratings than the non-online visit group in Texas. There is weak evidence that the properties in the online visit group has lower average number of reviews than the non-online visit group in Nevada
continuous_vars <- c('avg_review_rating_num', 'number_of_reviews_on_the_property_num') cat('================= FLORIDA ================= \n\n') map(continuous_vars, vectorize_ttest_for_property, property_FL) cat('================= TEXAS ================= \n\n') map(continuous_vars, vectorize_ttest_for_property, property_TX) cat('================= NEVADA ================= \n\n') map(continuous_vars, vectorize_ttest_for_property, property_NV)
p <- property %>% group_by(online_visits) %>% summarize(`Avg # of Bedrooms` = mean(bedrooms), `Avg # of Bathrooms` = mean(bathrooms), `Avg Maximum Occupancy` = mean(maximum_occupancy)) %>% ungroup() %>% dplyr::rename(`Online Visits` = online_visits) %>% gather(key = 'metrics', value = 'value', -`Online Visits`) %>% ggplot(aes(x = metrics, y = value, fill = `Online Visits`, label = round(value, 2))) + geom_col(position = 'dodge') + geom_text(position = position_dodge(width = 0.9), vjust = -1) + xlab('Metrics') + ylab('Value') + theme_hc() + ggtitle('Average Property Size by Online Visit Option') + ylim(0, 10) print(p) ggsave(file.path(here::here(), 'fig/03_prop_size_by_online_visit.png'), p)
p <- property %>% group_by(location, online_visits) %>% summarize(`Avg # of Bedrooms` = mean(bedrooms), `Avg # of Bathrooms` = mean(bathrooms), `Avg Maximum Occupancy` = mean(maximum_occupancy)) %>% ungroup() %>% dplyr::rename(`Online Visits` = online_visits) %>% gather(key = 'metrics', value = 'value', -`Online Visits`, -location) %>% ggplot(aes(x = metrics, y = value, fill = `Online Visits`, label = round(value, 2))) + geom_col(position = 'dodge') + geom_text(position = position_dodge(width = 0.9), hjust = -0.15) + xlab('Metrics') + ylab('Value') + theme_hc() + ggtitle('Average Property Size by Online Visit Option') + ylim(0, 15) + coord_flip() + facet_wrap(~location) print(p) ggsave(file.path(here::here(), 'fig/03B_prop_size_by_online_visit_by_loc.png'), p, width = 8, height = 4, dpi = 300)
With p-values > 0.05, none of these variables are that different between the online visit group and non-online visit group.
continuous_vars <- c('bedrooms', 'bathrooms', 'maximum_occupancy') map(continuous_vars, vectorize_ttest_for_property, property)
continuous_vars <- c('bedrooms', 'bathrooms', 'maximum_occupancy') cat('================= FLORIDA ================= \n\n') map(continuous_vars, vectorize_ttest_for_property, property_FL) cat('================= TEXAS ================= \n\n') map(continuous_vars, vectorize_ttest_for_property, property_TX) cat('================= NEVADA ================= \n\n') map(continuous_vars, vectorize_ttest_for_property, property_NV)
p <- property %>% group_by(online_visits) %>% summarize(instant_booking_enabled = mean(instant_booking_enabled), premier_partner_badge = mean(premier_partner_badge)) %>% ungroup() %>% dplyr::rename(`Online Visits` = online_visits) %>% gather(key = 'metrics', value = 'value', -`Online Visits`) %>% ggplot(aes(x = metrics, y = value, fill = `Online Visits`, label = scales::percent(value, 0.01))) + geom_col(position = 'dodge') + geom_text(position = position_dodge(width = 0.9), vjust = -1) + xlab('Metrics') + ylab('Value') + theme_hc() + ggtitle('Percentage of Properties with Instant Booking and Premier Partner Badge') + ylim(0, 0.4) print(p) ggsave(file.path(here::here(), 'fig/04_IB_and PP_type_by_tour.png'), p)
p <- property %>% group_by(location, online_visits) %>% summarize(instant_booking_enabled = mean(instant_booking_enabled), premier_partner_badge = mean(premier_partner_badge)) %>% ungroup() %>% dplyr::rename(`Online Visits` = online_visits) %>% gather(key = 'metrics', value = 'value', -`Online Visits`, -location) %>% ggplot(aes(x = metrics, y = value, fill = `Online Visits`, label = scales::percent(value, 0.01))) + geom_col(position = 'dodge') + geom_text(position = position_dodge(width = 0.9), hjust = -0.15) + xlab('Metrics') + ylab('Value') + theme_hc() + ggtitle('Percentage of Properties with Instant Booking and Premier Partner Badge') + ylim(0, 0.6) + coord_flip() + facet_wrap(~location) print(p) ggsave(file.path(here::here(), 'fig/04B_IB_and PP_type_by_tour_by_loc.png'), p, width = 10, height = 4, dpi = 300)
With p-values > 0.05, instant_booking_enabled and premier_partner_badge are that different between the online visit group and non-online visit group.
indicator_vars <- c('instant_booking_enabled', 'premier_partner_badge') map(indicator_vars, vectorize_propztest_for_property, property)
There is weak evidence (p-value < 0.1) that there are more more properties with instant booking enabled with the online visit option than without the non-online visit option in Texas.
indicator_vars <- c('instant_booking_enabled', 'premier_partner_badge') cat('================= FLORIDA ================= \n\n') map(indicator_vars, vectorize_propztest_for_property, property_FL) cat('================= TEXAS ================= \n\n') map(indicator_vars, vectorize_propztest_for_property, property_TX) cat('================= NEVADA ================= \n\n') map(indicator_vars, vectorize_propztest_for_property, property_NV)
p <- property %>% group_by(property_type, online_visits) %>% tally() %>% ungroup() %>% dplyr::rename(`Online Visits` = online_visits) %>% ggplot(aes(x = reorder(property_type, n), y = n, fill = `Online Visits`)) + geom_col(position = 'dodge') + coord_flip() + theme_hc() + ylab('Number of Property') + xlab('Type') + ggtitle('Number of Property by Type by Online Visit Option') print(p) ggsave(file.path(here::here(), 'fig/05_number_prop_by_type.png'), p)
p <- property %>% group_by(location, property_type, online_visits) %>% tally() %>% ungroup() %>% dplyr::rename(`Online Visits` = online_visits) %>% ggplot(aes(x = reorder(property_type, n), y = n, fill = `Online Visits`)) + geom_col(position = 'dodge') + coord_flip() + theme_hc() + ylab('Number of Property') + xlab('Type') + ggtitle('Number of Property by Type by Online Visit Option') + facet_wrap(~location) print(p) ggsave(file.path(here::here(), 'fig/05B_number_prop_by_type_by_loc.png'), p, width = 8, height = 4, dpi = 300)
metrics_by_months <- metrics %>% left_join(property, by = 'property_id') %>% mutate(year_month = as.yearmon(date)) %>% group_by(date, online_visits) %>% summarize(bookings_per_prop = mean(bookings, na.rm = TRUE), booking_amount_per_prop = mean(booking_amount, na.rm = TRUE), booking_requests_per_prop = mean(booking_requests, na.rm = TRUE), property_page_views_per_visit_per_prop = mean(property_page_views_visits, na.rm = TRUE), displays_in_searchpage_per_prop = mean(number_of_times_the_property_got_displayed_in_search_page, na.rm = TRUE)) %>% ungroup()
p <- metrics_by_months %>% dplyr::rename(`Online Visits` = online_visits) %>% ggplot(aes(x = date, y = booking_amount_per_prop, color = `Online Visits`)) + geom_line() + geom_smooth(method = 'loess', se = FALSE) + xlab('Month') + ylab('Average Booking Amount ($) Per Property') + ggtitle('Average Booking Amount Per Property by Month') + geom_vline(xintercept = as.Date('2019-01-01'), linetype = 'longdash', color = 'black', size = 0.3) + theme_hc() print(p) ggsave(file.path(here::here(), 'fig/06A_booking_amount_per_prop_nolo.png'), p)
p <- metrics_by_months %>% dplyr::rename(`Online Visits` = online_visits) %>% ggplot(aes(x = date, y = bookings_per_prop, color = `Online Visits`)) + geom_line() + geom_smooth(method = 'loess', se = FALSE) + xlab('Month') + ylab('Average Booking Per Property') + ggtitle('Average Booking Per Property by Month') + geom_vline(xintercept = as.Date('2019-01-01'), linetype = 'longdash', color = 'black', size = 0.3) + theme_hc() print(p) ggsave(file.path(here::here(), 'fig/06B_bookings_per_prop_nolo.png'), p)
p <- metrics_by_months %>% dplyr::rename(`Online Visits` = online_visits) %>% ggplot(aes(x = date, y = booking_requests_per_prop, color = `Online Visits`)) + geom_line() + geom_smooth(method = 'loess', se = FALSE) + xlab('Month') + ylab('Average Booking Requests Per Property') + ggtitle('Average Booking Requests Per Property by Month') + geom_vline(xintercept = as.Date('2019-01-01'), linetype = 'longdash', color = 'black', size = 0.3) + theme_hc() print(p) ggsave(file.path(here::here(), 'fig/06C_booking_requests_per_prop_nolo.png'), p)
p <- metrics_by_months %>% dplyr::rename(`Online Visits` = online_visits) %>% ggplot(aes(x = date, y = property_page_views_per_visit_per_prop, color = `Online Visits`)) + geom_line() + geom_smooth(method = 'loess', se = FALSE) + xlab('Month') + ylab('Average Page Views Per Visit Per Property') + ggtitle('Average Page Views Per Visit Per Property by Month') + geom_vline(xintercept = as.Date('2019-01-01'), linetype = 'longdash', color = 'black', size = 0.3) + theme_hc() print(p) ggsave(file.path(here::here(), 'fig/06D_page_views_per_visit_per_prop_nolo.png'), p)
p <- metrics_by_months %>% dplyr::rename(`Online Visits` = online_visits) %>% ggplot(aes(x = date, y = displays_in_searchpage_per_prop, color = `Online Visits`)) + geom_line() + geom_smooth(method = 'loess', se = FALSE) + xlab('Month') + ylab('Average Displays in Search Page Per Property') + ggtitle('Average Displays in Search Page Per Property by Month') + geom_vline(xintercept = as.Date('2019-01-01'), linetype = 'longdash', color = 'black', size = 0.3) + theme_hc() print(p) ggsave(file.path(here::here(), 'fig/06E_displays_in_searchpage_per_prop_nolo.png'), p)
metrics_by_months_by_loc <- metrics %>% left_join(property, by = 'property_id') %>% mutate(year_month = as.yearmon(date)) %>% group_by(date, online_visits, location) %>% summarize(bookings_per_prop = mean(bookings, na.rm = TRUE), booking_amount_per_prop = mean(booking_amount, na.rm = TRUE), booking_requests_per_prop = mean(booking_requests, na.rm = TRUE), property_page_views_per_visit_per_prop = mean(property_page_views_visits, na.rm = TRUE), displays_in_searchpage_per_prop = mean(number_of_times_the_property_got_displayed_in_search_page, na.rm = TRUE)) %>% ungroup()
p <- metrics_by_months_by_loc %>% dplyr::rename(`Online Visits` = online_visits) %>% ggplot(aes(x = date, y = booking_amount_per_prop, color = `Online Visits`)) + geom_line() + geom_smooth(method = 'loess', se = FALSE) + xlab('Month') + ylab('Average Booking Amount ($) Per Property') + ggtitle('Average Booking Amount Per Property by Month') + geom_vline(xintercept = as.Date('2019-01-01'), linetype = 'longdash', color = 'black', size = 0.3) + theme_hc() + facet_wrap(~location, nrow = 3) print(p) ggsave(file.path(here::here(), 'fig/06A_booking_amount_per_prop_byloc.png'), p, height = 7, width = 5, dpi = 300)
p <- metrics_by_months_by_loc %>% dplyr::rename(`Online Visits` = online_visits) %>% ggplot(aes(x = date, y = bookings_per_prop, color = `Online Visits`)) + geom_line() + geom_smooth(method = 'loess', se = FALSE) + xlab('Month') + ylab('Average Booking Per Property') + ggtitle('Average Booking Per Property by Month') + geom_vline(xintercept = as.Date('2019-01-01'), linetype = 'longdash', color = 'black', size = 0.3) + theme_hc() + facet_wrap(~location, nrow = 3) print(p) ggsave(file.path(here::here(), 'fig/06B_bookings_per_prop_byloc.png'), p, height = 7, width = 5, dpi = 300)
p <- metrics_by_months_by_loc %>% dplyr::rename(`Online Visits` = online_visits) %>% ggplot(aes(x = date, y = booking_requests_per_prop, color = `Online Visits`)) + geom_line() + geom_smooth(method = 'loess', se = FALSE) + xlab('Month') + ylab('Average Booking Requests Per Property') + ggtitle('Average Booking Requests Per Property by Month') + geom_vline(xintercept = as.Date('2019-01-01'), linetype = 'longdash', color = 'black', size = 0.3) + theme_hc() + facet_wrap(~location, nrow = 3) print(p) ggsave(file.path(here::here(), 'fig/06C_booking_requests_per_prop_byloc.png'), p, height = 7, width = 5, dpi = 300)
p <- metrics_by_months_by_loc %>% dplyr::rename(`Online Visits` = online_visits) %>% ggplot(aes(x = date, y = property_page_views_per_visit_per_prop, color = `Online Visits`)) + geom_line() + geom_smooth(method = 'loess', se = FALSE) + xlab('Month') + ylab('Average Page Views Per Visit Per Property') + ggtitle('Average Page Views Per Visit Per Property by Month') + geom_vline(xintercept = as.Date('2019-01-01'), linetype = 'longdash', color = 'black', size = 0.3) + theme_hc() + facet_wrap(~location, nrow = 3) print(p) ggsave(file.path(here::here(), 'fig/06D_page_views_per_visit_per_prop_byloc.png'), p, height = 7, width = 5, dpi = 300)
p <- metrics_by_months_by_loc %>% dplyr::rename(`Online Visits` = online_visits) %>% ggplot(aes(x = date, y = displays_in_searchpage_per_prop, color = `Online Visits`)) + geom_line() + geom_smooth(method = 'loess', se = FALSE) + xlab('Month') + ylab('Average Displays in Search Page Per Property') + ggtitle('Average Displays in Search Page Per Property by Month') + geom_vline(xintercept = as.Date('2019-01-01'), linetype = 'longdash', color = 'black', size = 0.3) + theme_hc() + facet_wrap(~location, nrow = 3) print(p) ggsave(file.path(here::here(), 'fig/06E_displays_in_searchpage_per_prop_byloc.png'), p, height = 7, width = 5, dpi = 300)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.