# Tools for translating extracted data into plottable formats.
#' Calculate the number of active users in a set of date ranges.
#'
#' @param sesh_dur_data A dataframe of user_ids and the dates when they took a
#' platform action.
#' @param range_beginning A data.table with a single column of dates which
#' denote the first days of each date range.
#' @return A data frame showing how many active users there were in each date
#' range.
#' @import data.table
calculate_active_users <- function(sesh_dur_data
, range_beginning){
copy_sesh_dur_data <- data.table::copy(sesh_dur_data)
copy_sesh_dur_data[, rollDate:= platform_action_date]
data.table::setkey(copy_sesh_dur_data, rollDate)
range_beginning[, rollDate:= range_beginning_date]
data.table::setkey(range_beginning, rollDate)
range_beginning[
copy_sesh_dur_data
, .(user_id, platform_action_date, range_beginning_date)
, roll = T
][
, .(number_of_active_users = length(unique(user_id)))
, by = range_beginning_date
]
}
#' Loop calculate_active_users over a list of date ranges.
#'
#' @param date_ranges A list of date ranges, of the form generated by
#' standardmetrics::define_date_ranges.
#' @return A list of data frames that show the number of active users that fall
#' within each date range.
#' @import data.table
#' @export
loop_calculate_active_users <- function(date_ranges
, ...){
lapply(date_ranges
, FUN = function(r){
calculate_active_users(..., r)
})
}
#' Calculate DAU to MAU ratio by month.
#'
#' @param MAU_count_by_month A data.table: (month_beginning MAU_count)
#' @param DAU_count_by_day A data.table: (day DAU_count)
#' @return A data frame showing the average DAU to MAU ratio for each month
#' specified by months_to_calc.
#' @import data.table
#' @export
calculate_DAU_MAU_ratio <- function(MAU_count_by_month
, DAU_count_by_day){
copy_MAU_count_by_month <- data.table::copy(MAU_count_by_month)
copy_DAU_count_by_day <- data.table::copy(DAU_count_by_day)
copy_MAU_count_by_month[, c('MAU_count'
, 'month_beginning_date'
, "rollMonth")
:=
.(number_of_active_users
, range_beginning_date
, range_beginning_date)]
data.table::setkey(copy_MAU_count_by_month, rollMonth)
copy_DAU_count_by_day[, c('DAU_count'
, 'day_beginning_date'
, "rollDate")
:=
.(number_of_active_users
, range_beginning_date
, range_beginning_date)]
data.table::setkey(copy_DAU_count_by_day, rollDate)
copy_MAU_count_by_month[
copy_DAU_count_by_day
, .(month_beginning_date, day_beginning_date, MAU_count, DAU_count)
, roll = T
][
, .(DAU_to_MAU_ratio = mean(DAU_count)/mean(MAU_count))
, by = month_beginning_date
]
}
#' Calculate overall DAU to MAU ratio for a time interval.
#'
#' @param months_beginning A data.table (range_beginning_date) of months to
#' include in the calculation.
#' @param max_date The largest date to use in the calculation. Any of the
#' range_beginning_dates in months_beginning which is less than one month
#' before max_date is dropped.
#' @param sesh_dur_data A data.table (user_id, platform_action_date).
#' @return A number representing the overall DAU/MAU ratio for the time
#' interval (min_date, max_date). The ratio is only calculated for complete
#' months within the time interval. So for example, (jan 3, march 20) gets
#' rounded to (feb 1, feb 28).
#' @import data.table
#' @export
calculate_overall_ratio <- function(months_beginning
, max_date = Sys.Date()
, sesh_dur_data = sesh_dur_date){
monthdiff <- function(date1,date2){
length(seq(from=date1, to=date2, by='month')) - 1
}
monthdiff2 <- Vectorize(FUN = monthdiff, vectorize.args = "date1")
months_beginning_copy <- data.table::copy(months_beginning)[
, ':='(rollDate = range_beginning_date
, month_length = c(diff(range_beginning_date), 0)
)
]
data.table::setkey(months_beginning_copy, rollDate)
sesh_dur_data_copy <- data.table::copy(sesh_dur_data)[
, rollDate := platform_action_date
]
data.table::setkey(sesh_dur_data_copy, rollDate)
months_beginning_copy[
sesh_dur_data_copy
, .(month_length
, range_beginning_date
, platform_action_date
, user_id
)
, roll = T
][
!is.na(month_length)
, .(days_active = length(unique(platform_action_date))
, month_days_possible = mean(month_length)
)
, by = c('user_id', 'range_beginning_date')
][
, .(overall_ratio = sum(days_active)/sum(as.numeric(month_days_possible)))
]
}
#' Count signups per week (as total, and as perentage of signups).
#'
#' @param relative_pa_datetimes A data frame: (user_id, datetime) giving the
#' moments when a user took a platform action.
#' @param user_oneD7 A data frame: (user_id oneD7) that tells us which users
#' are 1D7s.
#' @param range_beginning A data.table with a single column of dates which
#' denote the first days of each date range.
#' @param user_set A subset of users to restrict the calculation to. If not set
#' (ie NULL) then all users available in relative_pa_datetimes are used.
#' @return A data frame: (week_beginning, number_of_signups)
#' @import data.table
#' @export
count_signups <- function(relative_pa_datetimes
, user_oneD7
, range_beginning
, user_set = NULL){
rpd <- data.table::copy(relative_pa_datetimes)
u1 <- data.table::copy(user_oneD7)
if(!is.null(user_set)){
rpd <- rpd[user_id %in% user_set,,]
u1 <- u1[user_id %in% user_set,,]
}
user_signup_date <- rpd[
, .(signup_date = min(signup_date))
, by = user_id
]
data.table::setkey(user_signup_date, user_id)
data.table::setkey(u1, user_id)
range_beginning[, rollDate := range_beginning_date]
data.table::setkey(range_beginning, rollDate)
user_signup_week <- u1[user_signup_date][, rollDate := signup_date]
data.table::setkey(user_signup_week, rollDate)
range_beginning[user_signup_week, roll = T][
, .(
number_of_signups = length(unique(user_id))
, number_of_1d7_signups = length(unique(user_id[oneD7]))
)
, by = range_beginning_date
][
, c('range_beginning_date'
, 'number_of_signups'
, 'number_of_1d7_signups'
, 'percent_of_1d7_signups'
) :=
.(
range_beginning_date
, number_of_signups
, number_of_1d7_signups
, number_of_1d7_signups/number_of_signups
)
]
}
#' Count signups per week (user classes > 2)
#'
#' @param relative_pa_datetimes A data frame: (user_id, datetime) giving the
#' moments when a user took a platform action.
#' @param user_classes A data frame: (user_id, user_class) that groups users into
#' classes.
#' @param range_beginning A data.table with a single column of dates which
#' denote the first days of each date range.
#' @param user_set A subset of users to restrict the calculation to. If not set
#' (ie NULL) then all users available in relative_pa_datetimes are used.
#' @return A data frame: (week_beginning, number_of_signups)
#' @import data.table
#' @export
count_signups_multiclass <- function(relative_pa_datetimes
, user_classes
, range_beginning
, user_set = NULL){
rpd <- data.table::copy(relative_pa_datetimes)
u1 <- data.table::copy(user_classes)
if(!is.null(user_set)){
rpd <- rpd[user_id %in% user_set,,]
u1 <- u1[user_id %in% user_set,,]
}
user_signup_date <- rpd[
, .(signup_date = min(signup_date))
, by = user_id
]
data.table::setkey(user_signup_date, user_id)
data.table::setkey(u1, user_id)
range_beginning[, rollDate := range_beginning_date]
data.table::setkey(range_beginning, rollDate)
user_signup_week <- u1[user_signup_date][, rollDate := signup_date]
data.table::setkey(user_signup_week, rollDate)
range_beginning[user_signup_week, roll = T][
, .(
number_of_signups = length(unique(user_id))
)
, by = .(range_beginning_date, user_class)
]
}
#' Get user retention data (1d7s vs all).
#'
#' @param relative_pa_datetimes A data frame: (user_id, datetime) giving the
#' moments when a user took a platform action.
#' @param user_oneD7 A data frame: (user_id oneD7) that tells us which users
#' are 1D7s.
#' @param max_date The most recent date of activity that appears in
#' sesh_dur_date.
#' @return A data frame: (weeks_since_signup, pct_users_active)
#' @import data.table
#' @export
get_user_retention_data <- function(relative_pa_datetimes
, user_oneD7
, max_date = max(sesh_dur_date$platform_action_date)){
rpd <- data.table::copy(relative_pa_datetimes)
u1 <- data.table::copy(user_oneD7)
data.table::setkey(rpd, user_id)
data.table::setkey(u1, user_id)
x <- rpd[
, c('weeks_since_signup'
, 'weeks_eligible'
) :=
.(floor(as.numeric(relative_date)/7)
, floor(as.numeric(max_date - signup_date)/7)
)
][u1]
active_user_count <- x[
, .(number_of_active_users = length(unique(user_id)) )
, by = .(weeks_since_signup, oneD7)
]
eligible_user_count <- x[
, .(number_of_eligible_users = length(unique(user_id)) )
, by = .(weeks_eligible, oneD7)
][
order(desc(weeks_eligible))
, cumulative_eligible_users := cumsum(number_of_eligible_users)
, by = oneD7
]
data.table::setkey(active_user_count, weeks_since_signup, oneD7)
data.table::setkey(eligible_user_count, weeks_eligible, oneD7)
active_user_count[
eligible_user_count
, .(percent_active = number_of_active_users/cumulative_eligible_users
, number_of_active_users
, cumulative_eligible_users
, oneD7
, weeks_since_signup
)
]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.