R/snackapp_usage_p_sum.R

Defines functions snackapp_usage_p_sum

Documented in snackapp_usage_p_sum

#' snackapp_usage_p_sum
#' 
#' @description A function to generate a summary of SnackApp usage data for each participant
#'
#' @param data A dataframe of SnackApp usage data
#'
#' @return A dataframe summarising SnackApp usage data per participant (one row per participant)
#' @export
#'
#' @examples
snackapp_usage_p_sum <- function(data){
    df <- data
    
    # filter out app-state-changed unknown
    
    df <- df %>%
      dplyr::filter(Metric != "unknown")

    state_change_summary <- df %>%
      dplyr::select(id, date, Event, Metric) %>%
      dplyr::group_by(id) %>%
      dplyr::filter(Event == "app-state-changed") %>%
      dplyr::mutate(
        date = as.POSIXct(date),
        date_lag = dplyr::lag(date),
        diff = difftime(date, date_lag, units = "sec")
      ) %>%
      dplyr::filter(Metric != "active")
    
    ## inactive flag sum - iphone ## 
    
    inactive <- df %>%
      dplyr::group_by(id) %>%
      dplyr::mutate(
        inactive_flag = dplyr::lag(dplyr::if_else(Metric == "inactive", 1, 0))
      ) %>%
      dplyr::filter(inactive_flag == 1) %>% 
      dplyr::summarise(
        inactive_total_time = sum(as.numeric(diff), na.rm = TRUE)
      )

    state_change_summary <- state_change_summary %>%
      dplyr::group_by(id) %>%
      dplyr::summarise(
        total_time = as.numeric(sum(diff, na.rm = TRUE)),
        average_time = as.numeric(mean(diff, na.rm = TRUE)),
        std_dev_time = as.numeric(sd(diff, na.rm = TRUE))
      )
    
    state_change_summary <- dplyr::left_join(state_change_summary, inactive) %>%
      replace(is.na(.), 0) %>%
      dplyr::mutate(
        total_time = total_time - inactive_total_time
      ) 

    state_change_summary$id <- as.numeric(state_change_summary$id)
    state_change_summary <- round(state_change_summary, digits = 2)


  # add a lead to each _diff column - should not change calculations but will make intermediary products more accurate


    df$my_stat <- dplyr::lag(dplyr::if_else(stringr::str_detect(df$Metric, "my-stat"), 1, 0))
    df$stat_diff <- dplyr::lead(dplyr::if_else(df$my_stat == 1, as.numeric(difftime(df$date, df$date_lag, units = "sec")), 0), 1)
    
    df$my_goals <- dplyr::lag(dplyr::if_else(stringr::str_detect(df$Metric, "my-goal"), 1, 0))
    df$goal_diff <- dplyr::lead(dplyr::if_else(df$my_goals == 1, as.numeric(difftime(df$date, df$date_lag, units = "sec")), 0), 1)
    
    df$forum <- dplyr::lag(dplyr::if_else(stringr::str_detect(df$Metric, "forum"), 1, 0))
    df$forum_diff <- dplyr::lead(dplyr::if_else(df$forum == 1, as.numeric(difftime(df$date, df$date_lag, units = "sec")), 0), 1)
    
    df$case_studies <- dplyr::lag(dplyr::if_else(stringr::str_detect(df$Metric, "case-studies"), 1, 0))
    df$case_studies_diff <- dplyr::lead(dplyr::if_else(df$case_studies == 1, as.numeric(difftime(df$date, df$date_lag, units = "sec")), 0), 1)
    
    df$planner <- dplyr::lag(dplyr::if_else(stringr::str_detect(df$Metric, "planner"), 1, 0))
    df$planner_diff <- dplyr::lead(dplyr::if_else(df$planner == 1, as.numeric(difftime(df$date, df$date_lag, units = "sec")), 0), 1)
    
    df$case_studies <- dplyr::lag(dplyr::if_else(stringr::str_detect(df$Metric, "case-studies"), 1, 0))
    df$case_studies_diff <- dplyr::lead(dplyr::if_else(df$case_studies == 1, as.numeric(difftime(df$date, df$date_lag, units = "sec")), 0), 1)
    
    df$faq <- dplyr::lag(dplyr::if_else(stringr::str_detect(df$Metric, "faq"), 1, dplyr::if_else(stringr::str_detect(df$Metric, "onboarding"), 0, 0)))
    df$faq_diff <- dplyr::lead(dplyr::if_else(df$faq == 1, as.numeric(difftime(df$date, df$date_lag, units = "sec")), 0), 1)
    
    df$dashboard <- dplyr::lag(dplyr::if_else(stringr::str_detect(df$Metric, "dashboard"), 1, 0))
    df$dashboard_diff <- dplyr::lead(dplyr::if_else(df$dashboard == 1, as.numeric(difftime(df$date, df$date_lag, units = "sec")), 0), 1)
    
    df$profile <- dplyr::lag(dplyr::if_else(stringr::str_detect(df$Metric, "profile"), 1, 0))
    df$profile_diff <- dplyr::lead(dplyr::if_else(df$profile == 1, as.numeric(difftime(df$date, df$date_lag, units = "sec")), 0), 1)
    
    df$onboarding <- dplyr::lag(dplyr::if_else(stringr::str_detect(df$Metric, "onboarding"), 1, 0))
    df$onboarding_diff <- dplyr::lead(dplyr::if_else(df$onboarding == 1, as.numeric(difftime(df$date, df$date_lag, units = "sec")), 0), 1)
    
    df$notifications <- dplyr::if_else(stringr::str_detect(df$Event, "notification"), 1, 0)

    df[, 8:29][df[, 8:29] == 0] <- NA

    usage_summary <- df %>%
      dplyr::group_by(id) %>%
      dplyr::summarise(
        n_onboarding = sum((df[cumsum(rle(df$onboarding)$lengths), ]$onboarding == 1) == TRUE, na.rm = TRUE),
        total_stat = sum(stat_diff, na.rm = TRUE),
        total_goal = sum(goal_diff, na.rm = TRUE),
        total_forum = sum(forum_diff, na.rm = TRUE),
        total_case_studies = sum(case_studies_diff, na.rm = TRUE),
        total_planner = sum(planner_diff, na.rm = TRUE),
        total_case_studies = sum(case_studies_diff, na.rm = TRUE),
        total_faq = sum(faq_diff, na.rm = TRUE),
        total_dashboard = sum(dashboard_diff, na.rm = TRUE),
        total_profile = sum(profile_diff, na.rm = TRUE),
        total_onboarding = sum(onboarding_diff, na.rm = TRUE),
        average_stat = sum(stat_diff, na.rm = TRUE) / sum((df[cumsum(rle(df$my_stat)$lengths), ]$my_stat == 1) == TRUE, na.rm = TRUE),
        average_goal = sum(goal_diff, na.rm = TRUE) / sum((df[cumsum(rle(df$my_goals)$lengths), ]$my_goals == 1) == TRUE, na.rm = TRUE),
        average_forum = sum(forum_diff, na.rm = TRUE) / sum((df[cumsum(rle(df$forum)$lengths), ]$forum == 1) == TRUE, na.rm = TRUE),
        average_case_studies = sum(case_studies_diff, na.rm = TRUE) / sum((df[cumsum(rle(df$case_studies)$lengths), ]$case_studies == 1) == TRUE, na.rm = TRUE),
        average_planner = sum(planner_diff, na.rm = TRUE) / sum((df[cumsum(rle(df$planner)$lengths), ]$planner == 1) == TRUE, na.rm = TRUE),
        average_faq = sum(faq_diff, na.rm = TRUE) / sum((df[cumsum(rle(df$faq)$lengths), ]$faq == 1) == TRUE, na.rm = TRUE),
        average_dashboard = sum(dashboard_diff, na.rm = TRUE) / sum((df[cumsum(rle(df$dashboard)$lengths), ]$dashboard == 1) == TRUE, na.rm = TRUE),
        average_profile = sum(profile_diff, na.rm = TRUE) / sum((df[cumsum(rle(df$profile)$lengths), ]$profile == 1) == TRUE, na.rm = TRUE),
        average_onboarding = sum(onboarding_diff, na.rm = TRUE) / sum((df[cumsum(rle(df$onboarding)$lengths), ]$onboarding == 1) == TRUE, na.rm = TRUE),
        stdev_stat = sd(stat_diff, na.rm = TRUE),
        stdev_goal = sd(goal_diff, na.rm = TRUE),
        stdev_forum = sd(forum_diff, na.rm = TRUE),
        stdev_case_studies = sd(case_studies_diff, na.rm = TRUE),
        stdev_planner = sd(planner_diff, na.rm = TRUE),
        stdev_case_studies = sd(case_studies_diff, na.rm = TRUE),
        stdev_faq = sd(faq_diff, na.rm = TRUE),
        stdev_dashboard = sd(dashboard_diff, na.rm = TRUE),
        stdev_profile = sd(profile_diff, na.rm = TRUE),
        stdev_onboarding = sd(onboarding_diff, na.rm = TRUE),
        notificaition_count = sum(notifications)
      )

    usage_summary[usage_summary < 0] <- NA
    usage_summary <- do.call(data.frame, lapply(usage_summary, function(x) replace(x, is.infinite(x), NA)))
    usage_summary[is.na(usage_summary)] <- 0
    usage_summary$id <- as.numeric(usage_summary$id)
    usage_summary <- round(usage_summary, digits = 2)

    bouts <- participant_snack_count(df)

  participant_summary <- dplyr::full_join(state_change_summary, usage_summary)
  participant_summary <- dplyr::full_join(participant_summary, bouts, by = c("id" = "id"))

  return(participant_summary)
}
lboro-climb/snackapp.usage documentation built on March 22, 2023, 4:17 a.m.