R/general_activity_last_event.R

#' FIXME add documentation
#'
#' @template param_ids
#' @template param_data
#' @template param_colname
#' @template param_fun
#' @param first_event Logical parameter. If TRUE, the first event per day will be calculated.
#' If FALSE, the last event per day will be calculated.
#' @param  convert_to_hour parameter. If TRUE, the time will be converted to hours for easier readability.
#' @family general functions
#' @return FIXME
#' @export
#' @importFrom magrittr "%>%"
#' @import dplyr
general_activity_last_event = function(data, fun = "mean", first_event = TRUE,
  convert_to_hour = TRUE) {
  event.df = findLastEvent(data)
  if (nrow(event.df) == 0) {
    res = data.frame(userId = ids, NA)
    colnames(res)[2] = colname
    return(res)
  }
  if (convert_to_hour) {
    res = event.df %>% mutate(n = do.call(fun, list(time.in.sec/60/60))) %>% distinct(n)
  } else {
    res = event.df %>% mutate(n = do.call(fun, list(time.in.sec))) %>% distinct(n)
  }

  return(res)
}

#' @import dplyr
findFirstEvent = function(data, from.time = "05:00:00") {
  if (!grepl(pattern = "[012][[:digit:]]:[012345][[:digit:]]:[012345][[:digit:]]", x = from.time))
    stop("from.time must be a time format like '15:21:30'")
  threshhold = timeToSec(strsplit(from.time, ":")[[1]])

  res = data %>%
    filter(userId %in% ids) %>%
    labelScreenEvents() %>%
    filter(source == "SCREEN") %>%
    mutate(time.in.sec = sapply(strsplit(time, ":"), timeToSec)) %>%
    filter(time.in.sec >= threshhold, general.usageType %in% c("USING_START", "CHECKING_START")) %>%
    group_by(userId, studyDay) %>%
    slice(1) %>%
    select(weekday, time, userId, time.in.sec, studyDay)

  return(res)
}

#' @import dplyr
findLastEvent = function(data, until.time = "04:00:00") {
  if (!grepl(pattern = "[012][[:digit:]]:[012345][[:digit:]]:[012345][[:digit:]]", x = until.time))
    stop("until.time must be a time format like '15:21:30'")
  max.time = sapply(strsplit("24:00:00", ":"), timeToSec)
  threshhold = timeToSec(strsplit(until.time, ":")[[1]])

  res = data %>%
    labelScreenEvents() %>%
    filter(source == "SCREEN") %>%
    mutate(time.in.sec = sapply(strsplit(time, ":"), timeToSec)) %>%
    mutate(night.of = ifelse(time.in.sec < threshhold, as.character(shiftStudyDay(studyDay, increment = -1)),  as.character(studyDay))) %>%
    filter(general.usageType %in% c("USING_END", "CHECKING_END")) %>%
    group_by(userId, night.of) %>%
    slice(n()) %>%
    mutate(time.in.sec = ifelse(time.in.sec < threshhold, max.time + time.in.sec, time.in.sec)) %>%
    select(weekday, time, userId, time.in.sec, night.of)

  return(res)
}


#' @import dplyr
labelScreenEvents = function(data, checkingTimeMax = 60) {
  id = unique(data$userId)
  df0 = data[0, ]

  df1 = dplyr::filter(data, userId == id)
  df1 = df1 %>% dplyr::arrange(timestamp)
  screen_events = df1[df1$source == "SCREEN",]$screen.event

  # add a new variable "usage" for labeling screen events
  df1$general.usageType = NA

  # if there is at most one screen event for this ID, no labeling is needed --> go to next ID
  if (length(screen_events) <= 1) {
    df0 = dplyr::bind_rows(df0, df1)
    next
  }

  # collapse screen events and source into a single column
  source_with_screen.event = as.character(df1$source)
  source_with_screen.event[source_with_screen.event == "SCREEN"] = df1$screen.event[source_with_screen.event == "SCREEN"]

  ### label ON_LOCKED, ..., ON_UNLOCKED, .... OFF_LOCKED/OFF_UNLOCKED as USING starting with ON_LOCKED

  if (length(screen_events) >= 3) {
    # look for patterns c("ON_LOCKED", "ON_UNLOCKED", "OFF_LOCKED") and c("ON_LOCKED", "ON_UNLOCKED", "OFF_UNLOCKED")
    # in screen_events
    using1 = occur(c("ON_LOCKED", "ON_UNLOCKED", "OFF_LOCKED"), screen_events)
    using2 = occur(c("ON_LOCKED", "ON_UNLOCKED", "OFF_UNLOCKED"), screen_events)
    df1$general.usageType[df1$source == "SCREEN"][using1] = "USING_START"
    df1$general.usageType[df1$source == "SCREEN"][using1 + 1] = "USING_ONGOING"
    df1$general.usageType[df1$source == "SCREEN"][using1 + 2] = "USING_END"

    df1$general.usageType[df1$source == "SCREEN"][using2] = "USING_START"
    df1$general.usageType[df1$source == "SCREEN"][using2 + 1] = "USING_ONGOING"
    df1$general.usageType[df1$source == "SCREEN"][using2 + 2] = "USING_END"


    ### label OFF_(UN)LOCKED, ... , ON_UNLOCKED, ... , OFF_(UN)LOCKED as USING starting with ON_UNLOCKED
    using3 = occur(c("OFF_LOCKED", "ON_UNLOCKED", "OFF_LOCKED"), screen_events)
    using4 = occur(c("OFF_LOCKED", "ON_UNLOCKED", "OFF_UNLOCKED"), screen_events)
    using5 = occur(c("OFF_UNLOCKED", "ON_UNLOCKED", "OFF_LOCKED"), screen_events)
    using6 = occur(c("OFF_UNLOCKED", "ON_UNLOCKED", "OFF_UNLOCKED"), screen_events)

    df1$general.usageType[df1$source == "SCREEN"][using3 + 1] = "USING_START"
    df1$general.usageType[df1$source == "SCREEN"][using3 + 2] = "USING_END"
    df1$general.usageType[df1$source == "SCREEN"][using4 + 1] = "USING_START"
    df1$general.usageType[df1$source == "SCREEN"][using4 + 2] = "USING_END"
    df1$general.usageType[df1$source == "SCREEN"][using5 + 1] = "USING_START"
    df1$general.usageType[df1$source == "SCREEN"][using5 + 2] = "USING_END"
    df1$general.usageType[df1$source == "SCREEN"][using6 + 1] = "USING_START"
    df1$general.usageType[df1$source == "SCREEN"][using6 + 2] = "USING_END"
  }

  ### if ON_UNLOCKED screen event is the first screen.event in data for this participant ID,
  ### followed by OFF_(UN)LOCKED, label this as USING
  if (screen_events[1] == "ON_UNLOCKED" & (screen_events[2] == "OFF_UNLOCKED" | screen_events[2] == "OFF_LOCKED")) {
    df1$general.usageType[df1$source == "SCREEN"][1] = "USING_START"
    df1$general.usageType[df1$source == "SCREEN"][2] = "USING_END"
  }

  ### label "ON_LOCKED", "NOTIFICATION", "NOTIFICATION", ..., "OFF_LOCKED" as
  ### "NOTIFICATION_START" and "NOTIFICATION_END" ##################################################

  if (length(source_with_screen.event[my.rle(source_with_screen.event)]) >= 3) {
    # look for pattern c("ON_LOCKED", "NOTIFICATION", "OFF_LOCKED") in source_with_screen.event[my.rle(source_with_screen.event)]
    notifications = occur(c("ON_LOCKED", "NOTIFICATION", "OFF_LOCKED"), source_with_screen.event[my.rle(source_with_screen.event)])

    df1$general.usageType[my.rle(source_with_screen.event)][notifications] = "NOTIFICATION_START"
    df1$general.usageType[my.rle(source_with_screen.event)][notifications + 2] = "NOTIFICATION_END"
  } else {
    notifications = integer(0)
  }

  ### label "NOTIFICATION", "ON_LOCKED", "OFF_LOCKED" as "NOTIFICATION_START" and "NOTIFICATION_END"

  if (length(source_with_screen.event) >= 3) {
    # look for pattern c("NOTIFICATION", "ON_LOCKED", "OFF_LOCKED") in source_with_screen.event
    notifications2 = occur(c("NOTIFICATION", "ON_LOCKED", "OFF_LOCKED"), source_with_screen.event)

    df1$general.usageType[notifications2 + 1] = "NOTIFICATION_START"
    df1$general.usageType[notifications2 + 2] = "NOTIFICATION_END"
  } else {
    notifications2 = integer(0)
  }

  ### label ON_LOCKED OFF_LOCKED events immediately following each other (without source in between) as "CHECKING" ####################

  checking_pattern = occur(c("ON_LOCKED", "OFF_LOCKED"), source_with_screen.event)
  checking = rep(FALSE, nrow(df1))
  checking[checking_pattern] = TRUE
  checking[notifications2 + 1] = FALSE

  # label only ON_LOCKED OFF_LOCKED events which are at most checkingTimeMax seconds apart
  checking_timeLimit = (df1$timestamp[which(checking) + 1] - df1$timestamp[checking])/1000 <= checkingTimeMax
  df1$general.usageType[checking][checking_timeLimit] = "CHECKING_START"
  df1$general.usageType[which(checking) + 1][checking_timeLimit] = "CHECKING_END"

  ### label "ON_LOCKED", ... "OFF_LOCKED" as USING if there is usage in between (not only notifications)
  pattern = occur(c("ON_LOCKED", "OFF_LOCKED"), screen_events)
  potential_usage = rep(FALSE, nrow(df1))
  potential_usage[df1$source == "SCREEN"][pattern] = TRUE

  # recall:
  # notifications = occur(c("ON_LOCKED", "NOTIFICATION", "OFF_LOCKED"), source_with_screen.event[my.rle(source_with_screen.event)])
  # notifications2 = occur(c("NOTIFICATION", "ON_LOCKED", "OFF_LOCKED"), source_with_screen.event[])
  # checking = occur(c("ON_LOCKED", "OFF_LOCKED"), source_with_screen.event)

  potential_usage[my.rle(source_with_screen.event)][notifications] = FALSE
  potential_usage[notifications2 + 1] = FALSE
  potential_usage[checking] = FALSE

  df1$general.usageType[potential_usage] = "USING_START"

  potential_usage_screen_events = potential_usage[df1$source == "SCREEN"]
  df1$general.usageType[df1$source == "SCREEN"][which(potential_usage_screen_events) + 1] = "USING_END"

  df1
}
QuayAu/Phonestudy_feature_functions documentation built on May 16, 2019, 4:03 a.m.