R/display.r

Defines functions squashRetentionList squashPADistList

Documented in squashPADistList squashRetentionList

#' Convert a retentionList to a single data frame that's ready for plotting.
#'
#' @param retentionList The result of calling
#' clustApply(FUN=calculateWeeklyRetention) on a userClust object.
#' @return A data.frame of the form  (cluster, <additional variables>
#' , relative_session_week, pct_active). Here, <additional variables>
#' represents the variables which appear in the extraGroupings parameter of the
#' clustApply call.
#' @importFrom dplyr mutate
#' @importFrom dplyr select
#' @importFrom plyr ldply
#' @export
squashRetentionList <- function(retentionList){
  plyr::ldply( 
    .data = retentionList
    , .fun = function(list){
      userGroup <- data.frame(as.list(list$varCombo)) 
      userGroup <- dplyr::mutate(userGroup, dummy=T)
      retentionData <- list$result
      if(is.null(retentionData)){
        retentionData <- data.frame()
      }
      retentionData <- dplyr::mutate(retentionData, dummy=T)
      out <- dplyr::left_join(retentionData, userGroup, by = 'dummy')
      if(nrow(out)>0){
        dplyr::select(out, relative_session_week, pct_active)
      } else {
        data.frame()
      }
  }) 
}

#' Convert an aggPADistList to a single data frame that's ready to be
#' displayed.
#'
#' @param aggPADistList The result of calling
#' clustApply(FUN = function(u){
#'       dplyr::select(calculatePADist(u, agg = T)
#'                     , flash_report_category, pct_platform_actions)
#'     }
#' )
#' on a userClust object.
#' @param long Logical; should data be returned in long format (T) or wide
#' format (F)?
#' @param clustVariables Character vector. Variables that were used in 
#' the clustering process. All variables but these will be dropped, and
#' percentages will be re-computed based on these variables only. If left null,
#' then no variables will be dropped.
#' @return A data frame showing each cluster's aggregate platform 
#' action distribution.
#' @importFrom dplyr mutate
#' @importFrom dplyr rename
#' @importFrom dplyr filter
#' @importFrom dplyr group_by
#' @importFrom dplyr ungroup
#' @importFrom dplyr %>%
#' @export
squashPADistList <- function(aggPADistList
                             , long = F
                             , clustVariables = NULL){
  aggPADistList <- lapply(aggPADistList, function(x)x$result)
  longData <- data.frame(stringsAsFactors=F)
  for(i in 1:length(aggPADistList)){
    newDf <- dplyr::mutate(aggPADistList[[i]], user_id=i)
    longData <- rbind(longData, newDf)
  }
  if(!is.null(clustVariables)){
    longData <- longData %>% 
      filter(flash_report_category %in% clustVariables) %>%
      group_by(user_id) %>%
      mutate(
        pct_platform_actions = pct_platform_actions/sum(pct_platform_actions)
      ) %>%
      ungroup
  }
  if(long){
    longData <- dplyr::rename(longData, cluster=user_id)
    out <- longData
  } else {
    wideData <- spreadPADistData(longData)
    wideData <- dplyr::rename(wideData, cluster=user_id)
    out <- wideData
  }
  out
}
johnchower/oneD7 documentation built on May 19, 2019, 4:21 p.m.