#' Calculate Persistence
#'
#' @description The persistence rate is defined as the percentage of units (ID's)
#' in a given period that are also found in the subsequent period. This
#' function will either:
#'
#' 1) Calculate the persitence rate for each period
#'
#' 2) Create an indicator variable on the original dataframe that
#' identifies whether the ID persisted (1) or not (0)
#'
#'@param df A dataframe
#'@param id Unique ID variable
#'@param rank Numeric or Ordered Factor variable that indicates the sequence of periods
#'@param period Optional pretty name of the rank variable
#'@param ... Optional grouping variables
#'@param overall Logical variable to include overall persistence rate
#'@param calculate Logical variable to indicate calculating persistence
#' rates or indicator variable
#'
#'@return Returns either a summarised dataframe or the original dataframe
#' with an extra column.
#'
#'@details If \code{calculate==TRUE} \code{persistence()} removes the last
#' period from the output,
#'
#'@examples
#'
#'dataFrame <- data.frame(ID = c("A", "B", "C", "A", "B", "D", "A", "D"),
#' RANK = c(1, 1, 1, 2, 2, 2, 3, 3),
#' PERIOD = c("P1", "P1", "P1", "P2", "P2", "P2", "P3", "P3"),
#' GROUP = c("G1", "G2", "G1", "G1", "G2", "G3", "G1", "G3"),
#' stringsAsFactors = FALSE)
#'# Calculate == TRUE
#'persistence(dataFrame, ID, RANK, PERIOD)
#'
#'# Calculate == FALSE
#'dataFrame <- persistence(dataFrame, ID, RANK, PERIOD, calculate = FALSE)
#'
#'@export
persistence <- function(df, id, rank, period, ..., overall = TRUE, calculate = TRUE){
stopifnot(!missing(df), !missing(id), !missing(rank))
period_missing <- missing(period)
enq_id <- enquo(id)
enq_rank <- enquo(rank)
enq_period <- enquo(period)
enq_group_var <- quos(...)
valid_rank_type <- is.numeric(rlang::eval_tidy(enq_rank, df)) | is.ordered(rlang::eval_tidy(enq_rank, df))
if(!valid_rank_type){
stop("Argument \"rank\" must be numeric or ordered factor")
}
if(is.logical(calculate)){
calculate <- calculate
} else {
stop("Argument \"calculate\" must be logical (TRUE/FALSE)")
}
if(is.logical(overall)){
overall <- overall
} else{
stop("Argument \"overall\" must be logical (TRUE/FALSE)")
}
df <- df %>%
ungroup() %>%
mutate(denseRank = dense_rank(UQ(enq_rank)))%>%
group_by(UQ(enq_id))%>%
arrange(denseRank)%>%
mutate(nextrank = lead(denseRank))
if(calculate == FALSE){
out <- df %>%
mutate(persistence_indicator = case_when(nextrank == denseRank + 1 ~ 1,
TRUE ~ 0))%>%
ungroup()%>%
select(-nextrank, -denseRank)
return(out)
} else if (calculate == TRUE) {
if(period_missing){
out <- df %>%
group_by(UQS(enq_group_var), UQ(enq_rank), denseRank)
} else if(!period_missing){
out <- df %>%
group_by(UQS(enq_group_var), UQ(enq_rank), UQ(enq_period), denseRank)
}
out <- out %>%
summarize(persistence_rate = sum(nextrank == (denseRank+1), na.rm = TRUE)/n(),
count = n()) %>%
ungroup()%>%
filter(denseRank != max(denseRank))%>%
arrange(denseRank) %>%
select(-denseRank)
if(overall == TRUE){
total <- df %>%
ungroup()%>%
filter(denseRank != max(denseRank))%>%
summarize(persistence_rate=sum(nextrank == (denseRank + 1), na.rm = TRUE)/n())%>%
as.numeric()
out <- out %>%
mutate(overall = total)
}
return(out)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.