R/processLimeSurveyDropouts.R

#' processLimeSurveyDropouts
#' 
#' This function makes it easy to parse the dropouts from a LimeSurvey
#' questionnaire.
#' 
#' This will be described more in detail in a forthcoming publications.
#' 
#' @param lastpage A vector with the 'lastpage' variable as LimeSurvey stores
#' it (an integer denoting the last page a participant visited, in other words,
#' where they dropped out).
#' @param pagenames Optional: names for each page.
#' @param relevantPagenames Optional: the names of those pages that should be
#' included.
#' @return A list with information about the dropout, including
#' \code{\link{ggplot}}s.
#' @author Gjalt-Jorn Peters
#' 
#' Maintainer: Gjalt-Jorn Peters <gjalt-jorn@@userfriendlyscience.com>
#' @keywords untilities
#' @examples
#' 
#' processLimeSurveyDropouts(c(1,2,1,1,2,3,2,2,3,2,1));
#' 
#' @export processLimeSurveyDropouts
processLimeSurveyDropouts <- function(lastpage, pagenames = NULL,
                                      relevantPagenames = NULL) {

  if (!is.numeric(lastpage)) {
    stop("Argument 'lastpage' is not a numeric vector but has class ",
         class(lastpage), ". The first nonmissing values are: ",
         vecTxtQ(head(complete.cases(lastpage))), ".");
  }
  
  res <- list();
  res$specificDropout <- data.frame(lastpage = 0:max(lastpage));

  if (is.null(pagenames)) pagenames <-
      paste('Dropped out at page', seq(from=1, to=max(lastpage + 1)));

  if (is.null(relevantPagenames)) relevantPagenames <-
      paste('Page', seq(from=1, to=max(lastpage + 1)));

  if (length(pagenames) != nrow(res$specificDropout)) {
    stop("The vector 'pagenames' must have the same length as the number of pages ",
         "in the 'lastpage' vector - but ", length(pagenames), " pagenames were ",
         "provided, for ", nrow(res$specificDropout), " lastpages.");
  }

  totalParticipants <- length(lastpage);

  res$specificDropout <- merge(res$specificDropout,
                               as.data.frame(table(lastpage),
                                             responseName='frequency'),
                               by='lastpage',
                               all=TRUE);
  res$specificDropout$frequency[is.na(res$specificDropout$frequency)] <- 0;
  res$specificDropout <- res$specificDropout[order(as.numeric(res$specificDropout$lastpage)), ];

  res$specificDropout$comments <- pagenames;

  res$progressiveDropout <- data.frame(frequency = totalParticipants -
                                         head(c(0, tail(cumsum(res$specificDropout$frequency), -1)), -1));
  res$progressiveDropout$percentage <- 100 * res$progressiveDropout$frequency /
    totalParticipants;
  res$progressiveDropout$page <- 1:nrow(res$progressiveDropout);
  res$progressiveDropout$prettyPercentage <- paste0(round(res$progressiveDropout$percentage), "%");

  res$plots -> list;

  res$plots$absoluteDropout <-
    ggplot(res$progressiveDropout, aes_string(x='page', y='frequency')) +
    geom_point(size=4) + geom_line(size=1) + ylab('Number of participants') +
    xlab('Page in the questionnaire') + theme_bw() +
    geom_text_repel(aes_string(label='frequency'),
                    point.padding = unit(1, 'lines'),
                    min.segment.length = unit(0.05, "lines"),
                    segment.color="#2A5581", color="#2A5581",
                    size=5, nudge_x=1) +
    scale_x_continuous(breaks=res$progressiveDropout$page);
  res$plots$relativeDropout <-
    ggplot(res$progressiveDropout, aes_string(x="page", y="percentage")) +
    geom_point(size=4) + geom_line(size=1) + ylab('Percentage of participants') +
    xlab('Page in the questionnaire') + theme_bw() +
    geom_text_repel(aes_string(label='prettyPercentage'),
                    point.padding = unit(1, 'lines'),
                    min.segment.length = unit(0.05, "lines"),
                    segment.color="#2A5581", color="#2A5581",
                    size=5, nudge_x=1) +
    scale_x_continuous(breaks=res$progressiveDropout$page);

  class(res) <- 'limeSurveyDropouts';

  return(res);

}
Matherion/userfriendlyscience documentation built on May 7, 2019, 3:41 p.m.