R/htwt_check.r

Defines functions htwt_check

Documented in htwt_check

#' Check for data entry errors in participant heights and weights.
#' 
#' This function generates a list containing a plot of particpant heights vs.
#' weights; a data.frame of the most extreme height/weight observations;
#' and a data.frame of participants missing height and/or weight.
#' 
#' This function generates a list with three elements: 
#' \enumerate{
#'   \item A plot (using ggplot2) of participant heights vs. 
#'         weights, with the most extreme observations labeled with study IDs.
#'   \item A table of the most extreme observations.
#'   \item A table of participants missing either height or weight
#' }
#' 
#' "Most extreme" is currently defined by fitting a linear model
#' (\code{lm(log(Weight) ~ Height}) and flagging participants whose weights
#' fall outside the 99.9% prediction interval.
#' 
#' 
#' @return
#' A list containing three elements: \code{plot}, \code{outlierdf}, and
#' \code{missingdf}, each corresponding to the elements listed above.
#' 
#' @param cleanlist The list of cleaned TO1 data generated by 
#'   \code{\link{clean_to1}}
#' 
#' @export



# This function compares height and weight for patients and reports out
# those with unusual results.
# Output is a list of objects:
#  - a plot of height vs weight with potential
#  - a data.frame of outliers
#  - a data.frame of participants with missing height or weight



htwt_check <- function(cleanlist) {

    # Ensure that these variables are defined within the function's environment
    HeightInch <- WeightPound <- outlier <- lwr <- upr <- NULL


    # Extract the height and weight data
    htwt <- cleanlist$medicalhistory[ , c("StudyID", 
                                          "HeightInch", "HeightInchIdk", 
                                          "WeightPound", "WeightPoundIdk")]


    # Fit a model to the heights and weights
    htwt.mod <- lm(log(WeightPound) ~ HeightInch,
                   data = htwt)

    htwt.pred <- data.frame(
        na.omit(htwt[c("StudyID", "HeightInch", "WeightPound")]),
        predict(htwt.mod, interval = "prediction", level = 0.999)
    )


    # Flag anyone whose weight falls outside the prediction interval
    htwt.pred$outlier <- with(htwt.pred,
        WeightPound < exp(lwr) |
        WeightPound > exp(upr)
    )

    htwt.pred$label <- ifelse(htwt.pred$outlier,
                              yes = htwt.pred$StudyID,
                              no = NA)








    # Set up the output list
    output <- list()



    # Create the plot
    output$plot <- ggplot(htwt.pred, aes(x = HeightInch, y = WeightPound)) +
                       geom_point(aes(color = outlier), alpha = 0.7) +
                       geom_ribbon(aes(ymin = exp(lwr), ymax = exp(upr)), 
                                   alpha = 0.05,
                                   color = "#43A2CA") +
                       geom_text(aes(label = label), size = 4, hjust = -0.05) +
                       coord_cartesian(ylim = c(0, 
                                       max(htwt.pred$WeightPound) * 1.1)) + 
                       labs(x = "Height (inches)", y = "Weight (pounds)") +
                       scale_color_manual(values = c("#43A2CA", "red"),
                                          guide = FALSE) +
                       scale_x_continuous(expand = c(.2, 1)) +
                       scale_y_continuous(expand = c(.2, 1)) +
                       theme_bw()



    # Create the table of outliers
    output$outlierdf <- htwt.pred[htwt.pred$outlier,
                                  c("StudyID", "HeightInch", "WeightPound")]


    # Create the table of participants with missing height or weight
    # To be truly missing, both the measure and its "I Don't Know" indicator
    # need to be NA
    htwt$heightmissing <- is.na(htwt$HeightInch) & is.na(htwt$HeightInchIdk)

    htwt$weightmissing <- is.na(htwt$WeightPound) & is.na(htwt$WeightPoundIdk)


    output$missingdf <- htwt[(htwt$heightmissing | htwt$weightmissing),
                             c("StudyID", "HeightInch", "WeightPound")]



    # Return the output list
    output

}
mmparker/to1check documentation built on May 23, 2019, 5:05 a.m.