#' Check for data entry errors in participant heights and weights.
#'
#' This function generates a list containing a plot of particpant heights vs.
#' weights; a data.frame of the most extreme height/weight observations;
#' and a data.frame of participants missing height and/or weight.
#'
#' This function generates a list with three elements:
#' \enumerate{
#' \item A plot (using ggplot2) of participant heights vs.
#' weights, with the most extreme observations labeled with study IDs.
#' \item A table of the most extreme observations.
#' \item A table of participants missing either height or weight
#' }
#'
#' "Most extreme" is currently defined by fitting a linear model
#' (\code{lm(log(Weight) ~ Height}) and flagging participants whose weights
#' fall outside the 99.9% prediction interval.
#'
#'
#' @return
#' A list containing three elements: \code{plot}, \code{outlierdf}, and
#' \code{missingdf}, each corresponding to the elements listed above.
#'
#' @param cleanlist The list of cleaned TO1 data generated by
#' \code{\link{clean_to1}}
#'
#' @export
# This function compares height and weight for patients and reports out
# those with unusual results.
# Output is a list of objects:
# - a plot of height vs weight with potential
# - a data.frame of outliers
# - a data.frame of participants with missing height or weight
htwt_check <- function(cleanlist) {
# Ensure that these variables are defined within the function's environment
HeightInch <- WeightPound <- outlier <- lwr <- upr <- NULL
# Extract the height and weight data
htwt <- cleanlist$medicalhistory[ , c("StudyID",
"HeightInch", "HeightInchIdk",
"WeightPound", "WeightPoundIdk")]
# Fit a model to the heights and weights
htwt.mod <- lm(log(WeightPound) ~ HeightInch,
data = htwt)
htwt.pred <- data.frame(
na.omit(htwt[c("StudyID", "HeightInch", "WeightPound")]),
predict(htwt.mod, interval = "prediction", level = 0.999)
)
# Flag anyone whose weight falls outside the prediction interval
htwt.pred$outlier <- with(htwt.pred,
WeightPound < exp(lwr) |
WeightPound > exp(upr)
)
htwt.pred$label <- ifelse(htwt.pred$outlier,
yes = htwt.pred$StudyID,
no = NA)
# Set up the output list
output <- list()
# Create the plot
output$plot <- ggplot(htwt.pred, aes(x = HeightInch, y = WeightPound)) +
geom_point(aes(color = outlier), alpha = 0.7) +
geom_ribbon(aes(ymin = exp(lwr), ymax = exp(upr)),
alpha = 0.05,
color = "#43A2CA") +
geom_text(aes(label = label), size = 4, hjust = -0.05) +
coord_cartesian(ylim = c(0,
max(htwt.pred$WeightPound) * 1.1)) +
labs(x = "Height (inches)", y = "Weight (pounds)") +
scale_color_manual(values = c("#43A2CA", "red"),
guide = FALSE) +
scale_x_continuous(expand = c(.2, 1)) +
scale_y_continuous(expand = c(.2, 1)) +
theme_bw()
# Create the table of outliers
output$outlierdf <- htwt.pred[htwt.pred$outlier,
c("StudyID", "HeightInch", "WeightPound")]
# Create the table of participants with missing height or weight
# To be truly missing, both the measure and its "I Don't Know" indicator
# need to be NA
htwt$heightmissing <- is.na(htwt$HeightInch) & is.na(htwt$HeightInchIdk)
htwt$weightmissing <- is.na(htwt$WeightPound) & is.na(htwt$WeightPoundIdk)
output$missingdf <- htwt[(htwt$heightmissing | htwt$weightmissing),
c("StudyID", "HeightInch", "WeightPound")]
# Return the output list
output
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.