#' Plot manifest and fitted raw scores
#'
#' The function plots the raw data against the fitted scores from
#' the regression model per group. This helps to inspect the precision
#' of the modeling process. The scores should not deviate too far from
#' regression line.
#' @param data The raw data within a data.frame or cnorm object
#' @param model The regression model (optional)
#' @param group The grouping variable
#' @param raw The raw score variable
#' @param type Type of display: 0 = plot manifest against fitted values, 1 = plot
#' manifest against difference values
#' @examples
#' # Compute model with example dataset and plot results
#' result <- cnorm(raw = elfe$raw, group = elfe$group)
#' plotRaw(result)
#' @export
#' @family plot
plotRaw <- function(data, model, group = NULL, raw = NULL, type = 0) {
if(inherits(data, "cnorm")){
model <- data$model
data <- data$data
}
#if (!attr(data, "useAge")){
# stop("Age or group variable explicitely set to FALSE in dataset. No plotting available.")
#}
if (is.null(raw)) {
raw <- attr(data, "raw")
}
if (group != "" && !is.null(group) && !(group %in% colnames(data))) {
warning(paste(c("Grouping variable '", group, "' does not exist in data object. Please check variable names and fix 'group' parameter in function call."), collapse = ""))
group <- NULL
}
if (!(raw %in% colnames(data))) {
stop(paste(c("ERROR: Raw variable '", raw, "' does not exist in data object."), collapse = ""))
}
if(is.null(model$covariate))
cov <- NULL
else
cov <- data[[attr(data, "covariate")]]
d <- data
d$raw <- data[[raw]]
d$fitted <- model$fitted.values
d$diff <- d$fitted - d$raw
mse <- round(model$rmse, digits=4)
r <- round(cor(d$fitted, d$raw,
use = "pairwise.complete.obs"), digits = 4)
d <- as.data.frame(d)
if (group != "" && !is.null(group)) {
d$group <- data[[group]]
d$group <- as.factor(d$group)
if (type == 0) {
xyplot(fitted ~ raw | group, d,
main = paste("Observed vs. Fitted Raw Scores by ", group, "\nr = ", r, ", RMSE = ", mse),
ylab = "Fitted Scores",
xlab = "Observed Score",
grid = TRUE,
auto.key = TRUE,
group = cov,
abline = c(0, 1), lwd = 1
)
} else {
xyplot(diff ~ raw | group, d,
main = paste("Observed Raw Scores vs. Difference Scores by ", group, "\nr = ", r, ", RMSE = ", mse),
ylab = "Difference Scores",
xlab = "Observed Score",
grid = TRUE,
auto.key = TRUE,
group = cov,
panel = function(...) {
panel.xyplot(...)
panel.abline(h = .0, col = 2, lty = 2)
}
)
}
} else {
if (type == 0) {
xyplot(fitted ~ raw, d,
main = paste("Observed vs. Fitted Raw Scores\nr = ", r, ", RMSE = ", mse),
ylab = "Fitted Scores",
xlab = "Observed Score",
grid = TRUE,
auto.key = TRUE,
group = cov,
abline = c(0, 1), lwd = 1
)
} else {
xyplot(diff ~ raw, d,
main = paste("Observed Raw Scores vs. Difference Scores\nr = ", r, ", RMSE = ", mse),
ylab = "Difference",
xlab = "Observed Score",
grid = TRUE,
auto.key = TRUE,
group = cov,
panel = function(...) {
panel.xyplot(...)
panel.abline(h = .0, col = 2, lty = 2)
}
)
}
}
}
#' Plot manifest and fitted norm scores
#'
#' The function plots the manifest norm score against the fitted norm score from
#' the inverse regression model per group. This helps to inspect the precision
#' of the modeling process. The scores should not deviate too far from
#' regression line.
#' @param data The raw data within a data.frame or a cnorm object
#' @param model The regression model (optional)
#' @param group The grouping variable, use empty string for no group
#' @param minNorm lower bound of fitted norm scores
#' @param maxNorm upper bound of fitted norm scores
#' @param type Type of display: 0 = plot manifest against fitted values, 1 = plot
#' manifest against difference values
#' @examples
#' # Load example data set, compute model and plot results
#' \dontrun{
#' result <- cnorm(raw = elfe$raw, group = elfe$group)
#' plotNorm(result, group="group", minNorm=25, maxNorm=75)
#' }
#' @export
#' @family plot
plotNorm <- function(data, model, group = "", minNorm = NULL, maxNorm = NULL, type = 0) {
if(inherits(data, "cnorm")){
model <- data$model
data <- data$data
}
#if (!attr(data, "useAge")){
# stop("Age or group variable explicitely set to FALSE in dataset. No plotting available.")
#}
if (is.null(minNorm)) {
# warning("minNorm not specified, taking absolute minimum norm score from modeling...")
minNorm <- model$minL1
}
if (is.null(maxNorm)) {
# warning("maxNorm not specified, taking absolute maximum norm score from modeling...")
maxNorm <- model$maxL1
}
if (group != "" && !is.null(group) && !(group %in% colnames(data))) {
warning(paste(c("Grouping variable '", group, "' does not exist in data object. Please check variable names and fix 'group' parameter in function call."), collapse = ""))
group <- NULL
}
d <- data
raw <- data[[model$raw]]
if (attr(data, "useAge"))
age <- data[[model$age]]
else
age <- rep(0, length=nrow(data))
if(!is.null(model$covariate))
covariate <- data[[attr(data, "covariate")]]
else
covariate <- NULL
d$fitted <- predictNorm(raw, age, model, minNorm = minNorm, maxNorm = maxNorm, covariate = covariate)
d$diff <- d$fitted - data$normValue
d <- d[!is.na(d$fitted), ]
d <- d[!is.na(d$diff), ]
rmse <- round(sqrt(mean(d$diff^2)), digits = 4)
r <- round(cor(d$fitted, d$normValue, use = "pairwise.complete.obs"), digits = 4)
if (group != "" && !is.null(group)) {
d$group <- d[[group]]
d$group <- as.factor(d$group)
if (type == 0) {
xyplot(fitted ~ normValue | group, d,
main = paste("Observed vs. Fitted Norm Scores by ", group, "\nr = ",
r, ", RMSE = ", rmse),
ylab = "Fitted Scores",
xlab = "Observed Scores",
grid = TRUE,
auto.key = TRUE,
group = covariate,
abline = c(0, 1), lwd = 1
)
} else {
xyplot(diff ~ normValue | group, d,
main = paste("Observed Norm Scores vs. Difference Scores by ", group, "\nr = ",
r, ", RMSE = ", rmse),
ylab = "Difference",
xlab = "Observed Scores",
grid = TRUE,
auto.key = TRUE,
group = covariate,
abline = c(0, 1), lwd = 1,
panel = function(...) {
panel.xyplot(...)
panel.abline(h = .0, col = 2, lty = 2)
}
)
}
} else {
if (type == 0) {
xyplot(fitted ~ normValue, d,
main = paste("Observed vs. Fitted Norm Scores\nr = ",
r, ", RMSE = ", rmse),
ylab = "Fitted Scores",
xlab = "Observed Scores",
grid = TRUE,
auto.key = TRUE,
group = covariate,
abline = c(0, 1), lwd = 1
)
} else {
xyplot(diff ~ normValue, d,
main = paste("Observed Norm Scores vs. Difference Scores\nr = ",
r, ", RMSE = ", rmse),
ylab = "Difference",
xlab = "Observed Scores",
grid = TRUE,
auto.key = TRUE,
group = covariate,
abline = c(0, 1), lwd = 1,
panel = function(...) {
panel.xyplot(...)
panel.abline(h = .0, col = 2, lty = 2)
}
)
}
}
}
#' Plot norm curves
#'
#' The function plots the norm curves based on the regression model.
#' Please check the function for inconsistent curves: The different
#' curves should not intersect. Violations of this assumption are a strong
#' indication for violations of model assumptions in modeling the relationship between raw
#' and norm scores. There are several reasons, why this might occur:
#' \enumerate{
#' \item Vertical extrapolation: Choosing extreme norm scores, e. g. scores
#' -3 <= x and x >= 3 In order to model these extreme scores, a large sample
#' dataset is necessary.
#' \item Horizontal extrapolation: Taylor polynomials converge in a certain
#' radius. Using the model scores outside the original dataset may
#' lead to inconsistent results.
#' \item The data cannot be modeled with Taylor polynomials, or you need
#' another power parameter (k) or R2 for the model.
#' }
#' In general, extrapolation (point 1 and 2) can carefully be done to a
#' certain degree outside the original sample, but it should in general
#' be handled with caution.
#' checkConsistency and derivationPlot can be used to further inspect the model.
#' @param model The model from the bestModel function or a cnorm object
#' @param normList Vector with norm scores to display
#' @param minAge Age to start with checking
#' @param maxAge Upper end of the age check
#' @param step Stepping parameter for the age check, usually 1 or 0.1; lower
#' scores indicate higher precision / closer checks
#' @param minRaw Lower end of the raw score range, used for clipping implausible results
#' (default = 0)
#' @param maxRaw Upper end of the raw score range, used for clipping implausible results
#' @param covariate In case, a covariate has been used, please specify the degree of the covariate /
#' the specific value here.
#' @seealso checkConsistency, derivationPlot, plotPercentiles
#' @examples
#' # Load example data set, compute model and plot results
#' normData <- prepareData(elfe)
#' m <- bestModel(data = normData)
#' plotNormCurves(m, minAge=2, maxAge=5)
#' @export
#' @family plot
plotNormCurves <- function(model, normList = NULL,
minAge = NULL,
maxAge = NULL,
step = 0.1,
minRaw = NULL,
maxRaw = NULL,
covariate = NULL) {
if(inherits(model, "cnorm")){
model <- model$model
}
if(is.null(normList)){
normList <- c(-2, -1, 0, 1, 2)
normList <- normList*model$scaleSD + model$scaleM
}
if(!is.null(covariate)&&is.null(model$covariate)){
warning("Covariate specified but no covariate available in the model. Setting covariate to NULL.")
covariate = NULL
}else if(is.null(covariate)&&!is.null(model$covariate)){
stop("Covariate specified in the model, but no function parameter available.")
}
## TODO plot all degrees of the covariate when providing a list
if (!model$useAge){
stop("Age or group variable explicitely set to FALSE in dataset. No plotting available.")
}
if (is.null(minAge)) {
minAge <- model$minA1
}
if (is.null(maxAge)) {
maxAge <- model$maxA1
}
if (is.null(minRaw)) {
minRaw <- model$minRaw
}
if (is.null(maxRaw)) {
maxRaw <- model$maxRaw
}
valueList <- data.frame(n = factor(), raw = double(), age = double())
n <- length(normList)
for (i in 1:n) {
normCurve <-
getNormCurve(
normList[[i]],
model,
minAge = minAge,
maxAge = maxAge,
step = step,
minRaw = minRaw,
maxRaw = maxRaw,
covariate = covariate
)
currentDataFrame <- data.frame(n = normCurve$norm, raw = normCurve$raw, age = normCurve$age)
valueList <- rbind(valueList, currentDataFrame)
}
# generate variable names
NAMES <- paste("Norm ", normList, sep = "")
# lattice display options
COL <- rainbow(length(normList))
panelfun <- function(..., type, group.number) {
panel.lines(...)
}
xyplot(raw ~ age,
data = valueList, groups = n,
panel = function(...)
panel.superpose(..., panel.groups = panelfun),
main = "Norm Curves",
ylab = "Raw Score", xlab = "Explanatory Variable",
col = COL, lwd = 1.5, grid = TRUE,
key = list(
corner = c(0.99, 0.1),
lines = list(col = COL, lwd = 1.5),
text = list(NAMES)
)
)
}
#' Plot norm curves against actual percentiles
#'
#' The function plots the norm curves based on the regression model against
#' the actual percentiles from the raw data. As in 'plotNormCurves',
#' please check for inconsistent curves, especially intersections.
#' Violations of this assumption are a strong
#' indication for problems
#' in modeling the relationship between raw and norm scores.
#' In general, extrapolation (point 1 and 2) can carefully be done to a
#' certain degree outside the original sample, but it should in general
#' be handled with caution.
#' The original percentiles are displayed as distinct points in the according
#' color, the model based projection of percentiles are drawn as lines.
#' Please note, that the estimation of the percentiles of the raw data is done with
#' the quantile function with the default settings. Please consult help(quantile)
#' and change the 'type' parameter accordingly.
#' In case, you get 'jagged' or disorganized percentile curve, try to reduce the 'k'
#' parameter in modeling.
#' @param data The raw data including the percentiles and norm scores or a cnorm object
#' @param model The model from the bestModel function (optional)
#' @param minRaw Lower bound of the raw score (default = 0)
#' @param maxRaw Upper bound of the raw score
#' @param minAge Variable to restrict the lower bound of the plot to a specific age
#' @param maxAge Variable to restrict the upper bound of the plot to a specific age
#' @param raw The name of the raw variable
#' @param group The name of the grouping variable; the distinct groups are automatically
#' determined
#' @param percentiles Vector with percentile scores, ranging from 0 to 1 (exclusive)
#' @param scale The norm scale, either 'T', 'IQ', 'z', 'percentile' or
#' self defined with a double vector with the mean and standard deviation,
#' f. e. c(10, 3) for Wechsler scale index points; if NULL, scale information from the
#' data preparation is used (default)
#' @param type The type parameter of the quantile function to estimate the percentiles
#' of the raw data (default 7)
#' @param title custom title for plot
#' @param covariate In case, a covariate has been used, please specify the degree of the covariate /
#' the specific value here. If no covariate is specified, both degrees will be plotted.
#' @seealso plotNormCurves, plotPercentileSeries
#' @examples
#' # Load example data set, compute model and plot results
#' result <- cnorm(raw = elfe$raw, group = elfe$group)
#' plotPercentiles(result)
#' @export
#' @family plot
plotPercentiles <- function(data,
model,
minRaw = NULL,
maxRaw = NULL,
minAge = NULL,
maxAge = NULL,
raw = NULL,
group = NULL,
percentiles = c(0.025, 0.1, 0.25, 0.5, 0.75, 0.9, 0.975),
scale = NULL,
type = 7,
title = NULL, covariate = NULL) {
if(inherits(data, "cnorm")){
model <- data$model
data <- data$data
}
if (!model$useAge){
# plot
data1 <- unique(data)
data1 <- data1[order(data1$raw),]
step = (model$maxRaw - model$minRaw)/100
rt <- rawTable(0, model, minRaw = model$minRaw, maxRaw = model$maxRaw)
plot(normValue ~ raw, data = data1, ylab = "Norm Score", xlab = "Raw Score", col="black",
main = "Norm Score Plot",
sub = paste0("Solution: ", model$ideal.model , ", RMSE = ", round(model$rmse, digits = 4)))
legend(x = "bottomright", legend=c("Manifest Scores", "Regression Model"),
col=c("black", "blue"), lty=1:2, cex=0.8)
lines(norm ~ raw, data = rt, col = "blue")
cat("\nRegresion-based norm table:\n")
print(rt)
return()
}
if(!is.null(covariate)&&is.null(model$covariate)){
warning("Covariate specified but no covariate available in the model. Setting covariate to NULL.")
covariate = NULL
}else if(is.null(covariate)&&!is.null(model$covariate)){
degree <- unique(data[, attr(data, "covariate")])
if (is.null(title)) {
title <- paste0("Observed and Predicted Percentile Curves\nModel: ", model$ideal.model, ", R2 = ", round(model$subsets$adjr2[[model$ideal.model]], digits = 4), ", Covariates: ", degree[[1]], " versus ", degree[[2]])
}
trel <- c(plotPercentiles(data, model, covariate = degree[[1]],
minRaw = minRaw, maxRaw = maxRaw,
minAge = minAge, maxAge = maxAge,
raw = raw, group = group, percentiles = percentiles,
scale = scale, title = title),
plotPercentiles(data, model, covariate = degree[[2]],
minRaw = minRaw, maxRaw = maxRaw,
minAge = minAge, maxAge = maxAge,
raw = raw, group = group, percentiles = percentiles,
scale = scale, title = title))
return(base::print(trel))
}
if(!is.null(model$covariate)){
d <- data[data[[model$covariate]] == covariate, ]
model$fitted.values <- model$fitted.values[data[[model$covariate]] == covariate]
data <- d
}
if (is.null(group)) {
group <- attr(data, "group")
}
if(is.null(data[[group]])){
data$group <- getGroups(data[, attributes(data)$age])
group <- "group"
}
if (is.null(minAge)) {
minAge <- model$minA1
}
if (is.null(maxAge)) {
maxAge <- model$maxA1
}
if (is.null(minRaw)) {
minRaw <- model$minRaw
}
if (is.null(maxRaw)) {
maxRaw <- model$maxRaw
}
if (is.null(raw)) {
raw <- model$raw
}
if (!(raw %in% colnames(data))) {
stop(paste(c("ERROR: Raw score variable '", raw, "' does not exist in data object."), collapse = ""))
}
if (!(group %in% colnames(data))) {
stop(paste(c("ERROR: Grouping variable '", group, "' does not exist in data object."), collapse = ""))
}
if (typeof(group) == "logical" && !group) {
stop("The plotPercentiles-function does not work without a grouping variable.")
}
# compute norm scores from percentile vector
if (is.null(scale)) {
# fetch scale information from model
T <- qnorm(percentiles, model$scaleM, model$scaleSD)
} else if ((typeof(scale) == "double" && length(scale) == 2)) {
T <- qnorm(percentiles, scale[1], scale[2])
} else if (scale == "IQ") {
T <- qnorm(percentiles, 100, 15)
} else if (scale == "z") {
T <- qnorm(percentiles)
} else if (scale == "T") {
T <- qnorm(percentiles, 50, 10)
} else {
# no transformation
T <- percentiles
}
# generate variable names
NAMES <- paste("PR", percentiles * 100, sep = "")
NAMESP <- paste("PredPR", percentiles * 100, sep = "")
# build function for xyplot and aggregate actual percentiles per group
xyFunction <- paste(paste(NAMES, collapse = " + "),
paste(NAMESP, collapse = " + "),
sep = " + ", collapse = " + "
)
xyFunction <- paste(xyFunction, group, sep = " ~ ")
w <- attributes(data)$weights
data[, group] <- round(data[, group], digits=3)
AGEP <- unique(data[, group])
# get actual percentiles
if(!is.null(attr(data, "descend"))&&attr(data, "descend")){
percentile.actual <- as.data.frame(do.call("rbind", lapply(split(data, data[, group]), function(df){weighted.quantile(df[, raw], probs = 1 - percentiles, weights = df$w)})))
}else{
percentile.actual <- as.data.frame(do.call("rbind", lapply(split(data, data[, group]), function(df){weighted.quantile(df[, raw], probs = percentiles, weights = df$w)})))
}
percentile.actual$group <- as.numeric(rownames(percentile.actual))
colnames(percentile.actual) <- c(NAMES, c(group))
rownames(percentile.actual) <- AGEP
# build finer grained grouping variable for prediction and fit predicted percentiles
share <- seq(from = model$minA1, to = model$maxA1, length.out = 100)
AGEP <- c(AGEP, share)
percentile.fitted <- data.frame(matrix(NA,
nrow = length(AGEP),
ncol = length(T)
))
for(i in 1:length(AGEP)){
percentile.fitted[i, ] <- predictRaw(T, AGEP[[i]], model$coefficients, minRaw = minRaw, maxRaw = maxRaw)
}
percentile.fitted$group <- AGEP
percentile.fitted <- percentile.fitted[!duplicated(percentile.fitted$group), ]
colnames(percentile.fitted) <- c(NAMESP, c(group))
rownames(percentile.fitted) <- percentile.fitted$group
# Merge actual and predicted scores and plot them show lines
# for predicted scores and dots for actual scores
percentile <- merge(percentile.actual, percentile.fitted,
by = group, all = TRUE
)
END <- .8
COL1 <- rainbow(length(percentiles), end = END)
COL2 <- c(rainbow(length(percentiles), end = END), rainbow(length(percentiles), end = END))
panelfun <- function(..., type, group.number) {
if (group.number > length(T)) {
panel.lines(...)
} else {
panel.points(..., type = "p")
}
}
if (is.null(title)) {
title <- paste0("Observed and Predicted Percentile Curves\nModel: ", model$ideal.model, ", R2 = ", round(model$subsets$adjr2[[model$ideal.model]], digits = 4))
}
chart <- xyplot(formula(xyFunction), percentile,
panel = function(...)
panel.superpose(..., panel.groups = panelfun),
main = title,
ylab = paste0("Raw Score (", raw, ")"), xlab = paste0("Explanatory Variable (", group, ")"),
col = COL2, lwd = 1.5, grid = TRUE,
key = list(
corner = c(0.99, 0.01),
lines = list(col = COL1, lwd = 1.5),
text = list(NAMES)
)
)
base::print(chart)
return(chart)
}
#' Plot the density function per group by raw score
#'
#' The function plots the density curves based on the regression model against
#' the actual percentiles from the raw data. As in 'plotNormCurves',
#' please check for inconsistent curves, especially curves showing implausible shapes as f. e.
#' violations of biuniqueness.
#' @param model The model from the bestModel function or a cnorm object
#' @param minRaw Lower bound of the raw score
#' @param maxRaw Upper bound of the raw score
#' @param minNorm Lower bound of the norm score
#' @param maxNorm Upper bound of the norm score
#' @param group Column of groups to plot
#' @param covariate In case, a covariate has been used, please specify the degree of the covariate /
#' the specific value here.
#' @seealso plotNormCurves, plotPercentiles
#' @examples
#' # Load example data set, compute model and plot results for age values 2, 4 and 6
#' result <- cnorm(raw = elfe$raw, group = elfe$group)
#' plotDensity(result, group = c (2, 4, 6))
#' @export
#' @family plot
plotDensity <- function(model,
minRaw = NULL,
maxRaw = NULL,
minNorm = NULL,
maxNorm = NULL,
group = NULL, covariate = NULL) {
if(inherits(model, "cnorm")){
model <- model$model
}
if(!is.null(covariate)&&is.null(model$covariate)){
warning("Covariate specified but no covariate available in the model. Setting covariate to NULL.")
covariate = NULL
}else if(is.null(covariate)&&!is.null(model$covariate)){
stop("Covariate specified in the model, but no function parameter available.")
}
if (is.null(minNorm)) {
minNorm <- model$minL1
}
if (is.null(maxNorm)) {
maxNorm <- model$maxL1
}
if (is.null(minRaw)) {
minRaw <- model$minRaw
}
if (is.null(maxRaw)) {
maxRaw <- model$maxRaw
}
if (is.null(group)&&model$useAge) {
group <- round(seq(from = model$minA1, to = model$maxA1, length.out = 4), digits = 3)
}else if(!model$useAge){
group <- c(1)
}
step <- (maxNorm - minNorm) / 100
i <- 1
while (i <= length(group)) {
norm <- normTable(group[[i]], model = model, minNorm = minNorm, maxNorm = maxNorm, minRaw = minRaw, maxRaw = maxRaw, step = step, covariate = covariate, pretty = F)
norm$group <- rep(group[[i]], length.out = nrow(norm))
if (i == 1) {
matrix <- norm
} else {
matrix <- rbind(matrix, norm)
}
i <- i + 1
}
matrix <- matrix[matrix$norm > minNorm & matrix$norm < maxNorm, ]
matrix <- matrix[matrix$raw > minRaw & matrix$raw < maxRaw, ]
matrix$density <- dnorm(matrix$norm, mean = model$scaleM, sd = model$scaleSD)
# lattice display options
COL <- rainbow(length(group))
NAMES <- paste("Group ", group, sep = "")
panelfun <- function(..., type, group.number) {
panel.lines(...)
}
plot <- xyplot(density ~ raw,
data = matrix, groups = group,
panel = function(...)
panel.superpose(..., panel.groups = panelfun),
main = "Density functions",
ylab = "Density", xlab = "Raw Score",
col = COL, lwd = 1.5, grid = TRUE,
key = list(
corner = c(0, 1),
lines = list(col = COL, lwd = 1.5),
text = list(NAMES)
)
)
base::print(plot)
return(matrix)
}
#' Generates a series of plots with number curves by percentile for different models
#'
#' This functions makes use of 'plotPercentiles' to generate a series of plots
#' with different number of predictors. It draws on the information provided by the model object
#' to determine the bounds of the modeling (age and standard score range). It can be used as an
#' additional model check to determine the best fitting model. Please have a look at the
#'' plotPercentiles' function for further information.
#' @param data The raw data including the percentiles and norm scores or a cnorm object
#' @param model The model from the bestModel function (optional)
#' @param start Number of predictors to start with
#' @param end Number of predictors to end with
#' @param group The name of the grouping variable; the distinct groups are automatically
#' determined
#' @param percentiles Vector with percentile scores, ranging from 0 to 1 (exclusive)
#' @param type The type parameter of the quantile function to estimate the percentiles
#' of the raw data (default 7)
#' @param filename Prefix of the filename. If specified, the plots are saves as
#' png files in the directory of the workspace, instead of displaying them
#' @seealso plotPercentiles
#' @return the complete list of plots
#' @export
#'
#' @examples
#' # Load example data set, compute model and plot results
#' result <- cnorm(raw = elfe$raw, group = elfe$group)
#' plotPercentileSeries(result, start=1, end=5, group="group")
#' @family plot
plotPercentileSeries <- function(data, model, start = 1, end = NULL, group = NULL,
percentiles = c(0.025, 0.1, 0.25, 0.5, 0.75, 0.9, 0.975),
type = 7,
filename = NULL) {
if(inherits(data, "cnorm")){
model <- data$model
d <- data$data
}else{
d <- as.data.frame(data)
}
if(!is.null(model$covariate)){
stop("This function us currently not able to handle models with covariates.")
}
if (!attr(d, "useAge")){
stop("Age or group variable explicitely set to FALSE in dataset. No plotting available.")
}
if ((is.null(end)) || (end > length(model$subsets$rss))) {
end <- length(model$subsets$rss)
}
if (start < 1) {
start <- 1
}
if (start > end) {
start <- end
}
minR <- min(d[, model$raw])
maxR <- max(d[, model$raw])
l <- list()
while (start <= end) {
message(paste0("Plotting model ", start))
# compute model
text <- paste0(model$raw, " ~ ")
names <- colnames(model$subsets$outmat)
j <- 1
nr <- 0
while (j <= length(names)) {
if (model$subsets$outmat[start, j] == "*") {
text1 <- names[j]
if (nr == 0) {
text <- paste(text, text1, sep = "")
} else {
text <- paste(text, text1, sep = " + ")
}
nr <- nr + 1
}
j <- j + 1
}
bestformula <- lm(text, d)
bestformula$ideal.model <- model$ideal.model
bestformula$cutoff <- model$cutoff
bestformula$subsets <- model$subsets
bestformula$useAge <- model$useAge
bestformula$maxA1 <- model$maxA1
bestformula$minA1 <- model$minA1
bestformula$minL1 <- model$minL1
bestformula$maxL1 <- model$maxL1
bestformula$minRaw <- minR
bestformula$maxRaw <- maxR
bestformula$raw <- model$raw
bestformula$scaleSD <- attributes(d)$scaleSD
bestformula$scaleM <- attributes(d)$scaleM
bestformula$descend <- attributes(d)$descend
bestformula$group <- attributes(d)$group
bestformula$age <- attributes(d)$age
bestformula$k <- attributes(d)$k
l[[length(l) + 1]] <- plotPercentiles(d, bestformula,
minAge = model$minA1, maxAge = model$maxA1,
minRaw = minR,
maxRaw = maxR,
percentiles = percentiles,
scale = NULL,
group = group,
title = paste0("Observed and Predicted Percentiles\nModel with ", bestformula$subsets$numberOfTerms[[start]], " predictors, R2=", round(bestformula$subsets$adjr2[[start]], digits = 4))
)
if (!is.null(filename)) {
trellis.device(device = "png", filename = paste0(filename, start, ".png"))
base::print(l[[length(l)]])
dev.off()
}
start <- start + 1
}
return(l)
}
#' Evaluate information criteria for regression model
#'
#' Plots the information criterion - either Cp (default) or BIC - against
#' the adjusted R square of the feature selection in the modeling process.
#' Both BIC and Mallow's Cp are measures to avoid over-fitting. Please
#' choose the model that has a high information criterion, while modeling
#' the original data as close as possible. R2 adjusted values of ~ .99 might
#' work well, depending on your scenario. In other words: Look out for the
#' elbow in the curve and choose th model where the information criterion
#' begins to drop. Nonetheless, inspect the according model with \code{plotPercentiles(data, group)}
#' to visually inspect the course of the percentiles.
#' In the plot, Mallow's Cp is log transformed and the BIC is always highly
#' negative. The R2 cutoff that was specified in the bestModel function is
#' displayed as a dashed line.
#' @param model The regression model from the bestModel function or a cnorm object
#' @param type Type of chart with 0 = adjusted R2 by number of predictors,
#' 1 = log transformed Mallow's Cp by adjusted R2, 2 = Bayesian Information
#' Criterion (BIC) by adjusted R2, 3 = Root Mean Square Error (RMSE),
#' 4 = Residual Sum of Squares by number, 5 = F-test statistic for consecutive models
#' and 6 = p-value for model tests
#' of predictors
#' @param index add index labels to data points
#' @seealso bestModel, plotPercentiles, printSubset
#' @examples
#' # Compute model with example data and plot information function
#' cnorm.model <- cnorm(raw = elfe$raw, group = elfe$group)
#' plotSubset(cnorm.model)
#' @export
#' @family plot
plotSubset <- function(model, type = 0, index = FALSE) {
if(inherits(model, "cnorm")){
model <- model$model
}
# compute F and significance
RSS1 <- c(NA, model$subsets$rss)
RSS2 <- c(model$subsets$rss, NA)
k1 <- seq(from = 1, to = length(RSS1))
k2 <- seq(from = 2, to = length(RSS1) + 1)
df1 <- k2 - k1
df2 <- length(model$fitted.values) - k2
F <- ((RSS1-RSS2)/df1)/(RSS2/df2)
p <- 1 - pf(F, df1, df2)
dataFrameTMP <- data.frame(adjr2 = model$subsets$adjr2, bic = model$subsets$bic,
cp = model$subsets$cp, RSS = model$subsets$rss,
RMSE = sqrt(model$subsets$rss / length(model$fitted.values)),
F = head(F, -1), p = head(p, -1),
nr = seq(1, length(model$subsets$adjr2), by = 1))
indexLabel <- seq(from = 1, to = nrow(dataFrameTMP))
if (type == 1) {
xyplot(cp ~ adjr2,
data = dataFrameTMP, type = "b",
col.line = "lightblue", lwd = 1,
grid = TRUE, scales = list(y = list(log = 10)),
main = "Information Function",
ylab = "log-transformed Mallows's Cp",
xlab = "Adjusted R2",
key = list(
corner = c(
0.1,
0.1
), lines = list(
col = c("lightblue", "#9933FF"),
lty = c(1, 2), lwd = 2
),
text = list(c(
"Model in Ascending Order",
"Cutoff Value"
))
), panel = function(x, y, ...) {
panel.abline(
v = model$cutoff,
lwd = 2, lty = "longdash",
col = "#9933FF", label = model$cutoff
)
panel.xyplot(x, y, ...)
# add index value to data points
if(index)
ltext(x = x, y = y, labels = indexLabel, cex=.7)
}
)
} else if (type == 2) {
xyplot(bic ~ adjr2,
data = dataFrameTMP, type = "b",
col.line = "lightblue", lwd = 1,
grid = TRUE,
main = "Information Function",
ylab = "BIC",
xlab = "Adjusted R2",
key = list(
corner = c(
0.1,
0.1
), lines = list(
col = c("lightblue", "#9933FF"),
lty = c(1, 2), lwd = 2
),
text = list(c(
"Model in Ascending Order",
"cutoff Value"
))
), panel = function(x, y, ...) {
panel.abline(
v = model$cutoff,
lwd = 2, lty = "longdash",
col = "#9933FF", label = model$cutoff
)
panel.xyplot(x, y, ...)
# add index value to data points
if(index)
ltext(x = x, y = y, labels = indexLabel, cex=.7)
}
)
} else if(type == 3){
xyplot(RMSE ~ nr,
data = dataFrameTMP, type = "b",
col.line = "lightblue", lwd = 1,
grid = TRUE,
main = "Information Function",
ylab = "Root Means Square Error (Raw Score)",
xlab = "Number of Predictors", panel = function(x, y, ...) {
panel.xyplot(x, y, ...)
# add index value to data points
if(index)
ltext(x = x, y = y, labels = indexLabel, cex=.7)
} )
} else if(type == 4){
xyplot(RSS ~ nr,
data = dataFrameTMP, type = "b",
col.line = "lightblue", lwd = 1,
grid = TRUE,
main = "Information Function",
ylab = "Residual Sum of Squares (RSS from Raw Score)",
xlab = "Number of Predictors", panel = function(x, y, ...) {
panel.xyplot(x, y, ...)
# add index value to data points
if(index)
ltext(x = x, y = y, labels = indexLabel, cex=.7)
} )
} else if(type == 5){
xyplot(F ~ nr,
data = dataFrameTMP, type = "b",
col.line = "lightblue", lwd = 1,
grid = TRUE,
main = "Information Function",
ylab = "F-test statistics for consecutive models",
xlab = "Number of Predictors", panel = function(x, y, ...) {
panel.xyplot(x, y, ...)
# add index value to data points
if(index)
ltext(x = x, y = y, labels = indexLabel, cex=.7)
} )
} else if(type == 6){
xyplot(p ~ nr,
data = dataFrameTMP, type = "b",
col.line = "lightblue", lwd = 1,
grid = TRUE,
main = "Information Function",
ylab = "p-values for tests on R2 adj. of consecutive models",
ylim = c(-0.005, 0.11),
xlab = "Number of Predictors",
key = list(
corner = c(
0.1,
0.9
), lines = list(
col = c("#9933FF"),
lty = c(2), lwd = 2
),
text = list(c("p = .05"))
), panel = function(x, y, ...) {
panel.abline(
h = 0.05,
lwd = 2, lty = "longdash",
col = "#9933FF", label = model$cutoff
)
panel.xyplot(x, y, ...)
# add index value to data points
if(index)
ltext(x = x, y = y, labels = indexLabel, cex=.7)
}
)
} else {
xyplot(adjr2 ~ nr,
data = dataFrameTMP, type = "b",
col.line = "lightblue", lwd = 1,
grid = TRUE,
main = "Information Function",
ylab = "Adjusted R2",
xlab = "Number of Predictors",
key = list(
corner = c(
0.9,
0.1
), lines = list(
col = c("#9933FF"),
lty = c(2), lwd = 2
),
text = list(c("Cutoff Value"))
), panel = function(x, y, ...) {
panel.abline(
h = model$cutoff,
lwd = 2, lty = "longdash",
col = "#9933FF", label = model$cutoff
)
panel.xyplot(x, y, ...)
# add index value to data points
if(index)
ltext(x = x, y = y, labels = indexLabel, cex=.7)
}
)
}
}
#' Plot first order derivative of regression model
#'
#' Plots the scores obtained via the first order derivative of the regression model
#' in dependence of the norm score. The results indicate the progression of the
#' norm scores within each age group. The regression based modeling approach
#' relies on the assumption of a linear progression of the norm scores.
#' Negative scores in the first order derivative indicate a violation of this
#' assumption. Scores near zero are typical for bottom and ceiling effects in the raw data.
#' The regression models usually converge within the range of the original
#' values. In case of vertical and horizontal extrapolation, with increasing
#' distance to the original data, the risk of assumption violation increases
#' as well.
#' ATTENTION: plotDerivative is currently still incompatible with reversed raw
#' score scales ('descent' option)
#' @param model The model from the bestModel function or a cnorm object
#' @param minAge Age to start with checking
#' @param maxAge Upper end of the age check
#' @param stepAge Stepping parameter for the age check, usually 1 or 0.1; lower
#' values indicate higher precision / closer checks
#' @param minNorm Lower end of the norm score range, in case of T scores, 25 might be good
#' @param maxNorm Upper end of the norm score range, in case of T scores, 25 might be good
#' @param stepNorm Stepping parameter for norm scores
#' @param order Degree of the derivative (default = 1)
#' @seealso checkConsistency, bestModel, derive
#' @examples
#' # Load example data set, compute model and plot results
#' result <- cnorm(raw = elfe$raw, group = elfe$group)
#' plotDerivative(result, minAge=2, maxAge=5, step=.2, minNorm=25, maxNorm=75, stepNorm=1)
#' @export
#' @family plot
plotDerivative <- function(model,
minAge = NULL,
maxAge = NULL,
minNorm = NULL,
maxNorm = NULL,
stepAge = 0.2,
stepNorm = 1,
order = 1) {
if(inherits(model, "cnorm")){
model <- model$model
}
if (!model$useAge){
stop("Age or group variable explicitely set to FALSE in dataset. No plotting available.")
}
if (is.null(minAge)) {
minAge <- model$minA1
}
if (is.null(maxAge)) {
maxAge <- model$maxA1
}
if (is.null(minNorm)) {
minNorm <- model$minL1
}
if (is.null(maxNorm)) {
maxNorm <- model$maxL1
}
rowS <- c(seq(minNorm, maxNorm, length.out = 1 + (maxNorm - minNorm) / stepNorm))
colS <- c(seq(minAge, maxAge, length.out = 1 + (maxAge - minAge) / stepAge))
coeff <- derive(model, order)
cat(paste0(rangeCheck(model, minAge, maxAge, minNorm, maxNorm), " Coefficients from the ", order, " order derivative function:\n\n"))
base::print(coeff)
devFrame <- data.frame(matrix(NA, nrow = length(rowS), ncol = length(colS)))
dev2 <- data.frame()
colnames(devFrame) <- colS
rownames(devFrame) <- rowS
i <- 1
while (i <= ncol(devFrame)) {
j <- 1
while (j <= nrow(devFrame)) {
devFrame[j, i] <- predictRaw(rowS[[j]], colS[[i]], coeff)
colList <- c(rowS[[j]], colS[[i]], devFrame[j, i])
dev2 <- rbind(dev2, colList)
j <- j + 1
}
i <- i + 1
}
colnames(dev2) <- c("X", "Y", "Z")
# define range and colors
min <- min(dev2$Z)
max <- max(dev2$Z)
diff <- (max - min) / 10
min <- min - diff
max <- max + diff
step <- (max - min) / 1000
regions <- rainbow(1000, end = .8)
key <- list(at = seq(min, max, by = step))
sequence <- seq(min, max, by = step)
desc <- "(1st Order Derivative)"
if (order == 2) {
desc <- "(2nd Order Derivative)"
} else if (order == 3) {
desc <- "(3rd Order Derivative)"
} else if (order > 2) {
desc <- paste0(order, "th Order Derivative)")
}
if (requireNamespace("latticeExtra", quietly = TRUE)) {
p1 <- levelplot(Z ~ Y * X,
data = dev2,
at = sequence,
colorkey = key,
region = T,
col.regions = regions,
panel = panel.2dsmoother,
main = paste0("Slope of the Regression Function\n", desc),
ylab = "Norm Score",
xlab = "Explanatory Variable"
)
} else {
p1 <- levelplot(Z ~ Y * X,
data = dev2,
at = sequence, region = T,
colorkey = key,
col.regions = regions,
main = paste0("Slope of the Regression Function\n", desc),
ylab = "Norm Score",
xlab = "Explanatory Variable"
)
}
p2 <- contourplot(Z ~ Y * X, data = dev2)
p3 <- p1 + p2
p3
}
#' General convencience plotting function
#'
#' @param x a cnorm object
#' @param y the type of plot as a string, can be one of
#' 'raw' (1), 'norm' (2), 'curves' (3), 'percentiles' (4), 'series' (5), 'subset' (6),
#' or 'derivative' (7), either as a string or the according index
#' @param ... additional parameters for the specific plotting function
#'
#' @export
plotCnorm <- function(x, y, ...){
if(!inherits(x, "cnorm")||!is.character(y)){
message("Please provide a cnorm object as parameter x and the type of plot as a string for parameter y, which can be 'raw', 'norm', 'curves', 'percentiles', 'series', 'subset', or 'derivative'.")
return()
}
if(y == "raw" || y == 1){
plotRaw(x, ...)
}else if(y == "norm" || y == 2){
plotNorm(x, ...)
}else if(y == "curves" || y == 3){
plotNormCurves(x, ...)
}else if(y == "percentiles" || y == 4){
plotPercentiles(x, ...)
}else if(y == "density" || y == 5){
plotDensity(x, ...)
}else if(y == "series" || y == 6){
plotPercentileSeries(x, ...)
}else if(y == "subset" || y == 7){
plotSubset(x, ...)
}else if(y == "derivative" || y == 8){
plotDerivative(x, ...)
}else{
plotPercentiles(x, ...)
message("Please provide the type of plot as a string for parameter y, which can be 'raw', 'norm', 'curves', 'percentiles', 'series', 'subset', 'derivative' or the according index.")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.