Nothing
#' Prepare distance scores on data in preparation for composite scoring
#'
#' @param object An object of class \sQuote{CompositeData}.
#' @param winsorize Whether to winsorize the data or not. Defaults to \code{FALSE}.
#' If not \code{FALSE}, the percentile to winsorize at. For example, .01 would be
#' the .01 and the 1 - .01 percentiles.
#' @param values The values to use for winsorization. Optional. If specified, preempts
#' the percentiles given by winsorize.
#' @param better Logical indicating whether \dQuote{better} values than the threshold
#' are allowed. Defaults to \code{TRUE}.
#' #' @param object An DistanceScores class object
#' @param covmat The covariance matrix to use. If missing,
#' austomatically calculated from the data.
#' @param standardize A logical value whether to standardize the data or not.
#' Defaults to \code{TRUE}.
#' @param use.prethreshold A logical value whether to calculate covariance matrix
#' based on the data after winsorizing, but before applying the threshold.
#' Defaults to \code{FALSE}, so that covariances are calculated after thresholds
#' (if any) are applied.
#' @return An S4 object of class \dQuote{CompositeReady}.
#' @family prepare
#' @importFrom JWileymisc winsorizor
#' @importFrom stats cov
#' @export
#' @examples
#' # this example creates distances for the built in mtcars data
#' # see ?mtcars for more details
#' # The distances are calculated from the "best" in the dataset
#' # First we create an appropriate CompositeData class object
#' # higher mpg & hp are better and lower wt & qsec are better
#' d <- CompositeData(mtcars[, c("mpg", "hp", "wt", "qsec")],
#' thresholds = list(one = with(mtcars, c(
#' mpg = max(mpg),
#' hp = max(hp),
#' wt = min(wt),
#' qsec = min(qsec)))
#' ),
#' higherisbetter = c(TRUE, TRUE, FALSE, FALSE),
#' rawtrans = list(
#' mpg = function(x) x,
#' hp = function(x) x,
#' wt = function(x) x,
#' qsec = sqrt))
#'
#'
#' # create the distance scores
#' dres <- prepareComposite(d)
#'
#' # see a density plot of the distance scores
#' dres@@distanceDensity
#' # regular summary of distance scores
#' summary(dres@@distances)
#'
#' # examine covariance matrix
#' round(dres@@covmat,2)
#' # cleanup
#' rm(d, dres)
#'
prepareComposite <- function(object, winsorize = 0, values, better = TRUE,
covmat, standardize = TRUE, use.prethreshold = FALSE) {
# data and input checks
n <- nrow(object@rawdata)
ng <- length(unique(object@groups))
## transform thresholds to match raw data transformations
thresholds <- object@thresholds
thresholds <- lapply(thresholds, function(x) {
sapply(1:object@k, function(i) {
object@rawtrans[[i]](x[i])
})
})
## create the threshold matrix using the (transformed) thresholds
thresholdmatrix <- t(sapply(object@groups, function(i) thresholds[[as.character(i)]]))
rownames(thresholdmatrix) <- NULL
colnames(thresholdmatrix) <- colnames(object@rawdata)
## transform raw data, prior to windsorizing
d <- object@rawdata
d[] <- lapply(1:object@k, function(i) {
object@rawtrans[[i]](d[, i])
})
d <- winsorizor(d, percentile = winsorize, values = values, na.rm = TRUE)
winsorizedValues <- attr(d, "winsorizedValues")
if (use.prethreshold & missing(covmat)) {
covmat <- cov(d, use = "pairwise.complete.obs")
}
d <- as.data.frame(sapply(1:object@k, function(i) {
if (object@higherisbetter[i] == 0) {
d[, i, drop = FALSE] - thresholdmatrix[, i, drop = FALSE]
} else if (object@higherisbetter[i] == 1) {
thresholdmatrix[, i, drop = FALSE] - d[, i, drop = FALSE]
}
}))
colnames(d) <- colnames(thresholdmatrix)
# if "better" than threshold are not allowed
# than truncate at zero, otherwise leave as is
if (!better) {
d <- as.data.frame(apply(d, 2, pmax, 0))
}
if (!use.prethreshold & missing(covmat)) {
covmat <- cov(d, use = "pairwise.complete.obs")
}
if (n > 2) {
dplot <- ldensity(cbind(d, Group = object@groups), melt = TRUE, g = "Group")
} else {
dplot <- NA
}
stopifnot(identical(object@k, ncol(covmat)))
sigma <- sqrt(diag(covmat))
if (standardize) {
compdata <- sweep(d, 2, sigma, "/")
} else {
compdata <- d
}
CompositeReady(
data = as.data.frame(compdata),
covmat = covmat,
sigma = sigma,
standardize = standardize,
use.prethreshold = use.prethreshold,
distances = d,
distanceDensity = dplot,
winsorizedValues = winsorizedValues,
better = better,
rawdata = object@rawdata,
groups = object@groups,
thresholds = object@thresholds,
higherisbetter = object@higherisbetter,
k = object@k,
rawtrans = object@rawtrans
)
}
#' Score Data Using the Mahalanobis Distance
#'
#' Create a composite using the Mahalanobis Distance
#'
#' @param object An object of class \code{CompositeReady}
#' @param ncomponents the number of components to use from the
#' principal component analysis. If missing, defaults to the
#' number of columns in the data.
#' @param pca An optional PCA object from princomp to use.
#' If not passed, will be calculated from the data.
#' @return An S4 object of class \code{MahalanobisScores}.
#' @export
#' @importFrom stats princomp
#' @family composite
#' @examples
#' # this example creates distances for the built in mtcars data
#' # see ?mtcars for more details
#' # The distances are calculated from the "best" in the dataset
#' # First we create an appropriate CompositeData class object
#' # higher mpg & hp are better and lower wt & qsec are better
#' d <- CompositeData(mtcars[, c("mpg", "hp", "wt", "qsec")],
#' thresholds = list(one = with(mtcars, c(
#' mpg = max(mpg),
#' hp = max(hp),
#' wt = min(wt),
#' qsec = min(qsec)))
#' ),
#' higherisbetter = c(TRUE, TRUE, FALSE, FALSE))
#'
#' # create the distance scores
#' # and the composite
#' # covariance matrix will be calculated from the data
#' # and data will be standardized to unit variance by default
#' dres <- prepareComposite(d)
#'
#' # see a density plot of the distance scores
#' dres@@distanceDensity
#' # regular summary of distance scores
#' summary(dres@@distances)
#'
#' # examine covariance matrix
#' round(dres@@covmat,2)
#'
#' # now we can create the composite based on mahalanobis distances
#' # from our defined thresholds
#' mcomp <- mahalanobisComposite(dres, 1)
#'
#' # view a histogram of the composite scores
#' mcomp@@scoreHistogram
#'
#' # summarize the composite scores
#' summary(mcomp@@scores)
#'
#' # check the screeplot and loadings
#' mcomp@@screePlot
#' mcomp@@loadingGraph
#' # examine the loadings as a table
#' mcomp@@loadingTable
#'
#' # one component is adequate to explain these data
#' # to be safe can pick first two and re-run model
#'
#' # use only first two components
#' mcomp2 <- mahalanobisComposite(dres, ncomponents = 2)
#'
#' # view a histogram of the updated composite scores
#' mcomp2@@scoreHistogram
#'
#' # summarize the composite scores
#' summary(mcomp2@@scores)
#'
#' # compare using all versus two components
#' plot(mcomp@@scores, mcomp2@@scores)
#'
#' # cleanup
#' rm(d, dres, mcomp, mcomp2)
mahalanobisComposite <- function(object, ncomponents, pca) {
stopifnot(is(object, "CompositeReady"))
if (missing(ncomponents)) {
ncomponents <- object@k
}
stopifnot(ncomponents <= object@k)
if (missing(pca)) {
pca <- princomp(scores = FALSE, cor = object@standardize, covmat = object@covmat)
}
screeplot <- ggplot(data.frame(Component = 1:object@k, Eigenvalue = pca$sdev^2),
aes(Component, Eigenvalue)) +
geom_line() + geom_point() +
geom_hline(aes(yintercept = 1)) +
scale_x_continuous(breaks = 1:object@k) +
theme_classic()
Lbase <- as.matrix(pca$loadings[])
ltab <- Lbase
colnames(ltab) <- paste0("C", 1:object@k)
ltab[] <- format(round(ltab, 2), nsmall=2, digits=2)
ltab <- ltab[, 1:ncomponents, drop = FALSE]
L <- as.data.frame(pca$loadings[, 1:ncomponents, drop = FALSE])
colnames(L) <- paste0("Comp", 1:ncomponents)
L$Variable <- factor(rownames(L), levels = colnames(object@data)[1:object@k])
L <- melt(L, id.vars = "Variable")
loadingsplot <- ggplot(L, aes(Variable, abs(value), fill = ifelse(value > 0, "positive", "negative"))) +
geom_bar(stat = "identity") +
scale_fill_manual("Direction", values = c("positive" = "black", "negative" = "grey80")) +
facet_grid(variable ~ .) +
ylab("Absolute Loading") +
xlab("") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust=.5))
c.scores <- as.matrix(object@data) %*% Lbase %*% diag(1/pca$sdev)
c.scores <- as.data.frame(c.scores)
colnames(c.scores) <- paste0("C", 1:object@k)
finalScores <- sqrt(rowSums(c.scores[, 1:ncomponents, drop = FALSE]^2))
# alternate way
# finalScores <- sqrt(rowSums((data %*% solve(cov2cor(covmat))) * data))
class(pca) <- "list"
new("MahalanobisScores",
scores = finalScores,
scoreHistogram = ldensity(data.frame(Scores = finalScores), x = "Scores", hist = TRUE),
screePlot = screeplot,
loadingGraph = loadingsplot,
loadingTable = ltab,
pca = pca,
ncomponents = ncomponents,
CompositeReady = object)
}
# clear R CMD CHECK notes
if (getRversion() >= "2.15.1") {
utils::globalVariables(c("Component", "Eigenvalue", "Variable", "value"))
}
#' Score Data Using a simple sum
#'
#' Create a composite using summation
#'
#' @param object An object of class \code{CompositeReady}
#' @param transform A character string indicating the type of transformation to use.
#' One of \dQuote{square}, \dQuote{abs}, or \dQuote{none}, which either sums the raw data,
#' sums the squared data and then takes the square root, or sums the absolute values of the
#' data.
#' @param type A character string indicating the type of aggregation to use.
#' One of \dQuote{sum} or \dQuote{mean}.
#' @param systems An optional list where each element is a character vector of the
#' variable names within a particular system. If given, scores are first averaged
#' within a system, before being aggregated across systems.
#' @return An S4 object of class \code{SumScores}.
#' @export
#' @family composite
#' @examples
#' # this example creates distances for the built in mtcars data
#' # see ?mtcars for more details
#' # The distances are calculated from the "best" in the dataset
#' # First we create an appropriate CompositeData class object
#' # higher mpg & hp are better and lower wt & qsec are better
#' d <- CompositeData(mtcars[, c("mpg", "hp", "wt", "qsec")],
#' thresholds = list(one = with(mtcars, c(
#' mpg = max(mpg),
#' hp = max(hp),
#' wt = min(wt),
#' qsec = min(qsec)))
#' ),
#' higherisbetter = c(TRUE, TRUE, FALSE, FALSE))
#'
#' # create the distance scores
#' # and the composite
#' # covariance matrix will be calculated from the data
#' # and data will be standardized to unit variance by default
#' dres <- prepareComposite(d)
#'
#' # see a density plot of the distance scores
#' dres@@distanceDensity
#' # regular summary of distance scores
#' summary(dres@@distances)
#'
#' # examine covariance matrix
#' round(dres@@covmat,2)
#'
#' # now we can create the composite based on summing the (standardized)
#' # distances from our defined thresholds
#' # by default, distances are squared, then summed, and then square rooted
#' # to be back on the original scale
#' scomp <- sumComposite(dres, "square", "sum")
#'
#' # view a histogram and summary of the composite scores
#' scomp@@scoreHistogram
#' summary(scomp@@scores)
#'
#' # calculate average (mean) instead of sum
#' scomp2 <- sumComposite(dres, "square", "mean")
#'
#' # view a histogram and summary of the composite scores
#' scomp2@@scoreHistogram
#' summary(scomp2@@scores)
#'
#' # scores are still the same (just different scaling)
#' plot(scomp@@scores, scomp2@@scores)
#'
#' # first average scores within a system, then sum
#' # within a system, scores are always averaged, never summed
#' scomp3 <- sumComposite(dres, "square", "sum",
#' systems = list(
#' environment = c("mpg"),
#' performance = c("hp", "qsec", "wt")))
#'
#' # view a histogram and summary of the composite scores
#' scomp3@@scoreHistogram
#' summary(scomp3@@scores)
#'
#' # compare all three scores
#' # because of the different number of indicators within each system
#' # there is a re-weighting for S3
#' plot(data.frame(S1 = scomp@@scores, S2 = scomp2@@scores, S3 = scomp3@@scores))
#'
#' # cleanup
#' rm(d, dres, scomp, scomp2, scomp3)
sumComposite <- function(object, transform = c("square", "abs", "none"), type = c("sum", "mean"), systems) {
stopifnot(is(object, "CompositeReady"))
transform <- match.arg(transform)
type <- match.arg(type)
aggregator <- switch(type,
sum = rowSums,
mean = rowMeans)
## switch function to transform data (to) and backtransform (back)
trans <- switch(transform,
square = list(to = function(x) x^2, back = sqrt),
abs = list(to = abs, back = I),
none = list(to = I, back = I))
if (!missing(systems)) {
x <- do.call(cbind, lapply(systems, function(v) {
rowMeans(trans$to(object@data[, v, drop = FALSE]))
}))
finalScores <- trans$back(aggregator(x))
} else {
finalScores <- trans$back(aggregator(trans$to(object@data)))
systems <- list(NA_character_)
}
new("SumScores",
scores = as.numeric(finalScores),
scoreHistogram = ldensity(data.frame(Scores = finalScores), x = "Scores", hist = TRUE),
transform = transform,
type = type,
trans = trans,
systems = systems,
CompositeReady = object)
}
#' Score Data Using a Factor Model
#'
#' Create a composite using a Factor Model
#'
#' @param object An object of class \code{CompositeReady}
#' @param type A character string indicating the type of factor model to use
#' @param factors A named list where names are the factor names and each
#' element is a character string of the indicator names.
#' @return An S4 object of class \code{FactorScores}.
#' @import lavaan
#' @export
#' @family composite
#' @examples
#' # this example creates distances for the built in mtcars data
#' # see ?mtcars for more details
#' # The distances are calculated from the "best" in the dataset
#' # First we create an appropriate CompositeData class object
#' # higher mpg & hp are better and lower wt & qsec are better
#' d <- CompositeData(mtcars[, c("mpg", "hp", "wt", "qsec")],
#' thresholds = list(one = with(mtcars, c(
#' mpg = max(mpg),
#' hp = max(hp),
#' wt = min(wt),
#' qsec = min(qsec)))
#' ),
#' higherisbetter = c(TRUE, TRUE, FALSE, FALSE))
#'
#' # create the distance scores
#' # and the composite
#' # covariance matrix will be calculated from the data
#' # and data will be standardized to unit variance by default
#' dres <- prepareComposite(d)
#'
#' # see a density plot of the distance scores
#' dres@@distanceDensity
#' # regular summary of distance scores
#' summary(dres@@distances)
#'
#' # examine covariance matrix
#' round(dres@@covmat,2)
#'
#' # now we can create the composite based on summing the (standardized)
#' # distances from our defined thresholds
#' # by default, distances are squared, then summed, and then square rooted
#' # to be back on the original scale
#' fcomp <- factorComposite(dres, type = "onefactor")
#'
#' # view a histogram of the composite scores
#' fcomp@@scoreHistogram
#'
#' # summarize the composite scores
#' summary(fcomp@@scores)
#'
#' \dontrun{
#' # we can also fit a second-order factor model
#' # there are not enough indicators to identify the factor
#' # and so lavaan gives us warning messages
#' fcomp2 <- factorComposite(dres, type = "secondorderfactor",
#' factors = list(speed = c("hp", "qsec")))
#'
#' # view a histogram of the composite scores
#' fcomp2@@scoreHistogram
#'
#' # summarize the composite scores
#' summary(fcomp2@@scores)
#'
#' # compare one and second-order factor model scores
#' plot(fcomp@@scores, fcomp2@@scores)
#'
#' # cleanup
#' rm(d, dres, fcomp, fcomp2)
#' }
factorComposite <- function(object, type = c("onefactor", "secondorderfactor", "bifactor"), factors = list(NA_character_)) {
stopifnot(is(object, "CompositeReady"))
type <- match.arg(type)
vars <- colnames(object@data)
unused <- setdiff(vars, unlist(factors))
onefactor <- paste0("Composite =~ ", paste(vars, collapse = " + "))
m.factors <- paste(sapply(1:length(factors), function(i) {
sprintf("%s =~ %s", names(factors)[i],
ifelse(length(factors[[i]]) < 3,
paste(paste0("b", i, "1", " * ", factors[[i]]), collapse = " + "),
paste(paste0("b", i, 1:length(factors[[i]]), " * ", factors[[i]]), collapse = " + ")))
}), collapse = "\n")
if (length(factors) > 1) {
m.factors.covariances <- sapply(1:(length(factors) - 1), function(i) {
sapply((i + 1):(length(factors)), function(j) {
sprintf("%s ~~ 0 * %s", names(factors)[i], names(factors)[j])
})
})
} else {
m.factors.covariances <- ""
}
m.overall <- paste0("Composite =~ ", paste(c(names(factors), unused), collapse = " + "))
secondorderfactor <- paste(m.factors, m.overall, sep = "\n")
bifactor <- paste(
m.factors,
onefactor,
paste(paste("Composite ~~ 0 * ", names(factors)), collapse = "\n"),
paste(unlist(m.factors.covariances), collapse = "\n"),
sep = "\n")
fit <- switch(type,
onefactor = sem(onefactor, data = object@data),
secondorderfactor = sem(secondorderfactor, data = object@data),
bifactor = sem(bifactor, data = object@data))
factorScores <- as.data.frame(predict(fit))
finalScores <- factorScores$Composite
new("FactorScores",
scores = finalScores,
scoreHistogram = ldensity(data.frame(Scores = finalScores), x = "Scores", hist = TRUE),
factorScores = factorScores,
type = type,
factors = factors,
Fit = fit,
CompositeReady = object)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.