Nothing
#' @title EdSurvey Dataset Vectorization
#'
#' @description The \code{edsurvey.data.frame.list} function creates an
#' \code{edsurvey.data.frame.list} object from a series of
#' \code{edsurvey.data.frame} objects.
#' \code{append.edsurvey.data.frame.list} creates an
#' \code{edsurvey.data.frame.list} from two
#' \code{edsurvey.data.frame} or \code{edsurvey.data.frame.list} objects.
#'
#' An \code{edsurvey.data.frame.list} is useful for looking at
#' data, for example, across time or graphically, and reduces
#' repetition in function calls.
#' The user may specify a variable that varies across the
#' \code{edsurvey.data.frame} objects that is
#' then included in further output.
#'
#' @param datalist a list of \code{edsurvey.data.frame}s to be combined
#' @param cov a character vector that indicates what varies across
#' the \code{edsurvey.data.frame} objects.
#' Guessed if not supplied. For example,
#' if several \code{edsurvey.data.frame}s for several
#' different countries are supplied, then \code{cov} would
#' be set to the country.
#' @param labels a character vector that specifies labels. Must be the
#' same length
#' as \code{datalist}. Not needed if \code{cov} exists or can be
#' guessed. See Examples.
#' @param sdfA an \code{edsurvey.data.frame} or an \code{edsurvey.data.frame.list} to be combined
#' @param sdfB an \code{edsurvey.data.frame} or an \code{edsurvey.data.frame.list} to be combined
#' @param labelsA a character vector that specifies \code{labels} for \code{sdfA} when creating
#' the new \code{edsurvey.data.frame.list}.
#' @param labelsB a character vector that specifies \code{labels} for \code{sdfB} when creating
#' the new \code{edsurvey.data.frame.list}.
#'
#' @details
#' The \code{edsurvey.data.frame.list} can be used in place of an
#' \code{edsurvey.data.frame} in function calls, and results are returned
#' for each of the component \code{edsurvey.data.frame}s, with the
#' organization of the results varying by the particular method.
#'
#' An \code{edsurvey.data.frame.list} can be created from several
#' \code{edsurvey.data.frame} objects that are related;
#' for example, all are NAEP mathematics assessments but have one or more
#' differences (e.g., they are all from different years).
#' Another example could be data from multiple countries for an
#' international assessment.
#'
#' When \code{cov} and \code{labels} are both missing, \code{edsurvey.data.frame.list}
#' attempts to guess what variables may be varying and uses those. When there are no
#' varying covariates, generic labels are automatically generated.
#'
#' @return
#' \code{edsurvey.data.frame.list} returns an \code{edsurvey.data.frame.list} with
#' elements
#' \item{datalist}{a list of \code{edsurvey.data.frame} objects}
#' \item{covs}{a character vector of key variables that vary within
#' the \code{edsurvey.data.frame.list}.
#' When labels are included, they will be included in
#' \code{covs}. In the unusual circumstance that \code{sdfA} or \code{sdfB}
#' is an \code{edsurvey.data.frame.list}
#' has \code{covs}, and labels are not supplied, the \code{covs}
#' are simply pasted together with colons between them.}
#'
#' \code{append.edsurvey.data.frame.list} returns an \code{edsurvey.data.frame.list} with
#' elements
#' \item{datalist}{a list of \code{edsurvey.data.frame} objects}
#' \item{covs}{a character vector of key variables that vary within
#' the \code{edsurvey.data.frame.list}.
#' When labels are included, they will be included in
#' \code{covs}.}
#'
#' @author Paul Bailey, Huade Huo
#'
#' @example man/examples/edsurvey.data.frame.list.R
#' @aliases append.edsurvey.data.frame.list
#' @export
edsurvey.data.frame.list <- function(datalist, cov = NULL, labels = NULL) {
# Search for covariates if no labels or covariates are provided
searching <- ifelse(is.null(cov) & is.null(labels), TRUE, FALSE)
if (searching) {
# these are the attributes that might vary
cov <- c(
"subject", "year", "assessmentCode",
"dataType", "gradeLevel", "survey", "achievementLevels",
"country"
)
}
# the eventual covs result
covs <- NULL
# if the user did not provide the cov (no "s") matrix
if (!is.null(cov)) {
# for each column of cov (c)
covs <- extractCovs(datalist, cov, searching)
}
if (!is.null(labels)) {
if (length(labels) != length(datalist)) {
makeError <- FALSE
if (!inherits(labels, "data.frame")) {
makeError <- TRUE
}
if (nrow(labels) != length(datalist)) {
makeError <- TRUE
}
if (makeError) {
stop(paste0("Length of argument ", sQuote("labels"), " must be the same as the length of the ", sQuote("datalist"), " argument."))
}
}
if (is.null(covs)) {
if (is.data.frame(labels)) {
covs <- labels
} else {
covs <- data.frame(stringsAsFactors = FALSE, labels = labels)
}
} else {
covs$labels <- labels
}
}
# final results
res <- list(datalist = datalist, covs = covs)
class(res) <- c("edsurvey.data.frame.list", "edsurvey.data")
return(res)
}
extractCovs <- function(dataList, cov, searching) {
covs <- sapply(cov, function(c) {
# for each edsurvey.data.frame (z)
sapply(dataList, function(z) {
# grab attribute c from edsurvey.data.frame z
thisAttr <- getAttributes(z, c)
# if this attribute is a character or numeric
if (inherits(thisAttr, "character") | inherits(thisAttr, "numeric")) {
# if the attribute is a vector with more than one element we need to reduce it to a single element.
if (length(thisAttr) > 1) {
thisAttr <- paste(thisAttr, collapse = "; ")
}
} else {
thisAttr <- "" # return a blank character value in case of a missing cov value otherwise its returned as a list and won't load into the data.frame
}
thisAttr
}, simplify = TRUE)
}, simplify = FALSE)
# make proposed covs
covs <- data.frame(stringsAsFactors = FALSE, covs)
if (searching) {
for (i in ncol(covs):1) {
if (length(unique(covs[ , i])) == 1) {
if (i == 1 & ncol(covs) == 1) {
# we have removed all of the columns.
# So, the user asked us to automatically identify attributes that varried and we could not.
# warn them and just use LETTERS as labels
warning("Cannot identify attributes that vary across elements in datalist. Using generated labels instead.")
let <- LETTERS
while (length(let) < nrow(covs)) {
let <- paste0(rep(let, each = 26), rep(LETTERS, length(let)))
}
covs$labels <- let[1:nrow(covs)]
}
# if there is no variation in the column, get rid of it
covs <- covs[ , -i, drop = FALSE]
}
}
}
return(covs)
}
# @author Huade Huo and Paul Bailey
#
#' @rdname edsurvey.data.frame.list
#' @export
append.edsurvey.data.frame.list <- function(sdfA, sdfB, labelsA = NULL, labelsB = NULL) {
# return a list of sdfs from either an edsurvey.data.frame.list or a single edsurvey.data.frame
getDataList <- function(sdf) {
if (inherits(sdf, c("edsurvey.data.frame.list"))) {
return(unlist(lapply(sdf[[1]], list), recursive = FALSE))
} else {
return(list(sdf))
}
}
getDataLabel <- function(sdf, labels, name) {
if (!is.null(labels)) {
labels <- as.data.frame(labels)
if (inherits(sdf, c("edsurvey.data.frame.list"))) {
if (nrow(labels) != length(sdf[[1]])) {
stop(paste0("Provided labels", name, " has ", nrow(labels), " labels, while sdf", name, " has ", length(sdf[[1]]), " elements."))
}
return(labels)
} else {
# single item
if (nrow(labels) != 1) {
stop(paste0("Provided labels", name, " has ", nrow(labels), " labels, while sdf", name, " has 1 element."))
}
return(labels)
}
} else {
# no labels provided
if (inherits(sdf, c("edsurvey.data.frame.list"))) {
return(sdf[[2]])
} else {
return(data.frame(labels = paste0("label ", name)))
}
}
}
cov <- c(
"subject", "year", "assessmentCode",
"dataType", "gradeLevel", "survey", "achievementLevels",
"country"
)
# get data list
dataList <- c(
getDataList(sdfA),
getDataList(sdfB)
)
# if there are no provided labels and the existing labels appear to be autogenerated
# simply autogenerate new labels.
if (is.null(labelsA) & is.null(labelsB)) {
if (inherits(sdfA, "edsurvey.data.frame.list") & inherits(sdfB, "edsurvey.data.frame")) {
if (all(colnames(sdfA$covs) %in% cov)) {
labs <- extractCovs(dataList = dataList, cov = cov, searching = TRUE)
return(edsurvey.data.frame.list(
datalist = dataList,
labels = labs
))
}
# sdfA is an esdfl, sdfB is an esdf
}
if (inherits(sdfB, "edsurvey.data.frame.list") & inherits(sdfA, "edsurvey.data.frame")) {
if (all(colnames(sdfB$covs) %in% cov)) {
# sdfB is an esdfl, sdfA is an esdf
labs <- extractCovs(dataList = dataList, cov = cov, searching = TRUE)
return(edsurvey.data.frame.list(
datalist = dataList,
labels = labs
))
}
}
}
# get current labels
labA <- getDataLabel(sdfA, labelsA, "A")
labB <- getDataLabel(sdfB, labelsB, "B")
cnA <- colnames(labA)
cnB <- colnames(labB)
for (i in seq_along(sdA <- setdiff(cnA, cnB))) {
labB[ , sdA[i]] <- NA
}
for (i in seq_along(sdB <- setdiff(cnB, cnA))) {
labA[ , sdB[i]] <- NA
}
labs <- rbind(labA, labB)
# labels may not be unique, if so, start over
if (nrow(unique(labs)) < nrow(labs)) {
labs <- extractCovs(dataList, cov, searching = TRUE)
}
# bring it all together.
return(edsurvey.data.frame.list(
datalist = dataList,
labels = labs
))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.