#' Merge a supplementary dataset into a primary dataset
#'
#' @param primary the data.frame of the primary dataset
#' @param supplementary the data.frame of the supplementary dataset
#' @param remove_attributes If \code{TRUE}, remove all attributes from all
#' columns (this will break many classes); if \code{FALSE}, remove no
#' attributes from any columns; if a character, remove those attributes from
#' the columns.
#' @inheritParams supp_reformat
#' @return \code{primary} merged with \code{supplementary} where new column
#' names come from the \code{QNAM} column in \code{supplementary}
#' @seealso \code{\link{supp_reformat}}
#' @export
#' @importFrom dplyr anti_join
#' @importFrom rlang abort inform
merge_supp <- function(primary, supplementary, remove_attributes = c("label"), auto_convert=FALSE) {
if (length(unique(supplementary$RDOMAIN)) != 1) {
stop("Only direct relationships with supplementary domains are currently supported.")
}
if (is.logical(remove_attributes) && remove_attributes) {
primary <- strip_attributes(primary, specific = NULL)
supplementary <- strip_attributes(supplementary, specific = NULL)
} else if (is.character(remove_attributes)) {
primary <- strip_attributes(primary, specific = remove_attributes)
supplementary <- strip_attributes(supplementary, specific = remove_attributes)
}
ret <- primary
supp_prep <- supp_reformat(supplementary, auto_convert=auto_convert)
for (current_supp_idx in seq_along(supp_prep)) {
current_supp <- supp_prep[[current_supp_idx]]
current_idvar <- names(supp_prep)[current_supp_idx]
if (current_idvar != "") {
# Check for a class mismatch between the original and the supp domain idvar
if (any(class(current_supp[[current_idvar]]) != class(ret[[current_idvar]]))) {
orig <- current_supp[[current_idvar]]
current_supp[[current_idvar]] <-
methods::as(current_supp[[current_idvar]], Class=class(ret[[current_idvar]]))
if (!all(is.na(orig) == is.na(current_supp[[current_idvar]]))) {
# Introduction of an NA is a problem
rlang::abort(
message=sprintf("NA introduced by coercion for supplemental merge in column %s", current_idvar),
class="Rsdtm_merge_supp_na_idvar"
)
} else {
rlang::inform(
message=
sprintf(
"Supplemental merge column %s type converted to class: %s",
current_idvar,
class(ret[[current_idvar]])[1]
),
class="Rsdtm_merge_supp_convert_idvar"
)
}
}
}
current_join_vars <- setdiff(c("STUDYID", "DOMAIN", "USUBJID", current_idvar), "")
missed_rows <- dplyr::anti_join(current_supp, ret, by = current_join_vars)
if (nrow(missed_rows)) {
stop(
nrow(missed_rows),
" rows from the ",
current_idvar,
" IDVAR in the SUPP domain do not match rows in the primary ",
primary$DOMAIN[1],
" dataset."
)
}
ret <- dplyr::left_join(ret, current_supp, by = current_join_vars)
}
ret
}
#' Reformat a --SUPP SDTM domain into a list of data.frames ready for merging
#' into the primary domain.
#'
#' @param x a --SUPP SDTM domain object
#' @param auto_convert should the data be automatically converted using
#' `type_convert()`?
#' @return A list with length the same as \code{unique(x$IDVAR)} with
#' data.frames ready for merging into the primary dataset.
#' @seealso \code{\link{merge_supp}}
#' @export
supp_reformat <- function(x, auto_convert=FALSE) {
ret <- list()
for (current_idvar in unique(x$IDVAR)) {
ret <- append(
ret,
list(
supp_reformat_single(
x[x$IDVAR %in% current_idvar, ],
auto_convert=auto_convert
)
)
)
}
names(ret) <- unique(x$IDVAR)
ret
}
#' @importFrom dplyr rename_at recode
#' @importFrom tidyr spread
#' @importFrom readr type_convert
supp_reformat_single <- function(x, auto_convert=FALSE) {
idvar <- unique(x$IDVAR)
if (length(unique(x$RDOMAIN)) != 1) {
stop("RDOMAIN column in x must have a single value.")
} else if (length(idvar) != 1) {
stop("IDVAR column in x must have a single value.")
} else if (any(c("APID", "POOLID") %in% names(x))) {
stop("APID and POOLID are not yet supported.")
}
# Columns to drop
if (all(x$IDVAR %in% "" & x$IDVARVAL %in% "")) {
message(
"No IDVAR or IDVARVAL in SUPP", unique(x$RDOMAIN),
" data; assuming USUBJID is sufficient for merge."
)
ret <- x[, setdiff(names(x), c("IDVAR", "IDVARVAL", "QLABEL", "QORIG", "QEVAL")), drop=FALSE]
ret <-
rename_at(
.tbl=ret,
.vars="RDOMAIN",
.funs=dplyr::recode,
RDOMAIN="DOMAIN"
)
} else {
if (any(x$IDVAR %in% "")) {
stop("Some IDVAR values are missing (when some IDVAR or IDVARVAL are present) in SUPP", unique(x$RDOMAIN))
} else if (any(x$IDVARVAL %in% "")) {
stop("Some IDVARVAL values are missing (when some IDVAR or IDVARVAL are present) in SUPP", unique(x$RDOMAIN))
}
ret <- x[, setdiff(names(x), c("IDVAR", "QLABEL", "QORIG", "QEVAL")), drop=FALSE]
ret <-
rename_at(
.tbl=ret,
.vars=c("RDOMAIN", "IDVARVAL"),
.funs=recode,
RDOMAIN="DOMAIN",
IDVARVAL=idvar
)
}
ret <-
tidyr::spread(
ret,
key="QNAM",
value="QVAL"
)
if (auto_convert) {
ret <- type_convert(df=ret)
}
ret
}
#' Remove attributes from an object
#'
#' @param x The object to remove attributes from
#' @param specific If \code{NULL}, all attributes are removed. If a character
#' vector, only the named attributes are removed.
#' @param columns_only Do not strip attributes from the data.frame; only strip
#' them from the columns of the data.frame.
#' @param ... Passed to other `strip_attributes()` methods.
#' @return \code{x} with fewer attributes.
#' @export
strip_attributes <- function(x, specific=NULL, ...) {
UseMethod("strip_attributes")
}
#' @rdname strip_attributes
strip_attributes.data.frame <- function(x, specific=NULL, columns_only=TRUE, ...) {
if (columns_only) {
for (nm in seq_along(x)) {
x[[nm]] <- strip_attributes(x[[nm]], specific=specific, columns_only=columns_only, ...)
}
x
} else {
strip_attributes.default(x, specific=specific, ...)
}
}
#' @rdname strip_attributes
strip_attributes.default <- function(x, specific=NULL, ...) {
if (is.null(specific)) {
attributes(x) <- NULL
x
} else {
for (current in specific) {
attr(x, current) <- NULL
}
x
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.