#' Checks if any column contains exclusively NAs and returns colnames if any.
#'
#' @param data An \code{array}, a \code{dataframe} or a \code{matrix}.
#' @return A \code{list} with 'fullNA.col' containing the column names that
#' contain exclusively NAs, and 'is.fullNA' a logical TRUE if the array
#' has at least 1 column contaning exclusively NAs, FALSE if it has
#' none.
#' @author Yoann Pageaud.
#' @export
allNA.col <- function(data){
is.fullNA <-
apply(X = apply(X = data, MARGIN = 2, FUN = is.na), MARGIN = 2, FUN = all)
fullNA <- names(is.fullNA[is.fullNA == TRUE])
if(length(unique(is.fullNA)) == 2){
warning(paste("Columns", paste(
paste0("'", fullNA, "'"), collapse = ", "), "are empty!"))
res <- list("fullNA.col" = fullNA, "is.fullNA" = TRUE)
} else if(length(unique(is.fullNA)) > 2){
stop("Some columns could not be analyzed correctly")
} else if(length(unique(is.fullNA)) == 1){
cat("No columns containing exclusively NAs could be found.\n")
res <- list("fullNA.col" = fullNA, "is.fullNA" = FALSE)
}
return(res)
}
#' Looks for the best merging operation(s) between two data.tables trying a set
#' of columns from the second one.
#'
#' @param dt.x A \code{data.table}.
#' @param dt.y Another \code{data.table}.
#' @param by.x A \code{character} specifying a single column name from 'dt.x'.
#' @param try.y A \code{character} vector specifying multiple
#' column names from 'dt.y' to be tried for the
#' merging. If NULL, all columns from 'dt.y' will
#' be tried for the merging
#' (Default: try.y = NULL). Columns with a
#' different type from the one specified in 'by.x'
#' will raise an error by default
#' (See 'skip.incompatible.type').
#' @param skip.incompatible.type A \code{logical} specifying whether potential
#' 'dt.y' columns of incompatible type specified
#' in 'try.y' should be automatically skipped
#' (skip.incompatible.type = TRUE) or not
#' (skip.incompatible.type = FALSE).
#' @return A \code{list} containing:
#' \itemize{
#' \item{'best.merged.dt': a \code{data.table} resulting of the best
#' merging operation if a single merging operation performed
#' the best. If multiple merging operations gave best results,
#' operation names are given as a \code{character} vector.}
#' \item{'merging.results': a \code{list} of the merging operation
#' results. Each result contains 2 elements:
#' \itemize{
#' \item{'merge.res': the \code{data.table} resulting from the
#' merging operation.}
#' \item{'NA.count': an \code{integer} vector giving the number
#' of NAs contained in each columns from 'dt.y' after the
#' merging.}
#' }
#' }
#' }
#' @author Yoann Pageaud.
#' @export
best.merged.dt <- function(
dt.x, dt.y, by.x, try.y = NULL, skip.incompatible.type = FALSE){
#If no columns specified to for the try, take all columns from dt.y
if(is.null(try.y)){ try.y <- colnames(dt.y) }
#Get type of by.x column
type.by.x <- typeof(dt.x[[by.x]])
#Check that all columns in try.y have the same type
dty.type <- dt.y[, .(lapply(X = .SD, FUN = typeof), try.y), .SDcols = try.y]
if(nrow(dty.type) != nrow(dty.type[V1 == type.by.x])){
if(skip.incompatible.type){
warning("Incompatible type columns will be skipped for merging.")
try.y <- dty.type[V1 == type.by.x]$try.y
} else {
stop(paste(
"Some columns specified in 'try.y' have a different type than the",
"'by.x' column one. To automatically skip these columns set",
"'skip.incompatible.type' to TRUE."))
}
}
#Try different merging operations
list.merged.dt <- lapply(X = try.y, FUN = function(i){
merged.dt <- merge(x = dt.x, y = dt.y, by.x = by.x, by.y = i, all.x = TRUE)
count.na.col <- apply(X = apply(
X = merged.dt[, -colnames(dt.x), with = FALSE],
MARGIN = 2, FUN = is.na), MARGIN = 2, FUN = sum, na.rm = TRUE)
list("merge.res" = merged.dt, "NA.count" = count.na.col)
})
names(list.merged.dt) <- paste0("Merged by '", by.x, "' & '", try.y,"'")
ls.minNA <- lapply(X = list.merged.dt, FUN = function(i){
min(i$NA.count, na.rm = TRUE)
})
minNA <- unlist(ls.minNA)
bst.mtch <- minNA[minNA == min(minNA)]
#If there is a best match, returns best match.
if (length(bst.mtch) > 1) {
best.match <- names(bst.mtch)
warning("More than one match performed the best for the selected columns.")
} else { #Else, returns names of ex aequo matches
best.match <- list.merged.dt[[names(bst.mtch)]]$merge.res
}
res <- list("best.merged.dt" = best.match, "merging.results" = list.merged.dt)
return(res)
}
#' Applies the all.equal function to each column between 2 data.tables.
#'
#' @param dt.target A target \code{data.table}.
#' @param dt.current Another \code{data.table} to be compared with target.
#' @return Either TRUE or a \code{list} describing the differences between
#' target and current.
#' @author Yoann Pageaud.
#' @export
all_equal_bycol <- function(dt.target, dt.current){
ls_res <- lapply(X = colnames(dt.target), FUN = function(i){
all.equal(target = dt.target[[i]], current = dt.current[[i]])
})
names(ls_res) <- colnames(dt.target)
ls_res <- lapply(X = ls_res, FUN = function(m){
if(length(m) == 1){
if(!isTRUE(m)){ m }
} else { paste(m, collapse = " - ") }
})
ls_res <- ls_res[!sapply(ls_res, is.null)]
if(length(ls_res) == 0){ res <- TRUE } else { res <- ls_res }
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.