#' @importFrom dplyr bind_rows
#' @importFrom tibble tibble enframe
# Removes [size] values
# from [x] at random
punch_holes <- function(
x, size,
seed){
id <- 1:length(x)
missing <- as.integer(is.na(x))
id_obs <- id[missing==0]
x_obs <- x[missing==0]
set.seed(seed)
draw <- sample(id_obs,size=size)
x[id %in% draw] <- NA
return(x)
}
# Metrics
pfc_stat <- function(x,y){
1-(sum(x==y)/length(x))
}
smd_stat <- function(x,y){
if(!is.numeric(x)){
x <- unclass(x)
}
if(!is.numeric(y)){
y <- unclass(y)
}
abs(mean(x)-mean(y))/sqrt((var(x)+var(y))/2)
}
rmse_stat <- function(x,y){
sqrt(mean((x-y)^2))
}
ks_stat <- function(x,y){
if(!is.numeric(x)){
x <- unclass(x)
}
if(!is.numeric(y)){
y <- unclass(y)
}
as.numeric(
suppressWarnings(ks.test(x,y)$statistic) )
}
# Sort children data.frame
# by the same ID of a parent data.frame
sort_df <- function(children,parent,id){
if( !(id %in% colnames(parent)) ) stop(paste(id, "not in parent."))
if( !(id %in% colnames(children)) ) stop(paste(id, "not in children."))
return(
children[order(
match(
children[[id]],
parent[[id]]
)
),]
)
}
# Value that occurs most often
# (disregarding missings)
qmode <- function (x,na.rm=FALSE) {
if(na.rm) x <- na.omit(x)
ux <- unique(x)
findmax <- which.max(tabulate(match(x, ux)))
return(ux[findmax])
}
is_cat <- function(x){
is.character(x) |
is.factor(x) |
is.logical(x) |
is.ordered(x)
}
is_num <- function(x){
!(is.character(x) |
is.factor(x) |
is.logical(x) |
is.ordered(x))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.