## {{{ multiple inputation }}}
## {{{ docs }}}
#' Multiple imputation
#'
#' This is a wrap function for the Multiple Imputation package mice.
#'
#' @param data a data.frame
#' @param RHS.formula a string with the RHS of the regression model
#' @param dep.vars a string vector with the names of the dependent variables.
#' @param ind.vars a string vector with the names of the independent variables.
#' @param digits integer, number of significant digits to return in the table
#' @inheritParams mice::mice
#'
#' @return The function returns a list with the summary output of the models estimated after the multiple imputation is performed
#'
#' @details See help(mice)
#'
#' @examples
#' library(magrittr)
#'
#' data = tibble::data_frame(x1 = rnorm(200,3,1),
#' x2 = rexp(200),
#' cat.var = sample(c(0,1), 200, replace=TRUE),
#' cat.var2 = sample(letters[1:4], 200, replace=TRUE),
#' y1 = 10*x1*cat.var+rnorm(200,0,10) +
#' 3*x2*(6*(cat.var2=='a') -3*(cat.var2=='b') +
#' 1*(cat.var2=='c') +1*(cat.var2=='d')),
#' y2 = -10*x1*cat.var+rnorm(200,0,10) +
#' 10*x2*(3*(cat.var2=='a') -3*(cat.var2=='b') +
#' 1*(cat.var2=='c') -1*(cat.var2=='d'))
#' ) %>%
#' dplyr::mutate(cat.var=as.factor(cat.var))
#' data$x1[sample(1:nrow(data), 10)] = NA
#'
#'
#' formula = "x1*cat.var+x2*cat.var2"
#' imp = emultimputation(data, formula, dep.vars = c("y1", "y2"),
#' ind.vars=c("x1", "x2", "cat.var", "cat.var2"))
#' imp
#' @export
## }}}
emultimputation <- function(data, RHS.formula, dep.vars, ind.vars, m=5, maxit=50, method='pmm', seed=500, digits=4)
{
formula = paste0("y ~ ", formula)
## Debug/Monitoring message --------------------------
msg <- paste0('\n','Computing multiple inputation for variable ', paste0(dep.vars, collapse=", ") , '(it may take a while) ... ', '\n'); cat(msg)
## ---------------------------------------------------
models.imp.pooled.final <- list()
i=1
for (i in 1:length(dep.vars)){
dat = data[,c(dep.vars[i], ind.vars)]
names(dat)[1]='y'
utils::capture.output(imputed_dat <- mice::mice(dat, m=m, maxit = maxit, method = method, seed = seed))
if (length(unique(dat$y))==2 & all(unique(dat$y) %in% c(0,1))) {
models.imp = with(imputed_dat, expr=stats::glm(formula=stats::formula(formula), family='binomial'))
}else{
models.imp = with(imputed_dat, expr=stats::lm(formula=stats::formula(formula)))
}
models.imp.pooled = mice::pool(models.imp)
models.imp.pooled.final[[i]] = base::summary(models.imp.pooled) %>%
broom::tidy(.) %>%
dplyr::rename(term=.rownames, estimate=est, p.value="Pr...t..", low.95='lo.95', high.95='hi.95') %>%
dplyr::mutate_if(is.numeric, round, digits=digits)
}
names(models.imp.pooled.final) = dep.vars
return(models.imp.pooled.final)
}
## }}}
## {{{ post-stratification weights }}}
## {{{ docs }}}
#' Wrap function for post-stratification weights
#'
#' This is a wrap function to compute post-stratification weights based on simple sample design (probability sampling)
#'
#'
#' @param data a data frame with the data. The name of the (post) stratification variables and their categories must match the data provided in \code{pop.prop}.
#' @param pop.prop a data frame with the population frequencies. Each column must be a (post) stratification variable. It must include a column \code{Freq} with the percentage of the population in each strata. The name of the stratification variables and their categories must match those provided in the data
#' @param strata a formula with the name of the variables for stratification. See \code{\link[survey]{postStratify}} for details
#'
#' @return It returns a list with two elements. The first element is the weight, the second is the trimmed weights. See \code{\link[survey]{trimWeights}} for details
#'
#'
#' @export
## }}}
epoststrat <- function(data, pop.prop, strata)
{
options(warn=-1)
on.exit(options(warn=0))
## survey object
data.svy = survey::svydesign(ids= ~ 1, data=data)
data.svy.rake = survey::postStratify(design = data.svy,
strata = strata ,
population = pop.prop, partial = T)
data.svy.rake.trim <- survey::trimWeights(data.svy.rake, lower=0, upper=5, strict=TRUE)
return(
list(weights = stats::weights(data.svy.rake),
weights.trimmed = stats::weights(data.svy.rake.trim))
)
}
## }}}
## {{{ power and sample size }}}
## {{{ docs }}}
#' Compute power of a test
#'
#' This function compute power of a test or ideal sample size given desired power of a test
#'
#' @param mu1 a numeric vector with expected observed proportion of the first group. It must be between 0 and 1
#' @param mu2 a number between 0 and 1 with expected observed proportion of the second group.
#' @param power_ideal the desired power of the test of difference between proportions
#' @param n.current the current/planned sample size
#' @param n1.current the current/planned sample size for the first group
#' @param n2.current the current/planned sample size for the second group
#' @param lines boolean, if \code{TRUE} the plot generated by the function will display lines at current and ideal values of the power of the test
#' @param title string, title of the plot
#'
#' @return It returns a table with the power and sample size as well as a plot with the information
#'
#' @examples
#'
#' epower(c(.2),.4, .8, 10)
#' epower(c(.2,.5),.4, .8, 10,30)
#'
#' @export
## }}}
epower <- function(mu1=NULL,mu2=NULL, power_ideal=.8, n.current=NULL, n1.current=NULL, n2.current=NULL, title = NULL, lines=T){
ESs = pwr::ES.h(p1 = mu1, p2 = mu2) ## d: expected effect size over the pooled expected standard deviation
power=list()
n_ideal=c()
power_current=c()
for (i in 1:length(ESs)){
power_current[i] = pwr::pwr.2p.test(h=ESs[i], n=n.current, alternative='two.sided')$power
n_ideal[i] = pwr::pwr.2p.test(h=ESs[i], power=power_ideal, alternative='two.sided')$n
n_max = ifelse(power_current[i] >1, n.current, pwr::pwr.2p.test(h=ESs[i], power=.99, alternative='two.sided')$n)
N = seq(1,n_max, length=20)
power[[i]] = cbind(power=pwr::pwr.2p.test(h=ESs[i], n=N, alternative='two.sided')$power, n = N)
}
ES = tibble::data_frame(mu1 = mu1, mu2=mu2, ES = ESs, power_ideal=power_ideal, n_ideal=n_ideal, power_current=power_current, n_current=n.current, n_and_power=power)
layout = .edar_get_layout(length(ES$ES))
graphics::par(mfrow=c(layout))
if (!is.null(title)){
graphics::par(mar=c(4, 4, 6, 1))
}else{
graphics::par(mar=c(4, 4, 3, 1) )
}
graphics::par(las=1,cex.axis=.7,bty='l', pch=20, cex.main=.9, mgp = c(2,.6,0))
for (i in 1:length(ESs)){
graphics::plot(x=ES$n_and_power[[i]][,'n'], ES$n_and_power[[i]][,'power'], type='l', col="#00000044", pch=20, cex=2, xlab='Sample Size', ylab='Power', ylim=c(0,1))
graphics::points(x=n.current, y=power_current[i], col='red', pch=20, cex=3)
graphics::points(x=ES$n_ideal[i], y=ES$power_ideal[i], col='blue', pch=20, cex=3)
text_current = paste0('Current Design\nN=',n.current,'\nPower=',round(power_current[i],2))
text_ideal = paste0('Ideal Design\nN=',ceiling(ES$n_ideal[i]),'\nPower=',round(ES$power_ideal[i],2))
## text(x=ES$n_ideal[i], y=ES$power_ideal[i],col='blue', pch=1, cex=.8, labels=text_ideal, pos=4,offset=2)
## text(x=n.current, y=power_current[i], col='red', pch=1, cex=.8, labels=text_current, pos=4,offset=2)
title(paste0('Effect size : ', round(ES$ES[i],2), sep=''), cex.main=1.2)
if (lines){
## abline(v=ES$n_ideal[i], lty=2, col='blue')
## abline(v=n.current, lty=2, col='red')
graphics::segments(x0=ES$n_ideal[i], y0=0,y1=ES$power_ideal[i], lty=2, col="blue")
graphics::segments(x0=0,x1=ES$n_ideal[i], y0=ES$power_ideal[i], lty=2, col="blue")
graphics::segments(x0=n.current, y0=0,y1=power_current[i], lty=2, col="red")
graphics::segments(x0=0,x1=n.current, y0=power_current[i], lty=2, col="red")
}
text_current = paste0('Current Design: N=',n.current,' Power=',round(power_current[i],2))
text_ideal = paste0('Ideal Design : N=',ceiling(ES$n_ideal[i]),' Power=',round(ES$power_ideal[i],2))
graphics::legend('bottomright', legend=c(text_ideal, text_current), pch=c(20,20), col=c('blue','red'), bty='n', cex=1.2)
}
if (!is.null(title)) graphics::mtext(title, line=-2, outer=T)
return(ES)
}
## }}}
## =====================================================
## Recodings
## =====================================================
## {{{ docs }}}
#'
#' Recode variable that uses likert scale
#'
#' This function can be used to quickly recode variables that use 5 points likert scale
#'
#' @param df a data.frame
#' @param vars.to.recode a string vector with the names of the variables to recode
#' @param invert boolean, indicates if the values of the new variable should be inverted
#' @param new.levels a string vector with the name of the new categories
#'
#' @return The function returns the data frame with 6 new variables named <vars.to.recode>5 (numerical variable with 5 levels from 1 to 5),<vars.to.recode>5c (factor with 5 levels), <vars.to.recode>3 (numerical variable with 3 levels, -1, 0, and 1), <vars.to.recode>3c (3 levels categorical variable), <vars.to.recode>2 (2 levels numerical variable), <vars.to.recode>2c (two levels categorical variable).
#'
#' @export
## }}}
likert5.recode <- function(df, vars.to.recode, invert=FALSE, new.levels=NULL)
{
new.vars.names = names(vars.to.recode) %>% paste0(., c("5", "5c", "3", "3c", "2", "2c"))
if (any(new.vars.names %in% names(df))) {
df[, which(names(df) %in% new.vars.names)] %>% print()
stop("\n\n Some var names already exists \n\n")
}
if (is.null(new.levels)) {new.levels = c("Strongly disagree", "Disagree", "Neutral", "Agree", "Strongly agree")}
for (i in 1:length(vars.to.recode))
{
var = vars.to.recode[i]
names(var) = NULL
new.var.name = names(vars.to.recode)[i]
df[,"old.var"] = df %>% dplyr::select(var)
if (invert) {
df = df %>%
dplyr::mutate(new.var5 = dplyr::case_when(old.var == 1 ~ 5,
old.var == 2 ~ 4,
old.var == 3 ~ 3,
old.var == 4 ~ 2,
old.var == 5 ~ 1)) %>%
dplyr::mutate(new.var5c = dplyr::case_when(new.var5 == 1 ~ new.levels[1], #"Strongly disagree",
new.var5 == 2 ~ new.levels[2], #"Disagree",
new.var5 == 3 ~ new.levels[3], #"Neither agree nor disagree",
new.var5 == 4 ~ new.levels[4], #"Agree",
new.var5 == 5 ~ new.levels[5], #"Strongly agree"
),
new.var3 = dplyr::case_when(new.var5 %in% 1:2 ~ -1,
new.var5 %in% 3 ~ 0,
new.var5 %in% 4:5 ~ 1),
new.var3c = dplyr::case_when(new.var3 == -1 ~ new.levels[2], #"Disagree",
new.var3 == 0 ~ new.levels[3], #"Neutral",
new.var3 == 1 ~ new.levels[4], #"Agree"
),
new.var2 = dplyr::case_when(new.var5 %in% 1:3 ~ 0,
new.var5 %in% 4:5 ~ 1),
new.var2c = dplyr::case_when(new.var5 %in% 1:3 ~ new.levels[2], #"Disagree",
new.var5 %in% 4:5 ~ new.levels[4], #"Agree"
),
)
}else{
df = df %>%
dplyr::mutate(new.var5 = dplyr::case_when(old.var %in% 1:5 ~ old.var)) %>%
dplyr::mutate(new.var5c = dplyr::case_when(new.var5 == 5 ~ new.levels[1], #"Strongly disagree",
new.var5 == 4 ~ new.levels[2], #"Disagree",
new.var5 == 3 ~ new.levels[3], #"Neutral",
new.var5 == 2 ~ new.levels[4], #"Agree",
new.var5 == 1 ~ new.levels[5], #"Strongly agree"
),
new.var3 = dplyr::case_when(new.var5 %in% 1:2 ~ -1,
new.var5 %in% 3 ~ 0,
new.var5 %in% 4:5 ~ 1),
new.var3c = dplyr::case_when(new.var3 == 1 ~ new.levels[2], #"Agree",
new.var3 == 0 ~ new.levels[3], #"Neutral",
new.var3 == -1 ~ new.levels[4], #"Disagree"
),
new.var2 = dplyr::case_when(new.var5 %in% 1:2 ~ 1,
new.var5 %in% 3:5 ~ 0),
new.var2c = dplyr::case_when(new.var5 %in% 3:5 ~ new.levels[2], #"Agree",
new.var5 %in% 1:2 ~ new.levels[4], #"Disagree"
),
)
}
df = df %>% dplyr::rename_at(vars(dplyr::contains("new.var")), dplyr::funs(stringr::str_replace(string=., pattern="new.var", replacement=new.var.name)) )
}
df = df %>% dplyr::select(-old.var)
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.