Nothing
#' Extracts the test statistics
#'
#' Extracts the test statistics from the output of the \code{permute} function.
#' Returns the largest value from a specified subset of rows, each row is the test
#' statistic from a different null hypothesis.
#' @param out Array output by the \code{permute} function
#' @param positions Vector indicating which rows of out to use
#' @return Vector of numeric values of length \code{ncol(out)}
perm_dist <- function(out,
positions){
out <- out[positions,]
out <- abs(out)
if(length(positions)>1){
vals <- apply(out,2,max)
} else {
vals <- out
}
return(vals)
}
#' Conduct the randomisation-based stepdown procedure
#'
#' For a set of models fit with lme4, base R, or glmmrBase, the function will conduct the randomisation tests
#' and generate p-values for the null hypotheses of no treatment effect that controls the
#' family-wise error rate, and generates a 100(1-alpha)% confidence set for the
#' treatment effect model parameters.
#'
#' @param fitlist A list of models fitted with lme4, base R (lm or glm), or glmmrBase. All models should be fit using the
#' same data frame.
#' @param tr_var String indicating the name of the column in data that is a binary indicator
#' for whether the observation was under the treatment (1=treatment, 0=control)
#' @param cl_var String specifying the name of the column identifying the clusters/cluster-time
#' @param data A data frame containing the data used to fit the models in fitlist
#' @param alpha Numeric. 100(1-alpha)% confidence intervals are calculated. Default it 0.05
#' @param plots Logical indicating whether to plot permutational distributions and confidence
#' interval search during running of function. Default is TRUE
#' @param n_permute Number of permutations of the randomisation test to run
#' @param nsteps Number of steps of the confidence interval search process
#' @param type Method of correction: options are "rw" = Romano-Wolf randomisation test based stepdown, "h"
#' = Holm standard stepdown, "h" = Holm stepdown using randomisation test, "b" = standard Bonferroni, "br" =
#' Bonerroni using randomisation test, or "none" = randomisation test with no correction.
#' @param rand_func String of the name of a function that re-randomises the clusters. The function
#' must take the arguments `nJ` for the number of clusters and `nT` for the number of time periods.
#' The function should produce a data frame that identifies the clusters in the treatment group under the new
#' randomisation scheme. The data frame can either have a single column with name cl_var or
#' two columns of cl_var and t identifying the cluster ID and time period a cluster joins
#' the treatment group. If NULL then clusters are randomised in a 1:1 ratio to treatment and control
#' @param confint Logical indicating whether to run the confidence interval search process
#' @param sigma optional, list of estimated covariance matrices of the observations from the models in fitlist.
#' If provided then the weighted q-score statistic is used.
#' @param ci_start_values Optional list. The list should contain named vectors "upper" and/or "lower" that provide
#' a set of starting values for the upper and/or lower confidence interval searches, respectively. Alternatively,
#' a named scalar `scale` can be provided such that the starting values of the confidence interval search procedure
#' are est +/- `scale`*SE.
#' @param verbose Logical indicating whether to provide detailed output
#' @return A data frame with the point estimates, p-values, and confidence intervals
#' @importFrom methods is
#' @examples
#' \dontshow{
#' setParallelCRT(FALSE) # for the CRAN check
#' }
#' out <- twoarm_sim()
#' data <- out[[1]]
#' fit1 <- lme4::glmer(y1 ~ treat + (1|cl) ,
#' data=data,
#' family="poisson")
#'
#' fit2 <- lme4::glmer(y2 ~ treat + (1|cl),
#' data=data,
#' family="poisson")
#' stepdown(fitlist=list(fit1,fit2),
#' data=data,
#' n_permute = 100,
#' nsteps=100,
#' plots=FALSE,
#' verbose=TRUE)
#' @export
stepdown <- function(fitlist,
tr_var = "treat",
cl_var = "cl",
data,
alpha=0.05,
plots=TRUE,
n_permute=1000,
nsteps=1000,
type="rw",
rand_func=NULL,
confint=TRUE,
sigma = NULL,
ci_start_values = NULL,
verbose=TRUE){
if(!is(data,"data.frame"))stop("Data should be a data frame")
if(!tr_var%in%colnames(data))stop("tr_var not in colnames(data)")
if(!cl_var%in%colnames(data))stop("cl_var not in colnames(data)")
if(!all(unlist(lapply(fitlist,function(x)I(is(x,"glmerMod")|is(x,"lmerMod")|is(x,"glm")|is(x,"lm")|is(x,"mcml"))))))stop("All elements of fitlist should be glm, lm, glmer (lme4), lmer (lme4), or mcml (glmmrBase) model objects")
if(alpha<=0|alpha>=1)stop("alpha should be between 0 and 1")
if(!type%in%c("rw","b","h","br","hr","none"))stop("type should be one of rw, b, br, h, hr, or none.")
if(verbose&!is.null(sigma))cat("Covariance matrix provided, weighted quasi-score statistic will be used.\n")
if(verbose)cat("Extracting treatment effect estimates.\n")
tr_eff <- rep(NA,length(fitlist))
tr_sd <- rep(NA,length(fitlist))
tr_p <- rep(NA,length(fitlist))
for(i in 1:length(fitlist)){
if(!is(fitlist[[i]],"mcml")){
res <- summary(fitlist[[i]])
tr_eff[i] <- res$coefficients[tr_var,'Estimate']
tr_sd[i] <- res$coefficients[tr_var,'Std. Error']
if(ncol(res$coefficients)<4){
tr_p[i] <- res$coefficients[tr_var,3]
tr_p[i] <- 2*(1-stats::pnorm(abs(tr_p[i])))
} else {
tr_p[i] <- res$coefficients[tr_var,4]
}
} else {
res <- fitlist[[i]]
tr_eff[i] <- res$coefficients[res$coefficients$par == paste0("b_",tr_var),'est']
tr_sd[i] <- res$coefficients[res$coefficients$par == paste0("b_",tr_var),'SE']
tr_p[i] <- res$coefficients[res$coefficients$par == paste0("b_",tr_var),'p']
}
}
tr_st <- rep(NA,length(fitlist))
tr_icc <- rep(NA,length(fitlist))
if(verbose)cat("Point estimates: ",round(tr_eff,2),"\n")
if(verbose)cat("Uncorrected SE: ",round(tr_sd,2),"\n")
form.cl <- paste0("~factor(",cl_var,")-1")
Z <- model.matrix(as.formula(form.cl),data=data)
inv_sigma <- list()
if(!is.null(sigma)){
w.opt <- TRUE
for(i in 1:length(sigma)){
if(is(sigma[[i]],"list")){
inv_sigma[[i]] <- list()
for(j in 1:length(sigma[[i]])){
inv_sigma[[i]][[j]] <- solve(sigma[[i]][[j]])
}
} else {
inv_sigma[[i]] <- solve(sigma[[i]])
}
}
} else {
w.opt <- FALSE
for(i in 1:length(fitlist)){
inv_sigma[[i]] <- as.matrix(1)
}
}
new_rand <- function(){
if(is.null(rand_func)){
df_tr <- sample(unique(data[,cl_var]),ceiling(length(unique(data[,cl_var]))/2))
trnew <- rep(0,nrow(data))
trnew[data[,cl_var] %in% df_tr] <- 1
} else {
df_tr <- do.call(rand_func,list(nJ=length(unique(data[,cl_var])),
nT=length(unique(data$t))))
trnew <- rep(0,nrow(data))
if(!is(df_tr,"data.frame")){
stop("rand_func must produce a data frame")
} else {
if("t" %in% colnames(df_tr)){
for(i in 1:nrow(df_tr)){
trnew[data[,cl_var]==df_tr$cl[i]&data$t>=df_tr$t[i]] <- 1
}
} else {
trnew[data[,cl_var] %in% df_tr[,cl_var]] <- 1
}
}
}
return(trnew)
}
if(verbose)cat("Starting permutations...\n")
trmat <- sapply(1:n_permute,function(i)new_rand())
dtr <- data[,tr_var]
dtr[dtr==0] <- -1
if(type%in%c("rw","br","hr","none")){
Xnull <- list()
familylist <- list()
xb <- list()
ypred <- list()
y <- list()
resids <- list()
family2list <- list()
trlist <- list()
qtest <- list()
for(i in 1:length(fitlist)){
out <- est_null_model(fitlist[[i]],
data=data,
tr_var = tr_var,
null_par = 0)
Xnull[[i]] <- out$X
familylist[[i]] <- out$family
if(out$family[[1]] == "gaussian"){
xb[[i]] <- out$fit$fitted.values
} else {
xb[[i]] <- out$fit$linear.predictors
}
y[[i]] <- out$y
ypred[[i]] <- out$family$linkinv(xb[[i]])
resids[[i]] <- out$y - ypred[[i]]
family2list[[i]] <- out$family[[2]]
tr_st[i] <- qscore_impl(as.vector(resids[[i]]),
as.vector(dtr),
as.vector(xb[[i]]),
as.matrix(inv_sigma[[i]]),
as.character(family2list[[i]]),
Z,
w.opt)
qtest[[i]] <- as.vector(permutation_test_impl(as.vector(resids[[i]]),
as.matrix(trmat),
as.vector(xb[[i]]),
as.matrix(inv_sigma[[i]]),
as.character(family2list[[i]]),
Z,
w.opt,
as.numeric(n_permute),
verbose))
}
out <- matrix(Reduce(rbind,qtest), nrow=length(fitlist))
out <- abs(out)
tr_st <- abs(tr_st)
if(verbose)cat("Test statistics: ",round(tr_st,2),"\n")
#first determine corrected p-values
#test statistics
if(verbose)cat("Calculating p-values\n")
tr_p <- rep(NA,length(fitlist))
#order of statistics
ord_t <- order(tr_st)
if(plots)plist <- list()
if(type=="rw"){
for(i in 1:length(ord_t)){
test_stat <- max(tr_st[ord_t[1:(length(ord_t) - (i-1))]])
vals <- perm_dist(out,ord_t[1:(length(ord_t) - (i-1))])
tr_p[ord_t[(length(ord_t) - (i-1))]] <- (1+length(vals[vals>test_stat]))/(length(vals)+1)
if(plots){
plist[[i]] <- ggplot2::qplot(vals,bins=30) +
ggplot2::geom_vline(xintercept = test_stat,color="red")+
ggplot2::labs(x=paste0(i,"th largest statistic"))+
ggplot2::annotate("text",x = Inf, y= Inf,hjust=1,vjust=1,color="red",
label=paste0("p = ",round(tr_p[which(ord_t==(length(ord_t) - (i-1)))],2)))+
ggplot2::theme_bw()+
ggplot2::theme(panel.grid = ggplot2::element_blank())
}
}
}
if(type=="br"){
for(i in 1:length(ord_t)){
vals <- out[i,]
tr_p[i] <- (1+length(vals[vals>tr_st[i]]))/(length(vals)+1)
tr_p[i] <- min(tr_p[i]*length(fitlist),1)
if(plots){
plist[[i]] <- ggplot2::qplot(vals,bins=30) +
ggplot2::geom_vline(xintercept = tr_st[i],color="red")+
ggplot2::annotate("text",x = Inf, y= Inf,hjust=1,vjust=1,color="red",
label=paste0("p = ",round(tr_p[i],2)))+
ggplot2::theme_bw()+
ggplot2::theme(panel.grid = ggplot2::element_blank())
}
}
}
if(type=="hr"){
for(i in 1:length(ord_t)){
vals <- out[ord_t[(length(ord_t) - (i-1))],]
tr_p[ord_t[(length(ord_t) - (i-1))]] <- (1+length(vals[vals>tr_st[ord_t[(length(ord_t) - (i-1))]]]))/(length(vals)+1)
tr_p[ord_t[(length(ord_t) - (i-1))]] <- min(tr_p[ord_t[(length(ord_t) - (i-1))]]*(length(fitlist)+1-i),1)
if(plots){
plist[[i]] <- ggplot2::qplot(vals,bins=30) +
ggplot2::geom_vline(xintercept = tr_st[ord_t[i]],color="red")+
ggplot2::labs(x=paste0(i,"th largest statistic"))+
ggplot2::annotate("text",x = Inf, y= Inf,hjust=1,vjust=1,color="red",
label=paste0("p = ",round(tr_p[ord_t[i]],2)))+
ggplot2::theme_bw()+
ggplot2::theme(panel.grid = ggplot2::element_blank())
}
}
}
if(type=="none"){
for(i in 1:length(ord_t)){
vals <- out[i,]
tr_p[i] <- (1+length(vals[vals>tr_st[i]]))/(length(vals)+1)
if(plots){
plist[[i]] <- ggplot2::qplot(vals,bins=30) +
ggplot2::geom_vline(xintercept = tr_st[i],color="red")+
ggplot2::annotate("text",x = Inf, y= Inf,hjust=1,vjust=1,color="red",
label=paste0("p = ",round(tr_p[i],2)))+
ggplot2::theme_bw()+
ggplot2::theme(panel.grid = ggplot2::element_blank())
}
}
}
if(plots) print(ggpubr::ggarrange(plotlist = plist))
}
if(type=="b"){
tr_p <- ifelse(tr_p * length(fitlist) > 1, 1, tr_p * length(fitlist) )
}
if(type=="h"){
ord_p <- order(tr_p)
for(i in 1:length(fitlist)){
tr_p[ord_p[(length(ord_p) - (i-1))]] <- tr_p[ord_p[(length(ord_p) - (i-1))]]*(length(fitlist)-i+1)
}
tr_p <- ifelse(tr_p>1,1,tr_p)
}
if(verbose)cat("P-values: ",round(tr_p,2),"\n")
# next use search algorithm to determine 95% confidence interval
if(confint){
if(verbose)cat("Searching for confidence intervals...\n")
trmat <- sapply(1:nsteps,function(i)new_rand())
if(verbose)cat("Lower\n")
if(!is.null(ci_start_values)){
if(c("lower")%in%tolower(names(ci_start_values))){
ci_lower_start <- ci_start_values[[which(tolower(names(ci_start_values))=="lower")]]
if(length(ci_lower_start)!=length(tr_eff))stop("Wrong length start values for lower interval bound")
} else if(c("scale")%in%tolower(names(ci_start_values))) {
se_scale <- ci_start_values[[which(tolower(names(ci_start_values))=="scale")]]
ci_lower_start <- tr_eff-se_scale*tr_sd
}
} else {
ci_lower_start <- tr_eff-3*tr_sd
}
ci_lower <- confint_search(start = ci_lower_start,
b = tr_eff,
n = nrow(data),
nmodel = length(fitlist),
Xnull_= Xnull,
y = y,
tr_ = as.vector(data[,tr_var]),
new_tr_mat = as.matrix(trmat),
invS = inv_sigma,
family = familylist,
family2 = family2list,
Z = Z,
type = type,
nsteps=nsteps,
weight = w.opt,
alpha = alpha,
verbose = verbose)
#out <<- ci_lower
if(verbose)cat("\nUpper\n")
if(!is.null(ci_start_values)){
if(c("upper")%in%tolower(names(ci_start_values))){
ci_upper_start <- ci_start_values[[which(tolower(names(ci_start_values))=="upper")]]
if(length(ci_upper_start)!=length(tr_eff))stop("Wrong length start values for upper interval bound")
} else if(c("scale")%in%tolower(names(ci_start_values))) {
ci_upper_start <- tr_eff+se_scale*tr_sd
}
} else {
ci_upper_start <- tr_eff+3*tr_sd
}
ci_upper <- confint_search(start = ci_upper_start,
b = tr_eff,
n = nrow(data),
nmodel = length(fitlist),
Xnull_= Xnull,
y = y,
tr_ = as.vector(data[,tr_var]),
new_tr_mat = as.matrix(trmat),
invS = inv_sigma,
family = familylist,
family2 = family2list,
Z = Z,
type = type,
nsteps=nsteps,
weight = w.opt,
alpha = alpha,
verbose = verbose)
if(plots){
dfl <- as.data.frame(ci_lower$values)
dfu <- as.data.frame(ci_upper$values)
dfl <- reshape2::melt(dfl, id.vars=NULL)
dfu <- reshape2::melt(dfu, id.vars=NULL)
dfl$iter <- dfu$iter <- 1:nsteps
dfl$interval <- "lower"
dfu$interval <- "upper"
iter <- value <- NULL
print(ggplot2::ggplot(data=rbind(dfu,dfl),ggplot2::aes(x=iter,y=value))+
ggplot2::geom_line()+
ggplot2::facet_grid(interval~variable))+
ggplot2::theme_bw()+
ggplot2::theme(panel.grid = ggplot2::element_blank())
}
} else {
ci_upper <- rep(NA,length(tr_p))
ci_lower <- rep(NA,length(tr_p))
}
if(verbose)cat("\nCompleted!\n")
results <- data.frame(model=1:length(tr_eff),
mean = round(tr_eff,3),
lower_ci = round(ci_lower$bound,3),
upper_ci = round(ci_upper$bound,3),
p_value = round(tr_p,3),
icc = round(tr_icc,3))
return(results)
}
#' Extracts the dependent variable name from glm, lm, or mer model
#'
#' @param fit A fitted model object of class glm, lm, or *merMod
#' @return A string with the name of the dependent variable from the model
#' @importFrom methods is
#' @examples
#' out <- twoarm_sim()
#' data <- out[[1]]
#' fit1 <- lme4::glmer(y1 ~ treat + (1|cl) ,
#' data=data,
#' family="poisson")
#' outname_fit(fit1)
#' @export
outname_fit <- function(fit){
if(!(is(fit,"glm")|is(fit,"lm")|is(fit,"glmerMod")|is(fit,"lmerMod")))
stop("Model class should be glm, lm, or merMod")
if(is(fit,"glm")){
outv <- strsplit(as.character(fit$formula), " ")[[2]][1]
} else if(is(fit,"lm")){
outv <- strsplit(as.character(fit$call[[2]])," ")[[2]][1]
} else if(grepl("mer",class(fit))){
outv <- strsplit(as.character(fit@call[[2]])," ")[[2]]
} else if(is(fit,"fastglm")|is(fit,"fastLm")){
outv <- fit$outv
}
return(outv)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.