moulton.t.test<-function (x, y = NULL,cluster_x=1:length(x),cluster_y=NULL,method_df="cluster",do_sample_variance_correction=FALSE, alternative = c("two.sided", "less", "greater"),
mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95,
...)
{
# First, if there are no clustering variables provided, this is assumed to be a standard t-test
if(is.null(cluster_x) & is.null(cluster_y)) { return(t.test(x, y, alternative = alternative,
mu = mu, paired = paired, var.equal = var.equal, conf.level = conf.level,
...))}
if(is.null(y) & is.null(cluster_x)) { return(t.test(x, y, alternative = alternative,
mu = mu, paired = paired, var.equal = var.equal, conf.level = conf.level,
...))}
# If clustering information is provided for only one of the two variables, the other is supposed to unclustered
if(is.null(cluster_x)) { cluster_x = 1:length(x) }
if(is.null(cluster_y) & !is.null(y)) {cluster_y = length(cluster_x)+1:length(y)}
# Do t-test standard stuff from R standard implementation
alternative <- match.arg(alternative)
if (!missing(mu) && (length(mu) != 1 || is.na(mu)))
stop("'mu' must be a single number")
if (!missing(conf.level) && (length(conf.level) != 1 || !is.finite(conf.level) ||
conf.level < 0 || conf.level > 1))
stop("'conf.level' must be a single number between 0 and 1")
if (!is.null(y)) {
dname <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))
if (paired)
xok <- yok <- complete.cases(x, y)
else {
yok <- !is.na(y)
xok <- !is.na(x)
}
y <- y[yok]
}
else {
dname <- deparse(substitute(x))
if (paired)
stop("'y' is missing for paired test")
xok <- !is.na(x)
yok <- NULL
}
x <- x[xok]
if (paired) {
x <- x - y
y <- NULL
}
nx <- length(x)
cluster_nx <- length(unique(cluster_x))
mx <- mean(x)
vx <- var(x)
mf <- NA
if (is.null(y)) {
if (nx < 2)
stop("not enough 'x' observations")
if(cluster_nx==1)
{
stop("All observations in a single cluster, cannot perform testing")
}
# Conservative estimate of the degree of freedom: the number of clusters
df <- cluster_nx - 1
if(method_df=="ICC")
{
args=list(...)
args[["method"]]="unbiased" # Unbiased method to preserve expectation values
args[["x"]]=x
args[["cluster_x"]]=cluster_x
df=do.call(df_t_cluster,args)
}
if(method_df=="IK")
{
df=moultonTools:::dfadjustSE(lm(x~ 1),clustervar=as.factor(cluster_x))$coefficients["(Intercept)","df"]
}
moulton_factor_x=1
arguments=list(...) # Potentially, the users wants a specific ICC method
if("method" %in% names(arguments))
{
moulton_factor_x = moulton_factor(x,cluster_x,method=arguments["method"])
} else {
moulton_factor_x=moulton_factor(x,cluster_x)
}
# Correct the variance by multiplying with the Moulton factor squared
vx<-vx*moulton_factor_x^2
mf<-moulton_factor_x
if(do_sample_variance_correction)
{
# Correct here for the effect of clustering in variance estimation. vx is with a denominator of n-1, so only
# secondary correction to the clustered case
args=list(...)
args[["method"]]="unbiased" # Unbiased method to preserve expectation values
args[["x"]]=x
args[["cluster_x"]]=cluster_x
C=do.call(C_t_cluster,args)
# The variance is already calculated with n-1, so remove that correction and replace with the new one
vx<-vx*(length(x)-1)/length(x)*C
}
stderr <- sqrt(vx/nx)
if (stderr < 10 * .Machine$double.eps * abs(mx))
stop("data are essentially constant")
tstat <- (mx - mu)/stderr
method <- if (paired)
"Paired t-test"
else "One Sample t-test, with Moulton correction"
estimate <- setNames(mx, if (paired)
"mean of the differences, with Moulton correction"
else "mean of x, with Moulton correction")
}
else {
ny <- length(y)
cluster_ny <- length(unique(cluster_y))
if (nx < 1 || (!var.equal && nx < 2))
stop("not enough 'x' observations")
if (ny < 1 || (!var.equal && ny < 2))
stop("not enough 'y' observations")
if (var.equal && nx + ny < 3)
stop("not enough observations")
if(var.equal && cluster_ny+cluster_nx < 3)
stop("Too few clusters in x and y")
my <- mean(y)
vy <- var(y)
method <- paste(if (!var.equal)
"Welch", "Two Sample t-test")
estimate <- c(mx, my)
names(estimate) <- c("mean of x", "mean of y")
mf<-1
arguments=list(...) # Potentially, the users wants a specific ICC method
if("method" %in% names(arguments))
{
mf<-moulton_factor(c(x,y),c(cluster_x,cluster_y),c(rep(1,nx),rep(2,ny)),
method=arguments["method"])
} else {
mf<-moulton_factor(c(x,y),c(cluster_x,cluster_y),c(rep(1,nx),rep(2,ny)))
}
if (var.equal) {
df <- nx + ny - 2
v <- 0
if (nx > 1)
v <- v + (nx - 1) * vx
if (ny > 1)
v <- v + (ny - 1) * vy
if(do_sample_variance_correction)
{
# Correct here for the effect of clustering in variance estimation. vx is with a denominator of n-1, so only
# secondary correction to the clustered case
args=list(...)
args[["method"]]="unbiased" # Unbiased method to preserve expectation values
args[["x"]]=x
args[["cluster_x"]]=cluster_x
args[["y"]]=y
args[["cluster_y"]]=cluster_y
args[["var.equal"]]=TRUE
C=do.call(C_t_cluster,args)
v <- v/(nx+ny)*C
} else {
v <- v/df
}
v<-v*mf^2
stderr <- sqrt(v * (1/nx + 1/ny))
# Correct for clustering in the degrees of freedom, conservative estimate based on the clusters only
df<-cluster_nx + cluster_ny -2
if(method_df=="ICC")
{
args=list(...)
args[["method"]]="unbiased" # Use unbiased method to recover as much as possible the expectation values
args[["x"]]=x
args[["cluster_x"]]=cluster_x
args[["y"]]=y
args[["cluster_y"]]=cluster_y
args[["var.equal"]]=TRUE
df=do.call(df_t_cluster,args)
}
if(method_df=="IK")
{
all_x=c(x,y)
all_cluster=c(cluster_x,cluster_y)
treg=c(rep(0,length(x)),rep(1,length(y)))
df=moultonTools:::dfadjustSE(lm(all_x ~ treg),clustervar=as.factor(all_cluster))$coefficients["treg","df"]
}
} else {
stderrx <- sqrt(vx/nx)
stderry <- sqrt(vy/ny)
# df <- cluster_nx + cluster_ny -2
stderr <- sqrt(stderrx^2 + stderry^2)
if(do_sample_variance_correction)
{
C=C_t_cluster(x=x,cluster_x=cluster_x,y=y,cluster_y=cluster_y,var.equal=FALSE,...)
stderr<-stderr*sqrt(C)
}
df <- stderr^4/(stderrx^4/(cluster_nx - 1) + stderry^4/(cluster_ny -
1))
if(method_df=="ICC")
{
df=df_t_cluster(x=x,cluster_x=cluster_x,y=y,cluster_y=cluster_y,var.equal=FALSE,...)
}
if(method_df=="IK")
{
all_x=c(x,y)
all_cluster=c(cluster_x,cluster_y)
treg=c(rep(0,length(x)),rep(1,length(y)))
df=moultonTools:::dfadjustSE(lm(all_x ~ treg),clustervar=as.factor(all_cluster))$coefficients["treg","df"]
}
stderr <- stderr*mf
}
if (stderr < 10 * .Machine$double.eps * max(abs(mx),
abs(my)))
stop("data are essentially constant")
tstat <- (mx - my - mu)/stderr
}
if (alternative == "less") {
pval <- pt(tstat, df)
cint <- c(-Inf, tstat + qt(conf.level, df))
}
else if (alternative == "greater") {
pval <- pt(tstat, df, lower.tail = FALSE)
cint <- c(tstat - qt(conf.level, df), Inf)
}
else {
pval <- 2 * pt(-abs(tstat), df)
alpha <- 1 - conf.level
cint <- qt(1 - alpha/2, df)
cint <- tstat + c(-cint, cint)
}
cint <- mu + cint * stderr
names(tstat) <- "t"
names(df) <- "df"
names(mu) <- if (paired || !is.null(y))
"difference in means"
else "mean"
attr(cint, "conf.level") <- conf.level
rval <- list(statistic = tstat, parameter = df, p.value = pval,
conf.int = cint, estimate = estimate, null.value = mu,
alternative = alternative, method = method, data.name = dname,moulton_factor=mf)
class(rval) <- "htest"
return(rval)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.