#' Calculate de regression discontinuity p-value
#'
#' @param dataset The dataset with the variables
#' @param covariate One or more covariate
#' @param CT control and treatment variable
#' @param Nsim Number of simulations
#' @param low_lim lowest limit of each of the covariate
#' @param high_lim lowest limit of each of the covariate
#' @param cutvar number to divide the treatment
#' @return pvalues and other properties
#' @export
rd_pvalue <- function(dataset = last, covariate = "AGE", CT = vec, Nsim = 1000, cutvar = 30, low_lim = NULL, high_lim = NULL) {
# if(!is.null(low_lim) & is.null(high_lim)) { dataset <- } else if((is.null(low_lim)*1+is.null(high_lim)*1)==1) { }
# ds.ct <- dataset[,CT]
ds.ct <- CT
# only with one covariate
if (length(covariate) == 1) {
ds <- dataset[, covariate]
X.bart <- mean(ds[ds.ct == 1], na.rm = TRUE)
X.barc <- mean(ds[ds.ct == 0], na.rm = TRUE)
# X.bart-X.barc
T.obs <- abs(X.bart - X.barc)
T.obs2 <- (X.bart - X.barc)
# Nsim<-10000
p.dif <- 0
for (k in 1:Nsim) {
# print(k)
Wrep <- sample(ds.ct, replace = FALSE)
Trep <- abs(mean(ds[Wrep == 1], na.rm = TRUE) - mean(ds[Wrep == 0], na.rm = TRUE))
# print(Trep) print(T.obs)
p.dif <- p.dif + 1 * (Trep >= T.obs)
}
p.dif
pvalue <- round(p.dif/Nsim, 4)
pvalue
}
lista <- list(pvalue, len = dim(dataset)[1], covariate, T.obs2)
# with more than one covariate else if (length(covariate)>1) { }
return(lista)
# MAB #cases, bandwidth, variable names, mean diference, p-value (taula osoa 4 baliorekin var bakoitzeko)
}
#' Filtering the datasaet with ranges
#'
#' @param dataset The dataset with the variables
#' @param covariate One or more covariate
#' @param CT control and treatment variable
#' @param Nsim Number of simulations
#' @param low_lim lowest limit of each of the covariate
#' @param high_lim lowest limit of each of the covariate
#' @param cutvar number to divide the treatment
#' @return pvalues and other properties
#' @export
rd_filtering <- function(dataset = last, covariate = "AGE", num_out = 4, CT = "W", Nsim = 1000, cutvar = 30, low_lim = NULL, high_lim = NULL) {
r1 <- dataset$BMI
min.r <- min(r1)
max.r <- max(r1)
r1.m <- r1[r1 < cutvar]
r1.M <- r1[r1 > cutvar]
pr <- seq(min.r, cutvar, length = 5)/min.r - 1
qrm <- quantile(r1, prob = pr)[-(num_out + 1)]
pr2 <- (seq(cutvar, max.r, length = 5))
qrM <- quantile(r1, prob = pr2)[-(num_out + 1)]
}
#' Fisher p-values adjusted
#'
#' @param dataset The dataset with the variables
#' @param forcing_bin_var_name forcing_bin_var_name
#' @param forcing_var_name forcing_var_name
#' @param Y_name Y_name
#' @param niter niter
#' @param bandwidth bandwidth
#' @param cut_value cut_value
#' @return data frame with variable and value and bandwidth
#' @export
#'
rand_pajd <- function(dataset = data, forcing_var_name = "S", covariates = c("sex", "HSHumanity", "HSTech", "HSOther", "hsgrade", "Y2004"), niter = 1000, bandwidth = 1000,
cut_value = 15000, whichunder = 1) {
s0 <- cut_value
S <- dataset[, forcing_var_name] #Forcing variable
if (whichunder == 0) {
Z <- ifelse(S >= s0, 0, 1)
} else if (whichunder == 1) {
Z <- ifelse(S >= s0, 1, 0)
}
print(covariates)
print(dim(dataset))
X <- data.frame(dataset[, covariates])
# if(length(covariates)==1) names(X) <- covariates
print(dim(X))
h <- bandwidth
### NOT RUN: IT TAKES A WHILE
K <- niter
# We can using, e.g. K=100 K<-100
M <- K
dataset.h <- dataset[S >= s0 - h & S <= s0 + h, ]
print(dim(dataset.h))
Nh <- nrow(dataset.h)
Sh <- dataset.h[, forcing_var_name]
if (whichunder == 0) {
Zh <- ifelse(Sh >= s0, 0, 1)
} else if (whichunder == 1) {
Zh <- ifelse(Sh >= s0, 1, 0)
}
Xh <- data.frame(dataset.h[, covariates])
# if(length(covariates)==1) names(Xh) <- covariates
Th.obs <- apply(Xh, 2, Tave, Zh)
p.values.obs <- matrix(0, K, ncol(Xh))
Th.HYP <- matrix(0, K, ncol(Xh))
for (j in 1:K) {
Zh.hyp <- sample(Zh, Nh, replace = TRUE)
Th.hyp <- apply(Xh, 2, Tave, Zh.hyp)
p.values.obs[j, ] <- as.numeric(Th.hyp >= Th.obs)
Th.HYP[j, ] <- Th.hyp
rm(Zh.hyp, Th.hyp)
}
Pvalues.obs <- apply(p.values.obs, 2, mean)
print(paste0("Observerd: ", Pvalues.obs))
Pvalues.hyp.obs <- rep(0, M)
Adj.pvalues <- matrix(0, M, ncol(Xh))
for (j in 1:M) {
Th.hyp.obs <- matrix(Th.HYP[j, ], (K - 1), ncol(Xh), byrow = T)
Pvalues.hyp <- apply(Th.hyp.obs >= Th.HYP[-j, ], 2, mean)
Pvalues.hyp.obs[j] <- min(Pvalues.hyp)
rm(Pvalues.hyp)
Adj.pvalues[j, ] <- 1 * (Pvalues.hyp.obs[j] <= Pvalues.obs)
}
adj.pvalues <- apply(Adj.pvalues, 2, mean)
print(paste0("Ajustado: ", adj.pvalues))
names(adj.pvalues) <- names(X)
dff <- data.frame(names(adj.pvalues), Pvalues.obs, adj.pvalues)
names(dff)[1] <- c("Variables")
rownames(dff) <- NULL
return(dff)
} # function end
#' Fisher p-values adjusted bw
#'
#' @param dataset The dataset with the variables
#' @param forcing_bin_var_name forcing_bin_var_name
#' @param forcing_var_name forcing_var_name
#' @param Y_name Y_name
#' @param niter niter
#' @param bandwidth bandwidth
#' @param cut_value cut_value
#' @return data frame with variable and value and bandwidth
#' @export
#'
rand_pajd_bw <- function(dataset = data, forcing_var_name = "S", covariates = c("sex", "HSHumanity", "HSTech", "HSOther", "hsgrade", "Y2004"), niter = 1000, bandwidth = c(500,
1000, 5000), cut_value = 15000, whichunder = 1) {
s0 <- cut_value
S <- dataset[, forcing_var_name] #Forcing variable
if (whichunder == 0) {
Z <- ifelse(S >= s0, 0, 1)
} else if (whichunder == 1) {
Z <- ifelse(S >= s0, 1, 0)
}
print(covariates)
print(dim(dataset))
X <- dataset[, covariates]
print(dim(X))
lenbw <- length(bandwidth)
ss2 <- NULL
namesbuffer <- NULL
for (h in 1:lenbw) {
ss <- rand_pajd(dataset = dataset, forcing_var_name = forcing_var_name, covariates = covariates, niter = niter, bandwidth = bandwidth[h], cut_value = cut_value,
whichunder = whichunder)
ss <- as.vector(ss[, 3])
ss2 <- c(ss2, ss)
namesbuffer <- c(namesbuffer, paste0("p-value[Bandwidth=", bandwidth[h], "]"))
}
df <- data.frame(matrix(ss2, length(covariates), lenbw))
df2 <- data.frame(covariates, df)
names(df2) <- c("Names", namesbuffer)
return(df2)
}
#' Calculating Fisher Exact p-value
#'
#' @param dataset The dataset with the variables
#' @param forcing_bin_var_name forcing_bin_var_name
#' @param Y_name Y_name
#' @param niter niter
#' @return data frame with variable and value
#' @export
#'
sharp_fep <- function(dataset, forcing_bin_var_name = "Z", Y_name = "dropout", niter = 100) {
print(paste0("forcing_bin_var_name-", forcing_bin_var_name))
M <- niter
print(paste0("M-", M))
Y <- as.numeric(dataset[, Y_name])
print(summary(Y))
print(head(paste0(Y)))
Z <- dataset[, forcing_bin_var_name]
print(summary(Z))
print(head(paste0(Z)))
m1 <- mean(Y[dataset[, forcing_bin_var_name] == 1])
print(paste0("m1-", m1))
m0 <- mean(Y[dataset[, forcing_bin_var_name] == 0])
print(paste0("m0-", m0))
tave <- m1 - m0
print(paste0("tave-", tave))
tobs <- abs(tave)
print(paste0("tobs-", tobs))
Nh <- length(Y)
thyp.m <- 0
p.value <- 0
for (m in 1:M) {
Z.m <- sample(Z, Nh, replace = TRUE)
thyp.m <- c(thyp.m, abs(mean(Y[Z.m == 1]) - mean(Y[Z.m == 0])))
p.value <- p.value + as.numeric(thyp.m[m + 1] >= tobs)
# print(m)
}
p.value <- p.value/M
thyp.m <- thyp.m[-1]
# Results - Fisher Exact P-value for h0: Yi(0)=Yi(1)
res1 <- c(Nh, tave, tobs, p.value)
res1 <- data.frame(res1)
res1 <- data.frame(c("Nh", "tave", "tobs", "p.value"), res1)
names(res1) <- c("Variable", "Value")
return(list(res1 = res1, thyp.m = thyp.m))
}
#' Fisher p-values
#'
#' @param dataset The dataset with the variables
#' @param forcing_bin_var_name forcing_bin_var_name
#' @param forcing_var_name forcing_var_name
#' @param Y_name Y_name
#' @param niter niter
#' @param bandwidth bandwidth
#' @param cut_value cut_value
#' @return data frame with variable and value and bandwidth
#' @export
#'
sharp_fep_bw <- function(dataset = data, forcing_var_name = "S", Y_name = "dropout", niter = 1000, bandwidth = c(500, 1000, 1500), cut_value = 15000, whichunder = 1) {
# for each bandwidth
len_bw <- length(bandwidth)
# Cut value
s0 <- cut_value
if (whichunder == 1) {
dataset$assigVar <- ifelse(dataset[, forcing_var_name] <= cut_value, 1, 0)
} else if (whichunder == 0) {
dataset$assigVar <- ifelse(dataset[, forcing_var_name] <= cut_value, 0, 1)
}
Sh <- dataset[, forcing_var_name]
pbalioak <- c()
nak <- c()
hist_data <- list()
for (b in 1:len_bw) {
# bandwidth
h <- bandwidth[b]/2
# Filter dataste with bw
dat_bw <- dataset[Sh >= s0 - h & Sh <= s0 + h, ]
# number of records N
N <- dim(dat_bw)[1]
print(N)
ft <- sharp_fep(dataset = dat_bw, forcing_bin_var_name = "assigVar", Y_name = Y_name, niter = 1000)
pbalioak <- c(pbalioak, ft$res1[, 2])
hist_data[[b]] <- ft$thyp.m
} # for b
dfp <- data.frame(matrix(pbalioak, ncol = 4, byrow = T))
dfp[, 2:3] <- round(dfp[, 2:3], 4)
df <- cbind(bandwidth, dfp)
names(df) <- c("Bandwidth", "N", "Difference in average outcomes by treatment status
Statistic", "Absolute value of difference in average outcomes", "p-value")
return(list(df = df, hist_data = hist_data))
} # function end
#' SHARP RDD: RANDOMIZATION-BASED INFERENCE - NEYMAN APPROACH
#'
#' @param Y Outcome variable
#' @param Z forcing binary variable
#' @param cin Confidence interval
#' @return a vector with values
#' @export
#'
sharp_neyman <- function(Y, Z, cin) {
# Y=yg;Z=zg
Y = as.numeric(Y)
Z = as.numeric(Z)
m1 <- mean(Y[Z == 1])
m0 <- mean(Y[Z == 0])
### Estimate of tau.Us0
tau <- m1 - m0
tau
### Estimate of the Variance
# binomial or # continuous
if (length(table(Y)) == 2) {
Vneyman <- (m1 * (1 - m1))/sum(Z == 1) + (m0 * (1 - m0))/sum(Z == 0)
} else if (length(table(Y)) > 2) {
Vneyman <- (var(Y[Z == 1]))/sum(Z == 1) + (var(Y[Z == 0]))/sum(Z == 0)
}
Vneyman
### 95% Confidence interval
cig <- 1 - cin/2
# zz<- qnorm(0.975)
zz <- qnorm(cig)
tau - zz * sqrt(Vneyman)
tau + zz * sqrt(Vneyman)
vector <- c(length(Y), tau, sqrt(Vneyman), tau - zz * sqrt(Vneyman), tau + zz * sqrt(Vneyman))
return(vector)
}
#' Sharp - neyman bandwidth
#'
#' @param dataset The dataset with the variables
#' @param forcing_bin_var_name forcing_bin_var_name
#' @param forcing_var_name forcing_var_name
#' @param Y_name Y_name
#' @param niter niter
#' @param bandwidth bandwidth
#' @param cut_value cut_value
#' @return data frame with variable and value and bandwidth
#' @export
#'
sharp_neyman_bw <- function(dataset = data, forcing_var_name = "S", Y_name = "dropout", niter = 1000, bandwidth = c(500, 1000, 1500), cut_value = 15000, whichunder = 1,
cin = 95) {
# for each bandwidth
len_bw <- length(bandwidth)
# Cut value
s0 <- cut_value
if (whichunder == 1) {
dataset$assigVar <- ifelse(dataset[, forcing_var_name] <= cut_value, 1, 0)
} else if (whichunder == 0) {
dataset$assigVar <- ifelse(dataset[, forcing_var_name] <= cut_value, 0, 1)
}
Sh <- dataset[, forcing_var_name]
pbalioak <- c()
nak <- c()
for (b in 1:len_bw) {
# bandwidth
h <- bandwidth[b]/2
# Filter dataste with bw
dat_bw <- dataset[Sh >= s0 - h & Sh <= s0 + h, ]
# number of records N
N <- dim(dat_bw)[1]
print(N)
zg <- as.numeric(dat_bw[, "assigVar"])
yg <- as.numeric(dat_bw[, Y_name])
ft <- sharp_neyman(Y = yg, Z = zg, cin = cin)
pbalioak <- c(pbalioak, ft)
} # for b
dfp <- data.frame(matrix(pbalioak, ncol = 5, byrow = T))
dfp[, 2:5] <- round(dfp[, 2:5], 4)
df <- cbind(bandwidth, dfp)
names(df) <- c("Bandwidth", "N", "Average causal effect", "Neyman SE", paste0(cin, "% CI: Lower bound"), paste0(cin, "% CI: upper bound"))
return(df)
} # function end
#' Fuzzy - neyman
#'
#' @param Y Outcome variable
#' @param Z forcing binary variable
#' @param Wh selected records
#' @return data frame with variable and value and bandwidth
#' @export
#'
fuzzy_neyman <- function(Y, Wh, Z, cin = 95) {
m.y1 <- mean(Y[Z == 1])
m.y0 <- mean(Y[Z == 0])
tau.y <- m.y1 - m.y0
tau.y
m.w1 <- mean(Wh[Z == 1])
m.w0 <- mean(Wh[Z == 0]) #zero by design
tau.w <- m.w1 - m.w0
tau.w
tau.c <- tau.y/tau.w
tau.c
# binomial or # continuous
if (length(table(Y)) == 2) {
Vneyman.Y <- (m.y1 * (1 - m.y1))/sum(Z == 1) + (m.y0 * (1 - m.y0))/sum(Z == 0)
} else if (length(table(Y)) > 2) {
Vneyman.Y <- (var(Y[Z == 1]))/sum(Z == 1) + (var(Y[Z == 0]))/sum(Z == 0)
}
# Vneyman.Y <- { m.y1 * (1 - m.y1) }/sum(Z == 1) + { m.y0 * (1 - m.y0) }/sum(Z == 0)
Vneyman.Y
sqrt(Vneyman.Y)
if (length(table(Y)) == 2) {
Vneyman.W <- (m.w1 * (1 - m.w1))/sum(Z == 1) + (m.w0 * (1 - m.w0))/sum(Z == 0)
} else if (length(table(Y)) > 2) {
Vneyman.W <- (var(Y[Z == 1]))/sum(Z == 1) + (var(Y[Z == 0]))/sum(Z == 0)
}
# Vneyman.W <- { m.w1 * (1 - m.w1) }/sum(Z == 1) + { m.w0 * (1 - m.w0) }/sum(Z == 0)
Vneyman.W
sqrt(Vneyman.W)
cov.tau.w.tau.y <- {
sum(Y[Z == 1] == 1 & Wh[Z == 1] == 1)/sum(Z == 1) - m.w1 * m.y1
}/sum(Z == 1)
Vtau.c <- {
1/tau.w^2
} * Vneyman.Y + {
tau.y^2/tau.w^4
} * Vneyman.W - 2 * {
tau.y/tau.w^3
} * cov.tau.w.tau.y
Vtau.c
sqrt(Vtau.c)
### 95% Confidence interval
cig <- 1 - (100 - cin)/2/100
zz <- qnorm(cig)
# zz<- qnorm(0.975)
tau.c - zz * sqrt(Vtau.c)
tau.c + zz * sqrt(Vtau.c)
# Using 2SLS library(sem) tsls(Y ~ Wh, ~ Z)
vector <- c(tau.w, tau.y, tau.c, sqrt(Vneyman.W), sqrt(Vneyman.Y), sqrt(Vtau.c), tau.w - zz * sqrt(Vneyman.W), tau.y - zz * sqrt(Vneyman.Y), tau.c - zz *
sqrt(Vtau.c), tau.w + zz * sqrt(Vneyman.W), tau.y + zz * sqrt(Vneyman.Y), tau.c + zz * sqrt(Vtau.c))
return(vector)
}
#' Fuzzy - neyman bandwidth
#'
#' @param dataset The dataset with the variables
#' @param forcing_bin_var_name forcing_bin_var_name
#' @param forcing_var_name forcing_var_name
#' @param Y_name Y_name
#' @param niter niter
#' @param bandwidth bandwidth
#' @param cut_value cut_value
#' @param W selected
#' @return data frame with variable and value and bandwidth
#' @export
#'
fuzzy_neyman_bw <- function(dataset = data, forcing_var_name = "S", Y_name = "dropout", niter = 1000, W = "W", bandwidth = c(500, 1000, 1500), cut_value = 15000,
whichunder = 1, cin = 95) {
# for each bandwidth
len_bw <- length(bandwidth)
# Cut value
s0 <- cut_value
if (whichunder == 1) {
dataset$assigVar <- ifelse(dataset[, forcing_var_name] <= cut_value, 1, 0)
} else if (whichunder == 0) {
dataset$assigVar <- ifelse(dataset[, forcing_var_name] <= cut_value, 0, 1)
}
Sh <- dataset[, forcing_var_name]
pbalioak <- c()
nak <- c()
df2 <- data.frame(matrix(888, 1, 6))
names(df2) <- c("Bandwidth", "Estimand", "Average causal effect", "Neyman SE", paste0(cin, "% CI: Lower bound"), paste0(cin, "% CI: upper bound"))
for (b in 1:len_bw) {
# bandwidth
h <- bandwidth[b]/2
# Filter dataste with bw
dat_bw <- dataset[Sh >= s0 - h & Sh <= s0 + h, ]
# number of records N
N <- dim(dat_bw)[1]
print(N)
zg <- as.numeric(dat_bw[, "assigVar"])
yg <- as.numeric(dat_bw[, Y_name])
wg <- as.numeric(dat_bw[, W])
ft <- fuzzy_neyman(Y = yg, Wh = wg, Z = zg, cin = cin)
dfp <- data.frame(matrix(ft, ncol = 4))
dfp[, 1:4] <- round(dfp[, 1:4], 4)
df <- cbind(rep(bandwidth[b], 3), c("ITT.W", "ITT.Y", "CACE"), dfp)
names(df) <- c("Bandwidth", "Estimand", "Average causal effect", "Neyman SE", paste0(cin, "% CI: Lower bound"), paste0(cin, "% CI: upper bound"))
# names(df) <- c('Bandwidth','N','Average causal effect','Neyman SE',paste0(cin,'% CI: Lower bound'),paste0(cin,'% CI: upper bound'))
df2 <- rbind(df2, df)
} # for b
df2 <- df2[-1, ]
return(df2)
} # function end
#' Fuzzy - FEP binary one side
#'
#' @param dataset The dataset with the variables
#' @param forcing_bin_var_name forcing_bin_var_name
#' @param forcing_var_name forcing_var_name
#' @param Y_name Y_name
#' @param niter niter
#' @param bandwidth bandwidth
#' @param cut_value cut_value
#' @param W selected
#' @param M2 number of iterations
#' @return data frame with variable and value and bandwidth
#' @export
#'
fuzzy_fep1sided <- function(dataset, Y, W, Z, Y_name, M2 = 10) {
Nh <- nrow(dataset)
## IV-ESTIMATOR
ITT.Y <- mean(Y[Z == 1]) - mean(Y[Z == 0])
ITT.W <- mean(W[Z == 1]) - mean(W[Z == 0])
CACE.IV.obs <- ITT.Y/ITT.W
CACE.IV.obs
## MLE
names(dataset)[which(names(dataset) == Y_name)] <- "Y"
CACE.MLE.obs <- EM.bin(dat = dataset)
# Posterior Mean
CACE.PM.obs <- mcmc.bin(n.iter = 1000, n.burn = 500, dat = dataset)
Tobs <- c(CACE.IV.obs, CACE.MLE.obs, CACE.PM.obs)
M <- as.numeric(as.character(M2))
nit <- 2 * M
nburn <- nit - M
## Impute missing compliance statuses
G <- mcmc.bin.h0(n.iter = nit, n.burn = nburn, dat = dataset)$Gstatus
## Impute missing potential outcomes under the null
Y1 <- Y0 <- Y
Tsim <- matrix(0, M, 3)
colnames(Tsim) <- c("CACE.IV", "CACE.MLE", "CACE.PM")
for (i in 1:M) {
## Draw a random hypothetical assignment
Zh.sim <- sample(Z, Nh, replace = TRUE)
## Re-observe the data
dataset.sim <- data.frame(Z = Zh.sim, W = 0 * (1 - Zh.sim) + G[i, ] * Zh.sim, Y = Y0 * (1 - Zh.sim) + Y1 * Zh.sim)
## Calculate the test statistic on these data
ITT.Y <- mean(dataset.sim$Y[dataset.sim$Z == 1]) - mean(dataset.sim$Y[dataset.sim$Z == 0])
ITT.W <- mean(dataset.sim$W[dataset.sim$Z == 1]) - mean(dataset.sim$W[dataset.sim$Z == 0])
CACE.IV.sim <- ITT.Y/ITT.W
CACE.MLE.sim <- EM.bin(dat = dataset.sim)
CACE.PM.sim <- mcmc.bin(n.iter = 1000, n.burn = 500, dat = dataset.sim)
Tsim[i, ] <- c(CACE.IV.sim, CACE.MLE.sim, CACE.PM.sim)
print(i)
} ##End loop over M
## Calculate the posterior predictive p-value
pppv <- apply(abs(Tsim) >= abs(Tobs), 2, mean)
return(list(pppv, Tsim, Tobs))
}
#' Fuzzy - FEP binary two sided
#'
#' @param dataset The dataset with the variables
#' @param forcing_bin_var_name forcing_bin_var_name
#' @param forcing_var_name forcing_var_name
#' @param Y_name Y_name
#' @param niter niter
#' @param bandwidth bandwidth
#' @param cut_value cut_value
#' @param W selected
#' @param M2 number of iterations
#' @return data frame with variable and value and bandwidth
#' @export
#'
fuzzy_fep2sided <- function(dataset, Y, W, Z, Y_name, M2 = 10) {
Nh <- nrow(dataset)
## IV-ESTIMATOR
ITT.Y <- mean(Y[Z == 1]) - mean(Y[Z == 0])
ITT.W <- mean(W[Z == 1]) - mean(W[Z == 0])
CACE.IV.obs <- ITT.Y/ITT.W
CACE.IV.obs
## MLE
names(dataset)[which(names(dataset) == Y_name)] <- "Y"
CACE.MLE.obs <- EM.bin2(dat = dataset)
# Posterior Mean
CACE.PM.obs <- mcmc.bin2(n.iter = 1000, n.burn = 500, dat = dataset)
Tobs <- c(CACE.IV.obs, CACE.MLE.obs, CACE.PM.obs)
M <- as.numeric(as.character(M2))
nit <- 2 * M
nburn <- nit - M
## Impute missing compliance statuses
G <- mcmc.bin.h02(n.iter = nit, n.burn = nburn, dat = dataset)$Gstatus
## Impute missing potential outcomes under the null
Y1 <- Y0 <- Y
Tsim <- matrix(0, M, 3)
colnames(Tsim) <- c("CACE.IV", "CACE.MLE", "CACE.PM")
for (i in 1:M) {
## Draw a random hypothetical assignment
Zh.sim <- sample(dataset$Z, Nh, replace = TRUE)
Wh.sim <- 0 * {
(1 - Zh.sim) * (G[i, ] == 1 | G[i, ] == 3) + Zh.sim * (G[i, ] == 1)
} + 1 * {
(1 - Zh.sim) * (G[i, ] == 2) + Zh.sim * (G[i, ] == 2 | G[i, ] == 3)
}
## Re-observe the data
dath.sim <- data.frame(Z = Zh.sim, W = Wh.sim, Y = Y0 * (1 - Zh.sim) + Y1 * Zh.sim)
## Calculate the test statistic on these data
ITT.Y <- mean(dath.sim$Y[dath.sim$Z == 1]) - mean(dath.sim$Y[dath.sim$Z == 0])
ITT.W <- mean(dath.sim$W[dath.sim$Z == 1]) - mean(dath.sim$W[dath.sim$Z == 0])
CACE.IV.sim <- ITT.Y/ITT.W
CACE.MLE.sim <- EM.bin2(dat = dath.sim)
CACE.PM.sim <- mcmc.bin2(n.iter = 1000, n.burn = 500, dat = dath.sim)
Tsim[i, ] <- c(CACE.IV.sim, CACE.MLE.sim, CACE.PM.sim)
} ##End loop over M
## Calculate the posterior predictive p-value
pppv <- apply(abs(Tsim) >= abs(Tobs), 2, mean)
return(list(pppv, Tsim, Tobs))
}
#' Fuzzy - FEP 1 sided
#'
#' @param dataset The dataset with the variables
#' @param forcing_bin_var_name forcing_bin_var_name
#' @param forcing_var_name forcing_var_name
#' @param Y_name Y_name
#' @param niter niter
#' @param bandwidth bandwidth
#' @param cut_value cut_value
#' @param W selected
#' @param M2 number of iterations
#' @return data frame with variable and value and bandwidth
#' @export
#'
fuzzy_fep_numeric1sided <- function(dataset, Y, W, Z, Y_name, M2 = 10) {
G <- NULL
G[Z == 1 & W == 1] <- 1
G[Z == 1 & W == 0] <- 0
G[Z == 0] <- rbinom(sum(Z == 0), 1, sum(Z == 1 & W == 1)/sum(Z == 1))
Y <- NULL
Y[Z == 0 & G == 1] <- rnorm(sum(Z == 0 & G == 1), 40, 9)
Y[Z == 1 & G == 1] <- rnorm(sum(Z == 1 & G == 1), 60, 4)
Y[G == 0] <- rnorm(sum(G == 0), 50, 12)
rm(G)
dataset <- data.frame(Z = Z, W = W, Y = Y)
Nh <- nrow(dataset)
# Nh<- nrow(dataset) IV-ESTIMATOR
ITT.Y <- mean(Y[Z == 1]) - mean(Y[Z == 0])
ITT.W <- mean(W[Z == 1]) - mean(W[Z == 0])
CACE.IV.obs <- ITT.Y/ITT.W
CACE.IV.obs
## MLE
names(dataset)[which(names(dataset) == Y_name)] <- "Y"
CACE.MLE.obs <- EM.gauss(dat = dataset)
# Posterior Mean
CACE.PM.obs <- mcmc.gauss(n.iter = 1000, n.burn = 500, dat = dataset)
Tobs <- c(CACE.IV.obs, CACE.MLE.obs, CACE.PM.obs)
M <- as.numeric(as.character(M2))
nit <- 2 * M
nburn <- nit - M
## Impute missing compliance statuses
G <- mcmc.gauss.h0(n.iter = nit, n.burn = nburn, dat = dataset)$Gstatus
## Impute missing potential outcomes under the null
Y1 <- Y0 <- Y
Tsim <- matrix(0, M, 3)
colnames(Tsim) <- c("CACE.IV", "CACE.MLE", "CACE.PM")
for (i in 1:M) {
## Draw a random hypothetical assignment
Zh.sim <- sample(Z, Nh, replace = TRUE)
## Re-observe the data
dataset.sim <- data.frame(Z = Zh.sim, W = 0 * (1 - Zh.sim) + G[i, ] * Zh.sim, Y = Y0 * (1 - Zh.sim) + Y1 * Zh.sim)
## Calculate the test statistic on these data
ITT.Y <- mean(dataset.sim$Y[dataset.sim$Z == 1]) - mean(dataset.sim$Y[dataset.sim$Z == 0])
ITT.W <- mean(dataset.sim$W[dataset.sim$Z == 1]) - mean(dataset.sim$W[dataset.sim$Z == 0])
CACE.IV.sim <- ITT.Y/ITT.W
CACE.MLE.sim <- EM.gauss(dat = dataset.sim)
CACE.PM.sim <- mcmc.gauss(n.iter = 1000, n.burn = 500, dat = dataset.sim)
Tsim[i, ] <- c(CACE.IV.sim, CACE.MLE.sim, CACE.PM.sim)
print(i)
} ##End loop over M
## Calculate the posterior predictive p-value
pppv <- apply(abs(Tsim) >= abs(Tobs), 2, mean)
return(list(pppv, Tsim, Tobs))
}
#' Fuzzy - FEP 2 sided
#'
#' @param dataset The dataset with the variables
#' @param forcing_bin_var_name forcing_bin_var_name
#' @param forcing_var_name forcing_var_name
#' @param Y_name Y_name
#' @param niter niter
#' @param bandwidth bandwidth
#' @param cut_value cut_value
#' @param W selected
#' @param M2 number of iterations
#' @return data frame with variable and value and bandwidth
#' @export
#'
fuzzy_fep_numeric2sided <- function(dataset, Y, W, Z, Y_name, M2 = 10) {
G <- NULL
G[Z == 0 & W == 1] <- 2
G[Z == 1 & W == 0] <- 1
pnt <- sum(Z == 1 & W == 0)/sum(Z == 1)
pat <- sum(Z == 0 & W == 1)/sum(Z == 0)
pc <- 1 - pnt - pat
u <- rbinom(sum(Z == 0 & W == 0), 1, pc/(pc + pnt))
G[(Z == 0 & W == 0)] <- 3 * (u == 1) + 1 * (u == 0)
u <- rbinom(sum(Z == 1 & W == 1), 1, pc/(pc + pat))
G[(Z == 1 & W == 1)] <- 3 * (u == 1) + 2 * (u == 0)
Yh <- NULL
Yh[Z == 0 & G == 3] <- rnorm(sum(Z == 0 & G == 3), 40, 9)
Yh[Z == 1 & G == 3] <- rnorm(sum(Z == 1 & G == 3), 60, 4)
Yh[G == 1] <- rnorm(sum(G == 1), 50, 12)
Yh[G == 2] <- rnorm(sum(G == 2), 45, 8)
rm(G, u, pc, pat, pnt)
dath <- data.frame(Z = Z, W = W, Y = Yh)
Nh <- nrow(dath)
# Nh<- nrow(dataset) IV-ESTIMATOR
ITT.Y <- mean(Y[Z == 1]) - mean(Y[Z == 0])
ITT.W <- mean(W[Z == 1]) - mean(W[Z == 0])
CACE.IV.obs <- ITT.Y/ITT.W
CACE.IV.obs
## MLE
names(dataset)[which(names(dataset) == Y_name)] <- "Y"
CACE.MLE.obs <- EM.gauss2(dat = dataset)
# Posterior Mean
CACE.PM.obs <- mcmc.gauss2(n.iter = 1000, n.burn = 500, dat = dataset)
Tobs <- c(CACE.IV.obs, CACE.MLE.obs, CACE.PM.obs)
M <- as.numeric(as.character(M2))
nit <- 2 * M
nburn <- nit - M
## Impute missing compliance statuses
G <- mcmc.gauss.h02(n.iter = nit, n.burn = nburn, dat = dataset)$Gstatus
## Impute missing potential outcomes under the null
Y1 <- Y0 <- Y
Tsim <- matrix(0, M, 3)
colnames(Tsim) <- c("CACE.IV", "CACE.MLE", "CACE.PM")
for (i in 1:M) {
## Draw a random hypothetical assignment
Z.sim <- sample(dath$Z, Nh, replace = TRUE)
W.sim <- 0 * {
(1 - Z.sim) * (G[i, ] == 1 | G[i, ] == 3) + Z.sim * (G[i, ] == 1)
} + 1 * {
(1 - Z.sim) * (G[i, ] == 2) + Z.sim * (G[i, ] == 2 | G[i, ] == 3)
}
## Re-observe the data
dath.sim <- data.frame(Z = Z.sim, W = W.sim, Y = Y0 * (1 - Z.sim) + Y1 * Z.sim)
## Calculate the test statistic on these data
ITT.Y <- mean(dath.sim$Y[dath.sim$Z == 1]) - mean(dath.sim$Y[dath.sim$Z == 0])
ITT.W <- mean(dath.sim$W[dath.sim$Z == 1]) - mean(dath.sim$W[dath.sim$Z == 0])
CACE.IV.sim <- ITT.Y/ITT.W
CACE.MLE.sim <- EM.gauss2(dat = dath.sim)
CACE.PM.sim <- mcmc.gauss2(n.iter = 1000, n.burn = 500, dat = dath.sim)
Tsim[i, ] <- c(CACE.IV.sim, CACE.MLE.sim, CACE.PM.sim)
} ##End loop over M
## Calculate the posterior predictive p-value
pppv <- apply(abs(Tsim) >= abs(Tobs), 2, mean)
return(list(pppv, Tsim, Tobs))
}
#' Fuzzy - FEP bandwidth
#'
#' @param dataset The dataset with the variables
#' @param forcing_bin_var_name forcing_bin_var_name
#' @param forcing_var_name forcing_var_name
#' @param Y_name Y_name
#' @param niter niter
#' @param bandwidth bandwidth
#' @param cut_value cut_value
#' @param W selected
#' @return data frame with variable and value and bandwidth
#' @export
#'
fuzzy_fep_bw <- function(dataset = data, forcing_var_name = "S", Y_name = "dropout", niter = 1000, W_name = "W", bandwidth = c(500, 1000, 1500), cut_value = 15000,
M2 = 5, whichunder = 1, typemod = "binary", typesided = "onesided") {
dataset$W <- dataset[, W_name]
dataset$Y <- dataset[, Y_name]
# for each bandwidth
len_bw <- length(bandwidth)
# Cut value
s0 <- cut_value
if (whichunder == 1) {
dataset$assigVar <- ifelse(dataset[, forcing_var_name] <= cut_value, 1, 0)
} else if (whichunder == 0) {
dataset$assigVar <- ifelse(dataset[, forcing_var_name] <= cut_value, 0, 1)
}
dataset$Z <- dataset$assigVar
Sh <- dataset[, forcing_var_name]
pbalioak <- c()
nak <- c()
df2 <- data.frame(matrix(888, 1, 4))
names(df2) <- c("Bandwidth", "Statistic: IV estimate of CACE", "Statistic: MLE of CACE", "Statistic: Posterior median of CACE")
dfvec <- data.frame(matrix(888, 1, 3))
names(dfvec) <- c("CACE.IV", "CACE.MLE", "CACE.PM")
dfp <- 0
for (b in 1:len_bw) {
# bandwidth
h <- bandwidth[b]/2
# Filter dataste with bw
dat_bw <- dataset[Sh >= s0 - h & Sh <= s0 + h, ]
# number of records N
N <- dim(dat_bw)[1]
print(N)
zg <- dat_bw[, "assigVar"]
yg <- dat_bw[, Y_name]
wg <- dat_bw[, W_name]
# if numeric or dichotomic
if (typemod == "binary") {
if (typesided == "onesided") {
fu <- fuzzy_fep1sided(dat_bw, Y = yg, W = wg, Z = zg, Y_name = Y_name, M2 = M2)
ft <- c(bandwidth[b], fu[[1]])
} else if (typesided == "twosided") {
fu <- fuzzy_fep2sided(dat_bw, Y = yg, W = wg, Z = zg, Y_name = Y_name, M2 = M2)
ft <- c(bandwidth[b], fu[[1]])
}
} else if (typemod == "numeric") {
if (typesided == "onesided") {
fu <- fuzzy_fep_numeric1sided(dat_bw, Y = yg, W = wg, Z = zg, Y_name = Y_name, M2 = M2)
ft <- c(bandwidth[b], fu[[1]])
} else if (typesided == "twosided") {
fu <- fuzzy_fep_numeric2sided(dat_bw, Y = yg, W = wg, Z = zg, Y_name = Y_name, M2 = M2)
ft <- c(bandwidth[b], fu[[1]])
}
}
df2 <- rbind(df2, ft)
dfvec <- rbind(dfvec, fu[[2]])
dfp <- rbind(dfp, fu[[3]])
} # for b
df2 <- df2[-1, ]
dfvec <- dfvec[-1, ]
dfp <- dfp[-1, ]
return(list(df2, dfvec, dfp))
} # function end
#' Tave
#'
#' @param x The dataset with the variables
#' @param z forcing_bin_var_name
#' @return data frame with variable and value and bandwidth
#' @export
#'
Tave <- function(x, z) {
x0 <- mean(x[z == 0])
x1 <- mean(x[z == 1])
return(abs(x1 - x0))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.