#' Estimating design sensitivity.
#'
#' This function estimates design sensitivity parameter associated with a list of matching algorithms.
#' @param x A list of matched sampling algorithms.
#' @param sf A fraction used to split the entire matched sample into planning sample (to estimate design sensitivity) and analysis sample (for outcome analysis).
#' @param Card_Dat Dataframe passed to the function implementing cardinality matching. Passed to \code{ds_function} only if cardinality matching is one of the matching algorithms to be evaluated
#' @export
#' @examples
#' # Implementing different types of matching using matchit package
#' m.out_Nrst <- matchit(exposure~V2+V3+V5+V6+V8+V9,data=dat, method = "nearest", distance = "logit",replace = TRUE ,print = FALSE,ratio = 1)
#' m.out_NrstCAL <- matchit(exposure~V2+V3+V5+V6+V8+V9,data=dat, method = "nearest", distance = "logit",replace = TRUE,print = FALSE,ratio = 1,caliper=0.2)
#' m.out_optimal <- matchit(exposure~V2+V3+V5+V6+V8+V9,data=dat, method = "optimal", distance = "logit",replace =TRUE,print = FALSE,ratio = 1)
#' m.out_genetic <- matchit(exposure~V2+V3+V5+V6+V8+V9,data=dat, method = "genetic", distance = "logit",replace = TRUE,print = FALSE,ratio = 1,unif.seed=1945, int.seed=1906)
#'
#'
#' # Implementing cardinality matching
#' # Solver options
#' t_max = 60*5
#' solver = "glpk"
#' approximate = 1
#' solver = list(name = solver, t_max = t_max, approximate = approximate,round_cplex = 0, trace = 0)
#' Card_Dat = dat[order(dat$exposure==1, decreasing = TRUE), ]
#' Card_Dat$`.distance` <- (glm(exposure~V2+V3+V5+V6+V8+V9, data = Card_Dat ,family = binomial(link="logit")))$fitted.values
#'
#' attach(Card_Dat)
#' t_ind = exposure
#' # Distance matrix
#' dist_mat = NULL
# Subset matching weight
#' subset_weight = 1
#'
#' ## Setting balance criteria
#' mom_covs = cbind(.distance,V2,V3,V5,V6,V8,V9)
#' mom_tols = round(absstddif(mom_covs, t_ind, .1), 2)
#' mom = list(covs = mom_covs, tols = mom_tols)
#'
#' ## Creating cardinality matching object
#' m.out_dsnMatch = bmatch(t_ind = t_ind, dist_mat = dist_mat, subset_weight = subset_weight,mom = mom,solver = solver)
#'
#' # Assessing balance
#' t_id1 = m.out_dsnMatch$t_id
#' c_id1 = m.out_dsnMatch$c_id
#' dsn_bal = meantab(mom_covs, t_ind, t_id1, c_id1)[,6]
#' Card_Dat_A = rbind(Card_Dat[t_id1,],Card_Dat[c_id1,])
#' detach(Card_Dat)
#' arg.list1 = list("a"=m.out_Nrst , "b"=m.out_NrstCAL)
#' arg.list2= list("a"=m.out_Nrst , "b"=m.out_NrstCAL, "c"=m.out_dsnMatch, "d"=m.out_optimal)
#' k=ds_function(arg.list2,Card_Dat = Card_Dat)
#' k$designSensitivity
ds_function <-function (x,sf = 1/3,Card_Dat=NULL)
{
set.seed(1)
if(sum(unlist(lapply(x,class)) %in%c("matchit","list")) !=length(unlist(lapply(x,class)))){
stop("All objects must be of class matchit or list")
}
else{
x1=x[unlist(lapply(x,class))=="matchit"]
unmatchedData = x1$a$model$data
ds_df1 = list()
analysisDataframe1 = list()
#print("... Design Sensitivity Results ...")
for(i in 1:length(x1)){
dat = x1[[i]]$model$data
ctrlUnits = dat[as.numeric(as.vector(x1[[i]]$match.matrix[,1])),]
trtUnits = dat[as.numeric(row.names(x1[[i]]$match.matrix)),]
ctrlUnits$pair_id = c(1:nrow(ctrlUnits))
trtUnits$pair_id = c(1:nrow(trtUnits))
matchDat = rbind(trtUnits,ctrlUnits) # matched data with pair_ids
ctrl_outcomes = ctrlUnits[,c("pair_id","Y2")]
trt_outcomes = trtUnits[,c("pair_id","Y2")]
matchDat_outcomes= left_join(trt_outcomes,ctrl_outcomes,by = "pair_id")
names(matchDat_outcomes)[c(2,3)]<-c("trt_outcomes","ctrl_outcomes")
matchDat_outcomes = na.omit(matchDat_outcomes)
matchDat_outcomes$diff = matchDat_outcomes$trt_outcomes-matchDat_outcomes$ctrl_outcomes
matchDat_plan = sample_n(matchDat_outcomes,floor(sf*nrow(matchDat_outcomes)))
rownames(matchDat_plan) = c(1:nrow(matchDat_plan))
matchDat_analysis = subset(matchDat,!(pair_id %in% matchDat_plan$pair_id))
sumlogicP1 = function(df,diff){
x1 =sample(1:(nrow(df)-2),1)
x2 =sample((x1+1):(nrow(df)),1)
sum(df[x1,diff],df[x2,diff])>0
}
p1 = mean(replicate(1000,sumlogicP1(matchDat_plan,"diff")))
#ds_df[i] = p1/(1-p1)
ds_df1[[i]] = ifelse((p1/(1-p1))<1,1,(p1/(1-p1)))
analysisDataframe1[[i]] = matchDat_analysis
}
# CARDINALITY MATCHING
ds_df2 = list()
analysisDataframe2 =list()
x2=x[unlist(lapply(x,class))=="list"]
if(length(x2)==0)
{
ds_df =ds_df1
names(ds_df) <- names(x)
names(analysisDataframe1)=names(x)
ds_df2=data.frame(matrix(unlist(ds_df), nrow=length(x), byrow=T),stringsAsFactors=FALSE)
ds_df2$Algorithm = names(x)
names(ds_df2) = c("Design.Sensitivity","Algorithm")
ds_df2 = ds_df2[,c("Algorithm","Design.Sensitivity")]
ds_df2 = ds_df2[order(ds_df2$Design.Sensitivity),]
dataframes = analysisDataframe1
names(dataframes)=names(x)
}
else
{
for(i in 1:length(x2)){
ctrlUnits = Card_Dat[x2[[i]]$c_id,]
trtUnits = Card_Dat[x2[[i]]$t_id,]
ctrlUnits$pair_id = c(1:nrow(ctrlUnits))
trtUnits$pair_id = c(1:nrow(trtUnits))
matchDat = rbind(trtUnits,ctrlUnits) # matched data with pair_ids
ctrl_outcomes = ctrlUnits[,c("pair_id","Y2")]
trt_outcomes = trtUnits[,c("pair_id","Y2")]
matchDat_outcomes= left_join(trt_outcomes,ctrl_outcomes,by = "pair_id")
names(matchDat_outcomes)[c(2,3)]<-c("trt_outcomes","ctrl_outcomes")
matchDat_outcomes = na.omit(matchDat_outcomes)
matchDat_outcomes$diff = matchDat_outcomes$trt_outcomes-matchDat_outcomes$ctrl_outcomes
matchDat_plan = sample_n(matchDat_outcomes,floor(sf*nrow(matchDat_outcomes)))
rownames(matchDat_plan) = c(1:nrow(matchDat_plan))
matchDat_analysis = subset(matchDat,!(pair_id %in% matchDat_plan$pair_id))
sumlogicP1 = function(df,diff){
x1 =sample(1:(nrow(df)-2),1)
x2 =sample((x1+1):(nrow(df)),1)
sum(df[x1,diff],df[x2,diff])>0
}
p1 = mean(replicate(1000,sumlogicP1(matchDat_plan,"diff")))
#ds_df[i] = p1/(1-p1)
ds_df2[[i]] = ifelse((p1/(1-p1))<1,1,(p1/(1-p1)))
analysisDataframe2[[i]] = matchDat_analysis
}
ds_df = c(ds_df1,ds_df2)
names(ds_df) <- names(x)
ds_df2=data.frame(matrix(unlist(ds_df), nrow=length(x), byrow=T),stringsAsFactors=FALSE)
ds_df2$Algorithm = names(x)
names(ds_df2) = c("Design.Sensitivity","Algorithm")
ds_df2 = ds_df2[,c("Algorithm","Design.Sensitivity")]
ds_df2 = ds_df2[order(ds_df2$Design.Sensitivity),]
dataframes = c(analysisDataframe1,analysisDataframe2)
names(dataframes)=names(x)
}
}
Obj <- list(designSensitivity =ds_df2, AnalysisDataFrames = dataframes,UnmatchedData =unmatchedData )
class(Obj) <- "DSensitivity"
Obj
}
#' Assessing the three Rubin Rules.
#'
#' This function allows you to assess how sensitive your results are to unmeasured variable.
#' @param data data set to be used.
#' @param Treatment A variables defining exposure group.
#' @param matchscore Variable containing matching distance.Default is propensity score.
#' @param covlist list of variables to be balanced. Note: All variable should be of numeric type.
#' @export
#' @examples
#' data(toy)
#' psmodel <- glm(treated ~ covA + covB + covC + covD + covE + covF + Asqr + BC + BD, family=binomial(), data=toy)
#'
#' toy$ps <- psmodel$fitted
#' toy$linps <- psmodel$linear.predictors
#'
#' covlist1=c('covA', 'covB', 'covC', 'covD', 'covE', 'covF.Middle', 'covF.High', 'Asqr','BC', 'BD')
#'
#' rubinRules(data=toy,Treatment='treated',covlist=covlist1)
rubinRules2 = function(data, Treatment, matchscore = "ps", covlist) {
results = list()
# Rubin 1
data1a = subset(data, select = c(Treatment, matchscore))
names(data1a)[c(1:2)] = c("Treatment", "matchscore")
data1a$Treatment = as.factor(data1a$Treatment)
results$RUBIN1 <- with(data1a, abs(100 * (mean(matchscore[Treatment == "1"]) - mean(matchscore[Treatment == "0"]))/sd(matchscore)))
# Rubin 2
results$RUBIN2 <- with(data1a, var(matchscore[Treatment == "1"])/var(matchscore[Treatment == "0"]))
# Rubin 3
data1d = subset(data, select = c(Treatment, matchscore))
names(data1d) = c("Treatment", "matchscore")
data1f = as.data.frame(cbind(data, data1d))
data1f$Treatment = as.factor(data1f$Treatment)
covlist1 = data1f[covlist]
covlist2 <- as.matrix(covlist1)
res <- NA
for (i in 1:ncol(covlist2)) {
cov <- as.numeric(covlist2[, i])
num <- var(resid(lm(cov ~ data1f$matchscore))[data1f$Treatment == "1"])
den <- var(resid(lm(cov ~ data1f$matchscore))[data1f$Treatment == "0"])
res[i] <- round(num/den, 3)
}
names(res) <- names(covlist1)
# print(res)
res2 = data.frame(res)
res2$VarName = rownames(res2)
rownames(res2) = 1:dim(res2)[1]
res2 = res2[,c("VarName","res")]
names(res2)[2] = "Rubin3"
results$RUBIN3 = res2
res3 <- res2 %>%
dplyr::arrange(Rubin3) %>%
dplyr::mutate(VarName = factor(VarName, levels = .$VarName))
p0 = ggplot(res3, aes(Rubin3, VarName))+geom_point()+
geom_vline(xintercept = 1,colour = "black")+
geom_vline(xintercept = 0.8,colour = "blue",linetype = "dashed")+
geom_vline(xintercept = 1.25,colour = "blue",linetype = "dashed")+
geom_vline(xintercept = 0.5,colour = "red",linetype = "dashed")+
geom_vline(xintercept = 2,colour = "red",linetype = "dashed")+ labs(x = "Residual Variance Ratio",y= " Variables",title = "Rubin's Rules Plot",subtitle = paste0(" Rubin One:",round(results$RUBIN1, 2)," Rubin Two: ",round(results$RUBIN2, 2)))
results$plot = p0
return(results)
}
#' Amplifying and visualizing gamma parameter.
#'
#' @param gamma gamma parameter to be amplfied.
#' @param lambda vector of 3 possible n-fold increase in odds of treatment to be amplified.
#' @export
#' @return An amplification plot.
#' @examples
#' ampPlot(10, c(12,13,30))
ampPlot <- function(gamma, lambda) {
stopifnot(length(gamma) == 1)
stopifnot(gamma > 1)
stopifnot(min(lambda) > gamma)
delta <- (gamma * lambda - 1)/(lambda - gamma)
# plot(lambda,delta,type = 'l')
sensdata = data.frame(lambda, delta)
ggplot(sensdata, aes(x = lambda, y = delta)) + geom_line() + theme_bw() + annotate("text", x = sensdata[1, 1], y = sensdata[1,
2], hjust = -0.5, label = paste0("(", sensdata[1, 1], ",", sensdata[1, 2], ")")) + annotate("text", x = sensdata[2,
1], y = sensdata[2, 2], hjust = -0.5, label = paste0("(", sensdata[2, 1], ",", sensdata[2, 2], ")")) + annotate("text",
x = sensdata[3, 1], y = sensdata[3, 2], hjust = 1.2, label = paste0("(", sensdata[3, 1], ",", sensdata[3, 2], ")")) +
geom_point()
}
#' Sensitivity Test for Matched Time to Event Outcomes
#'
#' This function performs sensitivity analysis for time to event outcomes in observational studies
#'
#' @param x name of the matching object. Should be either a \code{Match} or \code{matchit} object.
#' @param data dataset.
#' @param exp name of treatment variable.
#' @param outcome name of outcome variable.
#' @param failtime name of event time variable.
#' @param Gamma upper bound for sensitivity parameter.
#' @param Gammainterval value by which to increament sensitivity parameter from zero to \code{Gamma}
#' @param plot_title main title of your plot.
#' @export
#' @examples
#' data(toy)
#' psmodel <- glm(treated ~ covA + covB + covC + covD + covE + covF + Asqr + BC + BD, family=binomial(), data=toy)
#' toy$ps <- psmodel$fitted
#' toy$linps <- psmodel$linear.predictors
#' X <- toy$linps ## matching on the linear propensity score
#' Tr <- as.logical(toy$treated)
#' Y <- toy$out3.time
#' match1 <- Match(Y=Y, Tr=Tr, X=X, M = 1, replace=FALSE, ties=FALSE)
#' match.it <- matchit(treated ~ covA + covB + covC + covD + covE + covF + Asqr + BC + BD, data = toy, method='nearest', ratio=1)
#' Survsens(x= match.it,data =toy,exp='treated',outcome = 'out2',failtime = 'out3.time',Gamma=2.4,Gammainterval = 0.01,alpha = 0.05,plot_title = 'Time To Event Outcome Sensitivity Plot')
#'
#' @references Rosenbum 2011
#'
Survsens = function(x, y=NULL,data =NULL, exp=NULL, outcome=NULL, failtime, Gamma, alpha=0.05, Gammainterval, plot_title = NULL) {
results = list()
if ((class(x) != "Match") & (class(x) != "matchit") & (class(x) !="list"))
{
wonpairs <- x # won pairs
expoutlive <- y
results$wonpairs = x
results$expoutlive = y
}
else{
if (class(x) == "Match") {
extractor = (c(x$index.treated, x$index.control))
group_id = c(c(1:length(x$index.treated)), c(1:length(x$index.control)))
data1s = data[extractor, c(exp, outcome, failtime)]
data1s$match33 = group_id
} else if (class(x) == "matchit") {
# data$rId = row.names(data) data2 = data[, c('rId', exp)] data2$rId = row.names(data2) names(data2)[2] = 'exp'
t_id = rownames(x$match.matrix)
c_id = x$match.matrix
extractor = as.numeric(c(t_id, c_id))
data1s = x$model$data[extractor, c(exp, outcome, failtime)]
k2 = length(extractor)/2
match = rep(1:k2, 2)
data1s$match = match
} else {
t_id = x$t_id
c_id = x$c_id
extractor(t_id, c_id)
extractor = as.numeric(c(t_id, c_id))
data1s = data[extractor, c(exp, outcome, failtime)]
data1s$match = x$group_id
}
# data1s = subset(data1, select = c('match', exp, outcome, failtime))
names(data1s)[c(1:3)] = c("exp", "outcome", "failtime")
data2s = subset(data1s, exp == 0)
data3s = subset(data1s, exp == 1)
data4s = subset(data2s, select = c("match", "exp", "outcome", "failtime"))
data5s = subset(data3s, select = c("match", "exp", "outcome", "failtime"))
data6s = dplyr::full_join(data4s, data5s, by = "match")
names(data6s)[c(3, 4, 6, 7)] = c("ExpOutcome", "failtimeNotexp", "NoExpOutcome", "failtimeExp")
data6s$timediff = data6s$failtimeNotexp - data6s$failtimeExp
wonpairs = sum(data6s$timediff != 0)
expoutlive = sum(data6s$timediff < 0)
results$wonpairs = sum(data6s$timediff != 0)
results$expoutlive = sum(data6s$timediff < 0)
}
## Universal code
gamVal = seq(1, Gamma, by = Gammainterval)
pminus = 1/(1 + gamVal)
pplus = gamVal/(1 + gamVal)
bounds = data.frame(cbind(gamVal, pplus, pminus))
bounds$expTplus = wonpairs * bounds$pplus
bounds$expTminus = wonpairs * bounds$pminus
bounds$sd_expT = sqrt(wonpairs * bounds$pplus * (1 - bounds$pplus))
for (i in 1:length(gamVal)) {
bounds$pupper[i] = round(min(1, 2 * pnorm((expoutlive - bounds[i, 4])/bounds[i, 6], lower.tail = FALSE)), 4)
}
for (i in 1:length(gamVal)) {
bounds$plower[i] = round(min(1, 2 * pnorm((expoutlive - bounds[i, 5])/bounds[i, 6], lower.tail = FALSE)), 4)
}
bounds2 = bounds[,c(1,8,7)]
names(bounds2)=c("Gamma","Lower bound"," Upper bound")
bounds1 = bounds
bounds1$min = abs(alpha - bounds1$pupper)
vrt = bounds1[bounds1$min == min(bounds1$min), ]$gamVal
hrz = bounds1[bounds1$min == min(bounds1$min), ]$pupper
vrt1 = round(bounds1[bounds1$min == min(bounds1$min), ]$gamVal, 2)
hrz1 = round(bounds1[bounds1$min == min(bounds1$min), ]$pupper, 2)
plot = ggplot(data = bounds1, aes(x = gamVal, y = pupper)) + geom_line() + geom_point(aes(x = vrt, y = hrz)) + ylab("p upper bound") +
xlab("gamma (Bias)") + ylim(0, 0.06) + theme_bw() + annotate("text", x = vrt + 0.1 * vrt, y = hrz, label = paste0("(",
vrt1, ",", hrz1, ")")) + labs(title = plot_title, caption = paste("matching done by", class(x), "function")) + theme(plot.title = element_text(hjust = 0.5))
results$plot = plot
results$upperbound_pval = hrz = bounds1[bounds1$min == min(bounds1$min), ]$pupper
results$Gamma = bounds1[bounds1$min == min(bounds1$min), ]$gamVal
results$bounds = bounds2
return(results)
}
#' Creating table One
#'
#' @param x Count of pairs where only control has outcome.
#' @param y Count of pairs where onlytreated has outcome.
#' @export
#' @return The sum of \code{x} and \code{y}.
#' @examples
#' edaTable (220, 380)
edaTable = function(baselinevars, outcomeVar, data) {
results = list()
formula = reformulate(termlabels = baselinevars, response = outcomeVar)
res1 <- compareGroups(formula, data = dataA, ref = 1)
table01 = createTable(res1, show.p.mul = TRUE, show.p.overall = TRUE)
try(export2pdf(table01, file = "table1.pdf"), silent = T)
}
#' Sensitivity analysis with Matching, MatchIt and designmatch objects for a continous outcome.
#'
#' @param x Treatment group outcomes or an objects from a Match,MatchIt or designmatch.
#' @param y Control group outcomes in same order as treatment group outcomes such that members of a pair occupy the same row in both x and y. Should not be specified x is a Matching, MatchIt and designmatch objects.
#' @param est Treatment effect. Must be specified if x is not a MatchIt object.
#' @param Gamma Upper bound of sensitivity parameter
#' @param GammaInc interval width for increasing gamma from 1 until the specified upper bound of sensitivity parameter is reached.
#' @param data Dataframe used to during matching. You do not have to specify this parameter if x is a MatchIt object
#' @param treat Treatmetn/Exposure variable name.
#' @export
#' @return a table of Rosenbaum bounds
#' @examples
#' ## Loading lalonde data
#' library(Matching);library(MatchIt)
#' data("lalonde",package = "Matching")
#'
#' ## Sensitivity analysis with a matchit object
#' m.out = matchit(treat ~ age + educ + black + hisp +married + nodegr + re74 + re75 +
#' u74 + u75, family=binomial, data = lalonde, method = "nearest")
#'
#' ## Estimating treatment effect. Ideally, balance assessement should be done prior to estimating treatment effect
#' mod = lm(re78~age + educ + black + hisp +married + nodegr + re74 + re75 +u74 + u75,data = match.data(m.out))
#' pens2(x = m.out, y="re78",Gamma = 2, GammaInc = 0.1,est = 629.7)
#'
#' ## using match object
#' ## Estimate Propensity Score
#' data("lalonde",package = "Matching")
#' DWglm <- glm(treat ~ age + I(age^2) + educ + I(educ^2) + black + hisp +married + nodegr + re74 + I(re74^2) + re75 + I(re75^2) +u74 + u75, family=binomial, data=lalonde)
#' ## Specifying outcome and treatment(Exposure)
#' Y <- lalonde$re78
#' Tr <- lalonde$treat
#' ## Matching using Match object.
#' mDW <- Match(Y=Y, Tr=Tr, X=DWglm$fitted, replace=FALSE)
#' ## Sensitivity analysis
#' pens2(mDW, Gamma = 2, GammaInc = 0.1)
#'
#' ## Sensitivity analysis with a matchit object
#' data("lalonde",package = "designmatch")
#' library(designmatch)
#' attach(lalonde)
#' ## Treatment indicator
#' t_ind = treatment
#' ## Distance matrix
#' dist_mat = NULL
#' ## Subset matching weight
#' subset_weight = 1
#' ## Moment balance: constrain differences in means to be at most .05 standard deviations apart
#' mom_covs = cbind(age, education, black, hispanic, married, nodegree, re74, re75)
#' mom_tols = round(absstddif(mom_covs, t_ind, .05), 2)
#' mom = list(covs = mom_covs, tols = mom_tols)
#' ## Fine balance
#' fine_covs = cbind(black, hispanic, married, nodegree)
#' fine = list(covs = fine_covs)
#' ## Exact matching
#' exact_covs = cbind(black)
#' exact = list(covs = exact_covs)
#' ## Solver options
#' t_max = 60*5
#' solver = "glpk"
#' approximate = 1
#' solver = list(name = solver, t_max = t_max, approximate = approximate,round_cplex = 0, trace = 0)
#' ## Match
#' out = bmatch(t_ind = t_ind, dist_mat = dist_mat, subset_weight = subset_weight,mom = mom, fine = fine, exact = exact, solver = solver)
#'
#' ## Sensitivity analysis with designmatch Object
#' pens2(x = out, y="re78",Gamma = 2, GammaInc = 0.1,est = 234,treat = "treatment",data = lalonde)
#' detach(lalonde)
pens2 =function (x, y = NULL, est = NULL, Gamma = 2, GammaInc = 0.1,
data = NULL, treat)
{
if (length(x) == 1) {
ctrl <- x
trt <- y
}
else {
if (class(x) == "Match") {
if (x$est > 0) {
ctrl <- x$mdata$Y[x$mdata$Tr == 0]
trt <- x$mdata$Y[x$mdata$Tr == 1]
}
else {
ctrl <- x$mdata$Y[x$mdata$Tr == 1]
trt <- x$mdata$Y[x$mdata$Tr == 0]
}
}
else if (class(x) == "matchit") {
if (missing(est)) {
stop("Est parameter missing")
}
else if (est > 0) {
trt = (x$model$data[as.numeric(row.names(x$match.matrix)),])[[y]]
ctrl = (x$model$data[as.numeric(x$match.matrix[, 1]), ])[[y]]
}
else {
ctrl = (x$model$data[row.names(x$match.matrix),
])[[y]]
trt = (x$model$data[x$match.matrix[, 1], ])[[y]]
}
}
else if (class(x) == "list") {
if (missing(est) | missing(data)) {
warning("Est or Data not specified")
}
else {
data = dplyr::arrange(data, desc(data[[treat]]))
rownames(data) = 1:dim(data)[1]
if (est > 0) {
trt = (data[as.character(x$t_id), ])[[y]]
ctrl = (data[as.character(x$c_id), ])[[y]]
}
else {
ctrl = (data[as.character(x$t_id), ])[[y]]
trt = (data[as.character(x$c_id), ])[[y]]
}
}
}
}
gamma <- seq(1, Gamma, by = GammaInc)
m <- length(gamma)
pvals <- matrix(NA, m, 2)
diff <- trt - ctrl
diff <- na.omit(diff)
S <- length(diff)
diff <- diff[diff != 0]
ranks <- rank(abs(diff), ties.method = "average")
psi <- as.numeric(diff > 0)
T <- sum(psi * ranks)
for (i in 1:m) {
p.plus <- gamma[i]/(1 + gamma[i])
p.minus <- 1/(1 + gamma[i])
E.T.plus <- sum(ranks * p.plus)
V.T <- sum(ranks^2 * p.plus * (1 - p.plus))
E.T.minus <- sum(ranks * p.minus)
z.plus <- (T - E.T.plus)/sqrt(V.T)
z.minus <- (T - E.T.minus)/sqrt(V.T)
p.val.up <- 1 - pnorm(z.plus)
p.val.low <- 1 - pnorm(z.minus)
pvals[i, 1] <- round(p.val.low, digits = 4)
pvals[i, 2] <- round(p.val.up, digits = 4)
}
pval <- pvals[1, 1]
bounds <- data.frame(gamma, pvals)
names(bounds) <- c("Gamma", "Lower bound", "Upper bound")
msg <- "Rosenbaum Sensitivity Test for Wilcoxon Signed Rank P-Value \n"
note <- "Note: Gamma is Odds of Differential Assignment To\n Treatment Due to Unobserved Factors \n"
Obj <- list(Gamma = Gamma, GammaInc = GammaInc, pval = pval,
msg = msg, bounds = bounds, note = note)
class(Obj) <- c("rbounds", class(Obj))
Obj
}
#' Sensitivity analysis with Matching, MatchIt and designmatch objects for a binary outcome.
#'
#' @param x Treatment group outcomes or an objects from a Match,MatchIt or designmatch.
#' @param y Control group outcomes in same order as treatment group outcomes such that members of a pair occupy the same row in both x and y. Should not be specified x is a Matching, MatchIt and designmatch objects.
#' @param Gamma Upper bound of sensitivity parameter
#' @param GammaInc interval width for increasing gamma from 1 until the specified upper bound of sensitivity parameter is reached.
#' @param data Dataframe used to during matching. You do not have to specify this parameter if x is a MatchIt object
#' @param treat Treatmetn/Exposure variable name.
#' @param alpha p-value to define maximum upper bound allowable
#' @export
#' @return a table of Rosenbaum bounds
#' @examples
#'
#' ## Sensitivity analysis with a matchit object
#' library(Matching);library(MatchIt);library(designmatch)
#' data("GerberGreenImai",package = "Matching")
#' ## Estimate Propensity Score
#' pscore.glm <- glm(PHN.C1 ~ PERSONS + VOTE96.1 + NEW +MAJORPTY + AGE + WARD + PERSONS:VOTE96.1 + PERSONS:NEW + AGE2, family = binomial(logit), data = GerberGreenImai)
#' ## save data objects
#' D <- GerberGreenImai$PHN.C1
#' Y <- GerberGreenImai$VOTED98
#' X <- fitted(pscore.glm)
#' ## Match - without replacement
#' m.obj <- Match(Y = Y, Tr = D, X = X, M = 1, replace=FALSE)
#' ## Sensitivity Test
#' binarysens2(m.obj, Gamma=2, GammaInc=.1)
#' ## Sensitivity analysis with a Match object
#' m.out = matchit(PHN.C1 ~ PERSONS + VOTE96.1 + NEW +MAJORPTY + AGE + WARD , family=binomial, data = GerberGreenImai, method = "nearest")
#' mod = lm(VOTED98 ~ PHN.C1+PERSONS + VOTE96.1 + NEW +MAJORPTY + AGE + WARD,data = match.data(m.out))
#' binarysens2(x=m.out,y ="VOTED98", Gamma=2, GammaInc=.1)
#' ## Sensitivity analysis with a designmatch object
#' ## data("GerberGreenImai",package = "Matching")
#' attach(GerberGreenImai)
#' ## Treatment indicator
#' t_ind = PHN.C1
#' ## Distance matrix
#' dist_mat = NULL
#' ## Subset matching weight
#' subset_weight = 1
# Moment balance: constrain differences in means to be at most .05 standard deviations apart
#' mom_covs = cbind(PERSONS,VOTE96.1 ,NEW , MAJORPTY , AGE , WARD)
#' mom_tols = round(absstddif(mom_covs, t_ind, .05), 2)
#' mom = list(covs = mom_covs, tols = mom_tols)
#' ## Solver options
#' t_max = 60*5
#' solver = "glpk"
#' approximate = 1
#' solver = list(name = solver, t_max = t_max, approximate = approximate,round_cplex = 0, trace = 0)
#' ## Match
#' out = bmatch(t_ind = t_ind, dist_mat = dist_mat, subset_weight = subset_weight,mom = mom,solver = solver)
#' binarysens2(x=out,y ="VOTED98", Gamma=2, GammaInc=.1,treat = "PHN.C1",data = GerberGreenImai)
#' detach(GerberGreenImai)
binarysens2 = function (x, y = NULL, Gamma = 6, GammaInc = 1,data =NULL,treat =NULL,alpha = 0.05)
{
if (length(x) == 1) {
ctrl <- x
trt <- y
}
else {
if (class(x)=="matchit"){
if(missing(y)){
warning("y not defined")
}
else{
y.t = (x$model$data[row.names(x$match.matrix),])[[y]]
y.c = (x$model$data[x$match.matrix[,1],])[[y]]
}
}
else if(class(x) == "Match"){
y.c <- x$mdata$Y[x$mdata$Tr == 0]
y.t <- x$mdata$Y[x$mdata$Tr == 1]
}
else if(class(x)== "list"){
data = dplyr::arrange(data,desc(data[[treat]]))
rownames(data) = 1:dim(data)[1]
y.t = (data[as.character(x$t_id),])[[y]]
y.c = (data[as.character(x$c_id),])[[y]]
}
else {
print("Accepts only matchit, match or bmatch objects.")
}
table(y.t, y.c)
y.tmp1 <- table(y.t, y.c)[2]
y.tmp2 <- table(y.t, y.c)[3]
if (y.tmp1 >= y.tmp2) {
trt <- y.tmp1
ctrl <- y.tmp2
}
else {
trt <- y.tmp2
ctrl <- y.tmp1
}
}
gamma <- seq(1, Gamma, by = GammaInc)
mx <- ctrl + trt
up <- c()
lo <- c()
series <- seq(trt, mx, by = 1)
n.it <- length(gamma)
for (i in 1:n.it) {
p.plus <- gamma[i]/(1 + gamma[i])
p.minus <- 1/(1 + gamma[i])
up.tmp <- sum(dbinom(series, mx, prob = p.plus))
lo.tmp <- sum(dbinom(series, mx, prob = p.minus))
up <- c(up, up.tmp)
lo <- c(lo, lo.tmp)
}
pval <- lo[1]
bounds <- data.frame(gamma, plower = round(lo, 5), pupper = round(up, 5))
bounds$min = abs(alpha - bounds$pupper)
vrt = bounds[bounds$min == min(bounds$min), ]$gamma
hrz = bounds[bounds$min == min(bounds$min), ]$pupper
vrt1 = round(bounds[bounds$min == min(bounds$min), ]$gamma, 2)
hrz1 = round(bounds[bounds$min == min(bounds$min), ]$pupper, 2)
plot = ggplot(data = bounds, aes(x = gamma, y = pupper)) + geom_line() + geom_point(aes(x = vrt, y = hrz)) + ylab("p upper bound") +
xlab("gamma (Bias)") + theme_bw() + annotate("text", x = vrt1 + 0.1, y = hrz1, label = paste0("(", vrt1, ",", hrz1,
")")) + labs(title = "Binary Outcome Sensitivity Plot")
colnames(bounds) <- c("Gamma", "Lower bound", "Upper bound")
msg <- "Rosenbaum Sensitivity Test \n"
note <- "Note: Gamma is Odds of Differential Assignment To\n Treatment Due to Unobserved Factors \n"
Obj <- list(Gamma = Gamma, GammaInc = GammaInc, pval = pval, msg = msg, bounds = bounds[, c(1:3)], note = note, plot = plot)
#class(Obj) <- c("rbounds", class(Obj))
return(Obj)
}
#' Love plots with `designmatch`,`matchIt`,`Matching` Object
#'
#' ## Love plot with `designmatch` package object
#'
#' @param x A `designmatch`,`matchIt`,`Matching` Object
#' @param data dataset before its preprocessed.
#' @param covList a vector of covariates to be balanced.
#' @param treat Exposure variable name
#' @return a plot of standardized difference.
#' @export
#'
#' @examples
#' data("lalonde",package = "cobalt")
#' attach(lalonde)
#'
#' ## Treatment indicator
#' t_ind =lalonde$treat
#'
#' ## Distance matrix
#' dist_mat = NULL
#'
#' ## Subset matching weight.
#' subset_weight = 1
#'
#' # Moment balance: constrain differences in means to be at most .05 standard deviations apart
#' mom_covs = cbind(age, educ, black, hispan, married, nodegree, re74, re75)
#' mom_tols = round(absstddif(mom_covs, t_ind, .05), 2)
#' mom = list(covs = mom_covs, tols = mom_tols)
#'
#' ## Fine balance
#' fine_covs = cbind(black, hispan, married, nodegree)
#' fine = list(covs = fine_covs)
#'
#' ## Exact matching
#' exact_covs = cbind(black)
#' exact = list(covs = exact_covs)
#'
#' ## Solver options
#' t_max = 60*5
#' solver = "glpk"
#' approximate = 1
#' solver = list(name = solver, t_max = t_max, approximate = approximate,round_cplex = 0, trace = 0)
#'
#' ## Cardinality matching
#' out = bmatch(t_ind = t_ind, dist_mat = dist_mat, subset_weight = subset_weight, mom = mom, fine = fine, exact = exact, solver = solver)
#'
#' # Indices of the treated units and matched controls
#' t_id = out$t_id
#' c_id = out$c_id
#' detach(lalonde)
#'
#' # Example of cardinality Matching
#' love_plot(X =out, data = lalonde , covList=c("age", "educ", "black", "hispan", "married", "nodegree", "re74", "re75"))
#'
#' # Example with MatchIt package
#' # data("lalonde") if not yet loaded
#' covs0 <- subset(lalonde, select = -c(treat, re78, nodegree, married))
#' # Nearest neighbor 1:1 matching with replacement
#' library("MatchIt") #if not yet loaded
#' m.out <- matchit(f.build("treat", covs0), data = lalonde, method = "nearest", replace = TRUE)
#' love_plot(X =m.out, data = lalonde , covList=c("age", "educ", "black", "hispan", "married", "nodegree", "re74", "re75"))
#'
#' # Example with Matching package
#'
#' library("Matching")
#' #data("lalonde") #If not yet loaded
#' covs0 <- subset(lalonde, select = -c(treat, re78, nodegree, married))
#' fit <- glm(f.build("treat", covs0), data = lalonde, family = "binomial")
#' p.score <- fit$fitted.values
#' match.out <- Match(Tr = lalonde$treat, X = p.score, estimand = "ATT")
#' std_dif_data =bal.tab(match.out, formula = f.build("treat", covs0), data = lalonde)
#' love_plot(X, data = lalonde , covList=c("age", "educ", "black", "hispan", "re74", "re75"),treat = "treat")
love_plot = function (X,data,covList, legend_position = "topright",treat=NULL)
{
if (class(X) =="list"){
if (missing(data)|missing(covList)){
stop("Data or list of covariate need to be specified")
}
else {
attach(data)
X_mat = as.matrix(data[,covList])
detach(data)
X_mat_t = X_mat[X$t_id, ] # Extract treated observations
X_mat_c_before = X_mat[-X$t_id, ] # Extract control observations before matching
X_mat_c_before_mean = apply(X_mat_c_before, 2, mean) # Extract control variable means before matching
X_mat_t_mean = apply(X_mat_t, 2, mean) # extract mean of treated observations
X_mat_t_var = apply(X_mat_t, 2, var) # extract variance of treated observations
X_mat_c_before_var = apply(X_mat_c_before, 2, var) # Extract variance before matching for control obserations
std_dif_before = (X_mat_t_mean - X_mat_c_before_mean)/sqrt((X_mat_t_var + X_mat_c_before_var)/2) # std_df before matching
X_mat_c_after = X_mat[X$c_id, ] # extract matched observations
X_mat_c_after_mean = apply(X_mat_c_after, 2, mean) # extract variable mean for treated observations after matching
std_dif_after = (X_mat_t_mean - X_mat_c_after_mean)/sqrt((X_mat_t_var + X_mat_c_before_var)/2) # Compute std_diff after matching
abs_std_dif_before = data.frame(std_dif_before)
abs_std_dif_before$VarName =rownames(abs_std_dif_before)
#abs_std_dif_before$Match = "Before Matching"
rownames(abs_std_dif_before) = NULL
names(abs_std_dif_before)[1] = "abs_std_before"
abs_std_dif_after = data.frame(std_dif_after)
abs_std_dif_after$VarName =rownames(abs_std_dif_after)
#abs_std_dif_after$Match = "After Matching"
rownames(abs_std_dif_after) = NULL
names(abs_std_dif_after)[1] = "abs_std_after"
std_dif_dat2 = dplyr::left_join(abs_std_dif_before,abs_std_dif_after,by = c("VarName"))
}
}
else if (class(X) =="matchit"){
std_dif_dat = bal.tab(X)$Balance[,c("Diff.Un","Diff.Adj")][-1,]
std_dif_dat$VarName =rownames(std_dif_dat)
rownames(std_dif_dat) = NULL
std_dif_dat2 = std_dif_dat
names(std_dif_dat2) = c("abs_std_before","abs_std_after","VarName")
}
else if(class(X) =="Match"){
if(missing(treat)|missing(data)){
stop(" Treatment Variable or Data not specified")
}
else{
#bal.tab(X, formula = f.build("treat", covList), data = data)
std_dif_dat =bal.tab(X, formula = f.build(treat, covList), data = data)$Balance[,c("Diff.Un","Diff.Adj")][-1,]
std_dif_dat$VarName =rownames(std_dif_dat)
rownames(std_dif_dat) = NULL
std_dif_dat2 = std_dif_dat
names(std_dif_dat2) = c("abs_std_before","abs_std_after","VarName")
}
}
else{
stop("Object has to be MatchIt, Matching or designMatch object")
}
ggplot(std_dif_dat2, aes(x = value, y = VarName, color = Variables)) +
geom_point(aes(x = abs_std_before, col = "Before Matching")) +
geom_point(aes(x = abs_std_after, col = "After Matching"))+ theme(legend.title = element_blank())
}
#' Sensitivity Analysis with multiple controls
#'
#' @param X A `MatchIt` object
#' @param outcomeName Name of the outcome.
#' @param Gamma Upper bound of sensitivity parameter
#' @param GammaInc Interval width of sequence between 1 and Gamma.
#' @param n_contrl Number of control matched to each treated observation
#' @export
#'
#' @examples
#' data("lalonde",package ="designmatch")
#' covs0 <- subset(lalonde, select = -c(treat, re78, nodegree, married))
#' m.out <- matchit(f.build("treatment", covs0), data = lalonde, method = "nearest", replace = TRUE,ratio = 2)
#' multiControlSens(X =m.out,outcomeName = "re78",Gamma = 2,GammaInc = 0.1,n_contrl = 2)
multiControlSens = function(X,outcomeName,Gamma,GammaInc,n_contrl){
results4 = list()
data1 = get_matches(m.out,model_frame = m.out$model$data)
treat = data1[which(row.names(data1) %in% rownames(m.out$match.matrix)),][,outcomeName]
k = list()
for(i in 1:dim(m.out$match.matrix)[2]){
k[[i]] = data1[as.vector(m.out$match.matrix[,i]),outcomeName]
#assign((paste0("c",i)),data1[as.vector(m.out$match.matrix[,i]),treat])
}
data2 = as.data.frame(cbind(treat,do.call(cbind, k,n_contrl)))
names(data2) = c("Treated","Control 1","Control 2")
results4$data = data2
gma = NULL
for(i in seq(1,Gamma,by=GammaInc )){
gma=append(gma,senmw(data2, gamma = i, method = "t")$pval)
}
sensData = data.frame(cbind(seq(1,Gamma,by=GammaInc ),round(gma,2)))
names(sensData) = c("Gamma","pval Upper Bound")
results4$pvalues = sensData
return(results4)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.