Description Details Author(s) References Examples
Implements a heuristic algorithm to build blocks of a given size aiming to maximize similarity within strata across multiple covariates. The blocking structure can be used for causal inference and for sensitivity analysis to unmeasured confounding. A stratified structure gives more flexibility for using multiple instrumental variables and direct treatment vs. control analysis as evidence factors. Karmakar, B., Small, D. S., and Rosenbaum, P. R. (2018). Rosenbaum, P. R. (2010)<ISBN:978-1-4419-1213-8>.
The DESCRIPTION file:
Index: This package was not yet installed at build time.
The primary function of the package is makeblocks
. The function takes
as input a pairwise distance structure between n points to create blocks
aiming to minimize the within block distance. The distance structure
can be customized if one wishes to put more emphasis on one or a set of variables,
see example below.
For the challenge part of the package please see Challenge
section of the the manual of the function makeblocks
.
Refer to Rosenbaum, P. R.
(2018) and Rosenbaum, P. R.
(2002)
for statistical methods of causal inference and sensitivity analysis to
unmeasured confounding using blocked design built this way.
IMPORTANT NOTE: In order to perform matching, makeblocks
requires the
user to load the optmatch (>= 0.9-1) package separately. The manual loading is
required due to software license issues. If the package is not loaded the
makeblocks
command will fail with an error saying the 'optmatch' package
is not present. Reference to 'optmatch' is given below.
Bikram Karmakar
Maintainer: Bikram Karmakar <bikramk@wharton.upenn.edu>
Hansen, B.B. and Klopfer, S.O. (2006) Optimal full matching and related designs via network flows, JCGS 15 609–627.
Karmakar, B., Small, D. S. and Rosenbaum, P. R. (2018). Reinforced designs in observational studies of treatment effects: Multiple instruments plus controls as evidence factors.
Rosenbaum, P. R. (2002). Observational Studies (2nd edition), New York: Springer.
Rosenbaum, P. R. (2010). Design of Observational Studies, New York: Springer.
Rosenbaum, P. R. (2018). Sensitivity analysis for stratified comparisons in an observational study of the effect of smoking on homocysteine levels. Annals of Applied Statistics, to appear.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | ## Not run:
data(wls)
library(optmatch)
wls4match <- wls
## This code replicates the blocking algorithm used in the paper
## Karmakar, Small, and Rosenbaum (2018).
## Create the distance matrix
distmat1 <- smahal(wls4match[,"gwiiq_bm"]) ## IQ
## Father's and mother's edu and parent's income
distmat2 <- smahal(wls4match[,c("edfa57q.NoNA", "edmo57q.NoNA", "bmpin1.NoNA",
## Father's and mother's edu and parent's income
"incg400", "incg250")])
## Indicators for income in the top 5
## occupation score
distmat2.2 <- smahal(wls4match[,c("ocsf57.NoNA", "ocpf57.NoNA")])
## missing indicators
distmat3 <- smahal(wls4match[,c("edfa57q.miss", "edmo57q.miss",
"bmpin1.miss", "ocsf57.miss", "ocpf57.miss")])
## The IQ = gwiiq_bm is given more weight.
## parents' education and parent's income
distmat = distmat1*10+6*distmat2+3*distmat2.2+2*distmat3
## creating the blocks. This can take about 30min to run.
## May take more time depending of the computation power of the system.
set.seed(0841)
res.20.2 = makeblocks(distmat, bsize=25, Itr=250, maxItr=250, .data=wls4match)
## Here we provide the code for the
## analysis of the wls data for the effect of
## Catholic schooling on earning later in life.
## This code reproduces the computation presented
## in the paper Karmakar, Small, and Rosenbaum (2018).
## Some reorganization of the data set
d <- res.20.2$.data
religion<-d$relfml*1
religion<-factor(religion,levels=c(1,0),
labels=c("Catholic","Other"),ordered=T)
urban<-d$res57Dic*1
urban<-factor(urban,levels=c(1,0),labels=c("Urban","Rural"),ordered=T)
school<-factor(d$hsdm57*1,levels=c(1,0),
labels=c("Catholic","Public"),ordered=T)
d<-cbind(d,religion,urban,school)
rm(religion,urban,school)
d$yrwg74[d$yrwg74<0]<-NA
## Create table 1
d3<-d[d$strata>0,]
dim(d3)
t1<-rep(table(d3$urban),4)
t2<-rep(table(d3$religion:d3$urban),2)
t3<-table(d3$school:d3$religion:d3$urban)
t1a<-rep(round(100*tapply(1*(d3$school=="Catholic"),d3$urban,mean)),4)
t2a<-rep(round(100*tapply(1*(d3$school=="Catholic"),d3$religion:d3$urban,mean)),2)
t3a<-round(100*tapply(1*(d3$school=="Catholic"),
d3$school:d3$religion:d3$urban,mean))
tab1<-cbind(t3,t2,t1,t3a,t2a,t1a)
tab1<-tab1[c(1,5,3,7,2,6,4,8),c(3,2,1,6,5,4)]
round(tab1)
rm(d3)
## Create plots in Figure 1
## A supplementary function
densityit<-function(x,main="",yl=""){
who<-d$strata>0
s<-d$strata[who]
x<-x[who]
a<-aov(x~factor(s))
x<-x[order(s)]
xm<-matrix(x,25,length(unique(s)))
xbar<-apply(xm,2,mean,na.rm=TRUE)
xc<-xm-outer(rep(1,25),xbar,"*")
fval<-round(as.vector(anova(a)$F),1)[1]
adj<-mean(x,na.rm=TRUE)+as.vector(xc)
tmp<-c(x,adj)
mx<-max(tmp,na.rm=TRUE)
mn<-min(tmp,na.rm=TRUE)
sub<-paste(" F = ",fval)
sdall<-round(sd(x,na.rm=TRUE),1)
sdwithin<-round(sqrt(anova(a)$Mean[2]),1)
sub<-paste("st. dev: all=",sdall,", within=",sdwithin,sub,sep="")
plot(density(adj[!is.na(adj)],adjust=4),xlim=c(mn,mx),
main=main,xlab=yl,sub=sub,cex.sub=.9)
lines(density(x[!is.na(x)]),lty=2)
abline(v=mean(x,na.rm=TRUE),lty=1,col="grey")
}
## Plot for IQ
densityit(d$gwiiq_bm,main="IQ Before High School",yl="IQ")
## The following code shows the steps
## of calculating the upper bounds of one p-values in Table 2
## of the paper Karmakar, Small, and Rosenbaum (2018).
## Please see the paper for details of the analysis and the
## relevant references.
library(senstrat)
library(sensitivitymv)
## Some preliminary
d2<-d[d$strata>0,]
d2$strata<-factor(d2$strata)
wages<-d2$yrwg74/10 # convert from hundreds to thousands
wages[wages<0]<-NA
d2<-cbind(d2,wages)
d2<-d2[!is.na(d2$wages),]
rm(wages)
library(MASS)
## van Elteren's rank test for two sample
## tests with multiple strata
VEwilcoxon<-function (y, z, st, tau = 0) {
ymiss<-is.na(y)
y<-y[!ymiss]
z<-z[!ymiss]
st<-st[!ymiss]
if (is.factor(st)) st <- as.integer(st)
ust <- sort(unique(st))
nst <- length(ust)
if (tau != 0) y <- y - tau * z
sc <- rep(NA, length(y))
for (i in 1:nst) {
who <- (st == ust[i])
yi <- y[who]
vi <- rank(yi)/(length(yi)+1)
sc[who] <- vi
}
list(sc=sc,z=z,st=st)
}
## A function that calculates one row
## of Table 2
calculatePval <- function(y, gamma){
ws<-VEwilcoxon(y,1*(urban=="Urban"),strata)
rurban<-senstrat(ws$sc,ws$z,ws$st,gamma=gamma,detail=TRUE)
purban<-rurban$LinearBoundResult[1]
ws<-VEwilcoxon(y,1*(religion=="Catholic"),strata:urban)
rreligion<-senstrat(ws$sc,ws$z,ws$st,gamma=gamma,detail=TRUE)
preligion<-rreligion$LinearBoundResult[1]
ws<-VEwilcoxon(y,1*(school=="Catholic"),strata:religion:urban)
rdirect<-senstrat(ws$sc,ws$z,ws$st,gamma=gamma,detail=TRUE)
pdirect<-rdirect$LinearBoundResult[1]
pcomb<-truncatedP(c(preligion,purban,pdirect))
result<-c(purban,preligion,pdirect,pcomb)
names(result)<-c("urban","religion","direct","combination")
round(result, 4)
}
attach(d2)
## The second row on the top part of the table (i.e.,
## stratified analysis w/o covariate adjustment)
calculatePval(y=wages, gamma=1.1)
# urban religion direct combination
# 0.0000 0.0835 0.0422 0.0000
## Stratified analysis with covariate adjustment
l2bmpin1.NoNA<-log2(1+bmpin1.NoNA)
## Testing for H0: beta=\$0
md<-rlm(wages~gwiiq_bm+edfa57q.miss+edmo57q.miss+bmpin1.miss+
ocsf57.miss+edfa57q.NoNA+edmo57q.NoNA+l2bmpin1.NoNA+
ocsf57.NoNA+ocpf57.NoNA+strata)
y<-md$residual
calculatePval(y, gamma=1.1)
# urban religion direct combination
# 0.0001 0.1115 0.0667 0.0001
## Testing for H0: beta=\$500
wages<-d2$yrwg74
wages1 = (wages*100)-500*1*(school=="Catholic")
md<-rlm(wages1~gwiiq_bm+edfa57q.miss+edmo57q.miss+bmpin1.miss+
ocsf57.miss+edfa57q.NoNA+edmo57q.NoNA+l2bmpin1.NoNA+
ocsf57.NoNA+ocpf57.NoNA+strata)
y<-md$residual
calculatePval(y, gamma=1.1)
# urban religion direct combination
# 0.0005 0.3105 0.4345 0.0110
## End(Not run)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.