#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# author: Reza Hosseini
## an implementation of matching using Matchit and fitting via zelig
# TODO: Reza Hosseini: implement the exact match faster and without MatchIt / Zelig
library("MatchIt")
library("Zelig")
library("dplyr")
library("data.table")
# it filters out continuous columns
# which become degenrate if we fit a glm model
# cols can be continuous or binary (0 / 1)
FilterContiCols_ifDegenrateGlm = function(
df, cols, valueCol, family="gaussian") {
formulaStr = paste0(
valueCol,
"~",
paste(cols, collapse="+"))
formula = as.formula(formulaStr)
predRegMod = glm(formula, data=df, family=family)
predRegMod_summ = summary(predRegMod)
x = predRegMod[["coefficients"]]
cols2 = names(x)[is.na(x)]
cols = setdiff(cols, cols2)
return(cols)
}
TestFilterContiCols_ifDegenrateGlm = function() {
n = 10
x1 = rnorm(n)
x2 = rnorm(n)
x3 = x1 + x2
y = 2*x1 + x2 + x3 + rnorm(n)
df = data.frame("x1"=x1, "x2"=x2, "x3"=x3, "y"=y)
FilterContiCols_ifDegenrateGlm(
df=df,
cols=c("x1", "x2", "x3"),
valueCol="y",
family="gaussian")
n = 10
x1 = rnorm(n)
x2 = rnorm(n)
x3 = x1 + x2
y = 2*x1 + x2 + x3 + rnorm(n) > 0
df = data.frame("x1"=x1, "x2"=x2, "x3"=x3, "y"=y)
FilterContiCols_ifDegenrateGlm(
df=df,
cols=c("x1", "x2", "x3"),
valueCol="y",
family="binomial")
}
# this function is for causal studies
FilterContCols_forCausal = function(
binDf,
predCols_bin,
valueCol) {
# first take out columns with only one value
predCols_bin2 = predCols_bin
omitCols = NULL
for (col in predCols_bin2) {
tab = table(binDf[ , col])
if (length(tab) < 2) {
warning(paste(
"The column:",
col,
"had only one value. Therefore was eliminated."))
omitCols = c(omitCols, col)
}
}
if (length(omitCols) > 0) {
predCols_bin2 = setdiff(predCols_bin2, omitCols)
}
predCols_bin = predCols_bin2
# some cont columns could be degenerate causing issues
# we take them out in first stage by fitting a model
# for treat as response
# this will eliminate cont columns which are already collinaer
# even with treat being a predictor as in the final model
predCols_bin2 = FilterContiCols_ifDegenrateGlm(
df=binDf,
cols=predCols_bin,
valueCol="treat",
family="binomial")
if (length(predCols_bin2) < length(predCols_bin)) {
warnings(paste(
"Some columns in matching were degenerate. ",
"We have omitted these columns: ",
paste(setdiff(predCols_bin, predCols_bin2), collapse="--")))
}
predCols_bin = predCols_bin2
# some control columns could be degenerate causing issues
# these will have coefficient of NA in models
# some models can handle that but Zelig cannot as of now
# we omit those
# we include treat as well and take it out afterwards
predCols_bin2 = FilterContiCols_ifDegenrateGlm(
df=binDf,
cols=c("treat", predCols_bin),
valueCol=valueCol,
family="gaussian")
if (!("treat" %in% predCols_bin2)) {
warning(paste(
"treatment col is too collinear with the pred cols you provided.",
"the alg tried to fix that by removing some columns from design matrix.",
"however it could not prevent degenerate models.",
"this might be caused by categorical vars with two many labels.",
collapse="\n"))
}
predCols_bin2 = setdiff(predCols_bin2, "treat")
if (length(predCols_bin2) < length(predCols_bin)) {
warnings(paste(
"Some columns in matching were degenerate. ",
"We have omitted these columns: ",
paste(setdiff(predCols_bin2, predCols_bin), collapse="--")))
}
return(predCols_bin2)
}
# estimate raw effect.
# the valueCol must be 0/1 and continuous
RawEffectCi = function(df, valueCol, treatCol) {
mod = glm(as.formula(paste0(valueCol, "~", treatCol)), data=df)
rawEffect_summ = summary(mod)
coef = rawEffect_summ[["coefficients"]]
rowName = rownames(coef)[grep(treatCol, rownames(coef))]
coefDf = data.frame(coef)
coefDf[ , "var"] = rownames(coef)
rawEffect = coefDf[(coefDf[ , "var"] %in% rowName), ]
x = rawEffect
lower = x[["Estimate"]] - 1.96 * x[["Std..Error"]]
upper = x[["Estimate"]] + 1.96 * x[["Std..Error"]]
raw_ci = c(lower, upper)
return(c(lower, upper))
}
TestRawEffectCi = function() {
n = 5000
treat = sample(c("drug", "placebo"), n, replace=TRUE)
u1 = 2*(treat == "drug") + 5 * rnorm(n)
u2 = sample(c("cat", "dog", "horse"), n, replace=TRUE)
y = 3*(treat == "drug") + u1 - 2 * (u2 == "dog") + rnorm(n)
df = data.frame("treat"=treat, "u1"=u1, "u2"=u2, "y"=y)
valueCol = "y"
predCols_categ = c("u2")
predCols_conti = c("u1")
treatCol = "treat"
RawEffectCi(df=df, valueCol=valueCol, treatCol=treatCol)
}
# idCol should be provided to join the matched data set later if needed
# if idCol is not provided we define an idCol: "id"
Process_andMatch = function(
df,
idCol=NULL,
predCols_categ=NULL,
predCols_conti=NULL,
treatCol,
treatPair=c(0, 1),
figsPath=NULL,
method="nearest",
dichomNum=4
) {
if (is.null(idCol)) {
idCol = "row_num_orig"
df[ , idCol] = 1:nrow(df)
}
# only keep necessary columns
df = df[ , c(idCol, predCols_conti, predCols_categ, treatCol)]
# only keep treatment pair
df = df[df[ , treatCol] %in% treatPair, ]
if (nrow(df) == 0) {
warnings(paste(
"The data size became 0 after picking the treatment pairs.",
"Make sure you have the correct treatment pairs."))
}
newCols = NULL
if (!is.null(predCols_conti)) {
## we turn the only two continuous/ordered control variables to categorical
# to do that we dichotomize each variable using quantiles
# the function below for each continuous variable will add
# a new column with the column name being the same
# with an extra suffix "_categ"
res = Add_dichomVarMulti(
dt=df,
cols=predCols_conti,
num=dichomNum)
newCols = res[["newCols"]]
dt = res[["dt"]]
## these are the new columns added
#print(newCols)
df = data.frame(dt)
for (col in newCols) {
#print(table(df[ , col]))
}
}
## defining treatment
predCols = c(predCols_categ, newCols)
df = df[ , c(idCol, treatCol, predCols)]
## define a new column called treat and assign 0, 1
df[ , "treat"] = plyr::mapvalues(
df[ , treatCol],
from=treatPair,
to=c("0", "1"))
#df[ , "treat"] = as.numeric(df[ , "treat"])
## check the number of units on control and treatment
print("data size per treatment arm for raw data")
ssTab = table(df[ , "treat"])
print(ssTab)
#### Explore control variables:
# (1) if their distribution is different
# (2) if they "effect" the response
# matching is usually necessary if both (1) and (2) hold
## Check if predictors distributions really differ across treatment groups
if (!is.null(figsPath)) {
for (col in predCols) {
#print(col)
df2 = df
df2[ , "treat"] = as.character(df2[ , "treat"])
res = PltCompare_categDist(df=df2, labelCol=col, compareCol="treat")
p = res[["p"]]
fn = paste0(figsPath, "compare_distbn_", col, ".png")
png(file=fn)
print(p)
dev.off()
#Pause()
}
}
# set reference for the treatment to be always the control arm: 0
# first removing factor ordering to avoid errors
df[ , "treat"] = factor(df[ , "treat"], ordered=FALSE)
df = within(df, treat <- relevel(treat, ref="0"))
## matching
## step 0: remove missing or re-assign
## below we calculate the percentage (out of 100%) of missing
# for each of our predictor/control variables
# then we assign the mode of observed data to them
# if the missing is more than 1%
# since missing could be informative we add a new value "not_avail"
for (col in predCols) {
df[ , col] = as.character(df[ , col])
}
for (col in predCols) {
print(col)
x = sum(is.na(df[ , col]))
print(100 * x / nrow(df))
if (x > 0 & x < 1) {
print("less than 1 percent missing. mode was assigned.")
df[is.na(df[ , col]) , col] = Zelig::Mode(df[ , col])
} else if (x >= 1) {
print("more than 1 percent missing. not_avail as assigned.")
df[is.na(df[ , col]) , col] = "not_avail"
}
}
# for treatment column we remove NAs as well
df = df[!is.na(df[ , "treat"]), ]
#dim(df)
#table(df[ , "treat"])
print("data size per treatment after removing NAs")
ssTab_afterRemoveNA = table(df[ , "treat"])
print(ssTab_afterRemoveNA)
# step 1
# building binary predictors
formulaStr = paste0("~", paste(predCols, collapse="+"))
formula = as.formula(formulaStr)
binDf = model.matrix(formula, data=df)
cols = colnames(binDf)
# making sure the column names are not
# causing issues for "formula" comprehension
cols = AbbrStringVec(
strings=cols,
wordLength=50,
replaceStrings=c("Inf", "\\(", "\\)", ",", "\\[", "\\]"),
sep="_")
cols = AbbrStringVec(
strings=cols,
wordLength=50,
replaceStrings=c("-"),
sep="minus")
cols = AbbrStringVec(
strings=cols,
wordLength=50,
replaceStrings=c("\\+"),
sep="plus")
cols = AbbrStringVec(
strings=cols,
wordLength=50,
replaceStrings=c("\\."),
sep="dot")
names(cols) = NULL
colnames(binDf) = cols
## removing intercept
predCols_bin = setdiff(colnames(binDf), "Intercept")
binDf = cbind(binDf, df[ , c("treat", idCol)])
# zelig expects binary variables even for treatCol
# it causes errors later down the line if we use factor
# df[ , "treat"] = as.numeric(df[ , "treat"])
binDf[ , "dummy_response"] = (1:nrow(binDf)) / nrow(binDf)
print("remove collinearity before matching")
predCols_bin = FilterContCols_forCausal(
binDf=binDf,
predCols_bin=predCols_bin,
valueCol="dummy_response")
predCols_bin_withTreat = c("treat", predCols_bin)
binDf = SubsetCols(binDf, dropCols="dummy_response")
### matching phase
matchStr = paste0("treat ~ ", paste(predCols_bin, collapse="+"))
matchFormula = as.formula(matchStr)
match = MatchIt::matchit(
matchFormula,
data=binDf,
method=method)
#subclass=6)
match_summ = summary(match)
matchDf = MatchIt::match.data(match)
dataPerc_afterMatching = round(100 * nrow(matchDf) / nrow(binDf), 1)
ssTab_afterMatching = table(matchDf[ , "treat"])
# lets remove the NA causing variables again after matching
matchDf0 = matchDf
matchDf0[ , "dummy_response"] = (1:nrow(matchDf0)) / nrow(matchDf0)
predCols_bin = FilterContCols_forCausal(
binDf=matchDf0,
predCols_bin=predCols_bin,
valueCol="dummy_response")
predCols_bin_withTreat = c("treat", predCols_bin)
print("survived data percent after matching")
print(dataPerc_afterMatching)
return(list(
"ssTab"=ssTab,
"ssTab_afterRemoveNA"=ssTab_afterRemoveNA,
"ssTab_afterMatching"=ssTab_afterMatching,
"binDf"=binDf,
"predCols_bin"=predCols_bin,
"predCols_bin_withTreat"=predCols_bin_withTreat,
"match"=match,
"match_summ"=match_summ,
"matchDf"=matchDf,
"dataPerc_afterMatching"=dataPerc_afterMatching
))
}
TestProcess_andMatch = function() {
# test 1
# x1 has effect on y event after matching for x2
# but the effect should be 2 after matching
# and should almost (1 + 2) before
#library("matchit")
n = 5000
treat = sample(c("drug", "placebo"), n, replace=TRUE)
u1 = 2*(treat == "drug") + 5 * rnorm(n)
u2 = sample(c("cat", "dog", "horse"), n, replace=TRUE)
y = 3*(treat == "drug") + u1 - 2 * (u2 == "dog") + rnorm(n)
df = data.frame("treat"=treat, "u1"=u1, "u2"=u2, "y"=y)
valueCol = "y"
predCols_categ = c("u2")
predCols_conti = c("u1")
treatCol = "treat"
treatPair = c("placebo", "drug")
figsPath = NULL
method = "exact"
df[ , "id"] = 1:nrow(df)
res = Process_andMatch(
df=df,
idCol="id",
predCols_categ=predCols_categ,
predCols_conti=predCols_conti,
treatCol=treatCol,
treatPair=treatPair,
figsPath=figsPath,
method=method)
summary(res[["match"]])
n = 5000
treat = sample(c("drug", "placebo"), n, replace=TRUE)
u1 = 2*(treat == "drug") + 5 * rnorm(n)
u2 = sample(c("cat", "dog", "horse"), n, replace=TRUE)
y = 3*(treat == "drug") + u1 - 2 * (u2 == "dog") + rnorm(n)
df = data.frame("treat"=treat, "u1"=u1, "u2"=u2, "y"=y)
valueCol = "y"
predCols_categ = c("u2")
predCols_conti = c("u1")
treatCol = "treat"
treatPair = c("placebo", "drug")
figsPath = NULL
method = "nearest"
res = Process_andMatch(
df=df,
idCol=NULL,
predCols_categ=predCols_categ,
predCols_conti=predCols_conti,
treatCol=treatCol,
treatPair=treatPair,
figsPath=figsPath,
method=method)
summary(res[["match"]])
res[["binDf"]]
}
# this fits models to matched data
# valueCol is extracted from original data
# for the exact method you use should set use_subclass to be TRUE
Fit_matchedData = function(
df,
valueCol,
binDf, # binary data before matching to calculate raw metrics
matchDf,
idCol="row_num_orig",
predCols_bin,
use_subclass=FALSE,
balance_wrtSubclass=FALSE) {
# if df does not have the idCol, the index must be simply the row num
if (!(idCol %in% colnames(df))) {
df[ , idCol] = 1:nrow(df)
}
# adding valueCol from original data to matched data
matchDf = merge(
matchDf,
df[ , c(idCol, valueCol)],
by=idCol,
all.x=TRUE,
all.y=FALSE)
binDf = merge(
binDf,
df[ , c(idCol, valueCol)],
by=idCol,
all.x=TRUE,
all.y=FALSE)
# Check if predictors are influential on response
formulaStr = paste0(valueCol, "~", paste(predCols_bin, collapse="+"))
formula = as.formula(formulaStr)
predRegMod = glm(formula, data=binDf)
predRegMod_summ = summary(predRegMod)
# fitting a simple model (only treat) with treatment before matching
raw_ci = RawEffectCi(df=binDf, valueCol=valueCol, treatCol="treat")
## fitting another model with treatment and other covariates
predCols_bin_withTreat = c("treat", predCols_bin)
# fitting a glm model with preds and treatment
formulaStr = paste0(
valueCol,
"~",
paste(predCols_bin_withTreat, collapse="+"))
formula = as.formula(formulaStr)
regMod = glm(formula, data=binDf)
regMod_summ = summary(regMod)
coef = regMod_summ[["coefficients"]]
rowName = rownames(coef)[grep("treat", rownames(coef))]
coefDf = data.frame(coef)
coefDf[ , "var"] = rownames(coef)
regEffect = coefDf[(coefDf[ , "var"] %in% rowName), ]
x = regEffect
lower = x[["Estimate"]] - 1.96 * x[["Std..Error"]]
upper = x[["Estimate"]] + 1.96 * x[["Std..Error"]]
reg_ci = c(lower, upper)
matchDf0 = matchDf
if (use_subclass) {
if (!("subclass" %in% colnames(matchDf))) {
warning(paste(
"subclass is not in matchDf.",
"subclass is only avail for some matching methods."))
}
matchDf0 = matchDf[ , "subclass", drop=FALSE]
matchDf0[ , "subclass"] = as.character(matchDf[ , "subclass"])
formulaStr = "~subclass"
formula = as.formula(formulaStr)
binDf_subclass = model.matrix(formula, data=matchDf0)
rm(matchDf0)
binDf_subclass = SubsetCols(binDf_subclass, drop="(Intercept)")
predCols_bin = colnames(binDf_subclass)
matchDf = cbind(matchDf[ , c("treat", valueCol)], binDf_subclass)
predCols_bin_withTreat = c("treat", predCols_bin)
}
## fit zelig with treatment
formulaStr = paste0(
valueCol,
"~",
paste(predCols_bin_withTreat, collapse="+"))
formula = as.formula(formulaStr)
matchDf_balanced = NULL
if (balance_wrtSubclass) {
res = BalanceSampleSize_wrtCols(
df=matchDf,
sliceCols="treat",
wrtCols="subclass",
itemCols=NULL,
sliceCombinValues_toBalance=NULL)
matchDf_balanced = res[["subDf"]]
matchDf = matchDf_balanced
}
raw_matched_ci = RawEffectCi(
df=matchDf, valueCol=valueCol, treatCol="treat")
## fitting a model
zeligMod = Zelig:::zelig(
formula,
data=matchDf,
model="ls",
cite=FALSE)
zeligMod_summ = summary(zeligMod)
res = GetEffect_fromZeligMod(zeligMod=zeligMod)
estimEffect = res[["estimEffect"]]
estimEffect_summ = res[["estimEffect_summ"]]
fd_ci = res[["fd_ci"]]
fd_mean = res[["fd_mean"]]
return(list(
"matchDf_balanced"=matchDf_balanced,
"zeligMod"=zeligMod,
"zeligMod_summ"=zeligMod_summ,
"estimEffect"=estimEffect,
"estimEffect_summ"=estimEffect_summ,
"raw_ci"=raw_ci,
"reg_ci"=reg_ci,
"raw_matched_ci"=raw_matched_ci,
"fd_ci"=fd_ci,
"fd_mean"=fd_mean
))
}
TestFit_matchedData = function() {
# test 1
# x1 has effect on y event after matching for x2
# but the effect should be 2 after matching
# and should almost (1 + 2) before
#library("matchit")
n = 5000
treat = sample(c("drug", "placebo"), n, replace=TRUE)
u1 = 2*(treat == "drug") + 5 * rnorm(n)
u2 = sample(c("cat", "dog", "horse"), n, replace=TRUE)
y = 3*(treat == "drug") + u1 - 2 * (u2 == "dog") + rnorm(n)
df = data.frame("treat"=treat, "u1"=u1, "u2"=u2, "y"=y)
valueCol = "y"
predCols_categ = c("u2")
predCols_conti = c("u1")
treatCol = "treat"
treatPair = c("placebo", "drug")
figsPath = NULL
method = "exact"
matchRes = Process_andMatch(
df=df,
idCol=NULL,
predCols_categ=predCols_categ,
predCols_conti=predCols_conti,
treatCol=treatCol,
treatPair=treatPair,
figsPath=figsPath,
method=method)
binDf = matchRes[["binDf"]]
matchDf = matchRes[["matchDf"]]
predCols_bin = matchRes[["predCols_bin"]]
fitRes = Fit_matchedData(
df=df,
valueCol="y",
binDf=binDf, # binary data before matching
matchDf=matchDf,
idCol="row_num_orig",
predCols_bin=predCols_bin,
use_subclass=FALSE)
## for the exact method we should set use_subclass to be TRUE
fitRes = Fit_matchedData(
df=df,
valueCol="y",
binDf=binDf, # binary data before matching
matchDf=matchDf,
idCol="row_num_orig",
predCols_bin=predCols_bin,
use_subclass=TRUE)
# test 2
# x1 has effect on y event after matching for x2
# but the effect should be 2 after matching
# and should almost (1 + 2) before
#library("matchit")
n = 100
treat = sample(c("drug", "placebo"), n, replace=TRUE)
u1 = 2*(treat == "drug") + 5 * rnorm(n)
u2 = sample(paste0("label", as.character(1:5)), n, replace=TRUE)
y = 3*(treat == "drug") + u1 - 2 * (u2 == "label1") + rnorm(n)
df = data.frame("treat"=treat, "u1"=u1, "u2"=u2, "y"=y)
valueCol = "y"
predCols_categ = c("u2")
predCols_conti = c("u1")
treatCol = "treat"
treatPair = c("placebo", "drug")
figsPath = NULL
method = "exact"
matchRes = Process_andMatch(
df=df,
idCol=NULL,
predCols_categ=predCols_categ,
predCols_conti=predCols_conti,
treatCol=treatCol,
treatPair=treatPair,
figsPath=figsPath,
method=method)
binDf = matchRes[["binDf"]]
matchDf = matchRes[["matchDf"]]
predCols_bin = matchRes[["predCols_bin"]]
matchDt = data.table(matchDf)
SortDf(matchDt[ , .N, c("treat", "subclass")], "subclass")
res = BalanceSampleSize(
df=matchDf,
sliceCols=c("treat", "subclass"))
matchDf_balanced = res[["subDf"]]
matchDt_balanced = data.table(matchDf_balanced)
SortDf(matchDt_balanced[ , .N, c("treat", "subclass")], "subclass")
fitRes = Fit_matchedData(
df=df,
valueCol="y",
binDf=binDf, # binary data before matching
matchDf=matchDf,
idCol="row_num_orig",
predCols_bin=predCols_bin,
use_subclass=FALSE)
fitRes[["raw_ci"]]
fitRes[["fd_ci"]]
fitRes = Fit_matchedData(
df=df,
valueCol="y",
binDf=binDf, # binary data before matching
matchDf=matchDf,
idCol="row_num_orig",
predCols_bin=predCols_bin,
use_subclass=FALSE)
fitRes[["raw_ci"]]
fitRes[["fd_ci"]]
## balance sample size
n = 2000
treat = sample(c("drug", "placebo"), n, replace=TRUE)
u1 = 2*(treat == "drug") + 5 * rnorm(n)
u2 = sample(paste0("label", as.character(1:5)), n, replace=TRUE)
y = 3*(treat == "drug") + u1 - 2 * (u2 == "label1") + rnorm(n)
df = data.frame("treat"=treat, "u1"=u1, "u2"=u2, "y"=y)
valueCol = "y"
predCols_categ = c("u2")
predCols_conti = c("u1")
treatCol = "treat"
treatPair = c("placebo", "drug")
figsPath = NULL
method = "exact"
matchRes = Process_andMatch(
df=df,
idCol=NULL,
predCols_categ=predCols_categ,
predCols_conti=predCols_conti,
treatCol=treatCol,
treatPair=treatPair,
figsPath=figsPath,
method=method)
binDf = matchRes[["binDf"]]
matchDf = matchRes[["matchDf"]]
predCols_bin = matchRes[["predCols_bin"]]
## for the exact method we should set use_subclass to be TRUE
fitRes = Fit_matchedData(
df=df,
valueCol="y",
binDf=binDf, # binary data before matching
matchDf=matchDf,
idCol="row_num_orig",
predCols_bin=predCols_bin,
use_subclass=FALSE,
balance_wrtSubclass=TRUE
)
fitRes[["raw_ci"]]
fitRes[["fd_ci"]]
fitRes[["raw_matched_ci"]]
matchDf_balanced = fitRes[["matchDf_balanced"]]
dt = data.table(matchDf_balanced)
SortDf(dt[ , .N, c("treat", "subclass")], "subclass")
}
# this does both steps of matching and fitting
# if the method is exact use_subclass should be FALSE or left to be NULL
Match_andFit = function(
df,
valueCol,
predCols_categ=NULL,
predCols_conti=NULL,
treatCol,
treatPair=c(0, 1),
figsPath=NULL,
method="nearest",
dichomNum=4,
use_subclass=FALSE,
balance_wrtSubclass=FALSE
) {
# use_subclass should be TRUE if the method is exact
#if (is.null(use_subclass) & method != "exact") {
# use_subclass = FALSE
#}
#if (is.null(use_subclass) & method == "exact") {
# use_subclass = TRUE
#}
matchRes = Process_andMatch(
df,
idCol=NULL,
predCols_categ=predCols_categ,
predCols_conti=predCols_conti,
treatCol=treatCol,
treatPair=treatPair,
figsPath=figsPath,
method=method,
dichomNum=dichomNum)
binDf = matchRes[["binDf"]]
matchDf = matchRes[["matchDf"]]
predCols_bin = matchRes[["predCols_bin"]]
fitRes = Fit_matchedData(
df=df,
valueCol=valueCol,
binDf=binDf, # binary data before matching
matchDf=matchDf,
idCol="row_num_orig",
predCols_bin=predCols_bin,
use_subclass=use_subclass,
balance_wrtSubclass=balance_wrtSubclass)
res = c(matchRes, fitRes)
return(res)
}
TestMatch_andFit = function() {
# test 1
# x1 has effect on y event after matching for x2
# but the effect should be 2 after matching
# and should almost (1 + 2) before
#library("matchit")
n = 5000
treat = sample(c("drug", "placebo"), n, replace=TRUE)
u1 = 2*(treat == "drug") + 5 * rnorm(n)
u2 = sample(c("cat", "dog", "horse"), n, replace=TRUE)
y = 3*(treat == "drug") + u1 - 2 * (u2 == "dog") + rnorm(n)
df = data.frame("treat"=treat, "u1"=u1, "u2"=u2, "y"=y)
valueCol = "y"
predCols_categ = c("u2")
predCols_conti = c("u1")
treatCol = "treat"
treatPair = c("placebo", "drug")
figsPath = NULL
method = "nearest"
res = Match_andFit(
df=df,
valueCol=valueCol,
predCols_categ=predCols_categ,
predCols_conti=predCols_conti,
treatCol=treatCol,
treatPair=treatPair,
figsPath=figsPath,
method=method,
use_subclass=FALSE)
summary(res[["match"]])
res[["estimEffect"]]
res[["raw_ci"]]
res[["reg_ci"]]
res[["fd_ci"]]
res[["fd_mean"]]
# x1 should not have any effect after controlling for x2
n = 200
treat = sample(c("1", "0"), n, replace=TRUE)
u1 = 1*(treat == "1") + 2*rnorm(n)
u2 = sample(c("cat", "dog", "horse"), n, replace=TRUE)
y = 2 * u1 - 2 * (u2 == "dog") + rnorm(n)
df = data.frame("treat"=treat, "u1"=u1, "u2"=u2, "y"=y)
valueCol = "y"
predCols_categ = c("u2")
predCols_conti = "u1"
treatCol = "treat"
treatPair = c("0", "1")
figsPath = NULL
res = Match_andFit(
df=df,
valueCol=valueCol,
predCols_categ=predCols_categ,
predCols_conti=predCols_conti,
treatCol=treatCol,
treatPair=treatPair,
figsPath=figsPath,
method="exact")
res[["raw_ci"]]
res[["reg_ci"]]
res[["fd_ci"]]
res = Match_andFit(
df=df,
valueCol=valueCol,
predCols_categ=predCols_categ,
predCols_conti=predCols_conti,
treatCol=treatCol,
treatPair=treatPair,
figsPath=figsPath,
method="exact",
use_subclass=TRUE)
res[["raw_ci"]]
res[["reg_ci"]]
res[["fd_ci"]]
s = summary(res[["match"]])
s
sum(s[["q.table"]]["Total"])
##
n = 2000
treat = sample(c("1", "0"), n, replace=TRUE)
u1 = 1*(treat == "1") + 2*rnorm(n)
u2 = sample(c("cat", "dog", "horse"), n, replace=TRUE)
u3 = u2
y = 2 * u1 - 2 * (u2 == "dog") + rnorm(n)
df = data.frame("treat"=treat, "u1"=u1, "u2"=u2, "u3"=u3, "y"=y)
valueCol = "y"
predCols_categ = c("u2", "u3")
predCols_conti = "u1"
treatCol = "treat"
treatPair = c("0", "1")
figsPath = NULL
res = Match_andFit(
df=df,
valueCol=valueCol,
predCols_categ=predCols_categ,
predCols_conti=predCols_conti,
treatCol=treatCol,
treatPair=treatPair,
figsPath=figsPath,
method="exact",
balance_wrtSubclass=TRUE)
res[["raw_ci"]]
res[["reg_ci"]]
res[["fd_ci"]]
}
# get the output from zelig
GetEffect_fromZeligMod = function(zeligMod) {
library(dplyr)
# we use try as the zelig set values can fail due to NA's
# in the coefs
trial1 = try({
xCont = Zelig:::setx(zeligMod, treat=0)
})
trial2 = try({
xTreat = Zelig:::setx(zeligMod, treat=1)
})
if (class(trial1) == "try-error" || class(trial2) == "try-error") {
return(list(
"estimEffect"=NULL,
"estimEffect_summ"=NULL,
"fd_ci"="",
"fd_mean"=NULL,
"att_ci"=att_ci))
}
estimEffect = Zelig:::sim(zeligMod, x=xCont, x1=xTreat)
estimEffect_summ = summary(estimEffect)
zqi = zeligMod %>%
Zelig:::setx(treat="0") %>%
Zelig:::setx1(treat="1") %>%
Zelig:::sim()
#pv0 = zqi$get_qi(qi ="pv", xvalue = "x")
#ev0 = zqi$get_qi(qi = "ev", xvalue = "x")
#pv1 = zqi$get_qi(qi = "pv", xvalue = "x")
#ev1 = zqi$get_qi(qi = "ev", xvalue = "x1")
fd = zqi$get_qi(qi="fd", xvalue="x1")
fd_ci = quantile(fd, c(0.025, 0.975))
fd_mean = mean(fd)
att_ci = NULL
#att = (zeligMod %>%
# ATT(treatment="treat", treat=1) %>%
# get_qi(qi="ATT", xvalue="TE"))
#att_ci = quantile(att, c(0.025, 0.0975))
return(list(
"estimEffect"=estimEffect,
"estimEffect_summ"=estimEffect_summ,
"fd_ci"=fd_ci,
"fd_mean"=fd_mean,
"att_ci"=att_ci))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.