Nothing
######################################################################################
##
## Internal functions, not intended for users
##
######################################################################################
#library( Matrix ); # bdiag()
ps.model <- function(dat, form.ps) {
# Fit propensity score model
#
# Args:
# dat: the data from which estimated ps to be calculated
# form.ps: propensity score model
#
# Return:
# Estimated propensity score and object returned by glm fitting
fm <- glm( formula = as.formula( form.ps ), data = dat, family = binomial(link = "logit"));
ps.hat <- as.numeric(predict(fm, newdata = dat, type = "response"));
ps.hat <- pmin(pmax(0.000001, ps.hat), 0.999999); # ps.hat cannot be exactly 0 or 1
return( list( ps.hat = ps.hat, fm = fm ) );
}
psw.balance.core <- function( Xmat, Xname, weight, W, Z ) {
# check and plot covariate balance before and after weighting
#
# Args:
# Xmat: matrix of covariate value
# Xname: name of covariates whose balance is to be checked
# weight: propensity score weighting method
# W: propensity score weight
# Z: treatment group
#
# Returns:
# Two data frames of covariate balance before and after applying weighting and a standardized difference plot
out1 <- NULL ;
for ( j in 1 : length( Xname ) ) {
this.name <- Xname[j];
this.var <- as.numeric( Xmat[ , this.name ] );
tmp <- calc.std.diff( var.value = this.var, wt = rep(1, length = nrow( Xmat ) ), Z = Z );
out1 <- rbind(out1, tmp );
}
rownames(out1) <- Xname;
colnames(out1) <- c("treated.mean","treated.sd","control.mean","control.sd","std.diff.pct");
std.diff.before <- out1;
out1 <- NULL;
for ( j in 1 : length( Xname ) ) {
this.name <- Xname[j];
this.var <- as.numeric( Xmat[ , this.name ] );
tmp <- calc.std.diff( var.value = this.var, wt = W, Z = Z);
out1 <- rbind(out1, tmp);
}
rownames(out1) <- Xname;
colnames(out1) <- c("treated.mean","treated.sd","control.mean","control.sd","std.diff.pct");
std.diff.after <- out1;
# plot difference before and after weighting
diff.plot( diff.before = std.diff.before[ , "std.diff.pct" ], diff.after = std.diff.after[ , "std.diff.pct" ], name = Xname, weight = weight );
return( list( std.diff.before = std.diff.before,
std.diff.after = std.diff.after ) );
}
psw.wt.core <- function( dat, beta.hat, omega, Q, trt.var, out.var, family, Xname, weight, delta=0.002, K=4 ) {
# Core function to perform propensity score weighting for weighted estimator
#
# Args
# dat: data set
# beta.hat: propensity score model coefficients
# omega: omega() function
# Q: denominator in the formula of calculating propensity score weight
# trt.var: treatment indicator, must be numerically 0 or 1
# out.var: outcome variable
# family: outcome family, "gaussian" or "binomial"
# Xname: covariate name in propensity score model
# weight: weighting method
# delta: closeness to non-differential point in omega function, delta=0.002 by default
# K: coefficient of trapezoidal weight, K is the slope of left trapezoidal edge, K=4 by default
#
# Return
# A list composed of point estimation (est), standard error (std).
n <- nrow(dat); # number of sujects
## point estimation
mu1.hat <- sum( ( omega / Q ) * dat[ , trt.var ] * dat[ , out.var ] ) / sum( ( omega / Q ) * dat[ , trt.var ] );
mu0.hat <- sum( ( omega / Q ) * ( 1 - dat[ , trt.var ]) * dat[ , out.var ] ) / sum( ( omega / Q ) * ( 1 - dat[ , trt.var ] ) );
if ( family == "gaussian" ) {
est <- mu1.hat - mu0.hat;
}
if ( family == "binomial" ) {
est.risk <- mu1.hat - mu0.hat;
est.rr <- mu1.hat / mu0.hat;
est.or <- ( mu1.hat / ( 1 - mu1.hat ) ) / ( mu0.hat / ( 1 - mu0.hat ) );
est.lor <- log( est.or );
}
## standard error estimation
Amat <- Bmat <- 0; # An and Bn matrix for variance calculation
for (i in 1 : n) {
Xi <- as.numeric( c( 1, dat[ i, Xname ] ) );
Zi <- dat[ i, trt.var ];
Yi <- dat[ i, out.var ];
ei <- calc.ps.Xbeta( Xmat = Xi, beta = beta.hat );
ei.deriv1 <- calc.ps.deriv1( Xmat = Xi, beta = beta.hat );
ei.deriv2 <- calc.ps.deriv2( Xi = Xi, beta = beta.hat );
omega.ei <- omega[ i ];
omegaei.deriv <- omega.derive.ei( ps = ei, weight = weight, delta = delta, K=K );
Qi <- Q[i];
Qi.deriv <- 2*Zi - 1;
phi <- c( Zi * ( Yi - mu1.hat ) * omega.ei / Qi,
( 1 - Zi ) * ( Yi - mu0.hat ) * omega.ei / Qi,
( Zi -ei ) / ( ei * ( 1 - ei ) ) * ei.deriv1 );
Bmat <- Bmat + outer( phi, phi ); # Bn matrix
# first row of phi's first derivative w.r.t theta
first.row <- c( - Zi * omega.ei / Qi, 0, Zi * ( Yi - mu1.hat ) * ei.deriv1 * ( Qi * omegaei.deriv - omega.ei * Qi.deriv ) / Qi^2 );
# second row of phi's first derivative w.r.t theta
second.row <- c( 0, - ( 1 - Zi ) * omega.ei / Qi, ( 1 - Zi ) * ( Yi - mu0.hat ) * ei.deriv1 * ( Qi * omegaei.deriv - omega.ei * Qi.deriv ) / Qi^2 );
# third row of phi's first derivative w.r.t theta
tmp0 <- matrix( 0, nrow = length( beta.hat ), ncol = 2 );
tmp1 <- - ei * ( 1 - ei ) * colVec( Xi ) %*% Xi;
third.row <- cbind( tmp0, tmp1 );
phi.deriv <- rbind( first.row, second.row, third.row );
Amat <- Amat + phi.deriv;
}
Amat <- Amat/n;
Bmat <- Bmat/n;
Amat.inv <- solve( Amat );
var.mat <- ( Amat.inv %*% Bmat %*% t( Amat.inv ) ) / n;
#tmp <- c(1, -1, rep(0, length(beta.hat)));
var.mat <- var.mat[ c( 1 : 2 ), c( 1 : 2 ) ]; # var-covar matrix for point estimators of invididual groups
# continuous outcome
if ( family == "gaussian" ) {
tmp1 <- c( 1, -1 );
var.est <- rowVec( tmp1 ) %*% var.mat %*% colVec( tmp1 );
std <- sqrt( as.numeric( var.est ) );
ans <- list( est = est, std = std );
}
# binary outcome
if ( family == "binomial" ) {
# risk difference
tmp.risk <- c( 1, -1 );
var.risk <- rowVec( tmp.risk ) %*% var.mat %*% colVec( tmp.risk );
std.risk <- sqrt( as.numeric( var.risk ) );
# risk ratio
tmp.rr <- c( 1 / mu1.hat, - mu1.hat / ( mu0.hat^2 ) );
var.rr <- rowVec( tmp.rr ) %*% var.mat %*% colVec( tmp.rr );
std.rr <- sqrt( as.numeric( var.rr ) );
# odds ratio
tmp1 <- ( 1 - mu0.hat ) / ( mu0.hat * ( 1 - mu1.hat )^2 );
tmp0 <- - mu1.hat / ( ( 1 - mu1.hat ) * mu0.hat^2 );
tmp.or <- c( tmp1, tmp0 );
var.or <- rowVec( tmp.or ) %*% var.mat %*% colVec( tmp.or );
std.or <- sqrt( as.numeric( var.or ) );
# log odds ratio
tmp1 <- 1/( mu1.hat * ( 1 - mu1.hat ) );
tmp0 <- - 1/( mu0.hat * ( 1 - mu0.hat ) );
tmp.lor <- c( tmp1, tmp0 );
var.lor <- rowVec( tmp.lor ) %*% var.mat %*% colVec( tmp.lor );
std.lor <- sqrt( as.numeric( var.lor ) );
ans <- list( est.risk = est.risk, std.risk = std.risk,
est.rr = est.rr, std.rr = std.rr,
est.or = est.or, std.or = std.or,
est.lor = est.lor, std.lor = std.lor );
}
return( ans );
}
psw.aug.core <- function(dat, beta.hat, omega, Q, out.ps, out.outcome, trt.var, out.var, weight, family, K, delta=0.002){
# Core function for augmented estimation
#
# Args:
# dat: date to be used.
# out.ps: the object returned by fitting the propensity score model with \code{glm()}.
# out.outcome: the object returned by fitting the outcome model with \code{lm()}.
# trt.var: dichotomous treatment indicator, \code{1} indicates treated and \code{0} indicates control.
# out.var: outcome variable.
# weight: propensity score weighting method.
# family: outcome family
# K: coefficient of trapezoidal weight, K is the slope of left trapezoidal edge
# delta: closeness to non-differential point in omega function, delta=0.002 by default
# Return
# A list of point estimation and standard error estimation for the augmented estimator
n <- nrow( dat );
W <- omega/Q; # generic weight;
tmp1 <- W*dat[ , trt.var ];
mu1.hat <- sum( tmp1*(dat[ , out.var ] - out.outcome$Y.hat.m1) )/sum( tmp1 );
mu2.hat <- sum( omega*out.outcome$Y.hat.m1 )/sum( omega );
tmp0 <- W*( 1 - dat[ , trt.var ] );
mu3.hat <- sum( tmp0*(dat[ , out.var ] - out.outcome$Y.hat.m0) )/sum( tmp0 );
mu4.hat <- sum( omega*out.outcome$Y.hat.m0 )/sum( omega );
if ( family == "gaussian" ) {
est <- mu1.hat + mu2.hat - mu3.hat - mu4.hat;
}
if ( family == "binomial" ) {
p1.hat <- mu1.hat + mu2.hat;
p0.hat <- mu3.hat + mu4.hat;
est.risk <- p1.hat - p0.hat;
est.rr <- p1.hat/p0.hat;
est.or <- (p1.hat/(1-p1.hat)) / (p0.hat/(1-p0.hat));
est.lor <- log( est.or );
}
alpha1.hat <- as.numeric( coef(out.outcome$fm1) );
alpha0.hat <- as.numeric( coef(out.outcome$fm0) );
beta.hat <- as.numeric( coef(out.ps$fm) );
n.alpha1 <- length( alpha1.hat );
n.alpha0 <- length( alpha0.hat );
n.beta <- length( beta.hat );
# sandwich variance
n <- nrow(dat);
Amat <- Bmat <- 0;
for (i in 1:n) {
Xi <- as.numeric( c(1, dat[i, names(coef(out.ps$fm))[-1] ] ) ); # variable in the propensity score model
Vi <- as.numeric( c(1, dat[i, names(coef(out.outcome$fm1))[-1] ] ) ); # variable in outcome model
Zi <- dat[ i, trt.var ] ;
Yi <- dat[ i, out.var ] ;
Wi <- W[i];
Qi <- Q[i];
omegai <- omega[i];
ei <- calc.ps.Xbeta(Xmat=Xi, beta=beta.hat);
ei.deriv1 <- calc.ps.deriv1(Xmat=Xi, beta=beta.hat);
ei.deriv2 <- calc.ps.deriv2( Xi=Xi, beta=beta.hat);
Wi.deriv.beta <- calc.W.derive.beta( Zi=Zi, Xi=Xi, omega.ei=omegai, beta.hat=beta.hat, ei=ei, Qi=Qi, weight=weight, delta=delta, K=K );
omegai.deriv.beta <- calc.omega.derive.beta( Xi=Xi, beta.hat=beta.hat, ei=ei, weight=weight, delta=delta, K=K );
if ( family == "gaussian" ) {
m1.hat <- sum(Vi*alpha1.hat);
m0.hat <- sum(Vi*alpha0.hat);
m1.deriv.alpha1 <- Vi;
m0.deriv.alpha0 <- Vi;
s1.deriv.alpha1 <- -outer(Vi, Vi);
s0.deriv.alpha0 <- -outer(Vi, Vi);
}
if ( family == "binomial" ) {
tmp1 <- sum(Vi*alpha1.hat);
tmp0 <- sum(Vi*alpha0.hat);
m1.hat <- exp(tmp1)/(1+exp(tmp1));
m0.hat <- exp(tmp0)/(1+exp(tmp0));
m1.deriv.alpha1 <- m1.hat*(1-m1.hat)*Vi;
m0.deriv.alpha0 <- m0.hat*(1-m0.hat)*Vi;
s1.deriv.alpha1 <- -m1.hat*(1-m1.hat)*outer(Vi, Vi);
s0.deriv.alpha0 <- -m0.hat*(1-m0.hat)*outer(Vi, Vi);
}
this.phi.row1 <- Wi*Zi*( Yi - m1.hat - mu1.hat );
this.phi.row2 <- omegai*( m1.hat - mu2.hat );
this.phi.row3 <- Wi*(1-Zi)*( Yi - m0.hat - mu3.hat );
this.phi.row4 <- omegai*( m0.hat - mu4.hat );
this.phi.row5 <- Zi*( Yi - m1.hat )*Vi;
this.phi.row6 <- (1-Zi)*( Yi - m0.hat )*Vi;
this.phi.row7 <- (Zi-ei)/ei/(1-ei)*ei.deriv1;
this.phi <- c( this.phi.row1, this.phi.row2, this.phi.row3,
this.phi.row4, this.phi.row5, this.phi.row6, this.phi.row7 );
Bmat <- Bmat + outer( this.phi, this.phi );
quad1 <- diag( c( -Wi*Zi, -omegai, -Wi*(1-Zi), -omegai ) );
quad2 <- matrix(0, nrow=n.alpha1+n.alpha0+n.beta, ncol=4);
tmp <- c( -Wi*Zi*m1.deriv.alpha1, rep(0, n.alpha0), Zi*( Yi - m1.hat - mu1.hat )*Wi.deriv.beta,
omegai*m1.deriv.alpha1, rep(0, n.alpha0), ( m1.hat - mu2.hat )*omegai.deriv.beta,
rep(0, n.alpha1), -Wi*(1-Zi)*m0.deriv.alpha0, (1-Zi)*( Yi - m0.hat - mu3.hat )*Wi.deriv.beta,
rep(0, n.alpha1), omegai*m0.deriv.alpha0, ( m0.hat - mu4.hat )*omegai.deriv.beta );
quad3 <- matrix( tmp, byrow = TRUE, nrow = 4,
ncol = n.alpha1 + n.alpha0 + n.beta );
quad4.blk1 <- cbind( Zi*s1.deriv.alpha1, matrix(0, nrow=n.alpha1, ncol=n.alpha0 + n.beta) );
quad4.blk2 <- cbind( matrix(0, nrow=n.alpha0, ncol=n.alpha1 ), (1-Zi)*s0.deriv.alpha0, matrix(0, nrow=n.alpha0, ncol=n.beta) );
quad4.blk3 <- cbind( matrix(0, nrow=n.beta, ncol=n.alpha1+n.alpha0), -ei*(1-ei)*outer(Xi, Xi) );
quad4 <- rbind( quad4.blk1, quad4.blk2, quad4.blk3 );
this.phi.deriv <- rbind( cbind(quad1, quad3) ,
cbind(quad2, quad4) );
Amat <- Amat + this.phi.deriv;
}
Amat <- Amat/n;
Bmat <- Bmat/n;
Amat.inv <- solve(Amat) ;
var.mat <- ( Amat.inv %*% Bmat %*% t(Amat.inv) )/n;
var.mat <- var.mat[ c(1:4), c(1:4) ];
if ( family == "gaussian" ) {
tmp <- c( 1, 1, -1, -1 );
var.est <- rowVec(tmp) %*% var.mat %*% colVec(tmp); # get variance of mu1+mu2-mu3;
std <- sqrt( as.numeric(var.est) );
ans <- list( est = est, std = std );
}
if ( family == "binomial" ) {
# risk difference
tmp.risk <- c( 1, 1, -1, -1 );
var.risk <- rowVec( tmp.risk ) %*% var.mat %*% colVec( tmp.risk );
std.risk <- sqrt( as.numeric( var.risk ) );
ans <- list( est.risk = est.risk, std.risk = std.risk );
}
return( ans );
}
psw.spec.test.core <- function( X.mat, V.mat, V.name, trt, beta.hat, omega, Q, trans.type, weight, K, delta=0.002 ) {
# check and test covariate balance between treatment and control arms.
#
# Args:
# X.mat: covarite matrxi for PS model
# V.mat: covariate matrxi used for balance test
# V.name: variables for balance test
# trt: treatment indicator
# beta.hat: fitted PS model parameters
# omega: omega function
# Q: denominator of PS weight function
# trans.type: transformation types applied to covariates, including log, logit, Fisher's Z, square root, and identity transformation.
# weight: weighting method.
# K: coefficient of trapezoidal weight, K is the slope of left trapezoidal edge.
# delta: closeness to non-differential point in omega function, delta=0.002 by default
#
# Returns:
# A list of balance test parameters
n <- nrow( X.mat ); # number of sujects
W <- omega/Q; # generic weight
# obtain balance test statistic.
mu.B1.hat <- as.numeric( t(V.mat) %*% colVec( W * trt ) ) / sum( W * trt ) ;
mu.B0.hat <- as.numeric( t(V.mat) %*% colVec( W * ( 1 - trt ) ) ) / sum( W * ( 1 - trt ) ) ;
eta.B1.hat <- g.fun( x=mu.B1.hat, fun.type = trans.type ) ;
eta.B0.hat <- g.fun( x=mu.B0.hat, fun.type = trans.type ) ;
theta.hat <- c( eta.B1.hat, eta.B0.hat, beta.hat ) ;
B.hat <- eta.B1.hat - eta.B0.hat ;
n.mu.B1 <- length( mu.B1.hat ) ;
n.mu.B0 <- length( mu.B0.hat ) ; # NOTE: n.mu.B1 = n.mu.B1 = length(B.hat)
n.beta <- length( beta.hat ) ;
n.theta <- length(theta.hat) ;
D.mat <- cbind( diag(n.mu.B1), -diag(n.mu.B0),
matrix(0, nrow=n.mu.B1, ncol=n.beta) ) ;
Meat.mat <- Bread.mat <- 0 ; # the meat and bread of the sandwich variance
for ( i in 1 : n ) {
Vi <- as.numeric( V.mat[i, ] );
Xi <- as.numeric( X.mat[i, ] );
Zi <- trt[i];
Wi <- W[i];
omegai <- omega[i];
Qi <- Q[i];
ei <- calc.ps.Xbeta(Xmat=Xi, beta=beta.hat);
ei.deriv <- calc.ps.deriv1( Xmat = Xi, beta = beta.hat );
omegai.deriv <- omega.derive.ei( ps = ei, weight = weight, delta = delta, K = K );
Qi.deriv <- 2 * Zi - 1;
Wi.deriv.beta <- ei.deriv * ( Qi*omegai.deriv - omegai*Qi.deriv ) / Qi^2;
tmp1 <- Wi * Zi * ( Vi - g.inv( x = eta.B1.hat, fun.type = trans.type ) );
tmp2 <- Wi * ( 1 - Zi ) * ( Vi - g.inv( x = eta.B0.hat, fun.type = trans.type ) );
tmp3 <- ( Zi - ei ) * Xi;
this.phi <- c( tmp1, tmp2, tmp3 );
Meat.mat <- Meat.mat + outer( this.phi, this.phi );
Block.11 <- - Wi * Zi *( diag( g.inv.deriv( x = eta.B1.hat, fun.type = trans.type ) ) );
Block.12 <- matrix( 0, nrow = n.mu.B1, ncol = n.mu.B0 );
Block.13 <- Zi * ( colVec( Vi - g.inv( x = eta.B1.hat, fun.type = trans.type ) ) %*% rowVec( Wi.deriv.beta ) );
Block.21 <- matrix( 0, nrow = n.mu.B0, ncol = n.mu.B1 ) ;
Block.22 <- - Wi * ( 1 - Zi ) * ( diag( g.inv.deriv( x = eta.B0.hat, fun.type = trans.type ) ) );
Block.23 <- (1-Zi)*( colVec( Vi - g.inv( x = eta.B0.hat, fun.type = trans.type ) ) %*% rowVec( Wi.deriv.beta ) );
Block.31 <- matrix( 0, nrow = n.beta, ncol = n.mu.B1 );
Block.32 <- matrix( 0, nrow = n.beta, ncol = n.mu.B0 );
Block.33 <- -ei * ( 1 - ei ) * outer( Xi, Xi ) ;
Bread.mat <- Bread.mat + rbind( cbind( Block.11, Block.12, Block.13 ) ,
cbind( Block.21, Block.22, Block.23 ) ,
cbind( Block.31, Block.32, Block.33 ) );
}
B.mat <- Meat.mat/n;
A.mat <- Bread.mat/n;
A.mat.inv <- solve( A.mat );
Sigma.theta.hat <- ( A.mat.inv %*% B.mat %*% t( A.mat.inv ) )/n;
Sigma.B.hat <- D.mat %*% Sigma.theta.hat %*% t( D.mat );
df <- qr(Sigma.B.hat)$rank; # degree of freedom
test.stat <- as.numeric( t(B.hat) %*% solve(Sigma.B.hat) %*% B.hat );
pvalue <- pchisq( test.stat, df = df, lower.tail = FALSE );
# chi-squared test for overall comparison
names(eta.B1.hat) <- paste("eta.B1.", V.name, sep="");
names(eta.B0.hat) <- paste("eta.B0.", V.name, sep="");
names(B.hat) <- c( paste("B.hat.", V.name, sep="") )
rownames(Sigma.B.hat) <- colnames(Sigma.B.hat) <- names(B.hat);
# Add names to point and variance estimators
return( list( weight = weight,
V.name = V.name,
g.B1.hat = eta.B1.hat,
g.B0.hat = eta.B0.hat,
B.hat = B.hat,
var.B.hat = Sigma.B.hat,
test.stat = test.stat,
df = df,
pvalue = pvalue
) );
}
mirror.hist.core <- function( ps.above, ps.below, wt.above, wt.below, add.weight,
label.above, label.below, nclass = 50 ) {
# Mirror histogram is used to plot the mirror histogram showing the propensity score distribution overlap between groups.
# ps.above propensity score for the control.
# ps.below propensity score for the treated.
# wt.above weight for the control.
# wt.below weight for the treated.
# add.weight add to the mirror histogram propensity score weights, \code{add.weight=FALSE} by default.
# label.above label for the control group.
# label.below label for the treated group.
# label.title plot title.
# nclass number of breaks, \code{nclass=50} by default.
x0 <- ps.above ; wt0 <- wt.above ;
x1 <- ps.below ; wt1 <- wt.below ;
if ( is.null(nclass) ) {
breaks <- hist( c(x0, x1), nclass=nclass, plot=F )$breaks ;
} else {
breaks <- hist( c(x0, x1), nclass=nclass, plot=F )$breaks ;
}
fm.x0 <- hist( x0, breaks=breaks, plot = F ) ;
fm.x1 <- hist( x1, breaks=breaks, plot = F ) ;
x.range <- range( c(x0, x1) ) ;
y.range <- c( -max( fm.x1$counts ), max( fm.x0$counts ) ) ;
par( las=1, lwd = 2, mar=c(5, 5, 4, 2), bty="n" );
plot( x=x.range, y=y.range, xaxt="n", yaxt="n", type="n" ,
xlim=x.range, ylim=y.range, ylab="", xlab="", cex.lab=1 ) ;
axis( side=1, at=pretty( x.range ), cex.axis=1 ) ;
axis( side=2, at=pretty( y.range ) , labels=abs( pretty( y.range ) ), cex.axis=1 ) ;
title( xlab="Propensity score", cex.lab=1 )
title( ylab="Frequency", cex.lab=1 );
abline( h=0, lty=1 );
# plot the histogram above the horizontal line
fm <- fm.x0 ;
for (i in 1:(length(breaks)-1)) {
x <- c( fm$breaks[i], fm$breaks[i], fm$breaks[i+1], fm$breaks[i+1] ) ;
y <- c(0, fm$counts[i], fm$counts[i], 0) ;
polygon(x, y) ;
# add weight
if ( add.weight ) {
tmp <- ( breaks[i] <= x0 & x0 <= breaks[i+1] ) ;
y2 <- c(0, sum(wt0[tmp]), sum(wt0[tmp]), 0) ;
polygon(x, y2, col="light green", border="black") ;
}
}
# plot the histogram below the horizontal line
fm <- fm.x1 ;
for (i in 1:(length(breaks)-1)) {
x <- c( fm$breaks[i], fm$breaks[i], fm$breaks[i+1], fm$breaks[i+1] ) ;
y <- c(0, -fm$counts[i], -fm$counts[i], 0) ;
polygon(x, y) ;
# add weight
if ( add.weight ) {
tmp <- ( breaks[i] <= x1 & x1 <= breaks[i+1] ) ;
y2 <- c(0, -sum(wt1[tmp]), -sum(wt1[tmp]), 0) ;
polygon(x, y2, col="dark green", border="black") ;
}
}
}
########################################################################################
##
## Supporting functions
##
########################################################################################
rowVec <- function(x) {
return( t(x) );
}
colVec <- function(x) {
return( t(t(x)) );
}
outcome.model <- function(dat, form, trt.var, family) {
# outcome.model to fit outcome with specified formula
#
# Args:
# dat: data based on which outcome model is to be fitted.
# form: outcome come model formula.
# trt.var treatment indicator
# family: outcome family
#
# Returns:
# Estimated outcome by treatment indicator and lm fitting
fm1 <- glm( as.formula( form ), data = dat[ dat[ , trt.var ] == 1, ] );
fm0 <- glm( as.formula( form ), data = dat[ dat[ , trt.var ] == 0, ] );
Y.hat.m1 <- as.numeric( predict( fm1, newdata = dat ) );
Y.hat.m0 <- as.numeric( predict( fm0, newdata = dat ) );
return( list( Y.hat.m1 = Y.hat.m1 ,
Y.hat.m0 = Y.hat.m0 ,
fm1 = fm1 , fm0 = fm0 ) );
}
calc.omega <- function(ps, weight, delta=0.002, K=4) {
# Calculate omega for each weighting method
#
# Args:
# ps: estimated proopensity score,
# weight: weighting method.
# delta: closeness to non-differentiable region
# K: wighting coefficient for trapezoidal weighting
#
# Return:
# A vector of length(ps)
ans <- 0;
if (weight == "ATE") {
ans <- 1;
} else if (weight == "MW") {
ans <- calc.omega.MW( ps = ps, delta = delta );
} else if (weight == "ATT") {
ans <- ps;
} else if (weight == "ATC") {
ans <- 1-ps;
} else if (weight == "OVERLAP") {
ans <- 4*ps*(1-ps);
} else if (weight == "TRAPEZOIDAL") {
ans <- calc.omega.trapzd(ps=ps, delta=delta, K=K);
} else {
stop("Error in calc.omega: weight method does not exist!");
}
return( ans );
}
calc.ps.Xbeta <- function( Xmat, beta ) {
# Calculate the propensity score, e(X, beta)
#
# Args:
# Xmat: a vector or a matrix with each column being X_i
# beta: propensity score model coefficients, of the same length as the nrow(X)
#
# Return:
# A scalar of matching weight
Xmat <- as.matrix(Xmat);
tmp <- as.numeric(rowVec(beta) %*% Xmat);
tmp <- exp(tmp);
names(tmp) <- NULL;
return( tmp/(1 + tmp) );
}
calc.ps.deriv1 <- function(Xmat, beta) {
# Calculate the derivative of propensity score w.r.t. beta.
#
# Args:
# Xmat: a matrix with each column being X_i,
# beta: propensity score coefficients, of the same length as the nrow(X).
#
# Return:
# A vector of the same dimension as beta.
Xmat <- as.matrix(Xmat);
tmp.ps <- calc.ps.Xbeta( Xmat=Xmat, beta=beta );
ans <- tmp.ps*(1 - tmp.ps)*t(Xmat); # Xmat is row vector from a single line of data
names(ans) <- rownames(ans) <- NULL;
return( t(ans) );
}
calc.ps.deriv2 <- function( Xi, beta ) {
# Calculate the second derivative of propensity score w.r.t beta.
#
# Args:
# Xi: a vector (not a matrix!)
# beta: a vector of coefficients, of the same length as Xi
#
# Return:
# A matrix of secondd derivative
Xi <- colVec(Xi);
tmp.ps <- calc.ps.Xbeta( Xmat=Xi, beta=beta );
tmp.deriv1 <- calc.ps.deriv1( Xmat=Xi, beta=beta );
ans <- Xi %*% rowVec(tmp.deriv1);
names(ans) <- rownames(ans) <- NULL;
ans <- (1 - 2*tmp.ps)*ans;
return( ans );
}
calc.omega.MW <- function (ps, delta) {
# Calculate omega value for MW method
#
# Args:
# ps: propensity score, scalar
# delta: closeness to non-differentiable region
#
# Return:
# Value of omega, scalar
ans <- 0;
if (ps <= 0.5 - delta) {
ans <- 2*ps;
} else if (ps >= 0.5 + delta) {
ans <- 2*(1 - ps);
} else {
ans <- approx.omega.MW(ps, delta);
}
return( ans );
}
approx.omega.MW <- function (ps, delta) {
# Approximate omega function of MW at non-differentiable region, eta(ps)
#
# Args:
# ps: propensity score, scalar
# delta: closeness to non-differentiable region
#
# Return:
# Approximated omega at non-differentiable region, scalar
A <- solve.A.MW(delta);
ans <- rowVec(A) %*% c(1, ps, ps^2, ps^3);
ans;
}
solve.A.MW <- function(delta) {
# Get coefficients for approximated cubic polynomial for omega (eta(e)) function of MW
#
# Args:
# delta: pre-defined closeness to midpiece of ps.
#
# Return
# A vecotor of solved coefficients
if ( delta < 0.00001 ) {
stop("*** ERROR in solve.a: delta too small ***");
}
tmp1 <- 0.5 - delta;
tmp2 <- 0.5 + delta;
D <- matrix(c(1, tmp1, tmp1^2, tmp1^3,
0, 1, 2*tmp1, 3*tmp1^2,
1, tmp2, tmp2^2, tmp2^3,
0, 1, 2*tmp2, 3*tmp2^2),
ncol = 4, nrow = 4, byrow = TRUE);
C <- 2*c(tmp1, 1, tmp1, -1);
A <- solve(D) %*% C; # coefficients of cubic polynomial
A;
}
calc.omega.trapzd <- function (ps, delta, K) {
# Calculate omega value for trapezoidal weighting method
#
# Args:
# ps: propensity score, scalar
# delta: closeness to non-differentiable region
# K: trapezoidal weighting coffecient
#
# Return:
# Value of omega, scalar
ans <- 0;
if ( (0 < ps) & (ps <= 1/K - delta) ) {
ans <- K*ps;
} else if ( (1/K + delta <= ps) & (ps <= 1 - 1/K - delta) ) {
ans <- 1;
} else if ( (1 - 1/K + delta <= ps) & (ps < 1) ) {
ans <- K*(1 - ps);
} else {
ans <- approx.omega.trapzd(ps, delta, K);
}
ans;
}
approx.omega.trapzd <- function (ps, delta, K) {
# Approximate omega function of trapezoidal weight at non-differentiable region, eta(ps)
#
# Args:
# ps: propensity score, scalar
# delta: closeness to non-differentiable region
# K: trapezoidal weight coefficient
#
# Return:
# Approximated omega at non-differentiable region, scalar
A <- 0;
if ( (1/K - delta < ps) & (ps < 1/K + delta) ) {
A <- solve.A.trapzd1st(delta=delta, K=K);
} else {
A <- solve.A.trapzd2nd(delta=delta, K=K);
}
ans <- rowVec(A) %*% c(1, ps, ps^2, ps^3);
ans;
}
solve.A.trapzd1st <- function (delta, K) {
# Get coefficients for approximated cubic polynomial for omega (eta(e)) function of
# trapezoidal weighting at the first non-differentiable pivot
#
# Args:
# delta: pre-defined closeness to midpiece of ps.
# K: coefficient of trapezoidal weight
#
# Return
# A vecotor solved coefficients
if ( delta < 0.00001 ) {
stop("*** ERROR in solve.a: delta too small ***");
}
tmp1 <- 1/K - delta;
tmp2 <- 1/K + delta;
D <- matrix(c(1, tmp1, tmp1^2, tmp1^3,
0, 1, 2*tmp1, 3*tmp1^2,
1, tmp2, tmp2^2, tmp2^3,
0, 1, 2*tmp2, 3*tmp2^2),
ncol = 4, nrow = 4, byrow = TRUE);
C <- 2*c(K*tmp1, K, 1, 0);
A <- solve(D) %*% C; # coefficients of cubic polynomial
A;
}
solve.A.trapzd2nd <- function (delta, K) {
# Get coefficients for approximated cubic polynomial for omega (eta(e)) function of
# trapezoidal weighting at the second non-differentiable pivot
#
# Args:
# delta: pre-defined closeness to midpiece of ps.
# K: coefficient of trapezoidal weight
#
# Return
# A vecotor solved coefficients
if ( delta < 0.00001 ) {
stop("*** ERROR in solve.a: delta too small ***");
}
tmp1 <- 1 - 1/K - delta;
tmp2 <- 1 - 1/K + delta;
D <- matrix(c(1, tmp1, tmp1^2, tmp1^3,
0, 1, 2*tmp1, 3*tmp1^2,
1, tmp2, tmp2^2, tmp2^3,
0, 1, 2*tmp2, 3*tmp2^2),
ncol = 4, nrow = 4, byrow = TRUE);
C <- 2*c(1, 0, K*(1/K - delta), -K);
A <- solve(D) %*% C; # coefficients of cubic polynomial
A;
}
omega.derive.ei <- function(ps, weight, delta=0.002, K=4) {
# Calculate the first derivative of omega function w.r.t propensity score (ei)
#
# Args:
# ps: estimated propensity score
# weight: selected weighting method
# delta: pre-defined closeness to non-differentiable regions
# K: weighting coefficient for trapezoidal weighting
#
# Returns:
# The first derivative of omega w.r.t to propensity score
if (weight == "ATE") {
ans <- 0;
} else if (weight == "ATT") {
ans <- 1;
} else if (weight == "ATC") {
ans <- -1;
} else if (weight == "OVERLAP") {
ans <- 4*(1 - 2*ps);
} else if (weight == "MW") {
ans <- omega.derive.ei.MW(ps, delta);
} else if (weight == "TRAPEZOIDAL") {
ans <- omega.derive.ei.trapzd(ps, delta, K);
} else {
stop( "User defined first-order derivative of omega function is not provided!" );
}
ans;
}
omega.derive.ei.MW <- function (ps, delta) {
# calculate of the first derivative of omega function w.r.t propensity score with approximation
# in MW method
#
# Args:
# ps: propensity score
# delta: pre-defined closeness to non-differentiable regions
#
# Returns:
# The first derivative of omega (MW method) w.r.t to propensity score,
# approximation at non-differentiable region is included.
if ( (0 < ps) & (ps <= 0.5 - delta) ) {
ans <- 2;
} else if ( (0.5 + delta <= ps) & (ps <1) ) {
ans <- -2;
} else {
A <- solve.A.MW(delta);
ans <- A[2] + 2*A[3]*ps + 3*A[4]*ps^2;
}
ans;
}
omega.derive.ei.trapzd <- function (ps, delta, K) {
# calculate of the first derivative of omega function w.r.t propensity score with approximation
# in Trapezoidal weighting method
#
# Args:
# ps: propensity score
# delta: pre-defined closeness to non-differentiable regions
# K: coefficient of trapezoidal weighting
#
# Returns:
# The first derivative of omega (trapezoidal weighting method) w.r.t to propensity score,
# approximation at non-differentiable region is included.
if ( (0 < ps) & (ps <= 1/K - delta) ) {
ans <- K;
} else if ( (1/K - delta < ps) & (ps < 1/K + delta) ) {
A <- solve.A.trapzd1st(delta = delta, K = K);
ans <- A[2] + 2*A[3]*ps + 3*A[4]*ps^2;
} else if ( (1/K + delta <= ps) & (ps <= 1 - 1/K - delta) ) {
ans <- 0;
} else if ( (1 - 1/K + delta <= ps) & (ps < 1) ) {
ans <- -K;
} else {
A <- solve.A.trapzd2nd(delta = delta, K = K);
ans <- A[2] + 2*A[3]*ps + 3*A[4]*ps^2;
}
ans;
}
calc.W.derive.beta <- function(Zi, Xi, omega.ei, beta.hat, ei, Qi, weight, delta, K ) {
# Calculate the first order derivative of weight (W) with respect to beta (coefficients in ps model)
#
# Args:
# Zi: treamtment indicator
# Xi: covariates of i-th subject
# omega.ei: value of omega for subject
# beta.hat: parameter estimation in ps model
# ei: propensity score
# Qi: denominator of weight formula
# weight: weight type
# delta: closeness to non-differential points
# K: slope of trapezoidal left edge
#
# Returns
# A vector, first order derivative of weight w.r.t. beta
ei.deriv1 <- calc.ps.deriv1( Xmat=Xi, beta=beta.hat );
omegaei.deriv <- omega.derive.ei( ps = ei, weight = weight, delta = delta, K = K );
Qi.deriv.ei <- 2*Zi - 1;
ans <- ei.deriv1*(Qi*omegaei.deriv - omega.ei*Qi.deriv.ei)/Qi^2;
return( ans );
}
calc.omega.derive.beta <- function(Xi, beta.hat, ei, weight, delta=0.002, K=4 ) {
# Calculate the first order derivative of omega with respect to beta (coefficients in ps model)
#
# Args:
# Xi: covariates of i-th subject
# beta.hat: parameter estimation in ps model
# ei: propensity score of i-th subject
# weight: weight type
# delta: closeness to non-differential points, which is 0.002 by default
# K: slope of trapezoidal left edge, K=4 by default
#
# Returns
# A vector, derivative of omega function w.r.t. beta
ei.deriv1 <- calc.ps.deriv1( Xmat=Xi, beta=beta.hat );
omegaei.deriv <- omega.derive.ei( ps=ei, weight=weight, delta=delta, K=K );
ans <- omegaei.deriv * ei.deriv1;
return( ans );
}
#' @import Hmisc
calc.std.diff <- function(var.value, wt, Z) {
# calculate standardized difference between treatment arms
#
# Args:
# var.value: covariate value.
# wt: weights of each subject.
# Z: treatment indicator
#
# Returns:
# Absolute standardized difference between treatment groups in 100% scale
Z1.mean <- wtd.mean( x = var.value[Z==1], weights = wt[Z==1] );
Z1.var <- wtd.var( x = var.value[Z==1], weights = wt[Z==1] );
Z0.mean <- wtd.mean( x = var.value[Z==0], weights = wt[Z==0] );
Z0.var <- wtd.var( x = var.value[Z==0], weights = wt[Z==0] );
std.diff <- 100 * ( Z1.mean - Z0.mean ) / sqrt( ( Z1.var + Z0.var ) / 2 ) ;
ans <- c( Z1.mean, sqrt(Z1.var), Z0.mean, sqrt(Z0.var), std.diff );
return( ans );
}
diff.plot <- function( diff.before, diff.after, name, weight ) {
# plot standardize difference for before and after weighting
#
# Args:
# diff.before: standardized mean differece before weighting
# diff.after: standardized mean difference after weighting
# name: covariate name
# weight: propensity score weighting method
# Returns
# A plot is generated
par( las=1, lwd = 2, mar=c(5, max( nchar(name) ) + 4, 4, 2), bty="n" );
x.range <- range( c(diff.before, diff.after) );
y.range <- c(1, length(name));
ord <- order( diff.before, decreasing = T );
plot( x=x.range, y=y.range, xaxt="n", yaxt="n", type="n",
xlim=x.range, ylim=y.range, ylab="", xlab="Standardized mean difference" );
axis( side=1, at=pretty( x.range ) );
axis( side=2, at=length(name):1, labels=name[ord], tick=F );
abline( v=0, lty=1, col="gray" );
points( y = length(name):1, x = diff.before[ord], pch=4 );
points( y = length(name):1, x = diff.after[ord], pch=21 );
legend("topleft", legend=c("Unadjusted", weight), pch=c(4, 21) );
}
#' @import gtools
g.fun <- function( x, fun.type ) {
# Perform monotone transformation for vector of covariates
# Included transformation types are: log, logit, square root, Fisher's Z transformation
# and identity transformation (default).
#
# Args:
# x: vector of covariates
# fun.type: transformation type
#
# Returns
# A vector of transformed values
if ( length(x) != length(fun.type) ) {
print("*** ERROR in g.fun() ***");
return( rep(NA, length(x)) );
} else {
m <- length(x);
ans <- rep(0, m);
for (j in 1:m) {
if ( fun.type[j] == "log" ) {
ans[j] <- log( max( 1e-6, x[j] ) );
} else if ( fun.type[j] == "logit" ) {
ans[j] <- logit( min( max( 1e-6, x[j]), 1-1e-6 ) );
} else if ( fun.type[j] == "Fisher" ) {
ans[j] <- 0.5*( log( 1 + x[j] ) - log( 1 - x[j] ) );
} else if (fun.type[j] == "sqrt") {
ans[j] <- sqrt( x[j] );
} else { # identity transformation, by default
ans[j] <- x[j] ;
}
}
return( ans ) ;
}
}
#' @import gtools
g.inv <- function( x, fun.type ) {
# Calculate the inverse function of transformed covaraites
# Inverse back from log, logit, square root, and Fisher's Z transformations.
#
# Args:
# x: vector of previously transformed covariates
# fun.type: transformation type
#
# Returns
# A vector of inverse function value.
if ( length(x) != length(fun.type) ) {
print("*** ERROR in g.inv() ***") ;
return( rep(NA, length(x)) ) ;
} else {
m <- length(x) ;
ans <- rep(0, m) ;
for (j in 1:m) {
if ( fun.type[j] == "log" ) {
ans[j] <- exp( x[j] ) ;
} else if ( fun.type[j] == "logit" ) {
ans[j] <- inv.logit( x[j] ) ;
} else if ( fun.type[j] == "Fisher" ) {
ans[j] <- ( (exp(2*x[j])-1)/(exp(2*x)+1) );
} else if ( fun.type[j] == "sqrt") {
ans[j] <- x[j]^2;
} else { # identity transformation, by default
ans[j] <- x[j] ;
}
}
return( ans ) ;
}
}
#' @import gtools
g.inv.deriv <- function( x, fun.type ) {
# Calculate the first order derivative of inverse function for transformed covaraites
#
# Args:
# x: vector of previously transformed covariates
# fun.type: type of transformation
#
# Returns
# A vector of first order derivative.
if ( length(x) != length(fun.type) ) {
print("*** ERROR in g.inv.deriv() ***") ;
return( rep(NA, length(x)) ) ;
} else {
m <- length(x) ;
ans <- rep(0, m) ;
for (j in 1:m) {
if ( fun.type[j] == "log" ) {
ans[j] <- exp( x[j] ) ;
} else if ( fun.type[j] == "logit" ) {
tmp <- inv.logit( x[j] ) ;
ans[j] <- tmp*(1-tmp) ;
} else if ( fun.type[j] == "Fisher" ) {
tmp <- 2*exp( 2*x[j] );
ans[j] <- tmp^2;
} else if ( fun.type[j] == "sqrt" ) {
ans[j] <- 2*x[j];
} else { # identity transformation, by default
ans[j] <- 1 ;
}
}
return( ans ) ;
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.