cdgd0_manual: Perform unconditional decomposition with nuisance functions...

View source: R/cdgd0_manual.R

cdgd0_manualR Documentation

Perform unconditional decomposition with nuisance functions estimated beforehand

Description

This function gives the user full control over the estimation of the nuisance functions. For the unconditional decomposition, three nuisance functions (YgivenGX.Pred_D0, YgivenGX.Pred_D1, and DgivenGX.Pred) need to be estimated. The nuisance functions should be estimated using cross-fitting if Donsker class is not assumed.

Usage

cdgd0_manual(
  Y,
  D,
  G,
  YgivenGX.Pred_D1,
  YgivenGX.Pred_D0,
  DgivenGX.Pred,
  data,
  alpha = 0.05,
  weight = NULL
)

Arguments

Y

Outcome. The name of a numeric variable.

D

Treatment status. The name of a binary numeric variable taking values of 0 and 1.

G

Advantaged group membership. The name of a binary numeric variable taking values of 0 and 1.

YgivenGX.Pred_D1

A numeric vector of predicted Y values given X, G, and D=1. Vector length=nrow(data).

YgivenGX.Pred_D0

A numeric vector of predicted Y values given X, G, and D=0. Vector length=nrow(data).

DgivenGX.Pred

A numeric vector of predicted D values given X and G. Vector length=nrow(data).

data

A data frame.

alpha

1-alpha confidence interval.

weight

Sampling weights. The name of a numeric variable. If unspecified, equal weights are used. Technically, the weight should be a deterministic function of X and G.

Value

A list of estimates.

Examples

# This example will take a minute to run.

data(exp_data)

Y="outcome"
D="treatment"
G="group_a"
X=c("Q","confounder")
data=exp_data

set.seed(1)

### estimate the nuisance functions with cross-fitting
sample1 <- sample(nrow(data), floor(nrow(data)/2), replace=FALSE)
sample2 <- setdiff(1:nrow(data), sample1)

### outcome regression model

message <- utils::capture.output( YgivenDGX.Model.sample1 <-
    caret::train(stats::as.formula(paste(Y, paste(D,G,paste(X,collapse="+"),sep="+"), sep="~")),
             data=data[sample1,], method="ranger", trControl=caret::trainControl(method="cv"),
             tuneGrid=expand.grid(mtry=c(2,4),splitrule=c("variance"),min.node.size=c(50,100))) )
message <- utils::capture.output( YgivenDGX.Model.sample2 <-
    caret::train(stats::as.formula(paste(Y, paste(D,G,paste(X,collapse="+"),sep="+"), sep="~")),
             data=data[sample2,], method="ranger", trControl=caret::trainControl(method="cv"),
             tuneGrid=expand.grid(mtry=c(2,4),splitrule=c("variance"),min.node.size=c(50,100))) )

### propensity score model
data[,D] <- as.factor(data[,D])
levels(data[,D]) <- c("D0","D1")  # necessary for caret implementation of ranger

message <- utils::capture.output( DgivenGX.Model.sample1 <-
    caret::train(stats::as.formula(paste(D, paste(G,paste(X,collapse="+"),sep="+"), sep="~")),
             data=data[sample1,], method="ranger",
             trControl=caret::trainControl(method="cv", classProbs=TRUE),
             tuneGrid=expand.grid(mtry=c(1,2),splitrule=c("gini"),min.node.size=c(50,100))) )
message <- utils::capture.output( DgivenGX.Model.sample2 <-
    caret::train(stats::as.formula(paste(D, paste(G,paste(X,collapse="+"),sep="+"), sep="~")),
             data=data[sample2,], method="ranger",
             trControl=caret::trainControl(method="cv", classProbs=TRUE),
             tuneGrid=expand.grid(mtry=c(1,2),splitrule=c("gini"),min.node.size=c(50,100))) )

data[,D] <- as.numeric(data[,D])-1

### cross-fitted predictions
YgivenGX.Pred_D0 <- YgivenGX.Pred_D1 <- DgivenGX.Pred <- rep(NA, nrow(data))

pred_data <- data
pred_data[,D] <- 0
YgivenGX.Pred_D0[sample2] <- stats::predict(YgivenDGX.Model.sample1, newdata = pred_data[sample2,])
YgivenGX.Pred_D0[sample1] <- stats::predict(YgivenDGX.Model.sample2, newdata = pred_data[sample1,])

pred_data <- data
pred_data[,D] <- 1
YgivenGX.Pred_D1[sample2] <- stats::predict(YgivenDGX.Model.sample1, newdata = pred_data[sample2,])
YgivenGX.Pred_D1[sample1] <- stats::predict(YgivenDGX.Model.sample2, newdata = pred_data[sample1,])

pred_data <- data
DgivenGX.Pred[sample2] <- stats::predict(DgivenGX.Model.sample1,
    newdata = pred_data[sample2,], type="prob")[,2]
DgivenGX.Pred[sample1] <- stats::predict(DgivenGX.Model.sample2,
    newdata = pred_data[sample1,], type="prob")[,2]

results <- cdgd0_manual(Y=Y,D=D,G=G,
                       YgivenGX.Pred_D0=YgivenGX.Pred_D0,
                       YgivenGX.Pred_D1=YgivenGX.Pred_D1,
                       DgivenGX.Pred=DgivenGX.Pred,
                       data=data)

results

cdgd documentation built on June 16, 2025, 9:06 a.m.