hdm: Interfaces for hdm package for data science pipelines.

Description Usage Arguments Details Value Author(s) Examples

Description

Interfaces to hdm functions that can be used in a pipeline implemented by magrittr.

Usage

1
2
3
4
5
6
7
8
9

Arguments

data

data frame, tibble, list, ...

...

Other arguments passed to the corresponding interfaced function.

Details

Interfaces call their corresponding interfaced function.

Value

Object returned by interfaced function.

Author(s)

Roberto Bertolusso

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
## Not run: 
library(intubate)
library(magrittr)
library(hdm)


## ntbt_rlasso: rlasso: Function for Lasso estimation under homoscedastic
##                      and heteroscedastic non-Gaussian disturbances
set.seed(1)
n <- 100 #sample size
p <- 100 # number of variables
s <- 3 # nubmer of variables with non-zero coefficients
X <- Xnames <- matrix(rnorm(n*p), ncol=p)
colnames(Xnames) <- paste("V", 1:p, sep="")
beta <- c(rep(5,s), rep(0,p-s))
Y <- X %*% beta + rnorm(n)

dta <- list(Y = Y, Xnames = Xnames)
rm(Y, Xnames)

## Original function to interface
rlasso(Y ~ Xnames, data = dta)

## The interface puts data as first parameter
ntbt_rlasso(dta, Y ~ Xnames)

## so it can be used easily in a pipeline.
dta %>%
  ntbt_rlasso(Y ~ Xnames)


## Functions for estimation of treatment effects
## ntbt_rlassoATE, ntbt_rlassoATET, ntbt_rlassoLATE, ntbt_rlassoLATET
## do not have examples of use in help.

## Original function to interface
## The interface puts data as first parameter
## so it can be used easily in a pipeline.



## ntbt_rlassoEffects: rigorous Lasso for Linear Models: Inference
set.seed(1)
n <- 100 #sample size
p <- 100 # number of variables
s <- 3 # nubmer of non-zero variables
X <- matrix(rnorm(n*p), ncol=p)
colnames(X) <- paste("X", 1:p, sep="")
beta <- c(rep(3,s), rep(0,p-s))
y <- 1 + X %*% beta + rnorm(n)
data <- data.frame(cbind(y,X))
colnames(data)[1] <- "y"
fm <- paste("y ~", paste(colnames(X), collapse="+"))
fm <- as.formula(fm)                 
rm(y, X)

## Original function to interface
rlassoEffects(fm, I = ~ X1 + X2 + X3 + X50, data=data)

## The interface puts data as first parameter
ntbt_rlassoEffects(data, fm, I = ~ X1 + X2 + X3 + X50)

## so it can be used easily in a pipeline.
data %>%
  ntbt_rlassoEffects(fm, I = ~ X1 + X2 + X3 + X50)



## ntbt_rlassoIV: Post-Selection and Post-Regularization Inference
##                in Linear Models with Many Controls and Instruments
## The example uses non-formula variant. Please see note below about
## possible problem.
data(EminentDomain)
dta <- list(z = EminentDomain$logGDP$z, # instruments
            x = EminentDomain$logGDP$x, # exogenous variables
            y = EminentDomain$logGDP$y, # outcome varialbe
            d = EminentDomain$logGDP$d) # treatment / endogenous variable
str(dta)
## Original function to interface
attach(dta)
rlassoIV(x=x, d=d, y=y, z=z, select.X=FALSE, select.Z=TRUE) 
detach()

## The interface puts data as first parameter
## NOTE: BE CAREFUL (in general in situations as follow)
## The parameter name "d" in this function can result in a nightmare
## (it got me scratching my head for quite a bit).
## In fact, if you call with parameter names (but not naming data)
## call the following version (commented out)
# ntbt_rlassoIV(dta, x=x, d=d, y=y, z=z, select.X=FALSE, select.Z=TRUE)
## there will be an error, as R will expand "d" to "data", and use
## its info (d) instead of dta.
## Right now I am not sure how to manage this situation and avoid
## that unwanted expansion. I will get back to this later.
## To avoid problems you should specify "data" as below
ntbt_rlassoIV(data=dta, x=x, d=d, y=y, z=z, select.X=FALSE, select.Z=TRUE)
## but of course this beats the purpose (we do not want to name "data"),
## and you *cannot* do it in the pipeline version (as you do not include data
## in your call).
## SOLUTION. In cases of unfortunate parameter names: "d", "da", "dat",
## you need to make sure that that parameter is sent by position AND unnamed
ntbt_rlassoIV(dta, x=x, d, y=y, z=z, select.X=FALSE, select.Z=TRUE)
## In general, required data is sent unnamed and by position, like
ntbt_rlassoIV(dta, x, d, y, z, select.X = FALSE, select.Z = TRUE)
## and this would have been what I would have done if I would
## (have had the ability to) produce this example.
## But this is how the example was provided, giving an opportunity to
## uncover this potentially unpleasant situation.

## so it can be used easily in a pipeline.
dta %>%
  ntbt_rlassoIV(x, d, y, z, select.X = FALSE, select.Z = TRUE)


## ntbt_rlassologit: Function for logistic Lasso estimation
library(hdm)
## DGP
set.seed(2)
n <- 250
p <- 100
px <- 10
X <- matrix(rnorm(n*p), ncol=p)
beta <- c(rep(2,px), rep(0,p-px))
intercept <- 1
P <- exp(intercept + X %*% beta)/(1+exp(intercept + X %*% beta))
y <- rbinom(n, size=1, prob=P)
dta <- list(y = y, X = X)
rm(y, X)

## Original function to interface
rlassologit(y ~ X, dta)

## The interface puts data as first parameter
ntbt_rlassologit(dta, y ~ X)

## so it can be used easily in a pipeline.
dta %>%
  ntbt_rlassologit(y ~ X)



## ntbt_tsls: Two-Stage Least Squares Estimation (TSLS)
## No example provided

## End(Not run)

intubate documentation built on May 2, 2019, 2:46 p.m.