knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", out.width = "100%" )
The goal of SIHR is to provide inference procedures in the high-dimensional setting for (1) linear functionals in generalized linear regression, (2) conditional average treatment effects in generalized linear regression (CATE), (3) quadratic functionals in generalized linear regression (QF) (4) inner product in generalized linear regression (InnProd) and (5) distance in generalized linear regression (Dist).
Currently, we support different generalized linear regression, by specifying the argument model
in "linear", "logisitc", "logistic_alter".
You can install the development version from GitHub with:
# install.packages("devtools") devtools::install_github("prabrishar1/SIHR")
These are basic examples which show how to solve the common high-dimensional inference problems:
library(SIHR)
Generate Data and find the truth linear functionals:
set.seed(0) X = matrix(rnorm(100*120), nrow=100, ncol=120) y = -0.5 + X[,1] * 0.5 + X[,2] * 1 + rnorm(100) loading1 = c(1, 1, rep(0, 118)) loading2 = c(-0.5, -1, rep(0, 118)) loading.mat = cbind(loading1, loading2) ## consider the intercept.loading=FALSE truth1 = 0.5 * 1 + 1 * 1 truth2 = 0.5 * -0.5 + 1 * -1 truth = c(truth1, truth2) truth
In the example, the linear functional does not involve the intercept term, so we set intercept.loading=FALSE
(default). If users want to include the intercept term, please set intercept.loading=TRUE
, such that truth1 = -0.5 + 1.5 = 1; truth2 = -0.5 - 1.25 = -1.75
Call LF
with model="linear"
:
Est = LF(X, y, loading.mat, model="linear", intercept=TRUE, intercept.loading=FALSE, verbose=TRUE)
ci
method for LF
ci(Est)
Note that both true values are included in their corresponding confidence intervals.
summary
method for LF
summary(Est)
summary()
function returns the summary statistics, including the plugin estimator, the bias-corrected estimator, standard errors.
Sometimes, we may be interested in multiple linear functionals, each with a separate loading. To be computationally efficient, we can specify the argument beta.init
first, so that the program can save time to compute the initial estimator repeatedly.
set.seed(1) X = matrix(rnorm(100*120), nrow=100, ncol=120) y = -0.5 + X[,1:10] %*% rep(0.5, 10) + rnorm(100) loading.mat = matrix(0, nrow=120, ncol=10) for(i in 1:ncol(loading.mat)){ loading.mat[i,i] = 1 }
library(glmnet) cvfit = cv.glmnet(X, y, family = "gaussian", alpha = 1, intercept = TRUE, standardize = T) beta.init = as.vector(coef(cvfit, s = cvfit$lambda.min))
Call LF
with model="linear"
:
Est = LF(X, y, loading.mat, model="linear", intercept=TRUE, beta.init=beta.init, verbose=FALSE)
ci
method for LF
ci(Est)
summary
method for LF
summary(Est)
Generate Data and find the truth linear functionals:
set.seed(0) X = matrix(rnorm(100*120), nrow=100, ncol=120) exp_val = -0.5 + X[,1] * 0.5 + X[,2] * 1 prob = exp(exp_val) / (1+exp(exp_val)) y = rbinom(100, 1, prob) ## loadings loading1 = c(1, 1, rep(0, 118)) loading2 = c(-0.5, -1, rep(0, 118)) loading.mat = cbind(loading1, loading2) ## consider the intercept.loading=TRUE truth1 = 0.5 * 1 + 1 * 1 truth2 = 0.5 * -0.5 + 1 * -1 truth = c(truth1, truth2) truth.prob = exp(truth) / (1 + exp(truth)) truth; truth.prob
Call LF
with model="logistic"
or model="logistic_alter"
:
## model = "logisitc" Est = LF(X, y, loading.mat, model="logistic", verbose=TRUE)
ci
method for LF
## confidence interval for linear combination ci(Est) ## confidence interval after probability transformation ci(Est, probability = TRUE)
summary
method for LF
summary(Est)
Call LF
with model="logistic_alter"
:
## model = "logistic_alter" Est = LF(X, y, loading.mat, model="logistic_alter", verbose=TRUE)
ci
method for LF
## confidence interval for linear combination ci(Est) ## confidence interval after probability transformation ci(Est, probability = TRUE)
summary
method for LF
summary(Est)
Generate Data and find the truth linear functionals:
set.seed(0) ## 1st data X1 = matrix(rnorm(100*120), nrow=100, ncol=120) y1 = -0.5 + X1[,1] * 0.5 + X1[,2] * 1 + rnorm(100) ## 2nd data X2 = matrix(0.8*rnorm(100*120), nrow=100, ncol=120) y2 = 0.1 + X2[,1] * 1.8 + X2[,2] * 1.8 + rnorm(100) ## loadings loading1 = c(1, 1, rep(0, 118)) loading2 = c(-0.5, -1, rep(0, 118)) loading.mat = cbind(loading1, loading2) truth1 = (1.8*1 + 1.8*1) - (0.5*1 + 1*1) truth2 = (1.8*(-0.5) + 1.8*(-1))- (0.5*(-0.5) + 1*(-1)) truth = c(truth1, truth2) truth
Call CATE
with model="linear"
:
Est = CATE(X1, y1, X2, y2, loading.mat, model="linear")
ci
method for CATE
ci(Est)
summary
method for CATE
summary(Est)
Generate Data and find the truth linear functionals:
set.seed(0) ## 1st data X1 = matrix(rnorm(100*120), nrow=100, ncol=120) exp_val1 = -0.5 + X1[,1] * 0.5 + X1[,2] * 1 prob1 = exp(exp_val1) / (1 + exp(exp_val1)) y1 = rbinom(100, 1, prob1) ## 2nd data X2 = matrix(0.8*rnorm(100*120), nrow=100, ncol=120) exp_val2 = -0.5 + X2[,1] * 1.8 + X2[,2] * 1.8 prob2 = exp(exp_val2) / (1 + exp(exp_val2)) y2 = rbinom(100, 1, prob2) ## loadings loading1 = c(1, 1, rep(0, 118)) loading2 = c(-0.5, -1, rep(0, 118)) loading.mat = cbind(loading1, loading2) truth1 = (1.8*1 + 1.8*1) - (0.5*1 + 1*1) truth2 = (0.8*(-0.5) + 0.8*(-1)) - (0.5*(-0.5) + 1*(-1)) truth = c(truth1, truth2) prob.fun = function(x) exp(x)/(1+exp(x)) truth.prob1 = prob.fun(1.8*1 + 1.8*1) - prob.fun(0.5*1 + 1*1) truth.prob2 = prob.fun(1.8*(-0.5) + 1.8*(-1)) - prob.fun(0.5*(-0.5) + 1*(-1)) truth.prob = c(truth.prob1, truth.prob2) truth; truth.prob
Call CATE
with model="logistic"
or model="logisitc_alter"
:
Est = CATE(X1, y1, X2, y2, loading.mat, model="logistic", verbose = FALSE)
ci
method for CATE
:
## confidence interval for linear combination ci(Est) ## confidence interval after probability transformation ci(Est, probability = TRUE)
summary
method for CATE
:
summary(Est)
Generate Data and find the truth quadratic functionals:
set.seed(0) A1gen <- function(rho, p){ M = matrix(NA, nrow=p, ncol=p) for(i in 1:p) for(j in 1:p) M[i,j] = rho^{abs(i-j)} M } Cov = A1gen(0.5, 150)/2 X = MASS::mvrnorm(n=400, mu=rep(0, 150), Sigma=Cov) beta = rep(0, 150); beta[25:50] = 0.2 y = X%*%beta + rnorm(400) test.set = c(40:60) truth = as.numeric(t(beta[test.set])%*%Cov[test.set, test.set]%*%beta[test.set]) truth
Call QF
with model="linear"
with intial estimator given:
library(glmnet) outLas <- cv.glmnet(X, y, family = "gaussian", alpha = 1, intercept = T, standardize = T) beta.init = as.vector(coef(outLas, s = outLas$lambda.min)) Est = QF(X, y, G=test.set, A=NULL, model="linear", beta.init=beta.init, verbose=FALSE)
ci
method for QF
ci(Est)
summary
method for QF
summary(Est)
Generate Data and find the true inner product:
set.seed(0) p = 120 mu = rep(0,p) Cov = diag(p) ## 1st data n1 = 200 X1 = MASS::mvrnorm(n1,mu,Cov) beta1 = rep(0, p); beta1[c(1,2)] = c(0.5, 1) y1 = X1%*%beta1 + rnorm(n1) ## 2nd data n2 = 200 X2 = MASS::mvrnorm(n2,mu,Cov) beta2 = rep(0, p); beta2[c(1,2)] = c(1.8, 0.8) y2 = X2%*%beta2 + rnorm(n2) ## test.set G =c(1:10) truth <- as.numeric(t(beta1[G])%*%Cov[G,G]%*%beta2[G]) truth
Call InnProd
with model="linear"
:
Est = InnProd(X1, y1, X2, y2, G, model="linear")
ci
method for InnProd
ci(Est)
summary
method for InnProd
summary(Est)
Generate Data and find the true distance:
set.seed(0) p = 120 mu = rep(0,p) Cov = diag(p) ## 1st data n1 = 200 X1 = MASS::mvrnorm(n1,mu,Cov) beta1 = rep(0, p); beta1[c(1,2)] = c(0.5, 1) y1 = X1%*%beta1 + rnorm(n1) ## 2nd data n2 = 200 X2 = MASS::mvrnorm(n2,mu,Cov) beta2 = rep(0, p); beta2[c(1,2)] = c(1.8, 1.8) y2 = X2%*%beta2 + rnorm(n2) ## test.set G =c(1:10) truth <- as.numeric(t(beta1[G]-beta2[G])%*%(beta1[G]-beta2[G])) truth
Call Dist
with model="linear"
:
Est = Dist(X1, y1, X2, y2, G, model="linear", A = diag(length(G)))
ci
method for Dist
ci(Est)
summary
method for Dist
summary(Est)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.