Nothing
nonlin_test <- function(X, y, fam, nsplits, RP_function, penalize){
## Computes p-values for goodness-of-fit test from "nsplits" splits of data
## and aggregates them.
pvals <- rep(0, nsplits)
for(i in 1:nsplits){
# Compute p-value from a single split
pvals[i] <- single_split_pval(X, y, fam, RP_function, penalize)
}
if(nsplits > 1){
pval <- min(2*median(pvals), 1)
}else{
pval <- pvals[1]
}
return(list(pval = pval, pvals = pvals))
}
single_split_pval <- function(X, y, fam, RP_function, penalize){
## Computes a p-value for goodness-of-fit test from a single split of data.
# Split sample
p <- ncol(X)
n <- nrow(X)
ind <- sample.int(n = nrow(X), size = floor(.5*nrow(X)), replace = FALSE)
XA <- as.matrix(X[ind, ])
XB <- as.matrix(X[-ind, ])
yA <- y[ind]
yB <- y[-ind]
# Fit GLMNET on Part A
partA <- glmfit(XA, yA, fam, penalize)
partB <- glmfit(XB, yB, fam, penalize)
# Compute residuals resA
resA <- partA$res/partA$D
beta.hat <- partA$beta
hatS <- which(beta.hat[-1] != 0)
# Compute D
mix <- XB%*%beta.hat[-1] + beta.hat[1] # predict fitA on XB
pars <- GLM_parameters(mix, fam)
D <- as.vector(sqrt(pars$var_y))
# Compute residuals resB
resB <- partB$res/D
# Apply RP-function to residuals
if (fam == "binomial" && sum(is.nan(resA) != 0)){
stop("Residuals from glmnet contain NaN. One binomial class might have too few observations.")
}
pred.rf <- RP_function(XA, resA, XB)
# Orthogonalize
w <- orthogonalize(D, XB, pred.rf, hatS)
w <- w / as.numeric(sqrt(crossprod(w)))
# Test statistic and p-value
TS <- as.numeric(crossprod(w, resB))
pval <- 1 - pnorm(TS)
pval
}
orthogonalize <- function(D, XB, pred.rf, hatS){
## Uses sqrt_lasso to approximately orthogonalize direction pred.rf
## with respect to variables in XB.
## Exact orthognalization is done on variables contained in hatS.
## Fit sqrt-LASSO to estimate w
Dw <- D
p <- ncol(XB)
n <- nrow(XB)
# Define indices 'indi' where we orthognalize exactly
indi <- rep(1,p)
indi[hatS] <- 0
if(sum(indi) == 0) indi[1] <- 1 # if all penalty factors are zero, put penalty on the first entry to avoid error
# Orthogonalize
fit <- glmnet(Dw*XB, Dw*pred.rf, penalty.factor = indi, nlambda = 40)
W <- Dw*(pred.rf - XB%*%fit$beta)
lambda.sqrt.lasso <- sqrt(2*log(p)/n)
index <- which.min(abs(fit$lambda - lambda.sqrt.lasso*apply(W, 2, function(x) sqrt(sum(x^2))/sqrt(n) )))
beta.sqrt <- fit$beta[,index]
w <- Dw*(pred.rf - XB%*%beta.sqrt)
w
}
glmfit <- function(XA, yA, fam, penalize){
## Fits glmnet to data (XA, yA) and output several values from the fit
if(fam == "binomial"){
y.trans <- as.factor(yA)
}else{
y.trans <- yA
}
if(penalize == FALSE){
fit1 <- glmnet(XA, y.trans, family = fam, lambda = 0)
beta.hat <- coef(fit1)
mix <- XA %*% beta.hat[-1] + beta.hat[1]
}else{
fit1 <- cv.glmnet(x = XA, y = y.trans, family = fam)
beta.hat <- coef(fit1, s = "lambda.min")
mix <- XA %*% beta.hat[-1] + beta.hat[1]
}
pars <- GLM_parameters(mix, fam)
mean_y <- pars$mean_y
var_y <- pars$var_y
D <- sqrt(var_y)
resA <- yA - mean_y
list(D = as.vector(D), res = as.vector(resA), beta = as.vector(beta.hat), mu = as.vector(mean_y))
}
RP_randomForest <- function(XA, resA, XB){
## Fits a random forest of resA on XA and predicts it on XB
vars <- 1:ncol(XA)
num.vars <- length(vars)
data.res <- data.frame(rbind(XA[, vars], XB[, vars]), c(resA, rep(0, nrow(XB))))
rf <- ranger(y = data.res[1:nrow(XA),num.vars + 1],
x = data.res[1:nrow(XA),1:num.vars], data = data.res[1:nrow(XA),])
pred.rf <- predict(rf, data = data.res[(nrow(XA)+1):(nrow(XA)+nrow(XB)),])$predictions
pred.rf
}
GLM_parameters <- function(mix, fam){
## Computes predicted mean and variance of a GLM
if(fam == "gaussian"){
mean_y <- mix
var_y <- rep(1, nrow(mix))
}else if(fam == "binomial"){
mean_y <- exp(mix) / (1 + exp(mix))
var_y <- mean_y * (1 - mean_y)
}else if(fam == "poisson"){
mean_y = exp(mix)
var_y = exp(mix)
}
list(mean_y = as.vector(mean_y), var_y = as.vector(var_y))
}
### Function for group testing
group_test <- function(X, y, G, fam, B, penalize){
## Computes p-value for testing significance of predictors in G.
n <- nrow(X)
p <- ncol(X)
W <- matrix(0, n, length(G))
# Fit y on X[,-G]
fit <- glmfit(X[,-G], y, fam, penalize)
beta.hat <- fit$beta
D <- fit$D
mu <- fit$mu
res <- ( y - mu ) / D
# Compute test statistic
test <- rep(0, length(G))
for(j in 1:length(G)){
k <- G[j]
beta.sqrt <- RPtests::sqrt_lasso(as.matrix(D*X[,-G]), as.vector(D*X[,k]))
w <- D*(X[,k] - X[,-G]%*%beta.sqrt)
w <- w/as.numeric(sqrt(crossprod(w)))
W[,j] <- w
test[j] <- crossprod(w, res)
}
maxtest <- max(test)
## Multiplier bootstrap
TB <- rep(0,B)
for(i in 1:B){
e <- rnorm(n)
TB[i] <- apply((t(W*e)%*%res), 2, max)
}
pval <- (1 + sum(TB > maxtest))/(B + 1)
return(pval)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.