knitr::opts_chunk$set(echo = TRUE, tidy = FALSE) options(digits=4, scipen = 999) rm(list=ls())
The source code used to create this report, including the main function can be found at https://github.com/sahirbhatnagar/penfam.
# block relaxation for fitting the penfam model # source("https://raw.githubusercontent.com/sahirbhatnagar/penfam/master/R/fitting.R") source("~/git_repositories/penfam/R/fitting.R") # utility functions # source("https://raw.githubusercontent.com/sahirbhatnagar/penfam/master/R/functions.R") source("~/git_repositories/penfam/R/functions.R") # print, plot and predict methods # source("https://raw.githubusercontent.com/sahirbhatnagar/penfam/master/R/methods.R") source("~/git_repositories/penfam/R/methods.R") # plotting functions # source("https://raw.githubusercontent.com/sahirbhatnagar/penfam/master/R/plot.R") source("~/git_repositories/penfam/R/plot.R")
pacman::p_load(MASS) pacman::p_load(Matrix) pacman::p_load(glmnet) pacman::p_load(progress) pacman::p_load(magrittr) pacman::p_load(regress) pacman::p_load(coxme)
set.seed(12345678) # Kinship matrix from Karim load("~/Dropbox/PhD/Year4/penfam/data/kin1.Rdata") diag(kin1) Phi <- 2 * kin1 dim(Phi) Phi[1:10, 1:10] # eigen decomposition Phi_eigen <- eigen(Phi) # eigenvalues Lambda <- Phi_eigen$values any(Lambda < 1e-3) all(Lambda > 0) rcond(Phi) kappa(Phi)
eta <- 0.5 sigma2 <- 4 # intercept b0 <- 3 # true betas b <- c(runif(10, 0.8,1.2), rep(0,80), runif(10, -1.2, -0.8))
# number of predictors p <- length(b) n <- nrow(Phi) # polygenic random effect P <- mvrnorm(1, mu = rep(0, n), Sigma = eta * sigma2 * Phi) # environment random effect E <- mvrnorm(1, mu = rep(0, n), Sigma = (1 - eta) * sigma2 * diag(n)) # nsim = 5 # P <- mvrnorm(nsim, mu = rep(0, n), Sigma = eta * sigma2 * Phi) %>% t # dim(P) # # environment random effect # E <- mvrnorm(nsim, mu = rep(0, n), Sigma = (1 - eta) * sigma2 * diag(n)) %>% t # dim(E) # independent predictors # X <- mvrnorm(n, rep(1,p), diag(p)) # Correlated predictors rho1 <- 0.7 rho2 <- 0.9 k1 <- rho1 ^ toeplitz(seq_len(p/2)) diag(k1) <- 1 k2 <- rho2 ^ toeplitz(seq_len(p/2)) diag(k2) <- 1 k <- Matrix::bdiag(k1,k2) dim(k) # eig <- svd(Phi, nu = 100, nv = 100) X <- mvrnorm(n, rep(1,p), k) dimnames(X)[[2]] <- paste0("alpha",1:p) # colnames(cbind(beta0=1,X)) # pheatmap::pheatmap(cor(X)) # response Y <- b0 + as.numeric(X %*% b) + P + E pheno_dat <- data.frame(Y = Y, id = 1:n)
glmnet
As per Appendix 2 of the glmnet
vignette, the recommended way to obtain a lasso solution for a single value of $\lambda$ is to
fit the entire lasso or elastic-net path without specifying lambda, letting it chose its own sequence. Then make a call to coef or predict (using the exact = TRUE option) and provide the requested $\lambda$ to extract the corresponding coefficients.
We provide these two main options for fitting the penfam
model by specifying the exact
argument:
- exact = TRUE
: this will use the exact value of lambda provided and re-fit the model using glmnet
- exact = FALSE
: this will use a linear interpolation
In both cases, the entire lasso path is being fit.
This is no longer being implemented.
Instead we are providing two lambdas to glmnet, the .Machine$double.xmax
and the actual value of lambda.
fit_regress <- regress(Y ~ 1, ~ Phi, pos = TRUE) summary(fit_regress) fit_lme <- lmekin(Y ~ 1 + (1|id), data = pheno_dat, varlist = Phi) newy <- residuals(fit_lme) plot(Y, newy) abline(a=0,b=1, col="red") cv_fit <- glmnet::cv.glmnet(x = X, y = newy) plot(cv_fit) coef(cv_fit) # this is XB only plot(fit_regress$fitted, matrix(1, nrow = n) %*% t(t(coef(fit_lme)$fixed))) plot(fit_regress$predicted, matrix(1, nrow = n) %*% t(t(coef(fit_lme)$fixed)) + coef(fit_lme)$random$id) abline(a=0, b=1) plot(fit_regress$predicted) fit_regress$Z$Phi plot(residuals(fit_lme), Y - matrix(1, nrow = n) %*% t(t(coef(fit_lme)$fixed)) - coef(fit_lme)$random$id) W <- chol(kin1) dim(W) W[1:10,1:10]
system.time( lasso <- penfam(x = X, y = Y, phi = Phi, thresh_glmnet = 1e-12, epsilon = 1e-6, fdev = 1e-5, alpha = 1, tol.kkt = 1e-3, nlambda = 100, an = log(log(n)) * log(n), lambda_min_ratio = ifelse(n < p, 0.01, 0.001), eta_init = 0.5, maxit = 100) )
Print method:
lasso
lasso$result[,c(-2,-3,-4,-10)]
coef(lasso, s = lasso$lambda_min) coef(lasso, s = lasso$lambda_min)
predict(lasso, type = "nonzero", s = lasso$lambda_min)
plot(lasso, type = "coef", xvar = "norm") plot(lasso, type = "coef", xvar = "lambda") plot(lasso, type = "coef", xvar = "dev")
plot(lasso, type = "BIC")
plot(coef(lasso, s = lasso$lambda_min), pch = 19, col = "red", ylim = range(c(b0, b, eta, sigma2, drop(coef(lasso, s = lasso$lambda_min))))) points(seq_along(c(b0, b, eta, sigma2)), c(b0, b, eta, sigma2), pch = 19, col = "blue") legend("bottomleft", legend = c("Estimated", "Truth"), col = c("red","blue"), pch = c(19, 19), bg = "gray90")
plot(lasso, type = c("QQranef"), s = lasso$lambda_min) plot(lasso$randomeff[,lasso$lambda_min])
plot(lasso, type = c("Tukey"), s = lasso$lambda_min)
plot(lasso, type = c("QQresid"), s = lasso$lambda_min)
plot(lasso, type = c("predicted"), s = lasso$lambda_min)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.