In sahirbhatnagar/penfam: Variable Selection in Linear Mixed Models for SNP Data

knitr::opts_chunk$set(echo = TRUE, tidy = FALSE)
options(digits=4, scipen = 999)
rm(list=ls())

Source code

The source code used to create this report, including the main function can be found at https://github.com/sahirbhatnagar/penfam.

# block relaxation for fitting the penfam model
# source("https://raw.githubusercontent.com/sahirbhatnagar/penfam/master/R/fitting.R")
source("~/git_repositories/penfam/R/fitting.R")
# utility functions 
# source("https://raw.githubusercontent.com/sahirbhatnagar/penfam/master/R/functions.R")
source("~/git_repositories/penfam/R/functions.R")
# print, plot and predict methods 
# source("https://raw.githubusercontent.com/sahirbhatnagar/penfam/master/R/methods.R")
source("~/git_repositories/penfam/R/methods.R")
# plotting functions
# source("https://raw.githubusercontent.com/sahirbhatnagar/penfam/master/R/plot.R")
source("~/git_repositories/penfam/R/plot.R")

Required Packages

pacman::p_load(MASS) 
pacman::p_load(Matrix)
pacman::p_load(glmnet)
pacman::p_load(progress)
pacman::p_load(magrittr)
pacman::p_load(regress)
pacman::p_load(coxme)

Simulation Details

Kinship Matrix and Eigen Decomposition

set.seed(12345678)
# Kinship matrix from Karim
load("~/Dropbox/PhD/Year4/penfam/data/kin1.Rdata")
diag(kin1)
Phi <- 2 * kin1
dim(Phi)
Phi[1:10, 1:10]

# eigen decomposition
Phi_eigen <- eigen(Phi)

# eigenvalues
Lambda <- Phi_eigen$values
any(Lambda < 1e-3)
all(Lambda > 0)
rcond(Phi)
kappa(Phi)

Simulation parameters

eta <- 0.5
sigma2 <- 4

# intercept
b0 <- 3 

# true betas
b <- c(runif(10, 0.8,1.2), rep(0,80), runif(10, -1.2, -0.8))

Simulate data

# number of predictors
p <- length(b)

n <- nrow(Phi)

# polygenic random effect
P <- mvrnorm(1, mu = rep(0, n), Sigma = eta * sigma2 * Phi)

# environment random effect
E <- mvrnorm(1, mu = rep(0, n), Sigma = (1 - eta) * sigma2 * diag(n))

# nsim = 5
# P <- mvrnorm(nsim, mu = rep(0, n), Sigma = eta * sigma2 * Phi) %>% t
# dim(P)
# # environment random effect
# E <- mvrnorm(nsim, mu = rep(0, n), Sigma = (1 - eta) * sigma2 * diag(n)) %>% t
# dim(E)

# independent predictors
# X <- mvrnorm(n, rep(1,p), diag(p))

# Correlated predictors
rho1 <- 0.7
rho2 <- 0.9

k1 <- rho1 ^ toeplitz(seq_len(p/2))
diag(k1) <- 1
k2 <- rho2 ^ toeplitz(seq_len(p/2))
diag(k2) <- 1
k <- Matrix::bdiag(k1,k2) 
dim(k)

# eig <- svd(Phi, nu = 100, nv = 100)

X <- mvrnorm(n, rep(1,p), k)

dimnames(X)[[2]] <- paste0("alpha",1:p)
# colnames(cbind(beta0=1,X))
# pheatmap::pheatmap(cor(X))
# response
Y <- b0 + as.numeric(X %*% b) + P + E

pheno_dat <- data.frame(Y = Y, id = 1:n)

A note on using `glmnet`

As per Appendix 2 of the glmnet vignette, the recommended way to obtain a lasso solution for a single value of $\lambda$ is to

fit the entire lasso or elastic-net path without specifying lambda, letting it chose its own sequence. Then make a call to coef or predict (using the exact = TRUE option) and provide the requested $\lambda$ to extract the corresponding coefficients.

We provide these two main options for fitting the penfam model by specifying the exact argument: - exact = TRUE: this will use the exact value of lambda provided and re-fit the model using glmnet - exact = FALSE: this will use a linear interpolation In both cases, the entire lasso path is being fit.

This is no longer being implemented.

Instead we are providing two lambdas to glmnet, the .Machine$double.xmax and the actual value of lambda.

Two-Step Method

fit_regress <- regress(Y ~ 1, ~ Phi, pos = TRUE)
summary(fit_regress)


fit_lme <- lmekin(Y ~ 1 + (1|id), data = pheno_dat, varlist = Phi)
newy <- residuals(fit_lme)

plot(Y, newy)
abline(a=0,b=1, col="red")
cv_fit <- glmnet::cv.glmnet(x = X, y = newy)
plot(cv_fit)

coef(cv_fit)

# this is XB only
plot(fit_regress$fitted,
matrix(1, nrow = n) %*% t(t(coef(fit_lme)$fixed)))

plot(fit_regress$predicted, matrix(1, nrow = n) %*% t(t(coef(fit_lme)$fixed)) + coef(fit_lme)$random$id)
abline(a=0, b=1)
plot(fit_regress$predicted)

fit_regress$Z$Phi


plot(residuals(fit_lme),
     Y - matrix(1, nrow = n) %*% t(t(coef(fit_lme)$fixed)) - coef(fit_lme)$random$id)

W <- chol(kin1)
dim(W)
W[1:10,1:10]

Fit the penfam model

system.time(
  lasso <- penfam(x = X, 
                  y = Y, 
                  phi = Phi, 
                  thresh_glmnet = 1e-12,
                  epsilon = 1e-6,
                  fdev = 1e-5,
                  alpha = 1,
                  tol.kkt = 1e-3,
                  nlambda = 100,
                  an = log(log(n)) * log(n),
                  lambda_min_ratio  = ifelse(n < p, 0.01, 0.001),
                  eta_init = 0.5,
                  maxit = 100) 
)

Print method:

lasso

KKT Checks

lasso$result[,c(-2,-3,-4,-10)]

Coefficients at Minimum BIC

coef(lasso, s = lasso$lambda_min)

coef(lasso, s = lasso$lambda_min)

Non-zero Coefficients at Minimum BIC

predict(lasso, type = "nonzero", s = lasso$lambda_min)

Plot of Coefficient Paths

plot(lasso, type = "coef", xvar = "norm")
plot(lasso, type = "coef", xvar = "lambda")
plot(lasso, type = "coef", xvar = "dev")

Plot of BIC as a function of Tuning parameters

plot(lasso, type = "BIC")

Compare Estimated vs. Truth

plot(coef(lasso, s = lasso$lambda_min), 
     pch = 19, col = "red", 
     ylim = range(c(b0, b, eta, sigma2, drop(coef(lasso, s = lasso$lambda_min)))))

points(seq_along(c(b0, b, eta, sigma2)), c(b0, b, eta, sigma2), pch = 19, col = "blue")

legend("bottomleft",
       legend = c("Estimated", "Truth"),
       col = c("red","blue"),
       pch = c(19, 19),
       bg = "gray90")

Prediction of Random Effects

plot(lasso, type = c("QQranef"), s = lasso$lambda_min)
plot(lasso$randomeff[,lasso$lambda_min])

Tukey-Anscombe Plot

plot(lasso, type = c("Tukey"), s = lasso$lambda_min)

Residuals

plot(lasso, type = c("QQresid"), s = lasso$lambda_min)

Predicted vs. Observed response

plot(lasso, type = c("predicted"), s = lasso$lambda_min)

sahirbhatnagar/penfam documentation built on April 14, 2021, 9:38 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

sahirbhatnagar/penfam
Variable Selection in Linear Mixed Models for SNP Data

In sahirbhatnagar/penfam: Variable Selection in Linear Mixed Models for SNP Data

Source code

Required Packages

Simulation Details

Kinship Matrix and Eigen Decomposition

Simulation parameters

Simulate data

A note on using `glmnet`

Two-Step Method

Fit the penfam model

KKT Checks

Coefficients at Minimum BIC

Non-zero Coefficients at Minimum BIC

Plot of Coefficient Paths

Plot of BIC as a function of Tuning parameters

Compare Estimated vs. Truth

Prediction of Random Effects

Tukey-Anscombe Plot

Residuals

Predicted vs. Observed response

R Package Documentation

Browse R Packages

We want your feedback!

sahirbhatnagar/penfam Variable Selection in Linear Mixed Models for SNP Data

In sahirbhatnagar/penfam: Variable Selection in Linear Mixed Models for SNP Data

Source code

Required Packages

Simulation Details

Kinship Matrix and Eigen Decomposition

Simulation parameters

Simulate data

A note on using glmnet

Two-Step Method

Fit the penfam model

KKT Checks

Coefficients at Minimum BIC

Non-zero Coefficients at Minimum BIC

Plot of Coefficient Paths

Plot of BIC as a function of Tuning parameters

Compare Estimated vs. Truth

Prediction of Random Effects

Tukey-Anscombe Plot

Residuals

Predicted vs. Observed response

R Package Documentation

Browse R Packages

We want your feedback!

sahirbhatnagar/penfam
Variable Selection in Linear Mixed Models for SNP Data

A note on using `glmnet`