In jchiquet/spring: Fit the SPRING model

This vignette intends to illustrate the basical use of the spring function and the its accompaning methods. To this end, let us consider a classical genetic data set concerning Brassica napus.

suppressMessages(library(ggplot2))
suppressMessages(library(grid))
suppressMessages(library(gridExtra))

plot.map <- function(par.hat, gen.map, title="Selected features along the outcomes", plot=TRUE, show.names=TRUE,
                     chr=1:max(gen.map$chrom)) {

  ## restricting to some specified chromosoms
  par.hat <- par.hat[gen.map$chrom %in% chr, ]
  gen.map <- gen.map[gen.map$chrom %in% chr, ]

  pheno  <- colnames(par.hat)
  npheno <- length(pheno)

  ## retrieve information related to the selected gen.map
  dx <- do.call(rbind, apply(par.hat, 2, function(sel) gen.map[sel!=0, ]))
  uchrom <- sort(unique(dx$chrom))
  nchrom <- length(uchrom)
  ochrom <- rep(0, max(uchrom))
  ochrom[uchrom] <- seq(nchrom)
  dx$outcomes  <- factor(rep(pheno, sapply(apply(par.hat, 2, function(sel) gen.map[sel!=0, ]), nrow)), levels=pheno)
  dx$position  <-  ochrom[dx$chrom] + (as.numeric(dx$outcomes) * 2 - npheno - 1) *.35 /npheno
  dx$intensity <-  par.hat[par.hat !=0]
  dx$sign      <-  rep("+", length(dx$intensity))
  dx$sign[sign(dx$intensity) < 0] <- "-"
  dx$sign <- as.factor(dx$sign)
  dx$intensity <-  abs(dx$intensity)

  ## Constructing ther map for chromosome representation with annotation
  map.x <-rep(seq(nchrom), tapply(gen.map$loci,
                                  factor(gen.map$chrom, ordered=TRUE, levels=1:max(uchrom)), length)[uchrom])
  map.y <- gen.map$loci[gen.map$chrom %in% uchrom]
  map.n <- gen.map$names[gen.map$chrom %in% uchrom]
  map <- data.frame(xs.map=map.x-0.475, xe.map=map.x+0.475, y=map.y,
                    xs.names=map.x-0.5, label=map.n)

  ## Build the genetic Map and put annotation of the gen.map
  g <- ggplot(map) + geom_segment(aes(x=xs.map,xend=xe.map,y=y,yend=y), alpha=0.6, colour="#00cc33")
  if (show.names)
    g <- g + geom_text(data=map, aes(x=xs.names,y=y,label=label,hjust=0,vjust=0), size=4, alpha=0.6, colour="#00cc33")
  ## Set the points where features have been selected by
##  g <- g + geom_point(data=dx, aes(x=position,y=loci,group=outcomes,colour=outcomes, size=intensity,shape=sign))
  g <- g + geom_point(data=dx, aes(x=position,y=loci,group=outcomes,colour=outcomes, size=intensity))
  ## Various adjustements (axis, legends, labels...)
  g <- g + scale_x_discrete(breaks=1:nchrom, labels=uchrom, limits=1:nchrom)
  g <- g + guides(alpha=FALSE) + labs(x="chromosomes",y="loci",title=title)

  if (plot)
    print(g)

  return(g)
}

grid_arrange_shared_legend <- function(..., nrow = 1, ncol = length(list(...)), position = c("bottom", "right")) {

  plots <- list(...)
  position <- match.arg(position)
  g <- ggplotGrob(plots[[1]] + theme(legend.position = position))$grobs
  legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
  lheight <- sum(legend$height)
  lwidth <- sum(legend$width)
  gl <- lapply(plots, function(x) x + theme(legend.position = "none"))
  gl <- c(gl, nrow = nrow, ncol = ncol)

  combined <- switch(position,
                     "bottom" = arrangeGrob(do.call(arrangeGrob, gl),
                                            legend,
                                            ncol = 1,
                                            heights = unit.c(unit(1, "npc") - lheight, lheight)),
                     "right" = arrangeGrob(do.call(arrangeGrob, gl),
                                           legend,
                                           ncol = 2,
                                           widths = unit.c(unit(1, "npc") - lwidth, lwidth)))
  grid.draw(combined)
}

Load the Brassica napus data set

We start by loading the dataset with pretreatment already done. It contains markers, traits and the genetic map:

library(spring)
load(system.file("extdata", "brassica.RData", package="spring"))
markers[1:3,1:5]
traits[1:3,]
head(map)

## Problem size
n <- nrow(traits)   # 103
p <- ncol(markers)  # 295
q <- ncol(traits)   # 8

Prior construction for structure inference

dist <- "neighborhood"
if (dist == "genetic") {
  ## Build Linkage Desequilibrium using the genetic map
  d <- as.numeric(unlist(with(map, tapply(loci, chrom, function(x) c(diff(x), Inf)) ))) ; d <- d[-p]
  U <- bandSparse(p,p,k=0:1,diagonals=list(rep(1,p), -exp(-2*d)))
  V <- Diagonal(x=c(1/(1-exp(-2*2*d)),1))
  L <- t(U) %*% V %*% U
} else {
  ## Neighborhood base... (nice one)
  D1 <- diffOp(p,4)
  L <- t(D1) %*% D1
}

L <- Diagonal(x=1/sqrt(diag(L))) %*% L %*% Diagonal(x=1/sqrt(diag(L)))
normx <- colSums(markers^2)/(n-1)
L <- Diagonal(x=normx) %*% L %*% Diagonal(x=normx)

Call to the 'spring' function

## Fix lambda2 (prior) on a grid of lambda1 (sparsity)
out  <- spring(markers, traits, nlambda1=20, min.ratio=3e-1, struct=L, lambda2=c(10^seq(3,-3,len=7),0))

Model selection

## Compute BIC/AIC information criteria
crit <- pen.crit(out)

spring.reg <- crit$BIC$model$coef.regres
spring.dir <- crit$BIC$model$coef.direct
cov.res    <- crit$BIC$model$covariance

plot(crit$BIC$criterion)
crit$BIC$model$plot(type="covariance", corr=TRUE, legend=FALSE)

Coefficients output (Genetic Map)

map.reg <- plot.map( spring.reg, map, title="", plot=FALSE, show.names=TRUE, chr=c(2,8,10))
map.dir <- plot.map(-spring.dir, map, title="", plot=FALSE, show.names=TRUE, chr=c(2,8,10))
grid_arrange_shared_legend(map.reg, map.dir, nrow=1, ncol=2, position = "right")