inst/doc/Week11_vignette.R

## ----install global_options, include=TRUE, results="hide", message=FALSE, warning=FALSE----
if(!requireNamespace("qvalue", quietly = TRUE)) {  
  if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
  BiocManager::install("qvalue")
}
if(!requireNamespace("lfmm", quietly = TRUE)) {  
  remotes::install_github("bcm-uga/lfmm")
}

## ----packages global_options, include=TRUE, results="hide", message=FALSE, warning=FALSE----
library(LandGenCourse)
library(vegan)    # Used to run PCA & RDA
library(lfmm)     # Used to run LFMM
library(qvalue)   # Used to post-process LFMM output

## ----load---------------------------------------------------------------------
gen <- read.csv(system.file("extdata", "wolf_geno_samp_10000.csv", package = "LandGenCourse"), row.names=1)
dim(gen)

## ----NA-----------------------------------------------------------------------
sum(is.na(gen)) # 27,987 NAs in the matrix (~3% missing data)
gen.imp <- apply(gen, 2, function(x) replace(x, is.na(x), as.numeric(names(which.max(table(x))))))
sum(is.na(gen.imp)) # No NAs

## ---- load.env----------------------------------------------------------------
env <- read.csv(system.file("extdata", "EnvironmentalData_8pred.csv", package = "LandGenCourse"))
str(env) # Look at the structure of the data frame
env$individual <- as.character(env$individual) # Make individual names characters (not factors)

# Confirm that genotypes and environmental data are in the same order
identical(rownames(gen.imp), env[,1]) 

## ---- env.prep----------------------------------------------------------------
pred <- env[,5:12]
colnames(pred) <- c("AMT","MDR","sdT","AP","cvP","NDVI","Elev","Tree")

## ---- fig.height=4, fig.width=6-----------------------------------------------
pred.pca <- rda(pred, scale=T)
summary(pred.pca)$cont
screeplot(pred.pca, main = "Screeplot: Eigenvalues of Wolf Predictor Variables")

# correlations between the PC axis and predictors:
round(scores(pred.pca, choices=1:8, display="species", scaling=0), digits=3)

## ---- PC1---------------------------------------------------------------------
pred.PC1 <- scores(pred.pca, choices=1, display="sites", scaling=0)

## ---- fig.height=4, fig.width=6-----------------------------------------------
screeplot(pred.pca, main = "Screeplot of Wolf Predictor Variables with Broken Stick", bstick=TRUE, type="barplot")

## ---- fig.height=4, fig.width=6-----------------------------------------------
gen.pca <- rda(gen.imp, scale=T)
screeplot(gen.pca, main = "Screeplot of Genetic Data with Broken Stick", bstick=TRUE, type="barplot")

## ---- setK--------------------------------------------------------------------
K <- 3

## -----------------------------------------------------------------------------
wolf.lfmm <- lfmm_ridge(Y=gen.imp, X=pred.PC1, K=K) # change K as you see fit

## -----------------------------------------------------------------------------
wolf.pv <- lfmm_test(Y=gen.imp, X=pred.PC1, lfmm=wolf.lfmm, calibrate="gif")

names(wolf.pv) # this object includes raw z-scores and p-values, as well as GIF-calibrated scores and p-values

## -----------------------------------------------------------------------------
wolf.pv$gif

## ---- fig.width=5, fig.height=3, fig.show='hold'------------------------------
hist(wolf.pv$pvalue[,1], main="Unadjusted p-values")        
hist(wolf.pv$calibrated.pvalue[,1], main="GIF-adjusted p-values")

## -----------------------------------------------------------------------------
# Let's change the GIF and readjust the p-values:
zscore <- wolf.pv$score[,1]   # zscores for first predictor, we only have one in our case...
(gif <- wolf.pv$gif[1])       # default GIF for this predictor
new.gif1 <- 2.0               # choose your new GIF

# Manual adjustment of the p-values:
adj.pv1 <- pchisq(zscore^2/new.gif1, df=1, lower = FALSE)

## ---- fig.width=5, fig.height=3, fig.show='hold'------------------------------
hist(wolf.pv$pvalue[,1], main="Unadjusted p-values")        
hist(wolf.pv$calibrated.pvalue[,1], main="GIF-adjusted p-values (GIF=2.8)")
hist(adj.pv1, main="REadjusted p-values (GIF=2.0)")

## -----------------------------------------------------------------------------
wolf.qv <- qvalue(wolf.pv$calibrated.pvalue)$qvalues

length(which(wolf.qv < 0.1)) # how many SNPs have an FDR < 10%?

(wolf.FDR.1 <- colnames(gen.imp)[which(wolf.qv < 0.1)]) # identify which SNPs these are

## ---- rda---------------------------------------------------------------------
wolf.rda <- rda(gen.imp ~ ., data=pred, scale=T)
wolf.rda

## ---- R2----------------------------------------------------------------------
RsquareAdj(wolf.rda)

## -----------------------------------------------------------------------------
summary(wolf.rda)$concont

## ---- screeplot, , fig.width=6.2, fig.height=4--------------------------------
screeplot(wolf.rda)

## ---- VIF---------------------------------------------------------------------
vif.cca(wolf.rda)

## ---- simple_plot, fig.width=5, fig.height=5----------------------------------
plot(wolf.rda, scaling=3)  # default is axes 1 and 2

## ---- loadings----------------------------------------------------------------
load.rda <- summary(wolf.rda)$species[,1:3]

## ---- loadings_plot, fig.width=2.5, fig.height=3, fig.show='hold'-------------
hist(load.rda[,1], main="Loadings on RDA1")
hist(load.rda[,2], main="Loadings on RDA2")
hist(load.rda[,3], main="Loadings on RDA3") 

## ---- outliers----------------------------------------------------------------
outliers <- function(x,z){
  lims <- mean(x) + c(-1, 1) * z * sd(x) # find loadings +/- z SD from mean loading     
  x[x < lims[1] | x > lims[2]]           # locus names in these tails
}

## ---- candidates--------------------------------------------------------------
cand1 <- outliers(load.rda[,1], 3) # 38
cand2 <- outliers(load.rda[,2], 3) # 69
cand3 <- outliers(load.rda[,3], 3) # 34

wolf.rda.cand <- c(names(cand1), names(cand2), names(cand3)) # just the names of the candidates

length(wolf.rda.cand[duplicated(wolf.rda.cand)]) # 7 duplicate detections (detected on multiple RDA axes)
wolf.rda.cand <- wolf.rda.cand[!duplicated(wolf.rda.cand)] # 134 unique candidates 

## ---- fig.width=5, fig.height=5, fig.show='hold'------------------------------
# Set up the color scheme for plotting:
bgcol  <- ifelse(colnames(gen.imp) %in% wolf.rda.cand, 'gray32', '#00000000')
snpcol <- ifelse(colnames(gen.imp) %in% wolf.rda.cand, 'red', '#00000000')

# axes 1 & 2 - zooming in to just the SNPs here...
plot(wolf.rda, type="n", scaling=3, xlim=c(-1,1), ylim=c(-1,1), main="Wolf RDA, axes 1 and 2")
points(wolf.rda, display="species", pch=21, cex=1, col="gray32", bg='#f1eef6', scaling=3)
points(wolf.rda, display="species", pch=21, cex=1, col=bgcol, bg=snpcol, scaling=3)
text(wolf.rda, scaling=3, display="bp", col="#0868ac", cex=1)

# axes 2 & 3
plot(wolf.rda, type="n", scaling=3, xlim=c(-1,1), ylim=c(-1,1), choices=c(2,3), main="Wolf RDA, axes 2 and 3")
points(wolf.rda, display="species", pch=21, cex=1, col="gray32", bg='#f1eef6', scaling=3, choices=c(2,3))
points(wolf.rda, display="species", pch=21, cex=1, col=bgcol, bg=snpcol, scaling=3, choices=c(2,3))
text(wolf.rda, scaling=3, display="bp", col="#0868ac", cex=1, choices=c(2,3))

## ---- env.axes----------------------------------------------------------------
intersetcor(wolf.rda)[,1:3]

## ---- overlap-----------------------------------------------------------------
intersect(wolf.FDR.1, wolf.rda.cand) # found by both LFMM and RDA
setdiff(wolf.FDR.1, wolf.rda.cand)   # unique to LFMM

## ---- eval=F------------------------------------------------------------------
#  ?rda  # help for running RDA
#  
#  # pseudo code for simple RDA:
#  foo <- rda(genomic.data ~ predictor1 + predictor2, data=dat
#  
#  # pseudo code for partial RDA (correcting for population structure with PCs):
#  bar <- rda(genomic.data ~ predictor1 + predictor2 + Condition(PC1), data=dat, scale=T)

## ----message=FALSE, warning=TRUE, include=FALSE-------------------------------
#LandGenCourse::detachAllPackages()
hhwagner1/LandGenCourse documentation built on March 25, 2020, 5:34 p.m.