inst/doc/Week14_vignette.R

## ----install global_options, include=TRUE, results="hide", message=FALSE, warning=FALSE--------------
if(!requireNamespace("popgraph", quietly = TRUE))
{
  install.packages(c("tmap", "geosphere", "proto", "sampling", 
                      "seqinr", "spacetime", "spdep"), dependencies=TRUE)
  remotes::install_github("dyerlab/popgraph")
}
if(!requireNamespace("gstudio", quietly = TRUE)) remotes::install_github("dyerlab/gstudio")


## ----packages global_options, include=TRUE, results="hide", message=FALSE, warning=FALSE-------------
library(LandGenCourse)
library(dplyr)
library(ggplot2)
library(tmap)
#library(gstudio)
#library(pegas)
#library(vegan)
#library(purrr)
#library(MuMIn)
#library(lme4)
#library(cowplot)


## ----import------------------------------------------------------------------------------------------
dat <- gstudio::read_population(system.file("extdata",              
        "pulsatilla_genotypes.csv", package="LandGenCourse"), 
        type = "column", locus.columns = 6:19)
dat$ID <- as.factor(dat$ID)


## ----------------------------------------------------------------------------------------------------
head(dat)


## ----fig.height=5, fig.width=7-----------------------------------------------------------------------
Coords <- dat %>% group_by(Population) %>% 
          summarize(X = mean(X), Y = mean(Y))
Coords <- data.frame(Coords, sf::sf_project(from = "+init=epsg:31468", 
          to = "+init=epsg:4326", pts = Coords[c("X", "Y")]))
names(Coords)[4:5] <- c("Longitude", "Latitude")
Coords.sf <- sf::st_as_sf(Coords, coords=c("Longitude", "Latitude"), crs=sf::st_crs(4326))


## ----------------------------------------------------------------------------------------------------
tmap_mode("view")
MyMap <-
  tm_basemap(c("Esri.WorldTopoMap", "Esri.WorldStreetMap", "Esri.WorldShadedRelief")) +
  tm_shape(Coords.sf) + tm_sf(size = 1, col = "black")
MyMap + tm_shape(Coords.sf) + tm_text(text = "Population", size = 1, col = "black",just="bottom")


## ----dat---------------------------------------------------------------------------------------------
dat[dat$ID == "3083",]


## ----warning=FALSE, minus_mom------------------------------------------------------------------------
pollen <- gstudio::minus_mom(dat, MomCol = "ID", OffCol = "OffID")
pollen[pollen$ID == "3083",]


## ----pegas-------------------------------------------------------------------------------------------
D <- gstudio::genetic_distance(pollen,mode="amova")
D <- as.dist(D)
Moms <- pollen$ID
Populations <- as.factor(pollen$Population) 
amova.result <- pegas::amova(D ~ Populations/Moms, nperm=500)
amova.result


## ----------------------------------------------------------------------------------------------------
phi <- amova.result$varcomp[1]/sum(amova.result$varcomp[1])
names(phi) <- "phi"
phi


## ----dps---------------------------------------------------------------------------------------------
dps <- 1 - gstudio::genetic_distance(pollen, stratum = "ID", mode = "Dps")


## ----dist--------------------------------------------------------------------------------------------
xy <- unique(data.frame(pollen$X, pollen$Y))
xy.dist <- as.matrix(dist(xy))


## ----plot--------------------------------------------------------------------------------------------
par(mar=c(4,4,0,0))
plot(xy.dist[lower.tri(xy.dist)], dps[lower.tri(dps)], 
    xlab = "Geographic distance (m)", 
     ylab = "1 - Proportion of shared alleles (dps)")
abline(lm(dps[lower.tri(dps)]~xy.dist[lower.tri(xy.dist)]))


## ----mantel------------------------------------------------------------------------------------------
vegan::mantel(xy.dist, dps)


## ----exclusion_probability---------------------------------------------------------------------------
# exclusion probabilities
pollen.freqs <- gstudio::frequencies(dat)
p.excl <- gstudio::exclusion_probability( pollen.freqs )
p.excl


## ----prod--------------------------------------------------------------------------------------------
1- prod((1-unlist(p.excl$Pexcl)))


## ----paternity---------------------------------------------------------------------------------------
# Select all offspring of mom 3083:
offspring.3083 <- subset(dat, OffID!=0 & ID == "3083")
# Select mom 3083:
mother.3083 <- subset(dat, ID == "3083" & dat$OffID == 0 )
# Select all potential fathers in the same population as mom 3083:
fathers.3083 <- subset(dat, OffID == 0 & Population %in% offspring.3083$Population)
# Paternity analysis: 
pat.3083 <- gstudio::paternity(offspring.3083, mother.3083, fathers.3083 )
pat.3083


## ----full_paternity----------------------------------------------------------------------------------
# make a dataframe just for the offspring:
offspring <- subset(dat, OffID!=0)

# here is the function that we will apply to all mothers:
get.parentage <- function(x){
  tmp.offspring <- subset(offspring, ID == x)
  tmp.mother <- subset(dat, ID == x & OffID == 0)
  tmp.fathers <- subset(dat, OffID == 0 & Population %in% tmp.offspring$Population)
  return(gstudio::paternity(tmp.offspring, tmp.mother, tmp.fathers ))
}


## ----purrrify----------------------------------------------------------------------------------------
# purrr-ify the function so that NA is returned when an error pops up:
possible_pat <- purrr::possibly(get.parentage, otherwise = NA_real_)

# run the function and store the output:
# list of results for each mother:
pat.all <- purrr::map(unique(offspring$ID), possible_pat)
# convert the list to a dataframe:
pat.all <- do.call(rbind, pat.all[!is.na(pat.all)]) 


## ----threshold---------------------------------------------------------------------------------------
# create a temporary ID that combines the MomID and the OffID
pat.all$tmpID <- paste(pat.all$MomID, pat.all$OffID, sep="_")
# get rid of all rows with duplicated tmpIDs, leaving just the first entry for each
pat.sub <- pat.all[!duplicated(pat.all$tmpID),]
# get rid of the tmpID column
pat.sub <- pat.sub[,1:4] # get rid of the tmp_ID column


## ----spiderplot_data---------------------------------------------------------------------------------
pat.sub <- gstudio::spiderplot_data(pat.sub, dat, longitude = "X", latitude = "Y")
# Join data to add population IDs
pat.sub <- merge(pat.sub, dat[, c("Population", "ID" ,"OffID")],
                 by.x=c("MomID", "OffID"), by.y=c("ID", "OffID"), all.x=T)

head(pat.sub)


## ----plot_pollenFlow, fig.height=6, fig.width=7------------------------------------------------------
pop <- "A25"
ggplot() +
  geom_point(data=dat[dat$Population==pop,], 
             aes(X,Y),size=3, color="red") +
  geom_segment(data=pat.sub[pat.sub$Population==pop,], 
       aes(x=X, y=Y, xend=Xend, yend=Yend), linewidth=0.5, alpha=0.2, 
       arrow = arrow(ends = "first", length = unit(0.3, "cm"))) +
  theme(legend.position = "none")


## ----dispersal_distance------------------------------------------------------------------------------
pat.sub$pollen.dist <- unlist(lapply(1:nrow(pat.sub),
    function(x) dist(rbind(c(pat.sub$X[x], pat.sub$Y[x]), 
                c(pat.sub$Xend[x], pat.sub$Yend[x]))) ))


## ----dispersal_kernel--------------------------------------------------------------------------------
ggplot(pat.sub[pat.sub$pollen.dist >0,]) +
  geom_histogram( aes(x=pollen.dist),  bins=20) +
  xlab("Distance from pollen source (m)")  +
  ylab("Number of pollen flow events")


## ----import_momVariables-----------------------------------------------------------------------------
# read in the data
mom.vars <- read.csv(system.file("extdata",                 
 "pulsatilla_momVariables.csv", package="LandGenCourse"))

# exclude selfing
pat.outcrossed <- subset(pat.sub, MomID != DadID)
# add mom variables to pat.outcrossed
pat.outcrossed <- merge(pat.outcrossed, mom.vars, by.x = "MomID", 
                        by.y = "ID", all.x=T)
# look at the data
head(pat.outcrossed)


## ----lme---------------------------------------------------------------------------------------------
# specify the model
mod <- lme4::lmer(log(pollen.dist) ~ scale(log(flower.density)) + 
            scale(log(mom.isolation)) + (1|Population/MomID),
            data=pat.outcrossed, na.action = "na.fail", REML=F)


## ----dredge------------------------------------------------------------------------------------------
MuMIn::dredge(mod)


## ----fig.height=3, fig.width=7-----------------------------------------------------------------------
Mom.isolation.plot <- ggplot(pat.outcrossed, 
       aes(x=log(mom.isolation), y=log(pollen.dist))) + 
       geom_point() + stat_smooth(method="lm", se=F) + 
       xlab("Log(mom isolation)") +
       ylab("Log(pollen flow distance)")

Flower.density.plot <- ggplot(pat.outcrossed, 
       aes(x=log(flower.density), 
       y=log(pollen.dist))) + geom_point() +
       stat_smooth(method="lm", se=F) + 
       xlab("Log(flower density)") +
       ylab("Log(pollen flow distance)")

cowplot::plot_grid(Mom.isolation.plot, Flower.density.plot, 
                   labels = c("A", "B"))


## ----merge_pat.sub-----------------------------------------------------------------------------------
offspring <- merge(offspring, pat.sub[,1:4], by.x=c("ID", "OffID"), 
                   by.y=c("MomID", "OffID"), all.x=T)
head(offspring)


## ----outside-----------------------------------------------------------------------------------------
num.out <- sapply(split(offspring$Fij, offspring$Population),
                  function(x) sum(is.na(x)))


## ----total_pollination-------------------------------------------------------------------------------
num.tot <- table(offspring$Population)


## ----data.frame--------------------------------------------------------------------------------------
# turn it into a dataframe:
pop.df <- data.frame(Population=names(num.out), 
    num.out=as.vector(num.out), num.tot=as.vector(num.tot))

# read in the population variable data:
pop.vars <- read.csv(system.file("extdata", 
  "pulsatilla_population.csv", package="LandGenCourse"))

# add the population variables to our outside pollination data:
pop.df <- merge(pop.df, pop.vars, by="Population")
pop.df


## ----glm---------------------------------------------------------------------------------------------
# specify the model
mod2 <- glm(cbind(num.out, num.tot-num.out) ~ forest.50 + forest.100 +
              forest.250 + forest.500 + forest.1000 + population.size,
              family = binomial(link = "logit"), data = pop.df, 
              na.action = "na.fail")


## ----scale.x-----------------------------------------------------------------------------------------
MuMIn::dredge(mod2,m.lim=c(0,1))


## ----plot_forest.250---------------------------------------------------------------------------------
forest.250.mod <- glm(cbind(num.out, num.tot-num.out) ~ forest.250,
                family=binomial(link="logit"), data=pop.df)

ggplot(pop.df, aes(x=forest.250, y=num.out/num.tot)) + geom_point() +
  geom_line(aes(x=forest.250, y=predict(forest.250.mod, type="response"))) +
  xlab("Proportion of forest at 250 m") +
  ylab("Proportion of immigrant pollen")


## ----message=FALSE, warning=TRUE, include=FALSE------------------------------------------------------
LandGenCourse::detachAllPackages()
hhwagner1/LandGenCourse documentation built on Feb. 17, 2024, 4:42 p.m.