In mahagadorn/MISTAKE_FIX: Not A Trait DataBase

knitr::opts_chunk$set(echo = TRUE)

Data Download

Data were download from the github repository of mahagadorn. These data include year, sample size per year, and allele counts per year.

mothdata <- read.csv("https://raw.githubusercontent.com/mahagadorn/EvolutionaryGenomics/master/MothEvolution/data_medionigra.csv", sep=",", as.is=TRUE)

Data Simulations

time <- seq(1940, 1999, 1)  #Amy we had to add because the 51 years didn't account for the fact that year gaps were missing
model <- function(data, numb.sim, time){
  df <- matrix(NA, nrow = time, ncol = numb.sim)
  for(j in 1:numb.sim){
    allele.freq.seed <- data[1,3]/data[1,2] #initial allele frequency (to be replaced each iteration of loop)
    pop.sd <- sd(data[,2])
    pop.seed <- data[1, 2]
    pop.vec <- numeric(time)
    pop.vec[1] <- pop.seed
    for(i in 2:(time)){
      pop.vec[i] <- round(abs(rnorm(1, mean = pop.vec[i - 1], sd = pop.sd/2)), 0) #new pop size ROUNDED to a WHOLE NUMBER
    }
    for(i in 1:length(pop.vec)){
        df[i,j] <- allele.freq.seed #fills data frame
        allele.sim <- rbinom(n=1, pop.vec[i], allele.freq.seed) #Generate next gen's allele count
        if(pop.vec[i] == 0){
           allele.freq.seed <- 0
        }
        allele.freq.seed <- allele.sim/pop.vec[i] #replace allele frequency seed with next gen's number
    }
  }
  return(df)
}

sims <- model(mothdata, 1000, 60)

Erase after sim works

fill.matrix <- function(expr, nrow, ncol) {
  matrix(eval(expr, envir=list(x=nrow*ncol)), nrow=nrow, ncol=ncol)
}

sims <- fill.matrix(runif(20, 0, 1), nrow=10, ncol=20)

Erase if not needed

STDER <- function(x){
  std.error <- sd(x)/sqrt(length(x))
  return(std.error)
}

mean <- apply(sims, 1, mean)  #1 is mean
SE <- apply(sims, 1, STDER) #2 StdError===SD/sqrt(samplesize)
upper <- mean+SE
lower <- mean-SE
results <- cbind(time, mean, upper, lower)

Calculation of the total variance and change in allele frequency over time individually for all simulations.

sims <- fill.matrix(runif(5000, 0, 1), nrow=60, ncol=1000)

cumchangecol <- function(x){
  vec <- numeric(length(x)-1) 
    for(i in seq_along(vec)){
      vec[i] <- x[i+1] - x[i]
    }
    sumdiff <- sum(vec)
  return(sumdiff)
}

persim_alfreqchange <- apply(sims, 2, cumchangecol)   #this gives you allele frequency change over time PER simulation!!
var <- apply(sims, 2, var)
final <- cbind(persim_alfreqchange, var)

observed data

allelefreq_Ne <- function(x, y){
  Ne <- round((x[,2])/y, digits=0)   #taken as a proportion of y
  ObsAfreqchange_Ne <- x[,3]/Ne
  return(cbind(mothdata, Ne,ObsAfreqchange_Ne))
}

modified.moth <- allelefreq_Ne(mothdata,5)
obs.changeAl <- cumchangecol(modified.moth[,5])
obs.var <- var(modified.moth[,5])

ObservedStats <- c(obs.changeAl, obs.var)

Density Map

plot<-plot(final, type='p', col='blue', pch=20, ylim = c(0,.2), xlab="Total Allele Frequency Change (per simulation)", ylab="Variance of Allele Frequencies (per simulation)")
  points(ObservedStats[1],ObservedStats[2], col='red', pch=4)

Looking at relationships between mean simulation data, quantiles, and where the observed data map out.

sim.mean <- apply(sims, 1, mean)
quantiles <- apply(sims, 1, quantile)
pcent25 <- quantiles[2,]
pcent75 <- quantiles[4,]

transparent_col <- function(color, transparency = 50) {  #color=name of color; name=name of color
#Get standard RGB values for named color
  rgb.val <- col2rgb(color)
#Make new color using input color as base and alpha set by transparency
  transparent_col <- rgb(rgb.val[1], rgb.val[2], rgb.val[3],
               max = 255,
               alpha = (100-transparency)*255/100)
}

mycol <- transparent_col("gray91", 10)

Extra.plot <- plot(sim.mean~time, ylim=c(0,1), type='l', col='blue', xlab='Years', ylab='Frequency of Medionegra Allele')
                lines(pcent25~time, col='lightgray')
                lines(pcent75~time, col='lightgray')
                polygon(c(time,rev(time)), c(pcent25,rev(pcent75)), col=mycol, border=NA)  #used to fill the area you want to be transparent
                lines(modified.moth[,5]~modified.moth[,1], type='b', col='red')
                lines(sim.mean~time, type='l', col='blue')