knitr::opts_chunk$set(echo = TRUE)

Simulating a 16-way MAGIC population with magic sim

The following uses magicsim to simulate 400 double haploid maize lines that were derived from a 16-way Multi-parent Advanced Generation InterCross (MAGIC) funnel crossing scheme.

| A x B | C x D | E x F | G x H | I x J | K x L | M x N | O x P |

| | AB x CD | EF x GH | IJ x KL | MN x OP | | |

| | | ABCD x EFGH | IJKL x MNOP | | | | |

| | | ABCDEFGH x IJKLMNOP | | | | | |

A population of 2000 MAGIC lines are created, followed by 3 generations of outcrossing (synthetic). Then 400 of these MAGIC S3 lines are made into double haploids (entirely homozygous).

Installation

knitr::opts_chunk$set(echo = TRUE, warning=FALSE)
### Script to simulate 400 Double Haploid MAGIC lines

devtools::install_github('sarahodell/magicsim',force = T)

library('magicsim')
library("data.table")
library("tidyverse")
library("ggplot2")

options(scipen=999)
set.seed(100)

Initializing Founder lines

magicsim simulates recombination evens using a genetic map of the organism. In this case, we are using a genetic map of maize generated by Ogut et al. (2015).

The cross order specifies which founder lines where crossed in the first stage of the funnel scheme. (i.e. here EP1 and A632 were crossed first, as were OH43 and B73).

We will do this for maize chromosome 1 only, although this can be done for any number of chromosomes.

gmap=fread('ogutmap_v4_ordered.csv',data.table=F)

founders=c("B73_inra","A632_usa","CO255_inra","FV252_inra",
           "OH43_inra","A654_inra","FV2_inra","C103_inra",
           "EP1_inra","D105_inra","W117_inra","B96","DK63",
           "F492","ND245","VA85")
cross_order=c(4,2,14,15,3,8,10,16,1,9,5,6,12,7,11,13)
cross_info=founders[cross_order]

### Simulate chromsome 10
c=10


founder_pop=magicsim::pop_init(n=16,c,donors=cross_info,gmap)

Create the first round of F1s

Using the crossing order above and the simulated founder chromosomes, we can create F1s from the first round of crossing of the MAGIC crossing scheme.

### Create F1s from the first round of crossing
#Create an empty Pop object
f1s=new("Pop",nIndv=8,indvlist=vector("list",length=8))

#Make F1s for each of the first crosses and add to the Pop object
count=1
for(i in seq(1,16,2)){
  f1s@indvlist[[count]]=make_f1(founder_pop[i],founder_pop[i+1],gmap,chroms=c)
  count=count+1
}

Building a MAGIC Population.

Create synthetic population of 1000 lines from MAGIC founder funnel

magic_pop=make_magic_pop(start_pop=f1s,c=10,g_map=gmap,popsize=2000)

[1] 2 [1] 237073571 [1] "4" [1] "No breakpoints in donor2 greater than p" [1] 237073571 [1] "donor1" An object of class "Chrom" Slot "chr": [1] 2

Slot "breakpoints": [1] 24342415 45010316 244427044

Slot "donors": [1] "ND245" "F492" "ND245"

Slot "xo_no": numeric(0)

[1] "donor2" An object of class "Chrom" Slot "chr": [1] 2

Slot "breakpoints": [1] 24342415 244427044

Slot "donors": [1] "ND245" "F492"

Slot "xo_no": numeric(0)

donor1=new("Chrom",chr=2,breakpoints=c(24342415,45010316,244427044),donors=c("ND245","F492","ND245"),xo_no=numeric(0))

donor2=new("Chrom",chr=2,breakpoints=c(24342415,244427044),donors=c("ND245","F492"),xo_no=numeric(0))

p=237073571



print(p)
# If p is equal to a pre-existing breakpoint, shift p up 100 base pairs
if((p < donor1@breakpoints[1]) & (p < donor2@breakpoints[1])){
  print("1")
  recomb_pos=c(p,donor2@breakpoints)
  recomb_donor=c(donor1@donors[1],donor2@donors)
}else if((p < donor1@breakpoints[1]) & (p > donor2@breakpoints[1])){
  print("2")
  #If the new break is less than previous breakpoints in donor1
  #but not in donor2
  d2_upper=donor2@breakpoints[donor2@breakpoints-p>0]
  d2_index=which(min(d2_upper)==donor2@breakpoints)
  # New breakpoints are p and all breakpoints from donor2 greater than p
  recomb_pos=c(p,donor2@breakpoints[d2_index:length(donor2@breakpoints)])
  recomb_donor=c(donor1@donors[1],donor2@donors[d2_index:length(donor2@donors)])
}else if((p > donor1@breakpoints[1]) & (p < donor2@breakpoints[1])){
  print("3")
  #If the new break is less than previous breakpoints in donor2
  #but not in donor1
  d1_lower=donor1@breakpoints[p-donor1@breakpoints>0]
  d2_upper=donor2@breakpoints[donor2@breakpoints-p>0]
  d2_index=which(min(d2_upper)==donor2@breakpoints)
  d1_index=which(max(d1_lower)==donor1@breakpoints)
  recomb_pos=c(donor1@breakpoints[1:d1_index],p,donor2@breakpoints[d2_index:length(donor2@breakpoints)])
  d1_index=d1_index+1
  recomb_donor=c(donor1@donors[1:d1_index],donor2@donors[d2_index:length(donor2@donors)])
}else{
  print("4")
  # Or both have breakpoints before p
  d1_lower=donor1@breakpoints[p-donor1@breakpoints>0]
  d2_upper=donor2@breakpoints[donor2@breakpoints-p>0]
  if(length(d2_upper)==0){
    #print("No breakpoints in donor2 greater than p")
    #print(p)
    #print("donor1")
    #print(donor1)
    #print("donor2")
    #print(donor2)
    browser()
  }else if(length(d1_lower)==0){
    #print("No breakpoints in donor1 less than p")
    #print(p)
    #print("donor1")
    #print(donor1)
    #print("donor2")
    #print(donor2)
    browser()
  }else{
    d2_index=which(min(d2_upper)==donor2@breakpoints)
    d1_index=which(max(d1_lower)==donor1@breakpoints)
    recomb_pos=c(donor1@breakpoints[1:d1_index],p,donor2@breakpoints[d2_index:length(donor2@breakpoints)])
    d1_index=d1_index+1
    recomb_donor=c(donor1@donors[1:d1_index],donor2@donors[d2_index:length(donor2@donors)])
  }
}

donor1=new("Chrom",chr=c,breakpoints=recomb_pos,donors=recomb_donor,xo_no=length())

Look at number of recombinations in magic f4 lines

#what is the distribution of xo_no in the population?
total_xo=c()
for(i in seq(1,1000)){total_xo=c(total_xo,magic_pop[i][1]@h1@xo_no)}

crossovers=data.frame(ind=seq(1,1000),xo_no=total_xo)
ggplot(crossovers,aes(x=xo_no)) + geom_histogram(bins=9,fill="darkgreen",color="black") + theme_classic() +
  ggtitle("Distribution of Crossovers in Simulation MAGIC Population") + xlab("Crossover Number") + 
  ylab("Frequency") + geom_vline(xintercept=mean(crossovers$xo_no))

What do they look like? Convert the Pop object to a breaktable to visualize crossovers. Here is one for just 10 of the lines. Can specify heterozygous or homozygous

breaktable=make_pop_breaktable(magic_pop,n=10,c=1,het=T)
head(breaktable)

Visualization

Plotting some of the lines can be useful in seeing how representative the simulated lines are.

plot_Pop(breaktable,n=10,c=1,het=T)
# Plotting can be done as heterozygous or homozygous

Simulate Outcrossing

Random outcrossing within the synthetic population of 1000 individuals for 3 generations

synth_pop=outcross(magic_pop,3,10,gmap)

Look at number of recombinations in synthetic 3 lines

#what is the distribution of xo_no in the population?
total_xo=c()
for(i in seq(1,1000)){total_xo=c(total_xo,synth_pop[i][1]@h1@xo_no)}

crossovers=data.frame(ind=seq(1,1000),xo_no=total_xo)
ggplot(crossovers,aes(x=xo_no)) + geom_histogram(bins=9,fill="darkgreen",color="black") + theme_classic() +
  ggtitle("Distribution of Crossovers in Simulation Synth3 Population") + xlab("Crossover Number") + 
  ylab("Frequency") + geom_vline(xintercept=mean(crossovers$xo_no))

Get Doubled Haploids

Randomly select 400 lines from the synthetic population and then choose one of the chromosomes at random to duplicate

#randomly choose 400 out of the 1000
dh_pop=make_dh_pop(synth_pop,n=400,c=1,gmap)

Assessing the simulated lines

Looking at number of crossovers

no=sapply(seq(1,400),function(x) length(dh_pop[x][1]@h1@donors))
#what is the distribution of xo_no in the population?
total_xo=c()
for(i in seq(1,400)){total_xo=c(total_xo,dh_pop[i][1]@h1@xo_no)}

crossovers=data.frame(ind=seq(1,400),xo_no=total_xo)
ggplot(crossovers,aes(x=xo_no)) + geom_histogram(bins=9,fill="darkgreen",color="black") + theme_classic() +
  ggtitle("Distribution of Crossovers in Simulation DH Population") + xlab("Crossover Number") + 
  ylab("Frequency") + geom_vline(xintercept=mean(crossovers$xo_no))

Convert the dh_pop object into breakpoints dataframe

#This should be its own function
breaktable=make_pop_breaktable(dh_pop,n=10,c=1,het=F)
head(breaktable)

Visualization

Plotting some of the lines can be useful in seeing how representative the simulated lines are.

plot_Pop(breaktable = breaktable,n=10,c=1,het=F)


sarahodell/magicsim documentation built on Sept. 25, 2023, 8:12 a.m.