script to run Rap1 simulation store output every ten iterations
library(GenomicLayers)
RAP1 (RAP1P?) to bind to regexp
# patternLength set to minimum length the pattern can match (CA) bf.RAP1.1 <- createBindingFactor.DNA_regexp("bf.RAP1.1", forRegExp="(C{1,3})A",revRegExp= "T(G{1,3})", mod.layers = "Sir3p_potential", mod.marks=1, stateWidth=20) # a second version to see if the reg-exp is too lax. bf.RAP1.2 <- createBindingFactor.DNA_regexp("bf.RAP1.2", forRegExp="(C{2,3})A",revRegExp= "T(G{2,3})", mod.layers = "Sir3p_potential", mod.marks=1, stateWidth=20) bf.RAP1.3 <- createBindingFactor.DNA_consensus("bf.RAP1.3", patternString="GGTGT", mod.layers = "Sir3p_potential", mod.marks=1, stateWidth=5) # this version to extend the state to 11 so that two hits with 6bp would be contiguous bf.RAP1.3C <- createBindingFactor.DNA_consensus("bf.RAP1.3C", patternString="GGTGT", mod.layers = "Sir3p_potential", mod.marks=1, stateWidth=5) bf.RAP1.4 <- createBindingFactor.DNA_regexp("bf.RAP1.4", forRegExp="GGTGT(.{0,3})GGTGT", revRegExp="ACACC(.{0,3})ACACC", patternLength = 10, mod.layers = "Sir3p_potential", mod.marks=1, stateWidth=20, ) bf.RAP1.4B <- createBindingFactor.DNA_regexp("bf.RAP1.4B", forRegExp="GGTGT(.{0,3})GGTGT", revRegExp="ACACC(.{0,3})ACACC", mod.layers = "Sir3p_potential", mod.marks=1, stateWidth=20, ) bf.RAP1.5 <- createBindingFactor.DNA_consensus("bf.RAP1.5", patternString="GGTGTGTGGGTGT", mod.layers = "Sir3p_potential", mod.marks=1, stateWidth=20, ) nucleosomeWidth <- 147
Sir3p binds to telomeres and is also spread cannot bind acetylated regions or H3K4me
# bf.Sir3p_M1 <- createBindingFactor.layer_region("bf.Sir3p_M1", type="layer_region", # patternLength = 5, patternString = "N", # profile.layers = c("Sir3p_potential"), # stateWidth=10, profile.marks = c(1), # mod.layers = "bound.Sir3p", mod.marks = 1) # bf.Sir3p_S <- createBindingFactor.layer_region("bf.Sir3p_S", type="layer_region", patternLength = 5, patternString = "N", profile.layers = c("Sir3p_potential"), stateWidth=5, profile.marks = c(1), mod.layers = "bound.Sir3p", mod.marks = 1)
need also something to spread Sir3p along the chromosome. want a binding factor that creates mods either up or downstream of binding site. Parameter 'offset.method' to accept function as argument
upDownFuncRnorm <- function(n, offset.mean, offset.sd) { y <- round(rnorm(n, mean=offset.mean, sd=offset.sd)) z <- sample(c(1, -1), length(y), replace=T) # random vector of 1,-1 to negate half the values return(round(y*z)) } # PROBLEM cannot spread into region that has H3K4me or H4K16ac bf.Sir3p.spread <- createBindingFactor.layer_region("bf.Sir3p.spread", type="layer_region", patternLength = 5, profile.layers = c("bound.Sir3p"), stateWidth=5, profile.marks = c(1), mod.layers = "bound.Sir3p", mod.marks = 1, offset = 100, offset.method=upDownFuncRnorm, offset.params=list(offset.mean=147, offset.sd=30))
Create the layerSet object to include the genome and all the layers we may need
Layers needed: -
RAP1 binding/recruitment - can act as recruiter for other repressive modifiers. bound.Sir3p
library(BSgenome.Scerevisiae.UCSC.sacCer3) genome <- BSgenome.Scerevisiae.UCSC.sacCer3 sequences_to_keep <- "chrIII" # telomere has many C{1,3}A repeats at both ends of the chromosome. # chrIII also contains the mating type loci HMR and HML genomeSub <- keepBSgenomeSequences(genome, sequences_to_keep) genomeSub # this should now still be a useable BSgenome object but with only one chromosome. # set up a LayerSet on the genome : a list with link to genome and GRanges objects to store position information # two additional layers "H3K4me","H4K16ac" are specified here but not used in these sims. layerSubGenome <- createLayerSet.BSgenome(genome=genomeSub, layer.names=c("Sir3p_potential", "bound.Sir3p", "H3K4me","H4K16ac"), n.layers=4)
You may want to test the binding of your factors against the genome.
(hits <- matchBindingFactor.BSgenome(layerSet = layerSubGenome,bindingFactor = bf.RAP1.1)) (hits <- matchBindingFactor.BSgenome(layerSet = layerSubGenome,bindingFactor = bf.RAP1.2)) (hits <- matchBindingFactor.BSgenome(layerSet = layerSubGenome,bindingFactor = bf.RAP1.3)) (hits <- matchBindingFactor.BSgenome(layerSet = layerSubGenome,bindingFactor = bf.RAP1.3C)) (hits <- matchBindingFactor.BSgenome(layerSet = layerSubGenome,bindingFactor = bf.RAP1.4)) (hits <- matchBindingFactor.BSgenome(layerSet = layerSubGenome,bindingFactor = bf.RAP1.4B)) (hits <- matchBindingFactor.BSgenome(layerSet = layerSubGenome,bindingFactor = bf.RAP1.5)) # and a check of the sequence at these positions getSeq(genomeSub, hits) gregexpr("GGTGT(.{0,3})GGTGT", getSeq(genomeSub)) gregexpr("ACACC(.{0,3})ACACC", getSeq(genomeSub)) # rev-comp of above gregexpr("ACACC(.{0,4})ACACC", getSeq(genomeSub)) ## max distance to be telomere specific gregexpr("GGTGT(.{1,3})GGTGT", getSeq(genomeSub)) vmatchPattern(pattern = "GGTGT", genomeSub)
# combine all bfs into a list to pass to runLayerBinding. Order DOES matter. bfSet.1 <- list(bf.RAP1.3=bf.RAP1.3, bf.Sir3p_S=bf.Sir3p_S, bf.Sir3p.spread=bf.Sir3p.spread ) # binding factors must be in a list and named. Easiest to use each BFs name. # specify abundances of bfs. abs.1 <- structure(c(50,20,1000), names=c(names(bfSet.1)))
iters <- 200 saveFreq <- 10 outFile <- "path_to_outFile" # take a copy of the subGenome object layerSubGenome.1 <- layerSubGenome # cache the sequence specific parts of profiles layerSubGenome.1 <- generateHitsCache(layerList=layerSubGenome.1, factorSet=bfSet.1[c('bf.RAP1.3' )], cache.layers="LAYER.0") for(i in 1:iters) { print(i) layerSubGenome.1 <- runLayerBinding.BSgenome(layerList=layerSubGenome.1, factorSet=bfSet.1, bf.abundances = abs.1, verbose=TRUE, collect.stats=TRUE) if((i %% saveFreq) == 0) { # export current model state. tempFileOut <- paste0(outFile, ".iter.", i, ".RData") print(paste("Saving to ", tempFileOut)) #save(layerSubGenome.1, file=tempFileOut) # use this to save model state periodically. plotLayers(layerSubGenome.1, layerNames=c("Sir3p_potential","bound.Sir3p"), chrom="chrIII", xlim=c(1,seqlengths(genomeSub)["chrIII"])) } }
The output of model 1 should be widespread binding across the whole chromosome.
# combine all bfs into a list to pass to runLayerBinding. Order DOES matter. bfSet.2 <- list(bf.RAP1.4=bf.RAP1.4, bf.Sir3p_S=bf.Sir3p_S, bf.Sir3p.spread=bf.Sir3p.spread ) # binding factors must be in a list and named. Easiest to use each BFs name. # specify abundances of bfs. abs.2 <- structure(c(50,20,1000), names=c(names(bfSet.2)))
Now repeat using model 2
iters <- 200 saveFreq <- 10 outFile <- "path_to_outFile" # take a copy of the subGenome object layerSubGenome.2 <- layerSubGenome # cache the sequence specific parts of profiles layerSubGenome.2 <- generateHitsCache(layerList=layerSubGenome.2, factorSet=bfSet.2[c('bf.RAP1.4' )], cache.layers="LAYER.0") for(i in 1:iters) { print(i) layerSubGenome.2 <- runLayerBinding.BSgenome(layerList=layerSubGenome.2, factorSet=bfSet.2, bf.abundances = abs.2, verbose=TRUE, collect.stats=TRUE) if((i %% saveFreq) == 0) { # export current model state. tempFileOut <- paste0(outFile, ".iter.", i, ".RData") print(paste("Saving to ", tempFileOut)) #save(layerSubGenome.2, file=tempFileOut) # use this to save model state periodically. plotLayers(layerSubGenome.2, layerNames=c("Sir3p_potential","bound.Sir3p"), chrom="chrIII", xlim=c(1,seqlengths(genomeSub)["chrIII"])) } }
Can examine the final state of the layers
layerSubGenome.2$layerSet$bound.Sir3p layerSubGenome.2$history
Now set up and run a whole genome model. We will use the nuclear chromosomes only and remove the mitochondrial chromosome/genome first.
sequences_to_keep <- setdiff(seqnames(genome), "chrM") genomeNuc <- keepBSgenomeSequences(genome, sequences_to_keep) genomeNuc # this should now still be a useable BSgenome object but with no mitochondrial chromosome. # set up a LayerSet on the genome : a list with link to genome and GRanges objects to store position information # two additional layers "H3K4me","H4K16ac" are specified here but not used in these sims. layerGenomeNuc <- createLayerSet.BSgenome(genome=genomeNuc, layer.names=c("Sir3p_potential", "bound.Sir3p", "H3K4me","H4K16ac"), n.layers=4)
Need to increase the abundances of the binding factors roughly in proportion. Helps to look at the seqlengths of chromosomes.
seqlengths(genomeNuc) seqlengths(genomeNuc)["chrIII"] / sum(seqlengths(genomeNuc)) # approx 1/40th of the genome abs.2 abs.wg <- abs.2 * 40 # increase abundances 40-fold
Now run the whole (nuclear) genome simulation.
iters <- 200 saveFreq <- 10 outFile <- "path_to_outFile" # take a copy of the subGenome object layerGenomeNuc.2 <- layerGenomeNuc # cache the sequence specific parts of profiles layerGenomeNuc.2 <- generateHitsCache(layerList=layerGenomeNuc.2, factorSet=bfSet.2[c('bf.RAP1.4' )], cache.layers="LAYER.0") for(i in 1:iters) { print(i) layerGenomeNuc.2 <- runLayerBinding.BSgenome(layerList=layerGenomeNuc.2, factorSet=bfSet.2, bf.abundances = abs.wg, verbose=TRUE, collect.stats=TRUE) # using abs.wg if((i %% saveFreq) == 0) { # export current model state. tempFileOut <- paste0(outFile, ".iter.", i, ".RData") print(paste("Saving to ", tempFileOut)) #save(layerGenomeNuc.2, file=tempFileOut) # use this to save model state periodically. plotLayers(layerGenomeNuc.2, layerNames=c("Sir3p_potential","bound.Sir3p"), chrom="chrIII", xlim=c(1,seqlengths(genomeNuc)["chrIII"])) } }
Might be worth comparing how the chrIII abundances scale up to the whole genome
layerSubGenome.2$layerSet$bound.Sir3p sum(width(layerSubGenome.2$layerSet$bound.Sir3p)) #layerSubGenome.2$history # now compare with whole genome sim layerGenomeNuc.2$layerSet$bound.Sir3p sum(width(layerGenomeNuc.2$layerSet$bound.Sir3p)) #layerGenomeNuc.2$history # calculate the coverage by chromosome using by() by(layerGenomeNuc.2$layerSet$bound.Sir3p, INDICES=seqnames(layerGenomeNuc.2$layerSet$bound.Sir3p) , FUN=function(x) sum(width(x)))
COMMANDS TO RUN FROM THE COMMAND LINE IN A SCRIPT - NOT RUN HERE
# Load the getopt library library(getopt) # Define command-line options spec = matrix(c( 'verbose', 'v', 2, "integer", 'help' , 'h', 0, "logical", 'out.file' , 'o', 2, "character", 'iterations', 'i', 2, "integer", 'saveFreq', 's', 2, "integer" ), byrow=TRUE, ncol=4) # Parse command-line options opt <- getopt(spec) # Set default values for options if they are not provided if ( is.null(opt$verbose)) { opt$verbose <- FALSE } # Output file outFile <- opt$out.file if (is.null(opt$out.file)) { outFile <- "outputfile" } iters <- opt$iterations if (is.null(opt$iterations)) { iters <- 100 } saveFreq <- opt$saveFreq if (is.null(opt$saveFreq)) { saveFreq <- iters } # Print the values for verification print(paste("Out file stem:", opt$out.file))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.