R.v.maj <- as.numeric(R.version$major) R.v.min.1 <- as.numeric(strsplit(R.version$minor, "\\.")[[1]][1]) if(R.v.maj < 2 || (R.v.maj == 2 && R.v.min.1 < 15)) stop("requires R >= 2.15", call.=FALSE) suppressPackageStartupMessages(library(knitr)) opts_chunk$set(echo=TRUE, warning=TRUE, message=TRUE, cache=FALSE, fig.align="center")
This document aims at presenting how to build genetic maps for any cross between two outbred parents using a pseudo-tescross strategy. The seminal article is from Grattapaglia and Sederoff (1994). First, a map is built for each parent from genotypes coded as in a backcross. Second, both maps are used to deduce parental phases. Third, a consensus map is built for both parents from genotypes coded as in a F2 intercross.
This document requires external packages to be available:
suppressPackageStartupMessages(library(scrm)) # on the CRAN suppressPackageStartupMessages(library(seriation)) # on the CRAN suppressPackageStartupMessages(library(qtl)) # on the CRAN stopifnot(file.exists(Sys.which("carthagene"))) # http://www7.inra.fr/mia/T/CarthaGene/ suppressPackageStartupMessages(library(ASMap)) # on the CRAN suppressPackageStartupMessages(library(rutilstimflutre)) # on GitHub stopifnot(compareVersion("0.144.0", as.character(packageVersion("rutilstimflutre"))) != 1)
This R chunk is used to assess how much time it takes to execute the R code in this document until the end:
t0 <- proc.time()
Set the seed:
set.seed(1859)
In two populations:
nb.genos <- 5 * 10^2 nb.chrs <- 2 chr.len.phy <- 10^5 # chromosome physical length in base pairs mu <- 10^(-8) # neutral mutation rate in events / base / generation u <- mu * chr.len.phy # in events / chrom / gen chr.len.gen <- 10^(-1) # chromosome genetic length in Morgans c.r <- chr.len.gen / chr.len.phy # recomb rate in events / base / gen r <- c.r * chr.len.phy # in events / chrom / gen m <- 10^(-4) # fraction of a pop replaced from other pops each gen Ne <- 10^4 # effective population size theta <- 4 * Ne * u # scaled neutral mutation rate in events / chrom rho <- 4 * Ne * r # scaled recomb rate in events / chrom M <- 4 * Ne * m # scaled migration rate in events genomes <- simulCoalescent(nb.inds=nb.genos, nb.reps=nb.chrs, pop.mut.rate=theta, pop.recomb.rate=rho, chrom.len=chr.len.phy, nb.pops=2, mig.rate=M, get.alleles=TRUE) table(inds.per.pop <- kmeans(genomes$genos, 2)$cluster) afs.pop <- estimSnpAf(X=genomes$genos) mafs.pop <- estimSnpMaf(afs=afs.pop) A.vr.pop <- estimGenRel(X=genomes$genos, afs=afs.pop, method="vanraden1") mrk2chr <- setNames(genomes$snp.coords$chr, rownames(genomes$snp.coords))
plotHistAllelFreq(afs=afs.pop, main="Allele frequencies") plotHistMinAllelFreq(mafs=mafs.pop, main="Minor allele frequencies")
imageWithScale(A.vr.pop, main="Additive genetic relationships") summary(diag(A.vr.pop)) hist(diag(A.vr.pop), col="grey", border="white") summary(A.vr.pop[upper.tri(A.vr.pop)]) hist(A.vr.pop[upper.tri(A.vr.pop)], col="grey", border="white")
if(all(c("GenomicRanges", "S4Vectors", "IRanges", "LDcorSV") %in% installed.packages()[,"Package"])){ chr <- "chr1" min.maf <- 0.15 snps.tokeep <- rownames(genomes$snp.coords[genomes$snp.coords$chr == chr & mafs.pop >= min.maf,]) print(length(snps.tokeep)) ld <- estimLd(X=genomes$genos[,snps.tokeep], snp.coords=genomes$snp.coords[snps.tokeep,], use.ldcorsv=FALSE) ## st <- system.time( ## ld <- estimLd(X=genomes$genos[,snps.tokeep], ## snp.coords=genomes$snp.coords[snps.tokeep,], ## K=A.vr.pop, ## use.ldcorsv=TRUE)) ## print(st) print(nrow(ld)) print(summary(ld$cor2)) snp.dist <- distSnpPairs(snp.pairs=ld[, c("loc1","loc2")], snp.coords=genomes$snp.coords[snps.tokeep,]) plotLd(snp.dist, sqrt(ld$cor2), estim="r", ## xlim=c(0, 4*10^4), main=paste0(length(snps.tokeep), " SNPs with MAF >= ", min.maf, " on ", chr), use.density=TRUE, span=1/20, sample.size=2*nb.genos, Ne=Ne, c=c.r, add.ohta.kimura=TRUE) }
Choose two individuals as parents (one per population):
(idx.parents <- c(sample(x=names(inds.per.pop)[inds.per.pop == 1], size=1), sample(x=names(inds.per.pop)[inds.per.pop == 2], size=1))) A.vr.pop[idx.parents, idx.parents] genos.parents <- genomes$genos[idx.parents,] names.parents <- rownames(genos.parents) haplos.parents <- getHaplosInds(haplos=genomes$haplos, ind.names=names.parents)
Cross them several times to make offsprings:
nb.offs <- 200 names.offs <- paste0("off-", sprintf(fmt=paste0("%0", floor(log10(nb.offs))+1, "i"), 1:nb.offs)) head(crosses.off <- data.frame(parent1=rep(names.parents[1], nb.offs), parent2=rep(names.parents[2], nb.offs), child=names.offs, stringsAsFactors=FALSE)) loc.crovers.off <- drawLocCrossovers(crosses=crosses.off, nb.snps=sapply(haplos.parents, ncol), simplistic=FALSE, verbose=1) haplos.offs <- makeCrosses(haplos=haplos.parents, crosses=crosses.off, loc.crossovers=loc.crovers.off, howto.start.haplo=0) genos.offs <- segSites2allDoses(seg.sites=haplos.offs, ind.ids=getIndNamesFromHaplos(haplos.offs), snp.ids=rownames(genomes$snp.coords)) afs.offs <- estimSnpAf(X=genos.offs) mafs.offs <- estimSnpMaf(afs=afs.offs) dim(genos.doses <- rbind(genos.parents, genos.offs)) genos.classes <- genoDoses2genoClasses(X=genos.doses, alleles=genomes$alleles) genos.jm <- genoClasses2JoinMap(x=genos.classes) genos.jm[1:3, 1:14] tests.seg <- filterSegreg(genos.jm[,-c(1:8)], return.counts=TRUE)
The JoinMap format will be used to encode genotypes from both parents and offsprings as it allows to specify segregation types and phases, and can be natively read by the R qtl package.
Everything is explained in detail in the JoinMap manual, available online. Below is a brief summary.
At each locus, parental alleles are labelled as A and B for the first parent, and C and D for the second parent. Interesting locus must segregate in the progeny, meaning that A, B, C and D shouldn't all correspond to the same allele. Several segregation types exist, depending on the parental genotypes and their alleles:
$A \ne B \ne C \ne D$: segregation type named <abxcd>
by JoinMap (version $\ge$ 3, if necessary the updateJoinMap
function can be used);
$A = C \ne B \ne D$: segregation type <efxeg>
;
$A = C \ne B = D$: segregation type <hkxhk>
;
$A = C = D \ne B$: segregation type <lmxll>
;
$A = B = C \ne D$: segregation type <nnxnp>
.
Moreover, when considering several locus belonging to the same linkage group, their alleles can belong to the same physical chromosome, hence allowing to define linkage phases (haplotypes). In a given parent, two alleles at different locus are said to be "in coupling" when they came from the same grand-parent. Otherwise, they are said to be "in repulsion".
For instance, if a locus $l_1$ is of segregation type <hkxhk>
and phase type {00}
and another locus $l_2$ is of segregation type <abxcd>
and phase type {01}
, this means that, in the first parent, the h
-allele of $l_1$ and the a
-allele of $l_2$ are in coupling (and thus also their k
- and b
-alleles), and that, in the second parent, the h
-allele of $l_1$ is in repulsion with the c
-allele of $l_2$ (and thus in coupling with the d
-allele of $l_2$).
ped <- data.frame(ind=c(names.parents, crosses.off$child), mother=c(rep(NA, length(names.parents)), crosses.off$parent1), father=c(rep(NA, length(names.parents)), crosses.off$parent2), gen=c(rep(0, length(names.parents)), rep(1, nrow(crosses.off))), stringsAsFactors=FALSE) ped.tmp <- rbind(ped[1:5,], c(ind="off-...", ped[5, -1]), c(ind="off-....", ped[5, -1]), ped[nrow(ped),]) plotPedigree(inds=ped.tmp$ind, mothers=ped.tmp$mother, fathers=ped.tmp$father, generations=ped.tmp$gen, main="Pedigree of the controlled cross")
plotHistAllelFreq(afs=afs.offs, main="Allele frequencies among offsprings") plotHistMinAllelFreq(mafs=mafs.offs, main="Minor allele frequencies among offsprings")
A.vr.cross <- estimGenRel(X=genos.doses, afs=afs.pop, method="vanraden1") A.t.cross <- estimGenRel(X=genos.doses, afs=afs.pop, method="toro2011_eq10") cor(c(A.vr.cross), c(A.t.cross)) imageWithScale(A.vr.cross, main="Additive genetic relationships of crosses") imageWithScale(A.vr.cross[1:10, 1:10], main="Additive genetic relationships of crosses (subset)", idx.rownames=1:10, idx.colnames=1:10) summary(diag(A.vr.cross)) summary(A.vr.cross[upper.tri(A.vr.cross)]) summary(A.vr.cross[names.parents[1], grep("off", colnames(A.vr.cross))]) summary(A.vr.cross[names.parents[2], grep("off", colnames(A.vr.cross))]) summary(A.t.cross[names.parents[1], grep("off", colnames(A.t.cross))]) summary(A.t.cross[names.parents[2], grep("off", colnames(A.t.cross))])
Under HWE in a single population, the additive genetic relationships between all parent-child pairs should be centered around 0.5, corresponding to a coancestry coefficient of 1/4.
if(all(c("GenomicRanges", "S4Vectors", "IRanges", "LDcorSV") %in% installed.packages()[,"Package"])){ chr <- "chr1" min.maf <- 0.1 max.maf <- 0.49 snps.tokeep <- rownames(genomes$snp.coords[genomes$snp.coords$chr == chr & mafs.offs >= min.maf & mafs.offs <= max.maf,]) print(length(snps.tokeep)) ld <- estimLd(X=genos.offs[,snps.tokeep], snp.coords=genomes$snp.coords[snps.tokeep,], use.ldcorsv=FALSE) print(nrow(ld)) print(summary(ld$cor2)) snp.dist <- distSnpPairs(snp.pairs=ld[, c("loc1","loc2")], snp.coords=genomes$snp.coords[snps.tokeep,]) plotLd(snp.dist, sqrt(ld$cor2), estim="r", main=paste0(length(snps.tokeep), " SNPs with MAF >= ", min.maf, " on ", chr), use.density=TRUE) }
This section is partly inspired by the document "Genetic map construction with R/qtl" by Karl Broman (2012).
genos.qtl <- joinMap2backcross(x=genos.jm[, -c(1:8)], alias.hom=1, alias.het=2, alias.dup=0, alias.miss=NA, parent.names=colnames(genos.jm)[1:2]) sapply(genos.qtl, dim) table(genomes$snp.coords[rownames(genos.qtl[[1]]), "chr"]) table(genomes$snp.coords[rownames(genos.qtl[[2]]), "chr"])
Set up the input object:
cross.par1 <- setupQtlCrossObject(gendat=t(genos.qtl[[1]]), cross.type="bc") summary(cross.par1)
Look at missing data:
plotMissing(cross.par1) par(mfrow=c(1,2), las=1) plot(ntyped(cross.par1), ylab="nb of typed markers", xlab="genotypes") plot(ntyped(cross.par1, "mar"), ylab="nb of typed genotypes", xlab="markers")
Identify duplicate individuals:
cg <- comparegeno(cross.par1) hist(cg[lower.tri(cg)], breaks=seq(0, 1, len=101), xlab="nb of matching marker between genotypes", main="") rug(cg[lower.tri(cg)]) wh <- which(cg > 0.9, arr=TRUE) dim(wh <- wh[wh[,1] < wh[,2],]) head(wh) g <- pull.geno(cross.par1) table(g[11,], g[37,])
Identify duplicate markers:
length(dup <- findDupMarkers(cross.par1, exact.only=FALSE)) head(dup)
Look for markers with distorted segregation patterns:
gt <- geno.table(cross.par1) gt[gt$P.value < 0.05/totmar(cross.par1),]
Define the linkage groups (two markers will be placed in the same linkage groups if they have estimated recombination fraction <= max.rf
and LOD score >= min.lod
):
cross.par1 <- est.rf(cross.par1) # see also markerlrt() checkAlleles(cross.par1, threshold=5) # to check markers with switched alleles rf <- pull.rf(cross.par1) lod <- pull.rf(cross.par1, what="lod") plot(as.numeric(rf), as.numeric(lod), xlab="recombination fraction", ylab="LOD score") ## different curves due to different amount of missing data btw marker pairs ## see the CartaGene section below where this plot is reproduced with colors lg <- formLinkageGroups(cross.par1, max.rf=0.2, min.lod=1.5) table(lg[,"LG"]) tapply(rownames(lg), factor(lg$LG), function(locus){ table(genomes$snp.coords[gsub("_m", "", locus), "chr"]) }, simplify=FALSE) cross.par1 <- formLinkageGroups(cross.par1, max.rf=0.2, min.lod=1.5, reorgMarkers=TRUE) summary(cross.par1) plotRF(cross.par1) # upper-right=RF lower-right=LOD rf <- pull.rf(cross.par1) lod <- pull.rf(cross.par1, what="lod") mrk <- markernames(cross.par1, chr=1)[3] # look at 3rd marker from lg 1 par(mfrow=c(2,1)) plot(rf, mrk, bandcol="gray70", ylim=c(0,1)) abline(h=0.5, lty=2) plot(lod, mrk, bandcol="gray70")
Determine the marker order and inter-marker distances for the linkage groups corresponding to the two distinct chromosomes:
system.time( map.par1 <- est.map(cross.par1, chr=c(1,3), map.function="kosambi", verbose=TRUE)) summaryMap(map.par1) plotMap(map.par1, show.marker.names=TRUE) cross.par1 <- replace.map(cross.par1, map.par1) plotRF(cross.par1, chr=c(1,3)) plot(countXO(cross.par1), ylab="Number of crossovers") xoloc.par1.lg1 <- locateXO(cross.par1, chr=1, full.info=TRUE) summary(sapply(xoloc.par1.lg1, function(x){ if(length(x) == 0){ 0 } else nrow(x) }))
Save the whole map:
map.par1.qtl <- pull.map(cross.par1, chr=c(1,3), as.table=TRUE) map.par1.qtl <- data.frame(linkage.group=map.par1.qtl$chr, locus=rownames(map.par1.qtl), genetic.distance=map.par1.qtl$pos) map.par1.qtl$linkage.group <- as.character(map.par1.qtl$linkage.group) map.par1.qtl$linkage.group[map.par1.qtl$linkage.group == "1"] <- "chr2" map.par1.qtl$linkage.group[map.par1.qtl$linkage.group == "3"] <- "chr1" tapply(map.par1.qtl$genetic.distance, factor(map.par1.qtl$linkage.group), max) file.map.par1.qtl <- paste0("bestmap_qtl_parent1-", colnames(genos.jm)[1], ".txt") write.table(x=map.par1.qtl, file=file.map.par1.qtl, quote=FALSE, sep="\t", row.names=FALSE)
Clean:
file.remove(file.map.par1.qtl)
cross.par2 <- setupQtlCrossObject(gendat=t(genos.qtl[[2]]), cross.type="bc") summary(cross.par2) cross.par2 <- est.rf(cross.par2) lg <- formLinkageGroups(cross.par2, max.rf=0.15, min.lod=1.5) table(lg[,"LG"]) tapply(rownames(lg), factor(lg$LG), function(locus){ table(genomes$snp.coords[gsub("_m", "", locus), "chr"]) }, simplify=FALSE) cross.par2 <- formLinkageGroups(cross.par2, max.rf=0.15, min.lod=1.5, reorgMarkers=TRUE) summary(cross.par2) system.time( map.par2 <- est.map(cross.par2, chr=c(1,3), map.function="kosambi", verbose=TRUE)) summaryMap(map.par2) cross.par2 <- replace.map(cross.par2, map.par2) xoloc.par2.lg1 <- locateXO(cross.par2, chr=1, full.info=TRUE) summary(sapply(xoloc.par2.lg1, function(x){ if(length(x) == 0){ 0 } else nrow(x) }))
map.par2.qtl <- pull.map(cross.par2, chr=c(1,3), as.table=TRUE) map.par2.qtl <- data.frame(linkage.group=map.par2.qtl$chr, locus=rownames(map.par2.qtl), genetic.distance=map.par2.qtl$pos) map.par2.qtl$linkage.group <- as.character(map.par2.qtl$linkage.group) map.par2.qtl$linkage.group[map.par2.qtl$linkage.group == "1"] <- "chr2" map.par2.qtl$linkage.group[map.par2.qtl$linkage.group == "3"] <- "chr1" tapply(map.par2.qtl$genetic.distance, factor(map.par2.qtl$linkage.group), max) file.map.par2.qtl <- paste0("bestmap_qtl_parent2-", colnames(genos.jm)[2], ".txt") write.table(x=map.par2.qtl, file=file.map.par2.qtl, quote=FALSE, sep="\t", row.names=FALSE)
Clean:
file.remove(file.map.par2.qtl)
genos.cg <- joinMap2backcross(x=genos.jm[, -c(1:8)], alias.hom="A", alias.het="H", alias.dup="Z", alias.miss="-", parent.names=colnames(genos.jm)[1:2]) sapply(genos.cg, dim) table(genomes$snp.coords[rownames(genos.cg[[1]]), "chr"]) table(genomes$snp.coords[rownames(genos.cg[[2]]), "chr"])
Load the data set:
file.par1 <- paste0("genos_carthagene_parent1-", names(genos.cg)[1], ".txt") writeCartagene(genos.cg[[1]], file.par1) cg <- openCarthagene(file.par1, "par1") (out <- runCarthagene(cg, "cgversion")) (out <- runCarthagene(cg, "dsinfo")) out.mrkinfo <- runCarthagene(cg, "mrkinfo") dim(mrk.info <- parseCgMrkinfo(out.mrkinfo)) head(mrk.info) (out <- runCarthagene(cg, "heapsizeget"))
Look at the pairwise matrices of recombination fraction, LOD and genetic distances:
out.2rf <- runCarthagene(cg, "mrkfr2p") out.2lod <- runCarthagene(cg, "mrklod2p") out.2dist <- runCarthagene(cg, "mrkdist2p k") pwrf <- parseCgPwMatrix(out.2rf, "mrkfr2p", mrk.info) pwlod <- parseCgPwMatrix(out.2lod, "mrklod2p", mrk.info) pwdist <- parseCgPwMatrix(out.2dist, "mrkdist2p", mrk.info) plot(pwrf[upper.tri(pwrf)], pwlod[upper.tri(pwlod)], xlab="recombination fraction", ylab="LOD score") plot(pwrf[upper.tri(pwrf)], pwdist[upper.tri(pwdist)], xlab="recombination fraction", ylab="genetic distance (Kosambi)") pimage(pwrf, main="pairwise recombination fractions") ord.rf <- seriate(pwrf) pimage(pwrf, ord.rf, main="pairwise recombination fractions")
Show the cause between the different LOD curves by coloring the plot of recombination fractions versus LOD scores:
pws <- rbind(rf=pwrf[upper.tri(pwrf)], lod=pwlod[upper.tri(pwlod)]) names.per.col <- matrix(rownames(pwrf), nrow=nrow(pwrf), ncol=ncol(pwrf), byrow=FALSE) names.per.row <- matrix(rownames(pwrf), nrow=nrow(pwrf), ncol=ncol(pwrf), byrow=TRUE) npc.up <- names.per.col[upper.tri(names.per.col)] npr.up <- names.per.row[upper.tri(names.per.row)] colnames(pws) <- paste0(npc.up, "-", npr.up) ## recompute rec frac and LOD for a subset of SNP pairs idx <- sample.int(n=ncol(pws), size=1000) pairs <- as.data.frame(cbind(colnames(pws)[idx], do.call(rbind, strsplit(colnames(pws)[idx], "-"))), stringsAsFactors=FALSE) colnames(pairs) <- c("pair", "mrk1", "mrk2") pairs$nb.offs <- NA pairs$nb.rec <- NA pairs[,c("nb.offs", "nb.rec")] <- do.call(rbind, lapply(1:nrow(pairs), function(i){ tmp <- genos.cg[[1]][unlist(pairs[i,c("mrk1","mrk2")]),] tmp.noNA <- tmp[, apply(tmp, 2, function(x){! any(x == "-")})] if(all(dim(table(tmp.noNA[1,], tmp.noNA[2,])) == c(2,2))){ c(ncol(tmp.noNA), table(tmp.noNA[1,], tmp.noNA[2,])[1,2] + table(tmp.noNA[1,], tmp.noNA[2,])[2,1]) } else c(ncol(tmp.noNA), NA) })) pairs$rec.frac <- pairs[,"nb.rec"] / pairs[,"nb.offs"] pairs[,"lod"] <- lodLinkage(pairs$rec.frac, pairs$nb.offs - pairs$nb.rec, pairs$nb.rec) plot(pairs$rec.frac, pairs$lod, col=ifelse(pairs$nb.offs == 200, "black", "red")) legend("top", legend=c("n = 200", "n < 200"), col=c("black", "red"), pch=1, bty="n") table(pairs$nb.offs) ## different LOD curves depending on the amount of NA's in marker pairs ## compare these recomputed LODs with CarthaGene's LODs plot(pairs$lod, pws["lod", pairs$pair], xlab="recomputed LOD scores", ylab="CarthaGene's LOD scores", las=1) pairs.above <- pairs$pair[pairs$rec.frac > 0.5] summary(pws["lod", pairs.above[! is.na(pairs.above)]]) ## differences seem to come from marker pairs with recomb frac above 0.5 ## pairs of markers with no recomb btw them but a very small CarthaGene's LOD idx <- which(apply(pws, 2, function(x){ all(x[1] < 0.05, x[2] < 5) })) pws[, idx] tmp <- as.data.frame(cbind(colnames(pws)[idx], do.call(rbind, strsplit(colnames(pws)[idx], "-"))), stringsAsFactors=FALSE) colnames(tmp) <- c("pair", "mrk1", "mrk2") tmp$nb.offs <- NA tmp$nb.rec <- NA tmp[, c("nb.offs", "nb.rec")] <- do.call(rbind, lapply(1:nrow(tmp), function(i){ tmp.NA <- genos.cg[[1]][unlist(tmp[i,c("mrk1","mrk2")]),] tmp.noNA <- tmp.NA[, apply(tmp.NA, 2, function(x){! any(x == "-")}), drop=FALSE] if(all(dim(table(tmp.noNA[1,], tmp.noNA[2,])) == c(2,2))){ c(ncol(tmp.noNA), table(tmp.noNA[1,], tmp.noNA[2,])[1,2] + table(tmp.noNA[1,], tmp.noNA[2,])[2,1]) } else c(ncol(tmp.noNA), NA) })) tmp ## seems to be marker pairs with too many NA's
Determine the linkage groups, by specifying a distance threshold and a LOD threshold (two markers are put in the same group if their Haldane distance and two-points LOD are below the thresholds):
cmd <- "group 0.2 1.5" # params chosen by trial and error out.group <- runCarthagene(cg, cmd) linkgroups.par1 <- parseCgGroup(out.group, mrk.info) table(linkgroups.par1$linkage.group)
Check that all markers put in the same linkage group belong to the same chromosome:
lg2chr <- tapply(linkgroups.par1$locus, factor(linkgroups.par1$linkage.group), function(locus){ setNames(genomes$snp.coords[gsub("_m", "", locus), "chr"], locus) }) lapply(lg2chr, table)
Note that knowing in which linkage group each marker is, is enough to determine parental phases. However, if the marker orders and inter-marker genetic distances are also required, it is necessary to build a genetic map.
Look at double markers:
out.mrkdbl <- runCarthagene(cg, "mrkdouble") length(out.mrkdbl) head(out.mrkdbl, n=10)
Determine the marker order and inter-marker distances for the first linkage group, by building a "framework" map:
runCarthagene(cg, "mrkselset [groupget 1]") system.time( out.buildfw <- runCarthagene(cg, "buildfw 3 3 {} 1")) length(out.buildfw)
Check its reliability:
out.heaprint <- runCarthagene(cg, "heaprint") (maps.info <- parseCgHeaprint(out.heaprint)) system.time( out.flips <- runCarthagene(cg, "flips 4 3 1")) length(out.flips) system.time( out.polish <- runCarthagene(cg, "polish")) length(out.polish) system.time( out.squeeze <- runCarthagene(cg, "squeeze 20")) out.squeeze out.heaprint <- runCarthagene(cg, "heaprint") (maps.info <- parseCgHeaprint(out.heaprint))
Get the maps (best is last):
cmd <- paste0("maprintd ", maps.info$id[which.max(maps.info$log10.lik)]) out.maprintd <- runCarthagene(cg, cmd) map.group1 <- parseCgMaprintd(out.maprintd) str(map.group1) head(map.group1) out.bestprintd <- runCarthagene(cg, "bestprintd") bestmap.group1 <- parseCgMaprintd(out.bestprintd) head(bestmap.group1)
Do the same for a linkage group corresponding to the other chromosome:
runCarthagene(cg, "mrkselset [groupget 3]") system.time( out.buildfw <- runCarthagene(cg, "buildfw 3 3 {} 1")) out.heaprint <- runCarthagene(cg, "heaprint") (maps.info <- parseCgHeaprint(out.heaprint)) system.time( out.flips <- runCarthagene(cg, "flips 4 3 1")) system.time( out.polish <- runCarthagene(cg, "polish")) system.time( out.squeeze <- runCarthagene(cg, "squeeze 20")) out.heaprint <- runCarthagene(cg, "heaprint") (maps.info <- parseCgHeaprint(out.heaprint)) out.bestprintd <- runCarthagene(cg, "bestprintd") bestmap.group3 <- parseCgMaprintd(out.bestprintd)
Save the whole map:
map.par1.cg <- rbind(data.frame(linkage.group=rep(unique(lg2chr[[1]][bestmap.group1$locus]), nrow(bestmap.group1)), locus=bestmap.group1$locus, genetic.distance=bestmap.group1$cum.dist.kosambi, stringsAsFactors=FALSE), data.frame(linkage.group=rep(unique(lg2chr[[3]][bestmap.group3$locus]), nrow(bestmap.group3)), locus=bestmap.group3$locus, genetic.distance=bestmap.group3$cum.dist.kosambi, stringsAsFactors=FALSE)) tapply(map.par1.cg$genetic.distance, factor(map.par1.cg$linkage.group), max) file.map.par1.cg <- paste0("bestmap_carthagene_parent1-", colnames(genos.jm)[1], ".txt") write.table(x=map.par1.cg, file=file.map.par1.cg, quote=FALSE, sep="\t", row.names=FALSE) ## map.par1.cg <- read.table(file.map.par1.cg, header=TRUE, sep="\t")
Save CarthaGene's state:
file.cgsave <- "cgsave.tcl" (out <- runCarthagene(cg, paste("cgsave", file.cgsave))) file.export <- "cg_map.txt" (out <- runCarthagene(cg, paste("cgexport", file.export, "tuto-genmap-outbreds")))
Close and clean:
closeCarthagene(cg, file=file.par1) file.remove(file.par1) file.remove(file.map.par1.cg)
Load the data set:
file.par2 <- paste0("genos_carthagene_parent2-", names(genos.cg)[2], ".txt") writeCartagene(genos.cg[[2]], file.par2) cg <- openCarthagene(file.par2, "par2")
Define linkage groups:
linkgroups.par2 <- defLinkgroupsWithCarthagene(cg=cg, dist.thresh=0.3, lod.thresh=5, mrk2chr=mrk2chr)
Build the map for the first group:
mrk2phy <- setNames(genomes$snp.coords$pos, rownames(genomes$snp.coords)) bestmap.lg1 <- estMrkOrderGenDistsWithCarthagene(cg, linkgroups.par2, lg.id=1, keep.thresh=3, add.thresh=3, mrk2phy=mrk2phy)
Do the same for the other chromosome:
bestmap.lg3 <- estMrkOrderGenDistsWithCarthagene(cg, linkgroups.par2, lg.id=3, keep.thresh=3, add.thresh=3, mrk2phy=mrk2phy)
For data sets of a bigger size, the batchtools package can be used to launch the function estMrkOrderGenDistsWithCarthagene()
on each linkage group in parallel.
Save the whole map:
map.par2.cg <- rbind(data.frame(linkage.group=rep(1, nrow(bestmap.lg1)), chr="chr2", locus=bestmap.lg1$locus, genetic.distance=bestmap.lg1$cum.dist.kosambi, physical.distance=bestmap.lg1$physical.distance, stringsAsFactors=FALSE), data.frame(linkage.group=rep(3, nrow(bestmap.lg3)), chr="chr1", locus=bestmap.lg3$locus, genetic.distance=bestmap.lg3$cum.dist.kosambi, physical.distance=bestmap.lg3$physical.distance, stringsAsFactors=FALSE)) tapply(map.par2.cg$genetic.distance, factor(map.par2.cg$chr), max) file.map.par2.cg <- paste0("bestmap_carthagene_parent2-", colnames(genos.jm)[2], ".txt") write.table(x=map.par2.cg, file=file.map.par2.cg, quote=FALSE, sep="\t", row.names=FALSE) ## map.par2.cg <- read.table(file.map.par2.cg, header=TRUE, sep="\t")
Close and clean:
closeCarthagene(cg, file=file.par2) file.remove(file.par2) file.remove(file.map.par2.cg)
Make a qtl::cross
object for the first parent, and look at it:
cross.cg.par1 <- setupQtlCrossObject(gendat=t(genos.qtl[[1]]), cross.type="bc", genmap=map.par1.cg) summary(cross.cg.par1) sum(duplicated(map.par1.cg$genetic.distance[map.par1.cg$linkage.group == "chr2"])) summaryMap(cross.cg.par1) plotMap(cross.cg.par1)
Make a qtl::cross
object for the second parent, and look at it:
cross.cg.par2 <- setupQtlCrossObject(gendat=t(genos.qtl[[2]]), cross.type="bc", genmap=map.par2.cg) summary(cross.cg.par2) sum(duplicated(map.par2.cg$genetic.distance[map.par2.cg$linkage.group == 1])) summaryMap(cross.cg.par2) plotMap(cross.cg.par2)
Plot physical vs genetic distances:
plotPhyVsGenDistances(map.par2.cg, xlim=c(0, chr.len.phy))
If both parental genetic maps are similar in terms of recombination rates, it is possible to make a consensus map.
Once linkage groups are determined for both parents, linkage phases can be deduced:
genos.jm.phased <- setJoinMapPhasesFromParentalLinkGroups( x=genos.jm[,-c(1:8)], lg.par1=linkgroups.par1[linkgroups.par1$linkage.group %in% c(1,3),], lg.par2=linkgroups.par2[linkgroups.par2$linkage.group %in% c(1,3),]) table(genos.jm.phased$phase, useNA="always") pairs.seg.phase <- paste0(genos.jm.phased$seg, "_", genos.jm.phased$phase) table(pairs.seg.phase, useNA="always")
TODO
Prepare the input data:
dim(X <- genos.doses[, estimSnpMaf(genos.doses[-(1:2),]) > 0.15]) genos.asmap <- genoDoses2ASMap(X=X) sapply(genos.asmap, dim)
Build a map for parent 1:
system.time( cross.mst.par1 <- mstmap(genos.asmap$parent1, pop.type="BC", dist.fun="kosambi", objective.fun="COUNT", p.value=10^(-6), # proba below which markers are in same group miss.thresh=1, trace=TRUE, as.cross=TRUE)) summary(cross.mst.par1)
Identify which linkage group corresponds to which chromosome(s):
plot.map(cross.mst.par1) heatMap(cross.mst.par1) nmar(cross.mst.par1) (lg2chr <- lapply(cross.mst.par1$geno, function(lg){ table(genomes$snp.coords[gsub("_m", "", names(lg$map)), "chr"]) })) head(cross.mst.par1$geno$L2$map) head(cross.mst.par1$geno$L3$map) head(cross.mst.par1$geno$L7$map) head(cross.mst.par1$geno$L8$map)
profileMark(cross.mst.par1, crit.val="bonf")
clones <- genClones(cross.mst.par1) clones$cgd (pair1 <- c(as.character(clones$cgd[1,1]), as.character(clones$cgd[1,2]))) if("A.vr.cross" %in% ls()){ print(A.vr.cross[pair1, pair1]) print(A.t.cross[pair1, pair1]) } loc.crovers.off[[pair1[1]]] loc.crovers.off[[pair1[2]]] idx <- grep(paste(pair1, collapse="|"), rownames(haplos.offs$chr1)) plotHaplosMatrix(haplos.offs$chr1[idx[c(1,3,2,4)],], main="Haplotypes (chr1)") plotHaplosMatrix(haplos.offs$chr2[idx[c(1,3,2,4)],], main="Haplotypes (chr2)")
Build a map for parent 2:
TODO
map.c <- combineMap(cross.mst.par1, cross.mst.par2)
Clean:
if(file.exists("MSToutput.txt")) file.remove("MSToutput.txt")
t1 <- proc.time(); t1 - t0 print(sessionInfo(), locale=FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.