#' parse_crossovers: Parse crossover information from CriMAP chrompic output
#' @param chrompicfile File with output from chrompic
#' @param familyPedigree data.frame containing columns ANIMAL, FATHER, MOTHER
#' and FAMILY. FAMILY defines family groups for crimap. IDs in ANIMAL can be
#' repeated within and between families if necessary. Should be that used in
#' create_crimap_input.
#' @param remove.zero.inf.loci Logical, default = TRUE. Remove IDs with no
#' informative loci.
#' @import plyr
#' @export
parse_crossovers <- function(chrompicfile, familyPedigree, remove.zero.inf.loci = TRUE){
#~~ read lines from the chrompic file
x <- readLines(chrompicfile)
nmarkers <- nrow(parse_map_chrompic(chrompicfile))
x <- x[grep("Family", x)[1]:(grep("Overall recombinations", x)-1)]
x <- x[-grep("^Recombinations ", x)]
x <- x[-grep("^Non-recombinations ", x)]
x <- x[-grep("^Chromosomes ", x)]
x <- x[-which(x == "")]
if (length(grep(" ", x) > 0)) x <- x[-grep(" ", x)]
#~~ create a table with family information to add to the full table later
famtab <- data.frame(FamilyOrder = 1:length(grep("Family", x)),
FamilyID = grep("Family", x, value = T),
LineNo = grep("Family", x),
stringsAsFactors = F)
famtab$startline <- famtab$LineNo - (famtab$FamilyOrder - 1)
famtab$FamilyShortID <- NA
famtab$FamilyShortID <- unlist(lapply(famtab$FamilyID, function (x) strsplit(x, split = " ")[[1]][2]))
x <- x[-grep("Family", x)]
famtab$Count <- diff(c(famtab$startline, (length(x) + 1)))
table(famtab$Count)
famvec <- rep(famtab$FamilyShortID, famtab$Count)
#~~ create a data frame to put all information in
recombframe <- data.frame(data = x,
ANIMAL = NA,
RecombCount = NA,
parent = NA,
Family = famvec,
stringsAsFactors=F)
#~~ Get the IDs
recombframe$ANIMAL <- unlist(lapply(recombframe$data, function(foo){
z <- strsplit(foo, split = "\\s+")[[1]]
if(length(z) == ceiling((nmarkers/10)+2)){
return(z[1])
} else {
return(z[2])
}
}))
recombframe$parent[which(recombframe$ANIMAL != "")] <- "MOTHER"
recombframe$ANIMAL[which(recombframe$ANIMAL == "")] <- recombframe$ANIMAL[which(recombframe$ANIMAL != "")]
recombframe$parent[which(is.na(recombframe$parent))] <- "FATHER"
#~~ get the number of recombinations for each individual
recExtract <- function(CMPstring){
x <- strsplit(CMPstring, split = " ")[[1]][length(strsplit(CMPstring, split = " ")[[1]])]
}
recombframe$RecombCount <- sapply(recombframe$data, recExtract, simplify = TRUE)
#~~ format the data column so that we can get out the number of informative loci
removeID <- function(CMPstring){
z <- strsplit(CMPstring, split = "\\s+")[[1]]
if(length(z) == ceiling((nmarkers/10)+2)){
return(paste0(z[2:(length(z)-1)], collapse = ""))
} else {
return(paste0(z[3:(length(z)-1)], collapse = ""))
}
}
system.time(recombframe$data <- unlist(lapply(recombframe$data, removeID)))
recombframe$data2 <- gsub("-" , "", recombframe$data)
recombframe$data2 <- gsub("c" , "", recombframe$data2)
recombframe$data2 <- gsub(":" , "", recombframe$data2)
recombframe$No.Inf.Loci <- nchar(recombframe$data2)
recombframe <- subset(recombframe, select = -data2)
recombframe$RecombCount <- as.numeric(recombframe$RecombCount)
if(remove.zero.inf.loci == TRUE) recombframe <- subset(recombframe, No.Inf.Loci != 0)
#~~ Get first and list informative positions
recombframe$First.Inf.Order <- unlist(lapply(recombframe$data, InfLengthFunc))
recombframe$Last.Inf.Order <- unlist(lapply(recombframe$data, function(x) InfLengthFunc(x, position = "Last")))
#~~ add the RRID information - the individual in which the recombination took place
suppressMessages(recombframe <- join(recombframe, familyPedigree))
recombframe$RRID <- NA
recombframe$RRID[which(recombframe$parent == "MOTHER")] <- recombframe$MOTHER[which(recombframe$parent == "MOTHER")]
recombframe$RRID[which(recombframe$parent == "FATHER")] <- recombframe$FATHER[which(recombframe$parent == "FATHER")]
#~~ add analysisID
analysisID.val <- gsub("\\\\", "/", chrompicfile)
analysisID.val <- strsplit(analysisID.val, split = "/")[[1]]
analysisID.val <- analysisID.val[length(analysisID.val)]
analysisID.val <- gsub("chr", "", analysisID.val)
analysisID.val <- gsub(".cmp", "", analysisID.val, fixed = T)
recombframe$analysisID <- analysisID.val
recombframe$UniqueID <- paste(recombframe$analysisID, recombframe$Family, recombframe$RRID, recombframe$ANIMAL, recombframe$parent, sep = "_")
recombframe
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.