#' Create diaQTL input files from PolyOrigin output
#'
#' Create diaQTL input files from PolyOrigin output
#'
#' Creates the pedigree (diaQTL_pedfile.csv) and genotype (diaQTL_genofile.csv) input files needed for \code{\link{read_data}} from the polyancestry output file generated by the PolyOrigin software. PolyOrigin outputs a genetic map in cM. To add a physical map in bp, use the option \code{mapfile}. The input file needed for \code{\link{phased_parents}} (diaQTL_parents.csv) is also created.
#'
#' @param filename Name of polyancestry file
#' @param mapfile Optional name of CSV file containing the physical map (marker, chrom, bp)
#' @param remove.outliers Should offspring flagged as outliers be removed (default is TRUE)
#' @param outstem prefix for output filenames
#'
#' @return NULL
#'
#' @export
#' @importFrom utils read.csv write.csv
convert_polyorigin <- function(filename,mapfile=NULL,remove.outliers=TRUE,outstem="") {
if (!is.null(mapfile)) {
map <- read.csv(mapfile,as.is=T)[,1:3]
colnames(map) <- c("marker","chrom","bp")
} else {
print("diaQTL_parents not generated because no physical mapfile.")
}
con <- file(filename,"r")
temp <- readLines(con)
ix <- grep("PolyOrigin",temp)
k <- grep("offspringinfo",temp[ix])
x <- strsplit(temp[c((ix[k]+2):(ix[k+1]-1))],split=",")
ped <- data.frame(id=sapply(x,function(x){x[1]}),pop=sapply(x,function(x){x[2]}),stringsAsFactors = F)
outliers <- as.logical(toupper(sapply(x,function(x){x[4]})))
if (remove.outliers) {
outlier.id <- ped$id[which(outliers)]
ped <- ped[!outliers,]
}
k <- grep("ancestralgenotype",temp[ix])
x <- strsplit(temp[c((ix[k]+2):(ix[k+1]-1))],split=",")
parents <- apply(cbind(sapply(x,function(x){x[1]}),sapply(x,function(x){x[3]})),1,paste,collapse="|")
parents <- unique(parents)
y <- strsplit(parents,split="|",fixed=T)
parents <- data.frame(pop=sapply(y,function(y){y[1]}),parent1=sapply(y,function(y){y[2]}),parent2=sapply(y,function(y){y[length(y)]}),stringsAsFactors = F)
ped <- merge(ped,parents)
write.csv(ped[,2:4],file=paste(outstem,"diaQTL_pedfile.csv",sep=""),row.names=F)
k <- grep("parentgeno",temp[ix])
header <- setdiff(strsplit(temp[ix[k]+1],split=",",fixed=T)[[1]],"")
parents <- header[-(1:3)]
header[1:3] <- c("marker","chrom","cM")
x <- strsplit(temp[c((ix[k]+2):(ix[k+1]-1))],split=",")
m <- length(x)
pp <- matrix("",nrow=m,ncol=length(header))
for (i in 1:3) {
pp[,i] <- sapply(x,function(z){z[i]})
}
for (i in 4:length(header)) {
v <- sapply(x,function(z){z[i]})
v <- gsub("1","0",v)
pp[,i] <- gsub("2","1",v)
}
colnames(pp) <- header
if (!is.null(mapfile)) {
pp2 <- merge(data.frame(pp,check.names = FALSE),map)
pp <- pp2[match(pp[,"marker"],pp2$marker),]
pp <- pp[,c(header[1:3],"bp",parents)]
write.csv(pp,paste(outstem,"diaQTL_parents.csv",sep=""),row.names=F)
}
k <- grep("genoprob",temp[ix])
id <- temp[ix[k]+1]
id <- strsplit(id,split=",",fixed=T)[[1]][-(1:3)]
keep <- 1:length(id)
if (remove.outliers & any(outliers)) {
keep <- setdiff(keep,match(outlier.id,id))
}
n <- length(keep)
genoprob <- matrix("",nrow=m,ncol=n+1)
colnames(genoprob) <- c("marker",id[keep])
for (j in 1:m) {
i <- ix[k]+1+j
x <- strsplit(temp[i],split=",",fixed=T)[[1]]
genoprob[j,] <- x[c(1,keep+3)]
}
if (!is.null(mapfile)) {
write.csv(cbind(pp[,1:4],genoprob[match(pp[,1],genoprob[,1]),-1]),paste(outstem,"diaQTL_genofile.csv",sep=""),row.names=F)
} else {
write.csv(cbind(pp[,1:3],genoprob[match(pp[,1],genoprob[,1]),-1]),paste(outstem,"diaQTL_genofile.csv",sep=""),row.names=F)
}
close(con)
return(NULL)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.