Nothing
######################################################################
#
# read.cross.mm.R
#
# copyright (c) 2000-2020, Karl W Broman
# last modified Feb, 2020
# first written Aug, 2000
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License,
# version 3, as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but without any warranty; without even the implied warranty of
# merchantability or fitness for a particular purpose. See the GNU
# General Public License, version 3, for more details.
#
# A copy of the GNU General Public License, version 3, is available
# at http://www.r-project.org/Licenses/GPL-3
#
# Part of the R/qtl package
# Contains: read.cross.mm, read.maps.mm
# [See read.cross.R for the main read.cross function.]
#
######################################################################
######################################################################
#
# read.cross.mm: read data from an experimental cross in mapmaker
# format.
#
# We need two files: a "raw" file containing the genotype and
# phenotype data and a "map" file containing the chromosomes
# assignments and (optionally) map positions.
#
# The map file contains two or three columns, separated by white
# space, with the chromosome number, marker name (with markers in
# order along the chromosomes) and (optionally) the map position.
#
######################################################################
read.cross.mm <-
function(dir,rawfile,mapfile,estimate.map=TRUE)
{
# create file names
if(missing(mapfile)) stop("Missing mapfile.")
if(missing(rawfile)) stop("Missing rawfile.")
if(!missing(dir) && dir != "") {
mapfile <- file.path(dir, mapfile)
rawfile <- file.path(dir, rawfile)
}
# count lines in rawfile
n.lines <- length(scan(rawfile, what=character(), skip=0, nlines=0,
blank.lines.skip=FALSE,quiet=TRUE,sep="\n"))
# read map file
map <- read.table(mapfile,header=FALSE,colClasses="character",blank.lines.skip=FALSE,stringsAsFactors=TRUE)
fixmap <- TRUE
if(ncol(map) == 1)
stop("Map file should contain the markers' chromosome IDs.")
if(ncol(map) > 3) { # special maps format
maps <- read.maps.mm(mapfile)
chr <- rep(names(maps),sapply(maps,length))
markers <- unlist(lapply(maps,names))
includes.pos <- TRUE
fixmap <- FALSE
}
if(fixmap) { # my map format: 2 or 3 column table
# remove any rows lacking a chromosome ID
o <- (1:nrow(map))[map[,1]==""]
if(length(o) > 0) map <- map[-o,]
# remove any leading *'s from the marker names
g <- grep("^\\*",map[,2])
if(length(g) > 0)
map[g,2] <- substr(map[g,2],2,nchar(map[g,2]))
}
# begin reading/parsing the genotype data
cur.mar <- 0
cur.phe <- 0
NEW.symb <- c("1","2","3","4","5","0")
OLD.symb <- c("A","H","B","D","C","-")
flag <- 0
# rawdata <- scan(rawfile,what=character(),sep="\n",
# blank.lines.skip=TRUE,quiet=TRUE)
#
for(i in 1:n.lines) {
a <- scan(rawfile,what=character(),skip=i-1,nlines=1,
blank.lines.skip=TRUE,quiet=TRUE)
if(length(a) == 0) next
if(length(grep("#", a[1])) != 0) next
if(flag == 0) {
flag <- 1
if(!is.na(match("intercross", a))) type <- "f2"
else if(!is.na(match("backcross", a)) || !is.na(match("bc", a))) type <- "bc"
else
stop("File indicates invalid cross type: ", a[length(a)], ".")
}
else if(flag == 1) {
flag <- 2
n.ind <- as.numeric(a[1])
n.mar <- as.numeric(a[2])
n.phe <- as.numeric(a[3])
cat(" --Read the following data:\n")
cat("\tType of cross: ", type, "\n")
cat("\tNumber of individuals: ", n.ind, "\n")
cat("\tNumber of markers: ", n.mar, "\n")
cat("\tNumber of phenotypes: ", n.phe, "\n")
# if there's a set of "symbols" for non-standard symbols in
# the file, use them.
if(length(a) > 3 && ("symbols" %in% a)) {
o <- match("symbols",a)
b <- a[-(1:o)]
infile.symb <- substring(b,1,1)
std.symb <- substring(b,3,3)
wh <- rep(0,length(std.symb))
fixed <- rep(0,length(OLD.symb))
for(j in 1:length(std.symb))
if(std.symb[j] %in% OLD.symb)
wh[j] <- match(std.symb[j],OLD.symb)
for(j in 1:length(std.symb))
if(wh[j] != 0) {
OLD.symb[wh[j]] <- infile.symb[j]
fixed[wh[j]] <- 1
}
temp <- table(OLD.symb)
if(any(temp>1)) {
for(j in names(temp)[temp>1]) {
o <- OLD.symb==j & fixed==0
if(any(o)) OLD.symb[o] <- paste(OLD.symb[o]," ")
}
}
}
marnames <- rep("", n.mar)
geno <- matrix(0,ncol=n.mar,nrow=n.ind)
if(n.phe == 0) {
pheno <- matrix(1:n.ind,ncol=1)
phenames <- c("number")
}
else {
pheno <- matrix(0,ncol=n.phe,nrow=n.ind)
phenames <- rep("", n.phe)
}
}
else {
if(substring(a[1],1,1) == "*") {
cur.mar <- cur.mar+1
cur.row <- 1
if(cur.mar > n.mar) { # now reading phenotypes
cur.phe <- cur.phe+1
if(cur.phe > n.phe) next
phenames[cur.phe] <- substring(a[1],2)
if(length(a) > 1) {
p <- a[-1]
p[p=="-"] <- NA
n <- length(p)
oldna <- is.na(p)
numerp <- suppressWarnings(as.numeric(p))
newna <- is.na(numerp)
wh <- !oldna & newna
if(any(wh)) {
droppedasmissing <- unique(p[wh])
if(length(droppedasmissing) > 1) {
themessage <- paste("The values", paste("\"", droppedasmissing, "\"", sep="", collapse=" "))
themessage <- paste(themessage, " for phenotype \"", phenames[cur.phe], "\" were", sep="")
}
else {
themessage <- paste("The value \"", droppedasmissing, "\" ", sep="")
themessage <- paste(themessage, " for phenotype \"", phenames[cur.phe], "\" was", sep="")
}
themessage <- paste(themessage, "interpreted as missing.")
warning(themessage)
}
pheno[cur.row+(0:(n-1)),cur.phe] <- numerp
}
else n <- 0 ## ?
cur.row <- cur.row + n
}
else { # reading genotypes
marnames[cur.mar] <- substring(a[1],2)
if(length(a) > 1) {
g <- paste(a[-1],collapse="")
h <- g <- unlist(strsplit(g,""))
for(j in seq(along=NEW.symb)) {
if(any(h==OLD.symb[j]))
g[h==OLD.symb[j]] <- NEW.symb[j]
}
n <- length(g)
geno[cur.row+(0:(n-1)),cur.mar] <- as.numeric(g)
}
else n <- 0
cur.row <- cur.row + n
}
}
else { # continuation lines
if(cur.mar > n.mar) { # now reading phenotypes
a[a=="-"] <- NA
n <- length(a)
pheno[cur.row+(0:(n-1)),cur.phe] <- as.numeric(a)
cur.row <- cur.row + n
}
else {
g <- paste(a,collapse="")
h <- g <- unlist(strsplit(g,""))
for(j in seq(along=NEW.symb)) {
if(any(h==OLD.symb[j]))
g[h==OLD.symb[j]] <- NEW.symb[j]
}
n <- length(g)
geno[cur.row+(0:(n-1)),cur.mar] <- as.numeric(g)
cur.row <- cur.row + n
}
} # end continuation line
} # end non-intro line
}
dimnames(pheno) <- list(NULL, phenames)
# done reading the raw file
if(fixmap) { # my map format: 2 or 3 column table
# parse map file
if(ncol(map) == 3) {
includes.pos <- TRUE
# make positions numeric
pos <- as.numeric(map[,3])
}
else includes.pos <- FALSE
chr <- as.character(map[,1])
markers <- map[,2]
# reorder markers?
if(all(chr %in% c(1:999,"X","x"))) { # 1...19 + X
tempchr <- chr
tempchr[chr=="X" | chr=="x"] <- 1000
tempchr <- as.numeric(tempchr)
if(includes.pos) neworder <- order(tempchr, pos)
else neworder <- order(tempchr)
} else {
# prevent reordering of chromosomes
tempchr <- factor(chr, levels=unique(chr))
if(includes.pos) neworder <- order(tempchr, pos)
else neworder <- order(tempchr)
}
chr <- chr[neworder]
if(includes.pos) pos <- pos[neworder]
markers <- markers[neworder]
}
Geno <- vector("list",length(unique(chr)))
names(Geno) <- unique(chr)
for(i in unique(chr)) {
mar <- markers[chr == i]
if(fixmap) { # my map format: 2 or 3 column table
# create map
if(includes.pos) {
map <- pos[chr == i]
# reorder markers?
if(any(diff(map)<0)) {
o <- order(map)
map <- map[o]
mar <- mar[o]
}
}
else map <- seq(0,by=5,length=length(mar))
names(map) <- mar
}
else map <- maps[[i]]
# pull out genotype data
o <- match(mar,marnames)
if(any(is.na(o)))
stop("Cannot find markers in genotype data: ",
paste(mar[is.na(o)],collapse=" "), ".",sep="")
if(length(o)==1) data <- matrix(geno[,o],ncol=1)
else data <- geno[,o]
# add marker names to data
colnames(data) <- mar
# changes 0's to NA's
data[!is.na(data) & data==0] <- NA
Geno[[i]] <- list(data=data,map=map)
if(i=="X" || i=="x") class(Geno[[i]]) <- "X"
else class(Geno[[i]]) <- "A"
}
cross <- list(geno=Geno,pheno=pheno)
class(cross) <- c(type,"cross")
if(estimate.map && !includes.pos) estmap <- TRUE
else estmap <- FALSE
cross$pheno <- as.data.frame(cross$pheno, stringsAsFactors=TRUE)
# return cross + indicator of whether to run est.map
list(cross,estmap)
}
######################################################################
#
# read.maps.mm: Read genetic map for a special Mapmaker format
# Written by Brian S Yandell; modified by Karl W Broman
#
######################################################################
read.maps.mm <-
function( mapsfile )
{
if (missing(mapsfile)) stop("Missing mapsfile.")
## find where everything is
f <- scan(mapsfile, what = "", blank.lines.skip = FALSE, sep = "\n",
quiet = TRUE)
start <- pmatch( paste( "*", c("OrderInfo","Classes","Chromosomes",
"Assignments and Placements" ), ":", sep = "" ), f )
## marker names
f <- scan( mapsfile, what = c("",rep(0,9)), skip = start[1],
nlines = start[2] - start[1] - 1,
blank.lines.skip = FALSE, quiet = TRUE)
markers <- substring( f[ seq( 1, length( f ), by = 10 ) ], 2 )
## distances
f <- scan( mapsfile, what = "", skip = start[3],
nlines = start[4] - start[3] - 1,
blank.lines.skip = FALSE, quiet = TRUE)
chr <- grep( "^\\*", f)
chrom <- substring( f[chr], 2 )
nmark <- as.integer( f[ 1 + chr ] )
chr <- c( chr[-1], 1 + length( f ))
lo <- chr - 2 * nmark + 2
hi <- chr - nmark
map <- list()
imark <- c( 0, cumsum( nmark ))
for( i in seq( along = chrom )) {
tmp <- cumsum( c(0,imf.h(as.numeric( f[ lo[i]:hi[i] ] ))))
names( tmp ) <- markers[ imark[i] + seq( nmark[i] ) ]
map[[ chrom[i] ]] <- tmp
}
map
}
# end of read.cross.mm.R
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.