R/read.cross.R

######################################################################
#
# read.cross.R
#
# copyright (c) 2000-9, Karl W Broman
# last modified Dec, 2009
# first written Aug, 2000
#
#     This program is free software; you can redistribute it and/or
#     modify it under the terms of the GNU General Public License,
#     version 3, as published by the Free Software Foundation.
# 
#     This program is distributed in the hope that it will be useful,
#     but without any warranty; without even the implied warranty of
#     merchantability or fitness for a particular purpose.  See the GNU
#     General Public License, version 3, for more details.
# 
#     A copy of the GNU General Public License, version 3, is available
#     at http://www.r-project.org/Licenses/GPL-3
#
# Part of the R/qtl package
# Contains: read.cross, fixXgeno.bc, fixXgeno.f2, asnumericwithdec
#           [See read.cross.csv.R, read.cross.mm.R, read.cross.qtx.R,
#            qtlcart_io.R, read.cross.gary.R, and read.cross.karl.R
#            for the format-specific functions.]
#
######################################################################

######################################################################
#
# read.cross: read data from an experimental cross
#
######################################################################

read.cross <-
function(format=c("csv", "csvr", "csvs", "csvsr", "mm", "qtx",
                  "qtlcart", "gary", "karl"),
         dir="", file, genfile, mapfile, phefile, chridfile, mnamesfile, pnamesfile,
         na.strings=c("-","NA"), genotypes=c("A","H","B","D","C"),
         alleles=c("A","B"), estimate.map=TRUE, convertXdata=TRUE,
         error.prob=0.0001, map.function=c("haldane", "kosambi", "c-f", "morgan"),
         BC.gen = 0, F.gen = 0,
         ...)
{
  if(format == "csvrs") {
    format <- "csvsr"
    warning("Assuming you mean 'csvsr' rather than 'csvrs'.\n")
  }
  format <- match.arg(format)

  if(format=="csv" || format=="csvr") { # comma-delimited format
    cross <- read.cross.csv(dir, file, na.strings, genotypes,
                            estimate.map, rotate=(format=="csvr"),
                            ...)
  }
  else if(format=="csvs" || format=="csvsr") { # comma-delimited format

    # allow easier input of filenames into function arguments
    if(missing(phefile) && !missing(file) && !missing(genfile)) {
      # read.cross("format", "dir", "genfile", "phefile")
      phefile <- genfile
      genfile <- file
    }
    else if(missing(genfile) && !missing(file) && !missing(phefile)) {
      # read.cross("format", "dir", "genfile", phefile="phefile")
      genfile <- file
    }

    cross <- read.cross.csvs(dir, genfile, phefile, na.strings, genotypes,
                             estimate.map, rotate=(format=="csvsr"),
                             ...)
  }
  else if(format=="qtx") { # Mapmanager QTX format
    cross <- read.cross.qtx(dir,file,estimate.map)
  }
  else if(format=="qtlcart") { # QTL Cartographer format
    # if missing mapfile but genfile is specified,
    #     use genfile as the map file.
    if(missing(mapfile) && !missing(genfile))
      mapfile <- genfile

    cross <- read.cross.qtlcart(dir, file, mapfile)
  }
  else if(format=="karl") { # karl's format
    # if missing file names, use standard ones
    if(missing(genfile)) genfile <- "gen.txt"
    if(missing(mapfile)) mapfile <- "map.txt"
    if(missing(phefile)) phefile <- "phe.txt"

    cross <- read.cross.karl(dir,genfile,mapfile,phefile)
  }
  else if(format=="mm") { # mapmaker format
    # if missing mapfile but genfile is specified,
    #     use genfile as the map file.
    if(missing(mapfile) && !missing(genfile))
      mapfile <- genfile

    cross <- read.cross.mm(dir,file,mapfile,estimate.map)
  }
  else if(format=="gary") { # gary's format
    # if missing file names, use the standard ones
    if(missing(genfile)) genfile <- "geno.dat"
    if(missing(mnamesfile)) mnamesfile <- "mnames.txt"
    if(missing(chridfile)) chridfile <- "chrid.dat"
    if(missing(phefile)) phefile <- "pheno.dat"
    if(missing(pnamesfile)) pnamesfile <- "pnames.txt"
    if(missing(mapfile)) mapfile <- "markerpos.txt"

    cross <- read.cross.gary(dir,genfile,mnamesfile,chridfile,
                             phefile,pnamesfile,mapfile,estimate.map,na.strings)
  }

  estimate.map <- cross[[2]]
  cross <- cross[[1]]

  # if chr names all start with "chr" or "Chr", remove that part
  chrnam <- names(cross$geno)
  if(all(regexpr("^[Cc][Hh][Rr]",chrnam)>0)){ 
    chrnam <- substr(chrnam,4,nchar(chrnam))
    if(all(regexpr("^[Oo][Mm][Oo][Ss][Oo][Mm][Ee]",chrnam)>0))
      chrnam <- substr(chrnam,8,nchar(chrnam))
  }
  # if chr named "x" make it "X"
  if(sum(chrnam=="x")>0) chrnam[chrnam=="x"] <- "X"
  names(cross$geno) <- chrnam
  # make sure the class of chromosomes named "X" is "X"
    for(i in 1:length(cross$geno)) 
      if(names(cross$geno)[i] == "X")
        class(cross$geno[[i]]) <- "X"

  # Fix up the X chromosome data for a backcross or intercross
  chrtype <- sapply(cross$geno,class)
  if(any(chrtype=="X") && convertXdata) {
    if(class(cross)[1]=="bc")
      cross <- fixXgeno.bc(cross)
    if(class(cross)[1]=="f2") {
      if(missing(alleles)) alleles <- c("A","B")
      cross <- fixXgeno.f2(cross, alleles)
    }
  }

  ## Pass through read.cross.bcsft for BCsFt (convert if appropriate).
  cross <- read.cross.bcsft(cross = cross, BC.gen = BC.gen, F.gen = F.gen, ...)

  # re-estimate map?
  if(estimate.map) {
    cat(" --Estimating genetic map\n")
    map.function <- match.arg(map.function)
    newmap <- est.map(cross, error.prob=error.prob, map.function=map.function)
    cross <- replace.map(cross, newmap)
  }

  # store genotype data as integers
  for(i in 1:nchr(cross))
    storage.mode(cross$geno[[i]]$data) <- "integer"

  # check alleles
  if(class(cross)[1] != "4way") {
    if(length(alleles) > 2) {
      warning("length of arg alleles should be 2")
      alleles <- alleles[1:2]
    }
    if(length(alleles) < 2) 
      stop("length of arg alleles should be 2")
  }
  else { # 4-way cross
    if(missing(alleles))
      alleles <- c("A","B","C","D")
    
    if(length(alleles) > 4) {
      warning("length of arg alleles should be 4 for a 4-way cross")
      alleles <- alleles[1:4]
    }
    if(length(alleles) < 4) 
      stop("length of arg alleles should be 4 for a 4-way cross")
  }
  if(any(nchar(alleles)) != 1) {
    warning("Each item in arg alleles should be a single character")
    alleles <- substr(alleles, 1, 1)
  }

  attr(cross, "alleles") <- alleles

  # run checks
  summary(cross)

  cat(" --Cross type:", class(cross)[1], "\n")

  cross
}




##############################
# fixXgeno.bc: fix up the X chromosome genotype data for backcross
##############################
fixXgeno.bc <-
function(cross)
{
  omitX <- FALSE

  # pull out X chr genotype data
  chrtype <- sapply(cross$geno,class)
  xchr <- which(chrtype=="X")
  Xgeno <- cross$geno[[xchr]]$data

  # find "sex" and "pgm" in the phenotype data
  sexpgm <- getsex(cross)

  if(!is.null(sexpgm$sex)) {     # "sex" is provided
    malegeno <- Xgeno[sexpgm$sex==1,]
    if(any(!is.na(malegeno) & malegeno==2)) {
      n.omit <- sum(!is.na(malegeno) & malegeno==2)
      warning(" --Omitting ", n.omit, " male heterozygote genotypes on the X chromosome.")
      malegeno[!is.na(malegeno) & malegeno==2] <- NA
    }
    malegeno[!is.na(malegeno) & malegeno==3] <- 2
    
    femalegeno <- Xgeno[sexpgm$sex==0,]
    if(any(!is.na(femalegeno) & femalegeno==3)) {
      n.omit <- sum(!is.na(femalegeno) & femalegeno==3)
      warning(" --Omitting ", n.omit, " BB genotypes from females on the X chromosome.")
      femalegeno[!is.na(femalegeno) & femalegeno==3] <- NA
    }

    Xgeno[sexpgm$sex==1,] <- malegeno
    Xgeno[sexpgm$sex==0,] <- femalegeno
    
  }
  else {
    # "sex" not provided

    if(all(is.na(Xgeno) | Xgeno==1 | Xgeno==3)) { # look like all males
      warning(" --Assuming that all individuals are male.\n")
      Xgeno[!is.na(Xgeno) & Xgeno==3] <- 2
      cross$pheno$sex <- factor(rep("m",nind(cross)),levels=c("f","m"))
    }
    else if(all(is.na(Xgeno) | Xgeno==1 | Xgeno==2)) { # look like females A:H
      warning(" --Assuming that all individuals are female.\n")
      cross$pheno$sex <- factor(rep("f",nind(cross)),levels=c("f","m"))
    }
    else { # have some of each of the three genotypes
      warning(" --Can't figure out the X chromosome genotypes.\n   You need to provide phenotypes \"sex\"\n   See the help file for read.cross() for details.\n   Omitting the X chr for now.\n  ")
      omitX <- TRUE
    }
  }

  if(!omitX) {
    wh <- !is.na(Xgeno) & Xgeno!=1 & Xgeno!=2
    if(any(wh)) {
      Xgeno[wh] <- NA
      n.omit <- sum(wh)
      warning(" --Omitted ", n.omit, " additional X chr genotype(s).")
    }

    cross$geno[[xchr]]$data <- Xgeno
  }
  else cross <- subset(cross,chr= -xchr) # <- omit the X chr completely

  cross
}


##############################
# fixXgeno.f2: fix up the X chromosome genotype data for intercross
##############################
fixXgeno.f2 <-
function(cross, alleles)
{
  omitX <- FALSE

  # pull out X chr genotype data
  chrtype <- sapply(cross$geno,class)
  xchr <- which(chrtype=="X")
  Xgeno <- cross$geno[[xchr]]$data

  # find "sex" and "pgm" in the phenotype data
  sexpgm <- getsex(cross)

  AA <- paste(rep(alleles[1], 2), collapse="")
  AB <- paste(alleles, collapse="")
  BB <- paste(rep(alleles[2], 2), collapse="")
  cross0 <- paste("(", alleles[1], "x", alleles[2], ")x(",
                  alleles[1], "x", alleles[2], ")", sep="")
  cross1 <- paste("(", alleles[2], "x", alleles[1], ")x(",
                  alleles[2], "x", alleles[1], ")", sep="")

  if(!is.null(sexpgm$sex) && !is.null(sexpgm$pgm)) {
    # both "sex" and "pgm" are provided

    if(any(sexpgm$sex == 1)) { # there are males
      malegeno <- Xgeno[sexpgm$sex==1,]
      if(any(!is.na(malegeno) & malegeno==2)) {
        n.omit <- sum(!is.na(malegeno) & malegeno==2)
        warning(" --Omitting ", n.omit, " male heterozygote genotypes on the X chromosome.")
        malegeno[!is.na(malegeno) & malegeno==2] <- NA
      }
      malegeno[!is.na(malegeno) & malegeno==3] <- 2
      Xgeno[sexpgm$sex==1,] <- malegeno
    }
    
    if(any(sexpgm$sex==0)) { # there are females
      femalegeno0 <- Xgeno[sexpgm$sex==0 & sexpgm$pgm==0,]
      femalegeno1 <- Xgeno[sexpgm$sex==0 & sexpgm$pgm==1,]

      if((any(!is.na(femalegeno0) & femalegeno0==3) ||
          any(!is.na(femalegeno1) & femalegeno1==1)) &&
         !any(!is.na(femalegeno0) & femalegeno0==1) &&
         !any(!is.na(femalegeno1) & femalegeno1==3)) {
                                          # appear to switched the "pgm" values
        warning(" --The 0/1 values for \"pgm\" appear to be switched; switching back.")
        sexpgm$pgm[sexpgm$pgm==1] <- 2
        sexpgm$pgm[sexpgm$pgm==0] <- 1
        sexpgm$pgm[sexpgm$pgm==2] <- 0
        cross$pheno$pgm[cross$pheno$pgm==1] <- 2
        cross$pheno$pgm[cross$pheno$pgm==0] <- 1
        cross$pheno$pgm[cross$pheno$pgm==2] <- 0

        temp <- femalegeno0
        femalegeno0 <- femalegeno1
        femalegeno1 <- temp
      }
      if(any(!is.na(femalegeno0) & femalegeno0==3)) {
        n.omit <- sum(!is.na(femalegeno0) & femalegeno0==3)
        warning(" --Omitting ", n.omit, " ", BB, " genotypes from females from cross ",
                cross0, " on the X chr.\n")
        femalegeno0[!is.na(femalegeno0) & femalegeno0==3] <- NA
      }
      if(any(!is.na(femalegeno1) & femalegeno1==1)) {
        n.omit <- sum(!is.na(femalegeno1) & femalegeno1==1)
        warning(" --Omitting ", n.omit, " ", AA, " genotypes from females from cross ",
                cross1, " on the X chr.\n")
        femalegeno1[!is.na(femalegeno1) & femalegeno1==1] <- NA
      }
      femalegeno1[!is.na(femalegeno1) & femalegeno1==3] <- 1
      Xgeno[sexpgm$sex==0 & sexpgm$pgm==0,] <- femalegeno0
      Xgeno[sexpgm$sex==0 & sexpgm$pgm==1,] <- femalegeno1
    }
    
  }

  else if(!is.null(sexpgm$sex) && is.null(sexpgm$pgm)) {
    # "sex" is provided but not "pgm"

    if(any(sexpgm$sex == 1)) { # there are males
      malegeno <- Xgeno[sexpgm$sex==1,]
      if(any(!is.na(malegeno) & malegeno==2)) {
        n.omit <- sum(!is.na(malegeno) & malegeno==2)
        warning(" --Omitting ", n.omit, " male heterozygote genotypes on the X chromosome.")
        malegeno[!is.na(malegeno) & malegeno==2] <- NA
      }
      malegeno[!is.na(malegeno) & malegeno==3] <- 2
      Xgeno[sexpgm$sex==1,] <- malegeno
    }
    
    if(any(sexpgm$sex==0)) { # there are females
      femalegeno <- Xgeno[sexpgm$sex==0,]

      if(any(!is.na(femalegeno) & femalegeno==3) &
         !any(!is.na(femalegeno) & femalegeno==1)) { # looks like (BxA)x(BxA)
        cross$pheno$pgm <- rep(1,nind(cross))
        femalegeno[!is.na(femalegeno) & femalegeno==3] <- 1
      }
      else if(any(!is.na(femalegeno) & femalegeno==1) &
              !any(!is.na(femalegeno) & femalegeno==3)) { # looks like (AxB)x(AxB)
        cross$pheno$pgm <- rep(0,nind(cross))
      }
      else { # we have some 1's and some 3's
        warning(" --There appear to be some individuals of each cross direction, but \"pgm\" is not provided.\n   Check the X chr genotype data and include a \"pgm\" column in the phenotype data.\n   \"pgm\" was inferred (probably poorly).\n   ")

        cross$pheno$pgm <- rep(0,nind(cross))

        # females with no 3's -> assumed to be from (AxB)x(AxB)
        # females with both 3's and 1's -> assumed to be from (AxB)x(AxB); 3's tossed
        wh.have3 <- apply(femalegeno, 1, function(a) any(!is.na(a) & a==3))
        cross$pheno$pgm[sexpgm$sex==0][wh.have3] <- 1

        temp <- femalegeno[wh.have3,]
        temp[!is.na(temp) & temp==1] <- NA
        temp[!is.na(temp) & temp==3] <- 1
        femalegeno[wh.have3,] <- temp

      }

      Xgeno[sexpgm$sex==0,] <- femalegeno
    }
  }
  else if(is.null(sexpgm$sex) && !is.null(sexpgm$pgm)) {
    # "pgm" is provided but not "sex"

    if(all(is.na(Xgeno) | Xgeno==1 | Xgeno==3)) { # look like all males
      cross$pheno$sex <- factor(rep("m",nind(cross)),levels=c("f","m"))
      Xgeno[!is.na(Xgeno) & Xgeno==3] <- 2
    }
    else { # assume all females
      cross$pheno$sex <- factor(rep("f",nind(cross)),levels=c("f","m"))
      Xgeno.pgm0 <- Xgeno[sexpgm$pgm==0,]
      Xgeno.pgm1 <- Xgeno[sexpgm$pgm==1,]
      if(all(is.na(Xgeno.pgm0) | Xgeno.pgm0==2 | Xgeno.pgm0==3) &&
         all(is.na(Xgeno.pgm1) | Xgeno.pgm1==1 | Xgeno.pgm1==2)) {
        cross$pheno$pgm <- 1 - sexpgm$pgm
        temp <- Xgeno.pgm0
        Xgeno.pgm0 <- Xgeno.pgm1
        Xgeno.pgm1 <- temp
      }
      Xgeno.pgm1[!is.na(Xgeno.pgm1) & Xgeno.pgm1==1] <- NA
      Xgeno.pgm1[!is.na(Xgeno.pgm1) & Xgeno.pgm1==3] <- 1
      Xgeno.pgm0[!is.na(Xgeno.pgm0) & Xgeno.pgm0==3] <- NA
      Xgeno[sexpgm$pgm==0,] <- Xgeno.pgm0
      Xgeno[sexpgm$pgm==1,] <- Xgeno.pgm1
    }
  }
  

  else {
    # Neither "sex" and "pgm" provided

    if(all(is.na(Xgeno) | Xgeno==1 | Xgeno==3)) { # look like all males
      warning(" --Assuming that all individuals are male.\n")
      Xgeno[!is.na(Xgeno) & Xgeno==3] <- 2
      cross$pheno$sex <- factor(rep("m",nind(cross)),levels=c("f","m"))
      cross$pheno$pgm <- rep(0,nind(cross))
    }
    else if(all(is.na(Xgeno) | Xgeno==2 | Xgeno==3)) { # look like females H:B
      warning(" --Assuming that all individuals are female.\n")
      Xgeno[!is.na(Xgeno) & Xgeno==3] <- 1
      cross$pheno$sex <- factor(rep("f",nind(cross)),levels=c("f","m"))
      cross$pheno$pgm <- rep(1,nind(cross))
    }
    else if(all(is.na(Xgeno) | Xgeno==2 | Xgeno==1)) { # looks like females A:H
      warning(" --Assuming that all individuals are female.\n")
      cross$pheno$sex <- factor(rep("f",nind(cross)),levels=c("f","m"))
      cross$pheno$pgm <- rep(0,nind(cross))
    }
    else { # have some of each of the three genotypes
      warning(" --Can't figure out the X chromosome genotypes.\n   You need to provide phenotypes \"sex\" and/or \"pgm\"\n   See the help file for read.cross() for details.\n   Omitting the X chr for now.\n  ")

      omitX <- TRUE

    }
  }


  if(!omitX) {
    wh <- !is.na(Xgeno) & Xgeno!=1 & Xgeno!=2
    if(any(wh)) {
      Xgeno[wh] <- NA
      n.omit <- sum(wh)
      warning(" --Omitted ", n.omit, " additional X chr genotype(s).")
    }

    cross$geno[[xchr]]$data <- Xgeno
  }
  else cross <- subset(cross,chr= -xchr) # <- omit the X chr completely

  cross
}


######################################################################
# convert character to numeric, using dec as the decimal point
######################################################################
asnumericwithdec <-
function(x, dec=".")
{
  if(dec==".") x <- as.numeric(x)
  else x <- sapply(strsplit(x, ","), function(a) as.numeric(paste(a, collapse=".")))
  x
}

# end of read.cross.R
byandell/qtl documentation built on May 13, 2019, 9:28 a.m.