R/dataexport.R
In polysat: Tools for Polyploid Microsatellite Analysis

Documented in gendata.to.genind write.ATetra write.GeneMapper write.GenoDive write.POPDIST write.SPAGeDi write.Structure write.Tetrasat

write.Structure <- function(object, ploidy, file = "",
                            samples = Samples(object), loci = Loci(object),
                            writepopinfo = TRUE, extracols = NULL,
                            missingout = -9){
  if(!all(!is.na(Ploidies(object, samples = samples, loci = loci))))
    stop("Ploidies needed.")
  
  if(missing(ploidy))
    stop("Ploidy of file must be specified seperately from ploidies of dataset.")
  
  structdata <- data.frame(rowlabel=c("missing",rep(samples, each = ploidy)))
  thiscol <- 1 # which column we are working with
  
  # Fill in PopInfo if that option is selected
  if(writepopinfo){
    if(!all(!is.na(PopInfo(object)[samples])))
      stop("PopInfo needed, or set writepopinfo to FALSE")
    thiscol <- thiscol+1
    structdata[[thiscol]] <- c(0,rep(PopInfo(object)[samples], each = ploidy))
    names(structdata)[thiscol] <- "PopInfo"
  }
  
  # Fill in extra columns.  Ignored if extracols=NULL.
  for(excol in dimnames(extracols)[[2]]){
    thiscol <- thiscol+1
    structdata[[thiscol]] <- c(0,rep(extracols[samples, excol], each = ploidy))
    names(structdata)[thiscol] <- excol
  }
  
  # Fill in the genotypes
  for(L in loci){
    thiscol <- thiscol + 1
    alleles <- missingout #set up the vector of alleles
    for(s in samples){
      # Is genotype missing?
      if(isMissing(object,s,L)){
        thesealleles <- rep(missingout, ploidy)
      } else {
        #If missing data must be inserted to reflect a lower ploidy level:
        if(Ploidies(object, s, L) < ploidy){
          if(length(Genotype(object,s,L)) == Ploidies(object, s, L)){
            #fully heterozygous genotype
            thesealleles <- c(Genotype(object, s, L),
                              rep(missingout,
                                  times = ploidy - Ploidies(object, s, L)))
          } else {
            if(length(Genotype(object, s, L)) < Ploidies(object, s, L)){
              #duplicate the first allele to get to the right ploidy
              thesealleles <- c(Genotype(object, s, L),
                                rep(Genotype(object, s, L)[1],
                                    times = Ploidies(object, s, L) -
                                      length(Genotype(object, s, L))),
                                rep(missingout,
                                    times = ploidy - Ploidies(object, s, L)))
            } else {
              #randomly choose alleles to use if there are too many
              thesealleles <- c(sample(Genotype(object, s, L),
                                       Ploidies(object, s, L),replace = FALSE),
                                rep(missingout,
                                    times = ploidy - Ploidies(object, s, L)))
              cat(paste("Randomly removed alleles:", s, L), sep = "\n")
            }
          }
          #If the individual has equal or greater ploidy to that
          #being used in the file:
        } else {
          if(length(Genotype(object, s, L)) == ploidy){
            #genotype fills available spaces
            thesealleles <- Genotype(object, s, L)
          } else {
            if(length(Genotype(object, s, L)) < ploidy){
              #duplicate the first allele to get to the right ploidy
              thesealleles <- c(Genotype(object, s, L),
                                rep(Genotype(object, s, L)[1],
                                    times = ploidy -
                                      length(Genotype(object, s, L))))
            } else {
              #randomly choose alleles to use if there are too many
              thesealleles <- sample(Genotype(object, s, L), ploidy,
                                     replace = FALSE)
              cat(paste("Randomly removed alleles:", s, L), sep = "\n")
            }
          }
        }
      }
      alleles <- c(alleles, thesealleles)
    }
    structdata[[thiscol]] <- alleles
    names(structdata)[thiscol] <- L
  }
  
  # write the two header lines
  if(writepopinfo){
    colstoskip <- 2
  } else {
    colstoskip <- 1
  }
  if(!is.null(extracols)){
    colstoskip <- colstoskip + dim(extracols)[2]
  }
  cat(c(paste(c(rep("", colstoskip), names(structdata)[-(1:colstoskip)]),
              collapse = "\t"),
        paste(c(rep("", colstoskip),
                rep(as.character(missingout), ncol(structdata) - colstoskip)),
              collapse = "\t")),
      sep = "\n", file = file)
  # write the bulk of the data
  write.table(structdata[-1,], file = file, sep = "\t", row.names = FALSE,
              quote = FALSE, col.names = FALSE, append = TRUE)
}

write.GenoDive<-function(object,
                         digits = 2, file = "", samples = Samples(object),
                         loci = Loci(object)){
  if(!all(!is.na(Ploidies(object, samples = samples, loci = loci))))
    stop("Ploidies needed.
              Use estimatePloidy to get max number of alleles.")
  if(!all(!is.na(PopInfo(object)[samples])))
    stop("PopInfo needed.")
  if(!all(!is.na(Usatnts(object)[loci])))
    stop("Usatnts needed.")
  # get some info for the second line of the file
  numsam <- length(samples)
  numloc <- length(loci)
  numpop <- length(unique(PopInfo(object)[samples]))
  maxploidy <- max(Ploidies(object, samples, loci))
  
  # start a character vector to contain all the lines of the file
  lines <- c(Description(object),
             paste(numsam, numpop, numloc, maxploidy, digits, sep = "\t"))
  lines[3:(numpop+2)] <-
    PopNames(object)[sort(unique(PopInfo(object)[samples]))]
  lines[numpop+3]<-paste("Population\tIndividual",
                         paste(loci, sep = "", collapse = "\t"), sep = "\t")
  
  # enter population and sample names
  for(s in 1:numsam){
    lines[numpop+3+s] <- paste(PopInfo(object)[samples[s]],
                               samples[s], sep = "\t")
  }
  
  # process alleles and write them to lines
  for(L in loci){
    # replace missing data with zeros
    repmiss<-function(value){
      if(value[1] == Missing(object)){ 0 } else {value}
    }
    convertedalleles <- mapply(repmiss, Genotypes(object, samples, L),
                               SIMPLIFY = FALSE)
    # convert alleles to repeat number
    divallele <- function(value) floor(value/Usatnts(object)[L])
    convertedalleles <- mapply(divallele, convertedalleles, SIMPLIFY = FALSE)
    # subtract if necessary to get them to the right number of digits
    suballele <- function(value){
      if(value[1] != 0){
        value-(10^(digits-1))
      } else{
        0
      }
    }
    while(max(mapply(max, convertedalleles)) >= 10 ^ digits){
      convertedalleles <- mapply(suballele, convertedalleles,
                                 SIMPLIFY = FALSE)
    }
    # For each sample, concatenate into strings and add to lines
    for(s in 1:numsam){
      # Convert alleles to character and set up string for concatenation
      charalleles <- as.character(convertedalleles[[s]])
      allelestring <- ""
      # For each allele:
      for(ca in charalleles){
        allele <- ca
        # Add zeros if necessary
        while(nchar(allele) < digits){
          allele <- paste("0", allele, sep = "")
        }
        # add this allele to the string
        allelestring <- paste(allelestring, allele, sep="")
      }
      # Add the allele string to the line for that sample.
      lines[numpop+3+s] <- paste(lines[numpop+3+s], allelestring, sep = "\t")
    }
  }
  
  # write the file
  cat(lines, file = file, sep = "\n")
}

write.SPAGeDi<-function(object, samples = Samples(object),
                        loci = Loci(object),
                        allelesep = "/", digits = 2, file = "",
                        spatcoord = data.frame(X = rep(1, length(samples)),
                                               Y = rep(1, length(samples)),
                                               row.names = samples)){
  if(!all(!is.na(Ploidies(object, samples, loci))))
    stop("Ploidies needed.")
  if(!all(!is.na(PopInfo(object)[samples])))
    stop("PopInfo needed.")
  if(!all(!is.na(Usatnts(object)[loci])))
    stop("Usatnts needed.")
  # name the rows of the spatial coordinates frame if not already done
  if(identical(row.names(spatcoord), as.character(1:dim(spatcoord)[1])))
    row.names(spatcoord) < -samples
  
  # set up data frame to contain genotypes
  gentable <- data.frame(Ind = samples,
                         Cat = PopNames(object)[PopInfo(object)[samples]],
                         spatcoord[samples,])
  
  # get genotype data by locus (column)
  for(L in loci){
    genotypesL <- Genotypes(object, samples, L)
    # replace missing data with zeros
    genotypesL[isMissing(object, samples, L)] <- 0
    # divide and subtract to convert to repeats
    divallele <- function(value) floor(value/Usatnts(object)[L])
    genotypesL <- mapply(divallele, genotypesL, SIMPLIFY = FALSE)
    suballele <- function(value){
      if(value[1] != 0){
        value - (10 ^ (digits - 1))
      } else {
        0
      }
    }
    while(max(mapply(max, genotypesL)) >= 10 ^ digits){
      genotypesL <- mapply(suballele, genotypesL, SIMPLIFY = FALSE)
    }
    names(genotypesL) <- samples
    # add zeros up to correct ploidy, delete alleles if necessary
    zerostoadd <- Ploidies(object, samples, L) - mapply(length, genotypesL)
    names(zerostoadd) <- samples
    for(s in samples){
      if(length(genotypesL[[s]]) == 1){
        # replicate allele if totally homozygous
        genotypesL[[s]] <- rep(genotypesL[[s]], Ploidies(object, s, L))
      } else {
        if(zerostoadd[s] < 0){
          # randomly remove alleles if there are too many
          genotypesL[[s]] <- sample(genotypesL[[s]],
                                    Ploidies(object, s, L),
                                    replace=FALSE)
          cat("Alleles randomly removed to get to ploidy:", L, s, "\n")
        } else {
          # add zeros for partial heterozygotes
          genotypesL[[s]] <- c(genotypesL[[s]], rep(0, zerostoadd[s]))
        }
      }
      # also make each allele the right number of digits if necessary
      if(allelesep == ""){
        genotypesL[[s]] <- as.character(genotypesL[[s]])
        for(a in 1:length(genotypesL[[s]])){
          while(nchar(genotypesL[[s]][a]) < digits){
            genotypesL[[s]][a] <- paste(0, genotypesL[[s]][a],
                                        sep = "")
          }
        }
      }
    }
    # concatenate into strings
    genvect <- mapply(paste, genotypesL, collapse = allelesep)
    # add the vector to the data frame
    gentable <- data.frame(gentable, genvect)
    names(gentable)[dim(gentable)[2]] <- L
  }
  # write file
  SpagTemp <- tempfile()
  write.table(gentable, file = SpagTemp, sep = "\t", row.names = FALSE,
              col.names = TRUE, quote = FALSE)
  cat(paste(length(samples), length(unique(PopInfo(object)[samples])),
            dim(spatcoord)[2],
            length(loci), digits, max(Ploidies(object,samples,loci)),
            sep="\t"),
      "0",
      readLines(SpagTemp), "END", sep = "\n", file = file)
}

write.GeneMapper <- function(object, file = "", samples = Samples(object),
                             loci = Loci(object)){
  if(!all(!is.na(Ploidies(object, samples, loci))))
    stop("Ploidies needed to determine number of columns.")
  # figure out how many allele columns are needed
  numallelecol <- max(Ploidies(object, samples, loci))
  # figure out how many rows are needed
  numrows <- length(samples) * length(loci)
  # set up data frame
  gentable <- data.frame(Sample.Name = rep(samples, times = length(loci)),
                         Marker = rep(loci, each = length(samples)))
  # put empty allele columns into data frame
  alcollabels <- paste("Allele.", 1:numallelecol, sep = "")
  for(ac in 1:numallelecol){
    gentable[[ac + 2]] <- rep("", times = numrows)
    names(gentable)[ac + 2] <- alcollabels[ac]
  }
  
  # put alleles into their respective cells
  currentrow <- 1
  for(L in loci){
    for(s in samples){
      thesealleles <- as.character(Genotype(object, s, L))
      while((length(thesealleles) + 2) > dim(gentable)[2])
        gentable <- data.frame(gentable, rep("", numrows),
                               stringsAsFactors = FALSE)
      gentable[currentrow, 3:(length(thesealleles) + 2)] <- thesealleles
      currentrow <- currentrow + 1
    }
  }
  
  # write the table to file
  write.table(gentable, file = file, sep = "\t", quote = FALSE,
              row.names = FALSE, col.names = TRUE)
}

write.Tetrasat <- function(object, samples = Samples(object), loci = Loci(object),
                           file = ""){
  if(!all(!is.na(PopInfo(object)[samples])))
    stop("PopInfo needed.")
  if(!all(!is.na(Usatnts(object)[loci])))
    stop("Usatnts needed.")
  if(!all(Ploidies(object, samples, loci) == 4))
    stop("Ploidy must be 4.")
  
  # a vector of populations to cycle through
  allpops <- unique(PopInfo(object)[samples])
  # set up vector of lines to write to file
  lines <- c(Description(object)[1], loci)
  # where does population/genotype data begin?
  datastart <- length(lines) + 1
  
  # make lines with "Pop" and sample names, and an index of where samples are
  sampleindex <- integer(0)
  currentline <- datastart
  for(pop in allpops){
    lines[currentline] <- "Pop"
    currentline <- currentline + 1
    # make a vector of all samples that belong to this population
    thesesamples <- samples[PopInfo(object)[samples] == pop]
    # make an index of the lines that they will go on
    indextoadd <- currentline:(currentline + length(thesesamples) - 1)
    names(indextoadd) <- thesesamples
    # add this to the sample index
    sampleindex <- c(sampleindex, indextoadd)
    # write each sample name (plus spaces up to 20 characters) to the
    # appropriate line
    for(s in thesesamples){
      samname <- s
      while(nchar(samname) < 20){
        samname <- paste(samname, " ", sep = "")
      }
      lines[currentline] <- samname
      currentline <- currentline + 1
    }
  }
  
  # now go through the loci, convert the genotypes, and fill them in
  for(L in loci){
    # convert alleles to repeat number
    divallele <- function(value){
      if(value[1] == Missing(object)){
        value
      } else {
        floor(value/Usatnts(object)[L])
      }
    }
    convertedalleles <- mapply(divallele, Genotypes(object, samples, L),
                               SIMPLIFY = FALSE)
    names(convertedalleles) <- samples
    # make sure alleles have two digits or fewer
    suballele <- function(value){
      if(value[1] == Missing(object)){
        value
      } else {
        value - 10
      }
    }
    while(max(mapply(max,convertedalleles)) >= 100){
      convertedalleles <- mapply(suballele, convertedalleles,
                                 SIMPLIFY = FALSE)
    }
    # go through the genotypes by sample
    for(s in samples){
      # convert missing data to blank spaces,
      # convert to alleles to characters
      if(convertedalleles[[s]][1] == Missing(object)){
        charalleles <- "         "
      } else {
        charalleles <- as.character(convertedalleles[[s]])
        # duplicate allele if fully homozygous
        if(length(charalleles) == 1){
          charalleles <- rep(charalleles, 4)
        }
        # randomly remove alleles if there are more than 4
        if(length(charalleles) > 4){
          charalleles <- sample(charalleles, 4, replace = FALSE)
          cat(c("Alleles randomly removed:", s, L, "\n"), sep = " ")
        }
      }
      # concatenate all alleles into one string
      allelestring <- ""
      for(ca in charalleles){
        allele <- ca
        # Add zeros if necessary
        while(nchar(allele) < 2){
          allele <- paste("0", allele, sep="")
        }
        # add this allele to the string
        allelestring <- paste(allelestring, allele, sep = "")
      }
      # add spaces up to nine characters
      while(nchar(allelestring) < 9){
        allelestring <- paste(allelestring, " ", sep = "")
      }
      # add the concatenated genotype to the appropriate line
      lines[sampleindex[s]] <- paste(lines[sampleindex[s]], allelestring,
                                     sep = "")
    }
  }
  
  cat(lines, sep = "\n", file = file)
}

write.ATetra<-function(object, samples = Samples(object), loci = Loci(object),
                       file = ""){
  if(!all(!is.na(PopInfo(object)[samples])))
    stop("PopInfo needed.")
  if(!all(Ploidies(object, samples, loci) == 4))
    stop("Ploidy must be 4.")
  
  # set up a character vector to hold the lines for the file
  lines <- paste("TIT", Description(object)[1], sep = ",")
  currentline <- 2 # a variable to say which line to write to next
  # make numbers to go with loci, pops, and samples
  locnums <- 1:length(loci)
  names(locnums) <- loci
  samnums <- 1:length(samples)
  names(samnums) <- samples
  popnames <- PopNames(object)[sort(unique(PopInfo(object)[samples]))]
  popnums <- sort(unique(PopInfo(object)[samples]))
  names(popnums) <- popnames
  
  # fill in data using a loop
  for(L in loci){
    # write a line describing the locus
    lines[currentline] <- paste("LOC", locnums[L], L, sep = ",")
    currentline <- currentline + 1
    for(p in popnames){
      # write a line describing the population
      lines[currentline] <- paste("POP", locnums[L], popnums[p], p, sep = ",")
      currentline <- currentline + 1
      # get a vector of individuals in this population
      thesesamples <- Samples(object, populations = p)
      thesesamples <- thesesamples[thesesamples %in% samples]
      # write lines for individuals
      for(s in thesesamples){
        # first put sample info into the line
        lines[currentline] <- paste("IND", locnums[L], popnums[p],
                                    samnums[s], s, sep = ",")
        # get the alleles and print a warning if there is missing data
        thesealleles<-Genotype(object, s, L)
        if(isMissing(object, s, L)){
          thesealleles <- ""
          cat("Missing data:", s, L, "\n", sep = " ")
        }
        # take a random sample if there are more than 4
        if(length(thesealleles) > 4){
          thesealleles <- sample(thesealleles, 4, replace = FALSE)
          cat("More than 4 alleles:", s, L, "\n", sep = " ")
        }
        # add alleles to the line
        for(a in 1:4){
          if(length(thesealleles) >= a){
            lines[currentline] <- paste(lines[currentline],
                                        thesealleles[a], sep = ",")
          } else {
            lines[currentline] <- paste(lines[currentline], "", sep = ",")
          }
        }
        currentline <- currentline + 1
      }
    }
  }
  
  # write the "END-record"
  lines[currentline] <- "END"
  
  # write the file
  cat(lines, sep = "\n", file = file)
}

write.POPDIST <- function(object, samples = Samples(object),
                          loci = Loci(object), file = ""){
  # error messages
  if(!all(!is.na(PopInfo(object)[samples])))
    stop("PopInfo needed")
  if(!all(!is.na(Ploidies(object,samples,loci))))
    stop("Ploidies needed")
  # start a vector of lines to write to the file
  Lines <- c(Description(object)[1], loci)
  # make a vector to index all the lines containing samples
  samindex <- integer(0)
  
  # add one population at a time
  for(p in unique(PopInfo(object)[samples])){
    Lines <- c(Lines, "Pop")
    # add each individual
    for(s in samples[samples %in% Samples(object, populations=p)]){
      Lines <- c(Lines, paste(PopNames(object)[p], "\t,", sep = ""))
      samindex <- c(samindex, length(Lines))
      names(samindex)[length(samindex)] <- s
    }
    # warning message for mixed ploidy
    if(length(unique(
      Ploidies(object,samples[samples %in% Samples(object,
                                                   populations = p)],
               loci))) > 1)
      warning(paste("Mixed ploidy in population", p,
                    "; POPDIST may reject file."))
  }
  
  # Convert alleles to two digits then add to lines
  for(L in loci){
    thesegen <- Genotypes(object, samples, L)
    if(max(mapply(max, thesegen)) > 99){
      if(!is.na(Usatnts(object)[L]) && Usatnts(object)[L] > 1){
        thesegen <- array(mapply(function(value){
          if(value[1] == Missing(object)){
            value
          } else {
            floor(value/Usatnts(object)[L])
          }
        },
        thesegen, SIMPLIFY = FALSE),
        dim = c(length(samples), 1),
        dimnames=list(samples, L))
      }
      
      while(max(mapply(max, thesegen)) > 99){
        thesegen <- array(mapply(function(value){
          if(value[1] == Missing(object)){
            value
          } else {
            value - 10
          }
        },
        thesegen, SIMPLIFY = FALSE),
        dim = c(length(samples), 1),
        dimnames = list(samples, L))
      }
    }
    
    for(s in samples){
      if(thesegen[[s,L]][1] == Missing(object)){
        alleles <- "00"
      } else {
        alleles <- as.character(thesegen[[s, L]])
      }
      if(length(alleles) > Ploidies(object, s, L)){
        alleles <- sample(alleles, Ploidies(object, s, L))
        warning(paste("Alleles randomly removed:", s, L))
      }
      Lines[samindex[s]] <- paste(Lines[samindex[s]], " ", sep="")
      for(a in 1:Ploidies(object, s, L)){
        if(a > length(alleles)){
          Lines[samindex[s]] <- paste(Lines[samindex[s]], "00", sep = "")
        } else {
          if(nchar(alleles[a]) == 1)
            alleles[a] <- paste("0", alleles[a], sep="")
          Lines[samindex[s]] <- paste(Lines[samindex[s]],
                                      alleles[a], sep = "")
        }
      }
    }
  }
  
  cat(Lines, file = file, sep = "\n")
}

gendata.to.genind <- function(object, samples = Samples(object),
                              loci = Loci(object)){
  # Errors and setup
  if(!is(object, "gendata")) stop("genambig or genbinary object needed.")
  object <- object[samples, loci]
  if(!"ploidysample" %in% class(object@Ploidies)){
    object <- reformatPloidies(object, output = "sample")
  }
  ploidy <- Ploidies(object)
  if(any(is.na(ploidy))) stop("Please specify ploidy.")
  
  # Get genbinary if necessary
  if(is(object, "genambig"))
    object <- genambig.to.genbinary(object)
  
  # export genotypes table
  tab <- Genotypes(object)
  # convert missing data to NA
  tab[tab == Missing(object)] <- NA
  # make column headings usable
  dimnames(tab)[[2]] <- gsub(".", "-", dimnames(tab)[[2]], fixed = TRUE)
  
  # create genind object
  x <- adegenet::genind(tab, pop = PopNames(object)[PopInfo(object)[samples]],
                        ploidy = ploidy, type = "PA")
  return(x)
}
Any scripts or data that you put into this service are public.
polysat documentation built on Aug. 23, 2022, 5:07 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
polysat
Tools for Polyploid Microsatellite Analysis

R/dataexport.R
In polysat: Tools for Polyploid Microsatellite Analysis

Defines functions gendata.to.genind write.POPDIST write.ATetra write.Tetrasat write.GeneMapper write.SPAGeDi write.GenoDive write.Structure

Documented in gendata.to.genind write.ATetra write.GeneMapper write.GenoDive write.POPDIST write.SPAGeDi write.Structure write.Tetrasat

Try the polysat package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

polysat Tools for Polyploid Microsatellite Analysis

R/dataexport.R In polysat: Tools for Polyploid Microsatellite Analysis

Defines functions gendata.to.genind write.POPDIST write.ATetra write.Tetrasat write.GeneMapper write.SPAGeDi write.GenoDive write.Structure

Documented in gendata.to.genind write.ATetra write.GeneMapper write.GenoDive write.POPDIST write.SPAGeDi write.Structure write.Tetrasat

Try the polysat package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

polysat
Tools for Polyploid Microsatellite Analysis

R/dataexport.R
In polysat: Tools for Polyploid Microsatellite Analysis