Nothing
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#
# This software was authored by Zhian N. Kamvar and Javier F. Tabima, graduate
# students at Oregon State University; Jonah C. Brooks, undergraduate student at
# Oregon State University; and Dr. Nik Grünwald, an employee of USDA-ARS.
#
# Permission to use, copy, modify, and distribute this software and its
# documentation for educational, research and non-profit purposes, without fee,
# and without a written agreement is hereby granted, provided that the statement
# above is incorporated into the material, giving appropriate attribution to the
# authors.
#
# Permission to incorporate this software into commercial products may be
# obtained by contacting USDA ARS and OREGON STATE UNIVERSITY Office for
# Commercialization and Corporate Development.
#
# The software program and documentation are supplied "as is", without any
# accompanying services from the USDA or the University. USDA ARS or the
# University do not warrant that the operation of the program will be
# uninterrupted or error-free. The end-user understands that the program was
# developed for research purposes and is advised not to rely exclusively on the
# program for any reason.
#
# IN NO EVENT SHALL USDA ARS OR OREGON STATE UNIVERSITY BE LIABLE TO ANY PARTY
# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
# LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION,
# EVEN IF THE OREGON STATE UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE. USDA ARS OR OREGON STATE UNIVERSITY SPECIFICALLY DISCLAIMS ANY
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY STATUTORY
# WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
# BASIS, AND USDA ARS AND OREGON STATE UNIVERSITY HAVE NO OBLIGATIONS TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#==============================================================================#
# This is simply a convenience function that serves as a wrapper for useful, but
# slightly obscure base R functions.
#' Get a file name and path and store them in a list.
#'
#' getfile is a convenience function that serves as a wrapper for the functions
#' [file.choose()], [file.path()], and [list.files()].
#' If the user is working in a GUI environment, a window will pop up, allowing
#' the user to choose a specified file regardless of path.
#'
#' @param multi this is an indicator to allow the user to store the names of
#' multiple files found in the directory. This is useful in conjunction with
#' [poppr.all()].
#'
#' @param pattern a [regex()] pattern for use while
#' `multi == TRUE`. This will grab all files matching this pattern.
#'
#' @param combine `logical`. When this is set to `TRUE` (default), the
#' `$files` vector will have the path appended to them. When it is set to
#' `FALSE`, it will have the basename.
#'
#' @return \item{path}{a character string of the absolute path to the
#' chosen file or files}
#' \item{files}{a character vector containing the chosen file
#' name or names.}
#' @author Zhian N. Kamvar
#' @md
#'
#' @examples
#' \dontrun{
#'
#' x <- getfile()
#' poppr(x$files)
#'
#' y <- getfile(multi=TRUE, pattern="^.+?dat$")
#' #useful for reading in multiple FSTAT formatted files.
#'
#' yfiles <- poppr.all(y$files)
#'
#' }
#' @export
#==============================================================================#
getfile <- function(multi=FALSE, pattern=NULL, combine=TRUE){
# the default option is to grab a single file in the directory. If multFile is
# set to TRUE, it will grab all the files in the directory corresponding to any
# pattern that is set. If there is no pattern, all files will be grabbed.
if (multi==TRUE){
# this sets the path variable that the user can use to set the path
# to the files with setwd(x$path), where x is the datastructure
# this function dumped into.
pathandfile <- file.path(file.choose())
path <- dirname(pathandfile)
if (!is.null(pattern)){
pat <- pattern
x <- list.files(path, pattern=pat)
}
else {
x <- list.files(path)
}
}
else {
# if the user chooses to analyze only one file, a pattern is not needed
pathandfile <- file.path(file.choose())
path <- dirname(pathandfile)
x <- basename(pathandfile)
}
if(combine == TRUE){
x <- paste(path, x, sep="/")
}
filepath <- list(files=x, path=path)
return(filepath)
}
#==============================================================================#
#' Importing data from genalex formatted \*.csv files.
#'
#' read.genalex will read in a genalex-formatted file that has been exported in
#' a comma separated format and will parse most types of genalex data. The
#' output is a [genclone-class][genclone] or [genind-class][genind] object.
#'
#' @param genalex a \*.csv file exported from genalex
#'
#' @param ploidy an integer to indicate the ploidy of the dataset
#'
#' @param geo indicates the presence of geographic data in the file. This data
#' will be included in a data frame labeled `xy` in the
#' [other()] slot.
#'
#' @param region indicates the presence of regional data in the file.
#'
#' @param genclone when `TRUE` (default), the output will be a [genclone-class]
#' object. When `FALSE`, the output will be a [genind-class] object
#'
#' @param sep A character specifying the column separator of the data. Defaults
#' to ",".
#'
#' @param recode **For polyploid data**: Do you want to recode your data to
#' have varying ploidy? Default is `FALSE`, and the data will be returned
#' with even ploidy where missing alleles are coded as "0". When `TRUE`,
#' the data is run through the function [recode_polyploids()] before
#' being returned. Note that this will prevent conversion to genpop objects in
#' the future. See details.
#'
#' @return A [genclone-class] or [genind-class] bject.
#'
#' @note This function cannot handle raw allele frequency data.
#'
#' In the case that there are duplicated names within the file, this function
#' will assume separate individuals and rename each one to a sequence of
#' integers from 1 to the number of individuals. A vector of the original
#' names will be saved in the `other` slot under `original_names`.
#'
#'
#' @details The resulting [genclone-class][genclone] or [genind-class][genind]
#' object will have a single strata defined in the strata slot. This will be
#' called "Pop" and will reflect the population factor defined in the genalex
#' input. If `region = TRUE`, a second column will be inserted and labeled
#' "Region". If you have more than two strata within your data set, you should
#' run the command [adegenet::splitStrata()] on your data set to define the
#' unique stratifications.
#'
#' \subsection{FOR POLYPLOID (> 2n) DATA SETS}{ The genind object has
#' an all-or-none approach to missing data. If a sample has missing data at a
#' particular locus, then the entire locus is considered missing. This works
#' for diploids and haploids where allelic dosage is unambiguous. For
#' polyploids this poses a problem as much of the data set would be
#' transformed into missing data. With this function, I have created a
#' workaround.
#'
#' When importing polyploid data sets, missing data is scored as "0" and kept
#' within the genind object as an extra allele. This will break most analyses
#' relying on allele frequencies*. All of the functions in poppr will work
#' properly with these data sets as multilocus genotype analysis is agnostic
#' of ploidy and we have written both Bruvo's distance and the index of
#' association in such a way as to be able to handle polyploids presented in
#' this manner.
#'
#' \* To restore functionality of analyses relying on allele frequencies, use
#' the [recode_polyploids()] function.}
#'
#'
#' @seealso [genind2genalex()], [clonecorrect()], [genclone-class],
#' [genind-class], [recode_polyploids()]
#'
#' @export
#' @author Zhian N. Kamvar
#' @md
#' @examples
#'
#' \dontrun{
#' Aeut <- read.genalex(system.file("files/rootrot.csv", package="poppr"))
#'
#' genalex2 <- read.genalex("genalex2.csv", geo=TRUE)
#' # A genalex file with geographic coordinate data.
#'
#' genalex3 <- read.genalex("genalex3.csv", region=TRUE)
#' # A genalex file with regional information.
#'
#' genalex4 <- read.genalex("genalex4.csv", region=TRUE, geo=TRUE)
#' # A genalex file with both regional and geographic information.
#' }
#==============================================================================#
read.genalex <- function(genalex, ploidy = 2, geo = FALSE, region = FALSE,
genclone = TRUE, sep = ",", recode = FALSE){
# The first two lines from a genalex file contain all of the information about
# the structure of the file (except for ploidy and geographic info)
gencall <- match.call()
all.info <- strsplit(readLines(genalex, n = 2), sep)
cskip <- ifelse(inherits(genalex, "connection"), 0, 2)
gena <- utils::read.table(
genalex,
sep = sep,
header = TRUE,
skip = cskip,
colClasses = "character",
stringsAsFactors = FALSE,
check.names = FALSE,
quote = "\""
)
for (i in seq_along(gena)) {
the_col <- trimws(gena[[i]])
the_col[the_col == ""] <- NA_character_
gena[[i]] <- the_col
}
num.info <- as.numeric(all.info[[1]])
pop.info <- all.info[[2]][-c(1:3)]
num.info <- num.info[!is.na(num.info)]
pop.info <- pop.info[!pop.info %in% c("", NA)]
nloci <- num.info[1]
ninds <- num.info[2]
npops <- num.info[3]
# Ensuring that all rows and columns have data
data_rows <- apply(gena, 1, function(i) !all(is.na(i)))
data_cols <- apply(gena, 2, function(i) !all(is.na(i)))
gcols <- colnames(gena)
dups <- duplicated(gcols)
if (any(dups) && any(gcols[dups] != "")){
locations <- which(dups)
locations <- locations[gcols[dups] != ""]
colnames(gena) <- make.unique(gcols, sep = "_")
the_loci <- gcols[dups & gcols != ""]
renamed <- colnames(gena)[dups & gcols != ""]
msg <- "\n I found duplicate column names in your data.\n They are being renamed:\n"
renamed_data <- paste0("col ", locations, ": ",
the_loci, " -> ", renamed, collapse = "\n ")
msg <- paste(msg, renamed_data)
warning(msg, immediate. = TRUE)
}
gena <- gena[data_rows, data_cols, drop = FALSE]
if (nrow(gena) != ninds) {
theData <- if (inherits(genalex, "character")) genalex else deparse(substitute(genalex))
msg <- paste0("\n The number of rows in your data do not match the number ",
"of individuals specified.",
"\n\t", ninds, " individuals specified",
"\n\t", nrow(gena), " rows in data",
"\n Please inspect ", theData, " to ensure it's a properly ",
"formatted GenAlEx file.\n")
stop(msg)
}
#----------------------------------------------------------------------------#
# Checking for extra information such as Regions or XY coordinates
#----------------------------------------------------------------------------#
# Creating vectors that correspond to the different information fields. If the
# regions are true, then the length of the pop.info should be equal to the
# number of populations "npop"(npops) plus the number of regions which is the
# npop+4th entry in the vector. Note that this strategy will only work if the
# name of the first region does not match any of the populations.
clm <- ncol(gena)
# The case where
region_columns <- num.info[npops + 4]
pop_and_region <- length(pop.info) == npops + region_columns
just_pop <- length(pop.info) == npops
# Info for the number of columns the loci can take on.
loci.adj <- c(nloci, nloci * ploidy)
# First question, do you have two or four extra columns? Two extra would
# indicate no geographic data. Four extra would indicate geographic data. Both
# of these indicate that, while a regional specification exists, a column
# indicating the regions was not specified, so it needs to be created
geo_columns <- clm %in% (loci.adj + 4) && geo == TRUE
no_geo_columns <- clm %in% (loci.adj + 2)
geoinds <- c((clm - 1), clm)
if (region == TRUE && !is.na(region_columns) && pop_and_region) {
if (geo_columns || no_geo_columns) {
# The regions were not specified in the columns
pop.vec <- gena[[2]]
ind.vec <- gena[[1]]
xy <- gena[, geoinds]
region.inds <- ((npops + 5):length(num.info)) # Indices for the regions
reg.inds <- num.info[region.inds] # Number of individuals per region
reg.names <- all.info[[2]][region.inds] # Names of the regions
reg.vec <- rep(reg.names, reg.inds) # Paste into a single vector
names(reg.vec) <- ind.vec
if (geo == TRUE) {
xy <- gena[, geoinds]
gena <- gena[, -geoinds, drop = FALSE]
} else {
xy <- NULL
}
gena <- gena[, c(-1, -2), drop = FALSE]
} else {
# The regions are specified as a single column and the individual column
# has been shifted over.
pop.vec <- if (any(gena[[1]] == pop.info[1])) 1 else 2
reg.vec <- if (pop.vec == 2) 1 else 2
orig.ind.vec <- NULL
reg.vec <- gena[[reg.vec]] # Regional Vector
pop.vec <- gena[[pop.vec]] # Population Vector
if (geo == TRUE) {
xy <- gena[, geoinds]
gena <- gena[, -geoinds, drop = FALSE]
clm <- clm - 2
} else {
xy <- NULL
}
ind.vec <- gena[[clm]] # Individual Vector
gena <- gena[, -c(1, 2, clm), drop = FALSE] # removing the non-genotypic columns
}
} else if (geo == TRUE && just_pop) {
# There are no Regions specified, but there are geographic coordinates
reg.vec <- NULL
pop.vec <- gena[[2]]
ind.vec <- gena[[1]]
xy <- gena[, geoinds]
gena <- gena[, -c(1, 2, geoinds), drop = FALSE]
} else {
# There are no Regions or geographic coordinates
reg.vec <- NULL
pop.vec <- gena[[2]]
ind.vec <- gena[[1]]
xy <- NULL
gena <- gena[, -c(1, 2), drop = FALSE]
}
`%null%` <- function(a, b) if (is.null(a)) NULL else b
if (!is.null(xy)) {
mode(xy[[1]]) <- "numeric"
mode(xy[[2]]) <- "numeric"
}
xy_trail_na <- xy %null% (rev(cumsum(rev(rowSums(!is.na(xy))))) > 0)
if (any(xy_trail_na)) {
xy <- xy[xy_trail_na, , drop = FALSE]
xy_nomatch <- TRUE
} else {
xy_nomatch <- is.null(xy)
}
# We need to set the names of the population vector to the original names.
names(pop.vec) <- pop.vec %null% ind.vec
names(reg.vec) <- reg.vec %null% ind.vec
rownames(xy) <- if (xy_nomatch) rownames(xy) else ind.vec
#----------------------------------------------------------------------------#
# The genotype matrix has been isolated at this point. Now this will
# reconstruct the matrix in a way that adegenet likes it.
#----------------------------------------------------------------------------#
clm <- ncol(gena)
gena.mat <- as.matrix(gena)
# Checking for greater than haploid data.
if (nloci == clm/ploidy & ploidy > 1){
# Missing data in genalex is coded as "0" for non-presence/absence data.
# We will convert these data later, but this will take care of the
# data that's considered missing here
gena[is.na(gena.mat)] <- "0"
type <- 'codom'
# The remainder of the position when divided by ploidy is 1, which means
# that this is the first column starting the locus
loci <- which(seq(clm) %% ploidy == 1)
gena2 <- gena[, loci, drop = FALSE]
# Collapsing all alleles over all loci into their respective loci.
for (pos in loci){
# Get the locus position in gena2. Example, in a triploid, the second
# locus would be at gena[, 4:6], so the second element in pos would be 4
# because position %% ploidy is 4 %% 3 == 1
# We get our position out by:
# 1. subtracting 1 to account for the modulo
# 2. divide by ploidy
# 3. add 1 to bring it up to R's 1-index
pos_out <- ((pos - 1)/ploidy) + 1
pos_in <- pos:(pos + ploidy - 1) # allele positions in gena
# paste columns of locus together
donor <- do.call("paste", c(gena[pos_in], list(sep = "/")))
gena2[, pos_out] <- donor
}
# Treating missing data.
gena2[gena2 == paste(rep("0", ploidy), collapse = "/")] <- NA_character_
# Importing
res.gid <- df2genind(gena2, sep="/", ind.names = ind.vec, pop = pop.vec,
ploidy = ploidy, type = type)
} else if (nloci == clm & all(gena.mat %in% as.integer(-1:1))) {
# Checking for AFLP data.
# Missing data in genalex is coded as "-1" for presence/absence data.
# this converts it to "NA" for adegenet.
if(any(gena.mat == -1L)){
gena[gena.mat == -1L] <- NA
}
type <- 'PA'
res.gid <- df2genind(gena, ind.names = ind.vec, pop = pop.vec,
ploidy = ploidy, type = type, ncode = 1)
} else if (nloci == clm & !all(gena.mat %in% as.integer(-1:1))) {
# Checking for haploid microsatellite data or SNP data
if (any(gena.mat == "0", na.rm = TRUE)){
gena[gena.mat == "0"] <- NA
}
type <- 'codom'
res.gid <- df2genind(gena, ind.names = ind.vec, pop = pop.vec,
ploidy = 1, type = type)
} else {
weirdomsg <- paste(
"Something went wrong. Please ensure that your data is",
"formatted correctly:\n\n",
"Is the ploidy flag set to the maximum ploidy of your data?\n",
ifelse(geo,
"You set geo = TRUE. Do you have geographic coordinates in the right place?\n",
"If you have geographic coordinates: did you set the flag?\n"),
ifelse(region,
"You set region = TRUE. Do you have regional data in the right place?\n\n",
"If you have regional data: did you set the flag?\n\n"),
"Otherwise, the problem may lie within the data structure itself.\n",
" - Inspect your data; if it looks fine then\n",
" - search the poppr forum for the error message; if you still can't find a solution:\n",
" 1. Make a minimum working example of your error (see http://stackoverflow.com/a/5963610)\n",
" 2. Use the 'reprex' package to reproduce the error (see https://github.com/jennybc/reprex#readme)\n",
" 3. Create a new issue on https://github.com/grunwaldlab/poppr/issues")
stop(weirdomsg)
}
res.gid@call <- gencall
# Checking for individual name duplications or removals -------------------
same_names <- intersect(indNames(res.gid), ind.vec)
if (setequal(same_names, indNames(res.gid))) { # no duplications, only removals
names(ind.vec) <- ind.vec
ind.vec <- ind.vec[indNames(res.gid)]
} else { # removals and/or duplciations
ind.vec <- ind.vec[as.integer(indNames(res.gid))]
other(res.gid)$original_names <- ind.vec
ind.vec <- as.integer(indNames(res.gid))
}
pop.vec <- pop.vec %null% pop.vec[ind.vec]
reg.vec <- reg.vec %null% reg.vec[ind.vec]
xy <- if (xy_nomatch) xy else xy[ind.vec, , drop = FALSE]
ind.vec <- indNames(res.gid)
names(pop.vec) <- pop.vec %null% ind.vec
names(reg.vec) <- reg.vec %null% ind.vec
rownames(xy) <- if (xy_nomatch) rownames(xy) else ind.vec
# Keep the name if it's a URL ---------------------------------------------
if (length(grep("://", genalex)) < 1 & !"connection" %in% class(genalex)){
res.gid@call[2] <- basename(genalex)
}
if (region){
strata(res.gid) <- data.frame(Pop = pop.vec, Region = reg.vec)
} else {
strata(res.gid) <- data.frame(Pop = pop.vec)
}
if (geo){
if (!all(c("x", "y") %in% colnames(xy))){
colnames(xy) <- c("x", "y")
}
other(res.gid)$xy <- xy
}
res.gid <- if (genclone) as.genclone(res.gid) else res.gid
res.gid <- if (recode) recode_polyploids(res.gid, newploidy = TRUE) else res.gid
return(res.gid)
}
#==============================================================================#
#' Export data from genind objects to genalex formatted \*.csv files.
#'
#' genind2genalex will export a genclone or genind object to a \*.csv file
#' formatted for use in genalex.
#'
#' @param gid a [genclone-class] or [genind-class]
#' object.
#'
#' @param filename a string indicating the name and/or path of the file you wish
#' to create. If this is left unchanged, the results will be saved in a
#' temporary file and a warning will be displayed for six seconds before the
#' file is written. This process should give you time to cancel the process
#' and choose a file path. Otherwise, the name of the file is returned, so you
#' can copy that to a file of your choice with [file.copy()]
#'
#' @param overwrite `logical` if `FALSE` (default) and `filename` exists, then
#' the file will not be overwritten. Set this option to `TRUE` to overwrite
#' the file.
#'
#' @param quiet `logical` If `FALSE` a message will be printed to the
#' screen.
#'
#' @param pop a character vector OR formula specifying the population factor.
#' This can be used to specify a specific subset of strata or custom
#' population factor for the output. Note that the `allstrata` command
#' has precedence over this unless the value of this is a new population
#' factor.
#'
#' @param allstrata if this is `TRUE`, the strata will be combined into a
#' single population factor in the genalex file.
#'
#' @param geo `logical` Default is `FALSE`. If it is set to
#' `TRUE`, the resulting file will have two columns for geographic data.
#'
#' @param geodf `character` Since the `other` slot in the adegenet
#' object can contain many different items, you must specify the name of the
#' data frame in the `other` slot containing your geographic coordinates.
#' It defaults to "xy".
#'
#' @param sep a character specifying what character to use to separate columns.
#' Defaults to ",".
#'
#' @param sequence when `TRUE`, sequence data will be converted to integers
#' as per the GenAlEx specifications.
#'
#' @note If your data set lacks a population structure, it will be coded in the
#' new file as a single population labeled "Pop". Likewise, if you don't have
#' any labels for your individuals, they will be labeled as "ind1" through
#' "ind*N*", with *N* being the size of your population.
#'
#' @return The the file path or connection where the data were written.
#'
#' @seealso [read.genalex()], [clonecorrect()], [genclone-class], [genind-class]
#'
#' @export
#' @author Zhian N. Kamvar
#' @md
#' @examples
#' \dontrun{
#' data(nancycats)
#' genind2genalex(nancycats, "~/Documents/nancycats.csv", geo=TRUE)
#' }
genind2genalex <- function(gid, filename = "", overwrite = FALSE, quiet = FALSE,
pop = NULL, allstrata = TRUE, geo = FALSE,
geodf = "xy", sep = ",", sequence = FALSE){
if (!is.genind(gid)) stop("A genind object is needed.")
if (nchar(sep) != 1) stop("sep must be one byte/character (eg. \",\")")
if (!inherits(filename, "connection") && filename == "") {
filename <- tempfile(fileext = ".csv")
msg <- paste("\nNo file name was supplied.",
"\nWriting to temporary file: ", filename, "\n")
warning(msg, immediate. = TRUE)
for (i in 1:3) {
message(paste("Starting in ...", 3 - i + 1, " "), appendLF = FALSE)
Sys.sleep(i)
message("\r", appendLF = FALSE)
}
} else if (!inherits(filename, "connection") && file.exists(filename) && !overwrite) {
msg <- paste("The file ", substitute(filename), "exists and will not be overwritten.",
"\nUse `overwrite = TRUE` if you want to replace this file.")
stop(msg)
}
if (allstrata && !is.null(strata(gid))) {
if (is.null(pop) && length(pop) != nInd(gid)) {
allform <- paste0("~", paste(nameStrata(gid), collapse = "/"))
allform <- stats::as.formula(allform)
setPop(gid) <- allform
} else {
pop(gid) <- pop
}
} else if (!is.null(pop)) {
if ("formula" %in% class(pop)){
setPop(gid) <- pop
} else if (is.character(pop) && length(pop) == nInd(gid)){
pop(gid) <- pop
} else{
warning("supplied population factor does not match the number of samples or strata.")
}
}
if (is.null(pop(gid))){
pop(gid) <- rep("Pop", nInd(gid))
}
popcall <- match.call()
#topline is for the number of loci, individuals, and populations.
topline <- c(nLoc(gid), nInd(gid), nPop(gid))
popsizes <- table(pop(gid))
# The sizes of the populations correspond to the second line, which is the_gid
# names.
topline <- c(topline, popsizes)
secondline <- c("", "", "", popNames(gid))
ploid <- ploidy(gid)
# Constructing the locus names. GenAlEx separates the alleles of the loci, so
# There is one locus name for every p ploidy columns you have.
if(all(ploid > 1) & gid@type == "codom"){
# To intersperse spaces between the locus names, make a ploidy x loci
# matrix, fill the first row with the loci names, fill the rest with
# emptiness, and then convert it into a vector.
locnames <- matrix(character(nLoc(gid)*max(ploid)), nrow = max(ploid))
locnames[1, ] <- locNames(gid)
locnames[-1, ] <- " "
dim(locnames) <- NULL
} else {
locnames <- locNames(gid)
}
thirdline <- c("Ind","Pop", locnames)
# This makes sure that you don't get into a stacking error when stacking the
# first three rows.
if(length(thirdline) > length(topline)){
lenfac <- length(thirdline) - length(topline)
topline <- c(topline, rep("", lenfac))
secondline <- c(secondline, rep("", lenfac))
}
else if(length(thirdline) < length(topline)){
lenfac <- length(topline) - length(thirdline)
thirdline <- c(thirdline, rep("", lenfac))
}
infolines <- rbind(topline, secondline, thirdline)
if (sequence){
alleles(gid) <- lapply(alleles(gid), function(i){
i <- tolower(i)
vapply(i, switch, integer(1),
a = 1L, c = 2L, g = 3L, t = 4L,
"-" = 5L, ":" = 5L, 0L)
})
}
if(!quiet) cat("Extracting the table ... ")
the_gid <- as.character(pop(gid))
df <- genind2df(gid, sep = "/", usepop = FALSE)
if (any(ploid > 1)){
df <- generate_bruvo_mat(df, maxploid = max(ploid), sep = "/", mat_type = "character")
}
df[is.na(df)] <- 0
# making sure that the individual names are included.
if(all(indNames(gid) == "") | is.null(indNames(gid))){
indNames(gid) <- paste("ind", seq(nInd(gid)), sep="")
}
df <- cbind(indNames(gid), the_gid, df)
# setting the NA replacement. This doesn't work too well.
replacement <- if(gid@type == "PA") "-1" else "0"
if(!quiet) cat("Writing the table to", filename, "... ")
if(geo == TRUE & !is.null(gid$other[[geodf]])){
replacemat <- matrix("", 3, 3)
replacemat[3, 2:3] <- c("X", "Y")
infolines <- cbind(infolines, replacemat)
df2 <- data.frame(list("Space" = rep("", nInd(gid))))
gdf <- as.matrix(gid@other[[geodf]])
if (nrow(gdf) < nInd(gid)){
gdf <- rbind(gdf, matrix("", nInd(gid) - nrow(gdf), 2))
}
df <- cbind(df, df2, gdf)
} else if (geo == TRUE) {
popcall <- popcall[2]
warning(paste0("There is no data frame or matrix in ",
paste0(substitute(popcall)), "@other called ", geodf,
".\nThe xy coordinates will not be represented in the",
" resulting file."))
}
df[df == "NA" | is.na(df)] <- replacement
if (ncol(infolines) > ncol(df)){
lendiff <- ncol(infolines) - ncol(df)
padding <- matrix("", nrow = nInd(gid), ncol = lendiff)
df <- cbind(df, padding)
}
write.table(infolines, file = filename, quote = FALSE, row.names = FALSE,
col.names = FALSE, sep = sep)
write.table(df, file = filename, quote = TRUE, na = replacement, append = TRUE,
row.names = FALSE, col.names = FALSE, sep = sep)
if(!quiet) cat("Done.\n")
invisible(return(filename))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.