Nothing
#' Convert a file in STRUCTURE format into a genind object
#'
#' @description The function converts a text file in STRUCTURE format into
#' a genind object to use in R
#'
#' @param path A character string indicating the path to the STRUCTURE file in
#' format .txt, or alternatively the name of the file in the working directory.
#' The STRUCTURE file must only have :
#' \itemize{
#' \item A first column with the IDs of the individuals
#' (can be a simple number)
#' \item A second column with the IDs of the populations
#' (can be a simple number)
#' \item Some loci columns : as many columns as loci in the data
#' }
#' The row for loci names is optional but recommended.
#' Each individual is displayed on 2 rows.
#'
#' @param loci_names A character vector with the names of the loci if not
#' specified in the file first row. This argument is mandatory if the
#' STRUCTURE file does not include the names of the loci in the first row.
#' In other cases, the names of the loci is extracted from the file first row
#' @param pop_names (optional) A character vector indicating the population
#' names in the same order as in the STRUCTURE file. It is of the same length
#' as the number of populations. Without this argument, populations are
#' numbered from 1 to the total number of individuals.
#' @param ind_names (optional) A character vector indicating the individual
#' names in the same order as in the STRUCTURE file. It is of the same length
#' as the number of individuals. Without this argument, individuals are
#' numbered from 1 to the total number of individuals.
#' @return An object of type \code{genind}.
#' @details The column order of the resulting object can be different from
#' that of objects returned by \code{\link{gstud_to_genind}}
#' and \code{\link{genepop_to_genind}}, depending on allele and loci coding
#' This function uses functions from \pkg{pegas} package.
#' For details about STRUCTURE file format :
#' \href{http://www.ccg.unam.mx/~vinuesa/tlem09/docs/structure_doc.pdf}{STRUCTURE user manual}
#' @export
#' @author P. Savary
#' @examples
#' data("data_ex_genind")
#' loci_names <- levels(data_ex_genind@loc.fac)
#' pop_names <- levels(data_ex_genind@pop)
#' ind_names <- row.names(data_ex_genind@tab)
#' path_in <- system.file('extdata', 'data_ex_str.txt',
#' package = 'graph4lg')
#' file_n <- file.path(tempdir(), "data_ex_str.txt")
#' file.copy(path_in, file_n, overwrite = TRUE)
#' str <- structure_to_genind(path = file_n, loci_names = loci_names,
#' pop_names = pop_names, ind_names = ind_names)
#' file.remove(file_n)
##################################
structure_to_genind <- function(path,
pop_names = NULL,
loci_names = NULL,
ind_names = NULL){
# Check whether 'path' is a character string
if(!is.character(path)){
stop("You must specify the file path as a character string input 'path'.")
}
# Check whether individuals are regrouped in the data
if (any(duplicated(pop_names))){
warning("At least 1 population appears twice in 'pop_names' vector.
You should regroup individuals from the same population.")
}
# Read the text file
data_str <- utils::read.table(file = path,
sep = "\t")
# If the first line does not correspond to the first individual
if(stringr::str_sub(data_str[1, ], 1, 2) != "1 "){
# Get the loci names from the first line
loci_names <- unlist(strsplit(as.character(data_str[1, ]), split = " "))
# Replace '.' by '_' in the loci names
loci_names <- gsub(loci_names,
pattern = "\\.",
replacement = "_")
# Re-open the data skipping the first line and with another separator
data_str <- utils::read.table(file = path, sep = "", skip = 1)
# If the first line corresponds to the first individual
} else {
# Check whether loci names are given
if (!is.vector(loci_names)){
stop("You must specify the names of the loci in a character vector
because input file does not include them.")
}
# Re-open the data with another separator
data_str <- utils::read.table(file = path, sep = "")
# Check whether there are as many loci names in 'loci_names' than
# in the data
if (length(loci_names)!= (ncol(data_str)-2)){
stop("The length of 'loci_names' is not equal to the number of loci
in the input file.")
}
# Replace '.' by '_' in the loci names
loci_names <- gsub(loci_names,
pattern = "\\.",
replacement = "_")
}
# Give column names to the data
col_names <- unlist(c("ind", "pop", loci_names))
colnames(data_str) <- col_names
# Check whether there are as many populatons in 'pop_names' than in the data
if (is.vector(pop_names)){
ind.pop <- as.numeric(table(data_str$pop))
if(length(pop_names)!=length(ind.pop)){
stop("The length of 'pop_names' is not equal to population number
in the input file.")
}
# Give population names to the individuals
data_str$pop <- rep(pop_names, times = ind.pop)
}
# Check whether there are as many individuals in 'ind_names' than in the data
if (is.vector(ind_names)){
if(length(ind_names) != (nrow(data_str)/2)){
stop("The length of 'ind_names' is not equal to individual number
in the input file.")
}
# Give individual names to the individuals
data_str$ind <- rep(ind_names, each = 2)
}
# Check whether there are at least 5 individuals in every population
if(length(unique(as.numeric(table(data_str[, 'pop'])) < 11)) == 2){
warning("There are populations with less than 5 individuals")
}
# Split the data in two: even rows and uneven rows
# (every individual genotype is displayed on two consecutive rows)
data_uneven <- data_str[seq(1, nrow(data_str), 2), ]
data_even <- data_str[-seq(1, nrow(data_str), 2), ]
# Merge the data by separating them with "/"
data_union <- data_even
for (i in 3:ncol(data_union)){
data_union[, i] <- paste(data_even[, i], "/",
data_uneven[, i],
sep = "")
}
# Replace '-9/-9' by 'NA/NA'
data_union[, 3:ncol(data_union)] <- lapply(data_union[, 3:ncol(data_union)],
gsub,
pattern = '-9/-9',
replacement = 'NA/NA')
# Add row names
row.names(data_union) <- data_union$ind
# Remove the 'ind' column
data_union <- data_union[, -which(colnames(data_union) == "ind")]
# Convert data_union into a 'loci' object and then into a 'genind' object
data_genind <- pegas::loci2genind(pegas::as.loci(data_union,
col.pop = 1,
allele.sep = "/"),
ploidy = 2,
na.alleles = "NA")
return(data_genind)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.