################################################################################
#' File Converter
#'
#' \code{read.pcadapt} converts genotype matrices or files to an appropriate
#' format readable by \code{pcadapt}. For a file as input, you can choose to
#' return either a matrix or convert it in bed/bim/fam files.
#' For a matrix as input, this returns a matrix.
#'
#' @param input A genotype matrix or a character string specifying the name of
#' the file to be converted. Matrices should use NAs to encode missing values.
#' To encode missing values in 'pcadapt' and 'lfmm' files, 9s should be used.
#' @param type A character string specifying the type of data to be converted
#' from. Converters from 'vcf' and 'ped' formats are not maintained anymore;
#' if you have any issue with those, please use PLINK >= 1.9 to convert them
#' to the 'bed' format.
#' @param type.out Either a bed file or a standard R matrix.
#' If the input is a matrix, then the output is automatically a matrix
#' (so that you don't need to specify this parameter).
#' If the input is a bed file, then the output is also a bed file.
#' @param allele.sep a vector of characters indicating what delimiters are used
#' in VCF files. By default, only "|" and "/" are recognized.
#' So, this argument is only useful for \code{type = "vcf"}.
#' @param local.env deprecated argument.
#' @param ploidy deprecated argument.
#' @param pop.sizes deprecated argument.
#' @param blocksize deprecated argument.
#'
#' @importFrom utils read.table
#'
#' @export
#'
read.pcadapt <- function(input,
type = c("pcadapt", "lfmm", "vcf", "bed", "ped",
"pool", "example"),
## only when input is a file path
type.out = c("bed", "matrix"),
## only when type == "vcf"
allele.sep = c("/", "|"),
## deprecated arguments
pop.sizes, ploidy, local.env, blocksize) {
# In version 3.1.0, arguments 'local.env' and 'blocksize' have been removed
if (!missing(local.env)) warning("Argument 'local.env' is deprecated.")
if (!missing(blocksize)) warning("Argument 'blocksize' is deprecated.")
# In version 4.0.0, arguments 'ploidy' and 'pop.sizes' have been removed
if (!missing(pop.sizes)) warning("Argument 'pop.sizes' is deprecated.")
if (!missing(ploidy)) warning("Argument 'ploidy' is deprecated.")
type <- match.arg(type)
if (any(class(input) == "character")) {
file2other(input, type, match.arg(type.out), match.arg(allele.sep))
} else if (any(class(input) %in% c("matrix", "data.frame", "array"))) {
if (!missing(type.out)) warning("Argument 'type.out' is not used with matrices.")
matrix2other(input, type)
} else {
stop("Input should be a file path or a matrix-like object.")
}
}
################################################################################
check_file_size <- function(file) {
if (file.size(file) > 2e9) {
stop(paste0("It seems that the input file is quite large.\n",
"For large 'vcf' or 'ped' files, ",
"please use PLINK (>= 1.9) to convert them to 'bed' format."))
}
}
################################################################################
file2other <- function(input, type.in, type.out, allele.sep) {
# Check if input exists
if ((type.in != "example") && !file.exists(input))
stop(paste("File", input, "does not exist."))
# File converter
is.pcadapt <- TRUE
if (type.in == "ped") {
check_file_size(input)
warning("Converter ped to pcadapt is deprecated. Please use PLINK for conversion to bed (and QC).")
tmp <- tempfile(fileext = ".pcadapt")
ped2pcadapt(input = input, output = tmp)
input <- tmp
} else if (type.in == "vcf") {
check_file_size(input)
warning("Converter vcf to pcadapt is deprecated. Please use PLINK for conversion to bed (and QC).")
tmp <- tempfile(fileext = ".pcadapt")
vcf2pcadapt(input = input, output = tmp, allele.sep = allele.sep)
input <- tmp
} else if (type.in == "lfmm") {
is.pcadapt <- FALSE
} else if (type.in == "example") {
input <- system.file("extdata", "geno3pops.pcadapt", package = "pcadapt")
} else if (type.in == "pool") {
return(structure(as.matrix(read.table(input)), class = "pcadapt_pool"))
} else if (type.in == "bed") {
p <- nrow(data.table::fread(sub("\\.bed$", ".bim", input)))
n <- nrow(data.table::fread(sub("\\.bed$", ".fam", input)))
return(structure(normalizePath(input), n = n, p = p, class = "pcadapt_bed"))
}
if (type.out == "matrix") {
mmap <- mmapcharr::mmapchar(input, code = CODE_012)
structure(`if`(is.pcadapt, t(mmap[]), mmap[]), class = "pcadapt_matrix")
} else if (type.out == "bed") {
writeBed(input, is.pcadapt)
} else {
stop("This 'type.out' is not supported.")
}
}
################################################################################
matrix2other <- function(input, type.in) {
if (!(ncol(input) > 0) || !(nrow(input) > 0))
stop("Invalid input genotype matrix.")
if (type.in == "lfmm") {
res <- as.matrix(input)
} else if (type.in == "pcadapt") {
res <- t(as.matrix(input))
} else if (type.in == "pool") {
return(structure(as.matrix(input), class = "pcadapt_pool"))
} else {
stop("Incorrect type.in for matrices.")
}
if (nrow(res) > ncol(res)) {
warning(sprintf("%s\n %s",
"You have more individuals than SNPs.",
"Are you sure of the type of your matrix? (pcadapt/lfmm)"))
}
storage.mode(res) <- "integer"
structure(res, class = "pcadapt_matrix")
}
################################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.