Nothing
#' Count the number/proportion of present/missing sites in each sample
#'
#' @param data EIGENSTRAT data object.
#' @param prop Calculate the proportion instead of counts?
#' @param missing Count present SNPs or missing SNPs?
#'
#' @return A data.frame object with SNP counts/proportions.
#'
#' @examples
#' \dontrun{snps <- eigenstrat(download_data(dirname = tempdir()))
#'
#' present_count <- count_snps(snps)
#' missing_count <- count_snps(snps, missing = TRUE)
#'
#' present_proportion <- count_snps(snps, prop = TRUE)
#' missing_proportion <- count_snps(snps, missing = TRUE, prop = TRUE)
#' }
#'
#' @export
#' @import rlang
count_snps <- function(data, missing = FALSE, prop = FALSE) {
summary_fun <- ifelse(prop, mean, sum)
if (missing) {
fun <- function(x) as.integer(is.na(x))
col <- "missing"
} else {
fun <- function(x) as.integer(!is.na(x))
col <- "present"
}
geno <- read_geno(data)
result <- read_ind(data)
result[[col]] <- as.vector(t(dplyr::summarise_all(geno, list(~ summary_fun(fun(.))))))
result
}
# Run a specified ADMIXTOOLS command.
run_cmd <- function(cmd, par_file, log_file) {
system(paste(cmd, "-p", par_file, ">", log_file))
}
# Create either specified or a temporary directory.
get_dir <- function(dir_name = NULL) {
if (!is.null(dir_name)) {
dir.create(dir_name, showWarnings = FALSE)
} else {
dir_name <- tempdir()
}
path.expand(dir_name)
}
# Generate paths to the population file, parameter file and log file
# based on a specified directory.
get_files <- function(dir_name, prefix) {
directory <- get_dir(dir_name)
list(
pop_file = file.path(directory, paste0(prefix, ".pop")),
par_file = file.path(directory, paste0(prefix, ".par")),
log_file = file.path(directory, paste0(prefix, ".log"))
)
}
# Check that the provided object is of the required type
check_type <- function(x, type) {
if (!inherits(x, type)) {
stop(glue::glue("Object is not of the type {type}"), call. = FALSE)
}
}
# Check for the presence of a given set of labels in an 'ind' file.
# Fail if there a sample was not found.
check_presence <- function(labels, data) {
not_present <- setdiff(labels, suppressMessages(read_ind(data)$label))
if (length(not_present) > 0) {
stop("The following samples are not present in the data: ",
paste(not_present, collapse = ", "), call. = FALSE)
}
}
#' Download example SNP data.
#'
#' The data is downloaded to a temporary directory by default.
#'
#' @param dirname Directory in which to put the data (EIGENSTRAT trio
#' of snp/geno/ind files).
#'
#' @export
download_data <- function(dirname = tempdir()) {
dest <- file.path(dirname, "snps.tgz")
utils::download.file(
"https://bioinf.eva.mpg.de/admixr/snps.tar.gz",
destfile = dest,
method = "wget",
quiet = TRUE
)
system(paste0("cd ", dirname, "; tar xf ", dest, "; rm snps.tgz"))
file.path(dirname, "snps", "snps")
}
# this looks incredibly hacky, but seems to be a solution to my R CMD check
# "missing global variable" NOTE woes caused by dplyr code (the following
# are not actually global variables)
utils::globalVariables(
names = c("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO",
"FORMAT", "chrom", "pos", "snp_id", "ref", "alt", "gen_dist",
"sample_id", "name", "target", ".", "start", "end",
"model", "noutgroups", "outgroups", "pattern", "pvalue"),
package = "admixr")
#' Pipe operator
#'
#' Added via usethis::use_pipe().
#'
#'
#' @name %>%
#' @rdname pipe
#' @keywords internal
#' @export
#' @importFrom magrittr %>%
#' @usage lhs \%>\% rhs
NULL
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.