#' Extract data produced by SLiM
#'
#' This function takes output produced from a \code{slimr_script} which uses
#' \code{\link{r_output}}, and converts it into a \code{tibble}
#'
#' @param output Character vector produced from SLiM run.
#'
#' @return A \code{tibble} with four columns:
#' \describe{
#' \item{generation}{A vector of generations where output was produced.}
#' \item{name}{Names of the output data.}
#' \item{expression}{The SLiM expression used to generate the output.}
#' \item{data}{The raw data output from SLiM as a character vector.}
#' }
#' @export
#'
#' @examples
#' if(slim_is_avail()) {
#' test_sim <- slim_script(
#' slim_block_init_minimal(),
#' slim_block_add_subpops(1, 100),
#' slim_block(1, 20, late(), {
#' r_output(p1$size(), "MS", do_every = 10)
#' })
#' ) %>%
#' slim_run(simple = TRUE)
#' slim_extract_output_data(test_sim$output)
#' }
slim_extract_output_data <- function(output) {
slim_process_output(output, data_only = TRUE)
}
#' Convert output data from a slimr_results object to a tibble
#'
#' This function tries to automate the process of converting
#' output in the \code{slimr_results} object returned by
#' \code{\link{slim_run}} function into usable data in the
#' form of a \code{tibble}. If this process fails you will
#' end up with the data as a character string, and you will
#' have to manually convert this into something you can use.
#'
#' @param dat A \code{slimr_results} object to extract data from.
#' You can alternatively specify the \code{output_data} from the \code{slimr_results}
#' directly in this parameter.
#' @param generations For what generations do you want to extract data?
#' Default is all generations that have data. The special value \code{0}
#' can be used to specify just the most recent generation in the data.
#'
#' @return A tibble with three columns, name: the name of the outputs,
#' generation: the generations of the outputs, and data: the outputs as converted data.
#' This final column will be a list column, where each element will usually
#' be a vector or a tibble, depending on what kind of data was returned
#' by the simulation.
#'
#' @export
#' @examples
#' if(slim_is_avail()) {
#' test_sim <- slim_script(
#' slim_block_init_minimal(),
#' slim_block_add_subpops(1, 100),
#' slim_block(1, 20, late(), {
#' r_output(sim.outputFull(), "out", do_every = 10)
#' })
#' ) %>%
#' slim_run()
#' slim_results_to_data(test_sim)
#' }
slim_results_to_data <- function(dat, generations = NULL) {
if(inherits(dat, "slimr_results")) {
dat <- dat$output_data
}
if(!is.null(generations)) {
if(generations == 0) {
generations <- max(dat$generation, na.rm = TRUE)
}
dat <- dat %>%
dplyr::filter(.data$generation %in% generations)
}
res_dat <- dat %>%
dplyr::mutate("ord" := seq_len(dplyr::n())) %>%
dplyr::group_by(.data$type, .data$expression) %>%
dplyr::group_modify(~slim_extract_to_data(.x, unlist(.y$type[1]))) %>%
dplyr::ungroup() %>%
dplyr::arrange(.data$ord) %>%
dplyr::select(-"ord")
return(res_dat)
}
slim_extract_to_data <- function(dat, type) {
if(type == "slim_output") {
first_line <- readr::read_lines(dat$data[[1]],
n_max = 1L) %>%
stringr::str_split(" ") %>%
.[[1]]
output_type <- first_line[c(3, 4)]
output_type <- output_type[which(stringr::str_detect(output_type, "[:upper:]"))]
res <- switch(output_type,
A = purrr::map(purrr::transpose(dat),
~slim_extract_full(.x,
type = "full_individual",
join = FALSE)),
GS = purrr::map(purrr::transpose(dat),
~slim_extract_genome(.x,
type = "full",
join = FALSE))
)
} else {
res <- slim_to_Rob(dat$data, type)
}
#print(dplyr::glimpse(res))
dat <- dat %>%
dplyr::mutate("data" := res)
if(type == "slim_nucleotides") {
assert_package("Biostrings", install = "BiocManager::install")
if(any(stringr::str_detect(dat$name, "_subpops"))) {
name_name <- dat$name[stringr::str_which(dat$name, "_subpops")][1]
seq_name <- stringr::str_remove(name_name, "_subpops")
subpops <- dat %>%
dplyr::filter(.data$name == name_name) %>%
dplyr::pull(.data$data)
} else {
seq_name <- dat$name[1]
subpops <- rep("1", length(dat$data[1]))
}
seq_dat <- dat %>%
dplyr::filter(.data$name == seq_name)
dna <- purrr::map(seq_dat %>%
dplyr::pull(.data$data),
~ Biostrings::DNAStringSet(.x))
dat <- seq_dat %>%
dplyr::mutate("data" := dna,
"subpops" = subpops)
}
if(type == "slim_nucleotides_both") {
assert_package("Biostrings", install = "BiocManager::install")
if(any(stringr::str_detect(dat$name, "_subpops"))) {
name_name <- dat$name[stringr::str_which(dat$name, "_subpops")][1]
seq_name <- stringr::str_remove(name_name, "_subpops")
subpops <- dat %>%
dplyr::filter(.data$name == name_name) %>%
dplyr::pull(.data$data)
subpops <- rep(subpops, each = 2)
} else {
seq_name <- dat$name[1]
subpops <- NULL
}
seq_dat <- dat %>%
dplyr::filter(.data$name == seq_name)
dna <- purrr::map(seq_dat %>%
dplyr::pull(.data$data),
~ Biostrings::DNAStringSet(.x))
dat <- seq_dat %>%
dplyr::mutate("data" := dna,
"subpops" = subpops)
}
if(type == "slim_snp") {
if(any(stringr::str_detect(dat$name, "_subpops"))) {
name_name <- dat$name[stringr::str_which(dat$name, "_subpops")][1]
seq_name <- stringr::str_remove(name_name, "_subpops")
subpops <- dat %>%
dplyr::filter(.data$name == name_name) %>%
dplyr::pull(.data$data)
} else {
seq_name <- dat$name[1]
subpops <- NULL
}
pos_dat <- dat %>%
dplyr::filter(.data$name == paste0(seq_name, "_pos")) %>%
dplyr::pull(.data$data)
seq_dat <- dat %>%
dplyr::filter(.data$name == seq_name)
snp <- purrr::map(seq_dat %>%
dplyr::pull(.data$data),
~ slim_make_snp_matrix(.x))
dat <- seq_dat %>%
dplyr::mutate("data" := snp,
"pos" := pos_dat,
"subpops" = subpops)
}
dat
}
slim_to_Rob <- function(dat, type) {
dat_res <- stringr::str_split(dat, " ")
# dat_res <- switch(type,
# )
dat_res
}
slim_make_snp_matrix <- function(dat) {
ninds <- as.numeric(dat[1])
nsnps <- as.numeric(dat[2])
snp_mat <- matrix(as.numeric(as.logical(dat[c(-1, -2)])),
nrow = ninds * 2,
ncol = nsnps,
byrow = TRUE)
snp_mat <- rowsum(snp_mat,
rep(1:ninds, each = 2),
reorder = FALSE)
snp_mat
}
#' Extract Elements from SLiM's outputFull()
#'
#' @param output_full A character vector where each element is the result of a
#' call to \code{outputFull()} in SLiM
#' @param type Which type of data to return: "mutations", "individuals", "genomes", "coordinates",
#' "sexes", or "ages"
#' @param join If asking for multiple output type, should they be joined into one tibble (\code{join = TRUE})
#' or left as separate tibbles returned in a list (\code{join = FALSE})?
#' @param expand_mutations If asking for "genomes" output, should mutations be expanded into their own column (\code{expand_mutations = TRUE})
#' or left as a vector of mutation ids in a list column (\code{expand_mutations = FALSE})?
#'
#' @return A tibble
#' @export
#'
#' @examples
#' if(slim_is_avail()) {
#' test_sim <- slim_script(
#' slim_block_init_minimal(mutation_rate = 1e-6),
#' slim_block_add_subpops(1, 100),
#' slim_block(1, 20, late(), {
#' r_output(sim.outputFull(), "out", do_every = 10)
#' })
#' ) %>%
#' slim_run()
#' slim_extract_full(test_sim$output_data, type = "mutations")
#' }
slim_extract_full <- function(output_full, type = c("mutations", "individuals",
"genomes", "coordinates",
"sexes", "ages", "full_individual"), join = TRUE,
expand_mutations = FALSE) {
if(length(type) == 1) {
type_orig <- match.arg(type)
} else {
type_orig <- type
}
if(length(type_orig) == 1) {
if(type_orig == "full_individual") {
type <- c("individuals", "genomes", "mutations")
}
} else {
type <- type[type != "full_individual"]
}
#type[type %in% c("coordinates", "sexes", "ages")] <- "individuals"
get_one <- function(type, data, generation) {
purrr::map2_dfr(data, generation,
~slim_extract_full_one(.x, type, expand_mutations, generation = .y)) %>%
dplyr::select("generation", dplyr::everything())
}
dat <- purrr::map(type,
~get_one(.x, output_full$data, output_full$generation))
if(join) {
## not the ideal solution but I don't have time to check which
## variable name to join by for every join in the code right now
## it is on my to do list though
suppressMessages(
dat <- purrr::reduce(dat,
~dplyr::left_join(.x, .y))
)
class(dat) <- c("slimr_outputFull_joined",
class(dat))
} else {
names(dat) <- type
class(dat) <- c("slimr_outputFull_list",
class(dat))
}
if(length(dat) == 1) {
dat <- dat[[1]]
}
dat
}
slim_extract_full_one <- function(string, type, expand_mutations, generation) {
if(stringr::str_detect(string, "Ancestral sequence:")) {
ancs <- TRUE
} else {
ancs <- FALSE
}
if(type %in% c("coordinates", "sexes", "ages")) {
new_type <- "individuals"
} else {
new_type <- type
}
dat <- switch(new_type,
metadat = stringr::str_match(string,
stringr::regex("^(?:(.*?))\nPopulations:",
dotall = TRUE))[ , 2],
populations = stringr::str_match(string,
stringr::regex("Populations:\n(?:(.*?))\nMutations:",
dotall = TRUE))[ , 2],
mutations = stringr::str_match(string,
stringr::regex("Mutations:\n(?:(.*?))\nIndividuals:",
dotall = TRUE))[ , 2],
individuals = stringr::str_match(string,
stringr::regex("Individuals:\n(?:(.*?))\nGenomes:",
dotall = TRUE))[ , 2],
genomes = if(ancs) {
stringr::str_match(string,
stringr::regex("Genomes:\n(?:(.*?))\nAncestral sequence:",
dotall = TRUE))[ , 2]
} else {
stringr::str_match(string,
stringr::regex("Genomes:\n(?:(.*?))$",
dotall = TRUE))[ , 2] #%>%
# stringr::str_replace_all(stringr::regex("(^p[:digit:]+\\:[:digit:]+[:blank:]+[:upper:])([:blank:]+)(.*?)$",
# multiline = TRUE),
# '\\1\\2"\\3"')
},
NULL
)
if(stringr::str_detect(dat, "\n", negate = TRUE)) {
dat <- paste(dat, "\n")
}
dat <- switch(type,
metadat = dplyr::tibble(metadata = dat),
populations = suppressWarnings(readr::read_table2(dat,
col_types = "cicd",
col_names = c("subpop", "n_ind",
"sex_type", "sex_ratio"))),
mutations = suppressWarnings(readr::read_table2(dat,
col_types = "iiciddciic",
col_names = c("mut_id", "unique_mut_id",
"mut_type", "chrome_pos",
"selection", "dominance",
"subpop", "first_gen",
"prevalence",
"nucleotide"))),
individuals = suppressWarnings(readr::read_table2(dat,
col_types = "ccccc",
col_names = c("unique_ind_id", "sex",
"gen_id_1", "gen_id_2",
"extra"))) %>%
tidyr::separate("unique_ind_id", c("pop_id", "ind_id"), sep = ":", remove = FALSE) %>%
tidyr::pivot_longer(c("gen_id_1", "gen_id_2"),
names_prefix = "gen_id_",
names_to = "genome_num",
values_to = "gen_id"),
coordinates = suppressWarnings(readr::read_table2(dat,
col_types = "ccccddd",
col_names = c("unique_ind_id", "sex",
"gen_id_1", "gen_id_2",
"x", "y", "z"))) %>%
tidyr::separate("unique_ind_id", c("pop_id", "ind_id"), sep = ":", remove = FALSE),
genomes = suppressWarnings(read_with_excess(dat,
col_types = "ccc",
col_names = c("gen_id", "gen_type",
"mut_list"))) %>%
dplyr::mutate("mut_list" := stringr::str_split(.data$mut_list, " ")),
NULL
)
if(expand_mutations) {
if(type == "genomes") {
dat <- dat %>%
tidyr::unnest_longer(col = "mut_list",
values_to = "mut_id",
indices_include = FALSE) %>%
dplyr::mutate("mut_id" := as.integer(.data$mut_id)) %>%
tidyr::drop_na("mut_id")
}
} else {
if(type == "genomes") {
dat <- dat %>%
dplyr::mutate("mut_list" := purrr::map(.data$mut_list,
as.integer))
}
}
dat %>%
dplyr::mutate("generation" := generation)
}
#' Extract Elements from SLiM's output() for genomes
#'
#' @param output A character vector where each element is the result of a
#' call to \code{genomes.output()} in SLiM
#' @param type Which type of data to return: "mutations", or "genomes" or
#' both.
#' @param join If asking for multiple output type, should they be joined into one tibble (\code{join = TRUE})
#' or left as separate tibbles returned in a list (\code{join = FALSE})?
#' @param expand_mutations If asking for "genomes" output, should mutations be expanded into their own column (\code{expand_mutations = TRUE})
#' or left as a vector of mutation ids in a list column (\code{expand_mutations = FALSE})?
#'
#' @return A tibble
#' @export
#'
#' @examples
#' if(slim_is_avail()) {
#' test_sim <- slim_script(
#' slim_block_init_minimal(mutation_rate = 1e-6),
#' slim_block_add_subpops(1, 100),
#' slim_block(1, 20, late(), {
#' r_output(p1.genomes.output(), "out", do_every = 10)
#' })
#' ) %>%
#' slim_run()
#' slim_extract_genome(test_sim$output_data, type = "mutations")
#' }
slim_extract_genome <- function(output, type = c("mutations",
"genomes",
"full"),
join = TRUE,
expand_mutations = FALSE) {
if(length(type) == 1) {
type_orig <- match.arg(type)
} else {
type_orig <- type
}
if(length(type_orig) == 1) {
if(type_orig == "full") {
type <- c("genomes", "mutations")
}
} else {
type <- type[type != "full"]
}
if("genomes" %in% type) {
type <- c("metadat", type)
}
get_one <- function(type, data, generation) {
purrr::map2_dfr(data, generation,
~slim_extract_genome_one(.x, type, expand_mutations, generation = .y)) %>%
dplyr::select("generation", dplyr::everything())
}
dat <- purrr::map(type,
~get_one(.x, output$data, output$generation))
if("metadat" %in% type) {
GS <- as.integer(stringr::str_match(dat[[1]]$metadata[1],
stringr::regex("GS ([:digit:]+)"))[ , 2])
genomes <- dplyr::tibble(gen_id = paste0("p*:", rep(1:GS)))
dat <- dat[-1]
dat[[1]] <- dplyr::left_join(genomes, dat[[1]], by = "gen_id")
}
if(join) {
suppressMessages(
dat <- purrr::reduce(dat,
~dplyr::left_join(.x, .y))
)
class(dat) <- c("slimr_genome_output_joined",
class(dat))
} else {
names(dat) <- type
class(dat) <- c("slimr_genome_output_list",
class(dat))
}
if(length(dat) == 1) {
dat <- dat[[1]]
}
dat
}
slim_extract_genome_one <- function(string, type, expand_mutations, generation) {
dat <- switch(type,
metadat = stringr::str_match(string,
stringr::regex("^(?:(.*?))\nMutations:",
dotall = TRUE))[ , 2],
mutations = stringr::str_match(string,
stringr::regex("Mutations:\n(?:(.*?))\nGenomes:",
dotall = TRUE))[ , 2],
genomes = stringr::str_match(string,
stringr::regex("Genomes:\n(?:(.*?))$",
dotall = TRUE))[ , 2] %>%
stringr::str_replace_all(stringr::regex("(^p[:digit:]+\\:[:digit:]+[:blank:]+[:upper:])([:blank:]+)(.*?)$",
multiline = TRUE),
'\\1\\2"\\3"'),
NULL
)
if(!is.na(dat)){
if(stringr::str_detect(dat, "\n", negate = TRUE)) {
dat <- paste(dat, "\n")
}
dat <- switch(type,
metadat = dplyr::tibble(metadata = dat),
mutations = suppressWarnings(readr::read_table2(dat,
col_types = "iiciddcii",
col_names = c("mut_id", "unique_mut_id",
"mut_type", "chrome_pos",
"selection", "dominance",
"subpop", "first_gen",
"prevalence",
"nucleotide"))),
genomes = suppressWarnings(read_with_excess(dat,
col_types = "ccc",
col_names = c("gen_id", "gen_type",
"mut_list"))) %>%
dplyr::mutate("mut_list" := stringr::str_split(.data$mut_list, " ")),
NULL
)
} else {
dat <- switch(type,
metadat = dplyr::tibble(metadata = dat),
mutations = dplyr::tibble("mut_id" = NA,
"unique_mut_id" = NA,
"mut_type" = NA,
"chrome_pos" = NA,
"selection" = NA,
"dominance" = NA,
"subpop" = NA,
"first_gen" = NA,
"prevalence" = NA,
"nucleotide" = NA),
genomes = suppressWarnings(read_with_excess(dat,
col_types = "ccc",
col_names = c("gen_id", "gen_type",
"mut_list"))) %>%
dplyr::mutate("mut_list" := stringr::str_split(.data$mut_list, " ")),
NULL
)
}
if(expand_mutations) {
if(type == "genomes") {
dat <- dat %>%
tidyr::unnest_longer(col = "mut_list",
values_to = "mut_id",
indices_include = FALSE) %>%
dplyr::mutate("mut_id" := as.integer(.data$mut_id))
}
} else {
if(type == "genomes") {
dat <- dat %>%
dplyr::mutate("mut_list" := purrr::map(.data$mut_list,
as.integer))
}
}
dat %>%
dplyr::mutate("generation" := generation)
}
slim_extract_individuals <- function(output_data, format = c("genlight", "tibble")) {
format <- match.arg(format)
if(inherits(output_data, "slimr_results")) {
output_data <- output_data$output_data
}
dat <- purrr::pmap_dfr(list(output_data$expression, output_data$data, output_data$generation),
~slim_extract_individuals_one(..1, ..2, ..3)) %>%
dplyr::select("generation", dplyr::everything())
if(format == "genlight") {
assert_package("adegenet")
alleles <- dat %>%
dplyr::select(dplyr::all_of(c("ind_id", "mut_id"))) %>%
dplyr::group_by(.data$ind_id, .data$mut_id)
dat_gen <- methods::new("genlight", gen = dat %>%
dplyr::select(dplyr::starts_with("i", ignore.case = FALSE)) %>%
as.matrix() %>%
t(),
position = dat$POS,
loc.names = dat$MID,
other = dat %>%
dplyr::select("generation", "S":"MULTIALLELIC"))
adegenet::pop(dat_gen) <- dat$pop_id
dat <- dat_gen
}
dat
}
slim_extract_individuals_one <- function(expression, data, generation) {
if(any(stringr::str_detect(expression, "outputVCF"))) {
dat <- read_VCF(data)
} else {
if(any(stringr::str_detect(expression, "outputFull"))) {
dat <- slim_extract_full(dplyr::tibble(generation = generation,
expression = expression,
data = data),
c("individuals", "genomes", "mutations"),
join = TRUE, expand_mutations = TRUE)
} else {
rlang::abort(glue::glue("slimr currently doesn't support extracting individual data produced by {expression}"))
}
}
dat
}
read_VCF <- function(string) {
all_lines <- readr::read_lines(string)
meta <- stringr::str_detect(all_lines, stringr::regex("^#"))
meta_rle <- rle(meta)
n_pos <- sum(meta_rle$values)
dat_indices <- matrix(cumsum(meta_rle$lengths),
nrow = n_pos, byrow = TRUE)
meta_indices <- cbind(c(1L, dat_indices[-n_pos, 2] + 1L), dat_indices[ , 1] - 1L)
pops <- stringr::str_split(all_lines[meta_indices[ , 1]], " ") %>%
purrr::map_chr(4)
data <- purrr::map2_dfr(purrr::array_branch(dat_indices, 1), pops,
~readr::read_tsv(all_lines[.x[1]:.x[2]]) %>%
dplyr::rename_at(dplyr::vars(dplyr::starts_with("i", ignore.case = FALSE)),
function(z) paste0(.y, "_", z))) %>%
dplyr::mutate("INFO" := purrr::map(.data$INFO,
~eval(rlang::parse_expr(paste0("c(",
stringr::str_replace_all(.x, c(";" = ",", "MULTIALLELIC" = "MULTIALLELIC = 1")),
")"))))) %>%
tidyr::unnest_wider(col = .data$INFO) %>%
dplyr::mutate("MULTIALLELIC" := ifelse(is.na(.data$MULTIALLELIC), 0, .data$MULTIALLELIC)) %>%
dplyr::mutate_at(dplyr::vars(dplyr::starts_with("p", ignore.case = FALSE)),
~stringr::str_split(.x, "\\|", simplify = TRUE) %>%
apply(2, as.numeric) %>% rowSums())
data
}
#' Extract data into a genlight object
#'
#' @param x \code{slimr_results} object containing data generated by \code{r_output}
#' using \code{outputFull()} in SLiM
#' @param ... Arguments passed to or from other methods.
#' @return A \code{genlight} object
#' @export
#' @examples
#' if(slim_is_avail()) {
#' test_sim <- slim_script(
#' slim_block_init_minimal(mutation_rate = 1e-6),
#' slim_block_add_subpops(1, 100),
#' slim_block(1, 20, late(), {
#' r_output(sim.outputFull(), "out", do_every = 10)
#' })
#' ) %>%
#' slim_run()
#' if(require("adegenet", quietly = TRUE)) {
#' plot(slim_extract_genlight(test_sim$output_data))
#' }
#' }
slim_extract_genlight <- function(x, ...) {
UseMethod("slim_extract_genlight", x)
}
#' @export
slim_extract_genlight.slimr_results_coll <- function(x, name = NULL, by = NULL, ...) {
if(is.null(names(x))) {
names(x) <- paste0("rep_", seq_along(x))
}
furrr::future_imap_dfr(x, ~ slim_extract_genlight(.x, name = name, by = by, ...) %>%
dplyr::mutate(rep = .y),
.options = furrr::furrr_options(seed = TRUE))
}
#' @export
slim_extract_genlight.slimr_results <- function(x, name = NULL, by = NULL, ...) {
assert_package("adegenet")
output_data <- x$output_data
if(!is.null(name)) {
output_data <- output_data %>%
dplyr::filter(.data$name == !!name)
}
if(!is.null(by)) {
output_data_grouped <- output_data %>%
dplyr::group_by(!!!rlang::syms(by))
data_split <- output_data_grouped %>%
dplyr::group_split() %>%
purrr::map(~{class(.x) <- c("slimr_output_data",
class(.x)); .x})
gls <- purrr::map(data_split,
~slim_extract_genlight(.x))
dat <- dplyr::group_keys(output_data_grouped) %>%
dplyr::mutate(genlight = gls)
} else {
dat <- slim_extract_genlight(output_data)
}
dat
}
#' @export
slim_extract_genlight.slimr_output_data <- function(x, ...) {
assert_package("adegenet")
alleles <- mut_dat <- NULL
output_full <- x %>%
dplyr::filter(stringr::str_detect(expression, "outputFull"))
output_VCF <- x %>%
dplyr::filter(stringr::str_detect(expression, "outputVCF"))
output_GS <- x %>%
dplyr::filter(stringr::str_detect(expression, "\\.output\\("))
if(nrow(output_full) == 0 && nrow(output_VCF) == 0 && nrow(output_GS) == 0) {
rlang::abort("You must have output generated by outputFull(), outputVCF(), or output() to extract a genlight object.")
}
if(nrow(output_full) > 0) {
c(alleles, mut_dat) %<-% slim_extract_genlight_tibble_full(output_full)
dat_gen_full <- methods::new("genlight", gen = alleles %>%
dplyr::select(-dplyr::any_of(c("generation",
"pop_id",
"ind_id"))) %>%
as.matrix(),
position = mut_dat$chrome_pos,
loc.names = mut_dat$unique_mut_id,
ind.names = paste0("generation_", alleles$generation,
alleles$pop_id, ":", alleles$ind_id),
other = mut_dat %>%
dplyr::select(dplyr::all_of(c("mut_type", "prevalence"))))
adegenet::pop(dat_gen_full) <- alleles$pop_id
} else {
dat_gen_full <- NULL
}
if(nrow(output_VCF) > 0) {
rlang::abort("Sorry, VCF output not yet supported.")
#dat_VCF <- slim_extract_genlight_tibble_VCF(output_VCF)
# dat_gen_VCF <- methods::new("genlight", gen = alleles %>%
# dplyr::select(-.data$pop_id, -.data$ind_id) %>%
# as.matrix(),
# position = mut_dat$chrome_pos,
# loc.names = mut_dat$unique_mut_id,
# ind.names = paste0(alleles$pop_id, ":", alleles$ind_id),
# other = mut_dat %>%
# dplyr::select(.data$mut_type, .data$prevalence))
# adegenet::pop(dat_gen_VCF) <- alleles$pop_id
} else {
dat_gen_VCF <- NULL
}
if(nrow(output_GS) > 0) {
c(alleles, mut_dat) %<-% slim_extract_genlight_tibble_GS(output_GS)
dat_gen_GS <- methods::new("genlight", gen = alleles %>%
dplyr::select(-"ind_id") %>%
as.matrix(),
position = mut_dat$chrome_pos,
loc.names = mut_dat$unique_mut_id,
ind.names = alleles$ind_id,
other = mut_dat %>%
dplyr::select(dplyr::all_of(c("mut_type", "prevalence"))))
} else {
dat_gen_GS <- NULL
}
if(!is.null(dat_gen_full)) {
if(!is.null(dat_gen_VCF)) {
if(!is.null(dat_gen_GS)) {
dat <- list(full = dat_gen_full, VCF = dat_gen_VCF,
GS = dat_gen_GS)
} else {
dat <- list(full = dat_gen_full, VCF = dat_gen_VCF)
}
} else {
if(!is.null(dat_gen_GS)) {
dat <- list(full = dat_gen_full, GS = dat_gen_GS)
} else {
dat <- dat_gen_full
}
}
} else {
if(!is.null(dat_gen_VCF)) {
if(!is.null(dat_gen_GS)) {
dat <- list(VCF = dat_gen_VCF, GS = dat_gen_GS)
} else {
dat <- dat_gen_VCF
}
} else {
if(!is.null(dat_gen_GS)) {
dat <- dat_gen_GS
} else {
dat <- list()
}
}
}
dat
}
slim_extract_genlight_tibble_full <- function(output_full) {
inds <- slim_extract_full(output_full,
c("individuals",
"genomes"),
join = TRUE,
expand_mutations = TRUE)
muts <- slim_extract_full(output_full,
"mutations")
suppressMessages(
alleles <- inds %>%
dplyr::left_join(muts %>% dplyr::select(dplyr::all_of(c("mut_id", "unique_mut_id",
"generation")))) %>%
dplyr::select(dplyr::all_of(c("generation",
"pop_id",
"ind_id",
"unique_mut_id"))) %>%
dplyr::mutate("present" := 1) %>%
dplyr::group_by(.data$generation,
.data$pop_id,
.data$ind_id,
.data$unique_mut_id) %>%
dplyr::summarise("count" := sum(.data$present), .groups = "drop") %>%
dplyr::sample_frac()
)
suppressMessages(
mut_dat <- alleles %>%
dplyr::ungroup() %>%
dplyr::left_join(muts %>%
dplyr::select(dplyr::all_of(c("unique_mut_id",
"chrome_pos",
"mut_type",
"prevalence"))),
relationship = "many-to-many") %>%
dplyr::group_by(.data$unique_mut_id,
.data$chrome_pos,
.data$mut_type) %>%
dplyr::summarise("prevalence" := mean(.data$prevalence),
.groups = "drop")
)
alleles <- alleles %>%
tidyr::pivot_wider(id_cols = c("generation", "pop_id", "ind_id"),
names_from = "unique_mut_id",
values_from = "count",
values_fill = 0)
list(alleles = alleles, mut_dat = mut_dat)
}
slim_extract_genlight_tibble_GS <- function(output_GS) {
inds <- slim_extract_genome(output_GS,
c("genomes"),
join = TRUE,
expand_mutations = TRUE)
ind_df <- dplyr::tibble(gen_id = unique(inds$gen_id)) %>%
dplyr::mutate(ind_id = rep(1:(dplyr::n() / 2), each = 2))
suppressMessages(
inds <- inds %>%
dplyr::left_join(ind_df)
)
muts <- slim_extract_genome(output_GS,
"mutations")
suppressMessages(
alleles <- inds %>%
dplyr::left_join(muts %>% dplyr::select(dplyr::all_of(c("mut_id", "unique_mut_id")))) %>%
dplyr::select(dplyr::all_of(c("ind_id", "unique_mut_id"))) %>%
dplyr::mutate("present" := 1) %>%
dplyr::group_by(.data$ind_id, .data$unique_mut_id) %>%
dplyr::summarise("count" := sum(.data$present), .groups = "drop") %>%
dplyr::sample_frac()
)
suppressMessages(
mut_dat <- alleles %>%
dplyr::select("unique_mut_id") %>%
dplyr::distinct() %>%
dplyr::left_join(muts %>%
dplyr::select(dplyr::all_of(c("unique_mut_id",
"chrome_pos",
"mut_type",
"prevalence"))))
)
mut_dat <- mut_dat %>%
dplyr::filter(!is.na(.data$unique_mut_id))
alleles <- alleles %>%
#dplyr::filter(!is.na(unique_mut_id)) %>%
tidyr::pivot_wider(id_cols = c("ind_id"),
names_from = "unique_mut_id",
values_from = "count",
values_fill = 0) %>%
dplyr::select(-"NA")
list(alleles = alleles, mut_dat = mut_dat)
}
read_with_excess <- function(string, col_names, col_types) {
txt <- readr::read_lines(string)
ncol <- length(col_names)
split_it <- stringr::str_split_fixed(txt, " ", ncol) %>%
as.data.frame() %>%
stats::setNames(col_names) %>%
readr::type_convert(col_types) %>%
dplyr::as_tibble()
return(split_it)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.