#' @title Prepare and summarise the assignment results from GENODIVE
#' @description Import assignment results from GENODIVE.
#' Current version needs results from both statistics, the home likelihood
#' and likelihood ratio.
#' @param assignment.lmax The file with likelihood ratio assignment results
#' from GENODIVE.
#' @param assignment.lhome The file with home likelihood assignment results
#' from GENODIVE.
#' @param lmax.migrant.skip The number of lines to skip before the migrant
#' info in the likelihood ratio assignment results. Usually = 11.
#' @param lmax.number.migrant The number of migrant detected
#' in the likelihood ratio assignment results.
#' @param lmax.skip The number of lines to skip before the individuals info
#' in the likelihood ratio assignment results.
#' @param lhome.migrant.skip The number of lines to skip before
#' the migrant info in the home likelihood assignment results. Usually = 11.
#' @param lhome.number.migrant The number of migrant detected
#' in the home likelihood assignment results.
#' @param lhome.skip The number of lines to skip before the individuals info
#' in the home likelihood assignment results.
#' @param sites.levels An optional character string with your sites names in
#' the same order as the pop levels.
#' @param pop.labels The pop label to identify your sampling sites.
#' @param pop.levels An optional character string with your populations ordered.
#' @param pop.id.start The start of your population id
#' in the name of your individual sample.
#' @param pop.id.end The end of your population id
#' in the name of your individual sample.
#' @param number.individuals The number of individuals analysed.
#' @param number.pop The number of populations analysed.
#' @references Meirmans PG, Van Tienderen PH (2004) genotype and genodive:
#' two programs for the analysis of genetic diversity of asexual organisms.
#' Molecular Ecology Notes, 4, 792-794.
#' @import dplyr
#' @import readr
#' @importFrom stringr str_sub
#' @export
#' @rdname assignment_genodive
#' @author Thierry Gosselin \email{thierrygosselin@@icloud.com}
assignment_genodive <- function(assignment.lmax, assignment.lhome,
lmax.migrant.skip,
lmax.number.migrant,
lmax.skip,
lhome.migrant.skip,
lhome.number.migrant,
lhome.skip,
sites.levels,
pop.labels,
pop.levels,
pop.id.start,
pop.id.end,
number.individuals,
number.pop) {
Individuals <- NULL
Current <- NULL
Inferred <- NULL
Lik_max <- NULL
Lik_home <- NULL
Lik_ratio <- NULL
Threshold <- NULL
STATUS <- NULL
IND <- NULL
N_CORRECTED <- NULL
DISCRIMINATE <- NULL
STATISTICS <- NULL
POUR <- NULL
migrants_lmax <- read_delim(
assignment.lmax,
delim = "\t",
skip = lmax.migrant.skip,
n_max = lmax.number.migrant,
col_names = c("Individuals", "Current", "Inferred", "Lik_max", "Lik_home", "Lik_ratio", "Threshold"),
progress = interactive(),
col_types = "ccciiii")
# View(migrants_lmax)
migrants_lhome <- read_delim(
assignment.lhome,
delim = "\t",
skip = lhome.migrant.skip,
n_max = lhome.number.migrant,
col_names = c("Individuals", "Current", "Inferred", "Lik_max", "Lik_home", "Lik_ratio", "Threshold"),
progress = interactive(),
col_types = "ccciiii")
# View(migrants_lhome)
# Create a new vector with a unindified pop label
pop.levels.lhome <- c(pop.levels, "?")
# create a new vector to assign the class of the column during the import
col.types <- stri_join("ccciii", stri_dup("i", times = number.pop), sep = "") # ccciii are default, integer are added based on the number of populations
assignment_lmax <- read_delim(
assignment.lmax,
delim = "\t",
skip = lmax.skip,
n_max = number.individuals,
col_names = c("Individuals", "Current", "Inferred", "Lik_max", "Lik_home", "Lik_ratio",
paste(pop.levels, sep = ",")),
progress = interactive(),
col_types = col.types) %>%
mutate(
Current = str_sub(Current, pop.id.start, pop.id.end),
Inferred = str_sub(Inferred, pop.id.start, pop.id.end),
Current = factor(stri_replace_all_fixed(Current, sites.levels, pop.labels, vectorize_all = F), levels = pop.levels, ordered =T),
Inferred = factor(stri_replace_all_fixed(Inferred, sites.levels, pop.labels, vectorize_all = F), levels = pop.levels, ordered =T)
)%>%
mutate(
STATUS = ifelse(Individuals %in% migrants_lmax$Individuals, "migrant",
ifelse(Current != Inferred, "error", "resident")),
STATUS = factor(STATUS, levels = c("resident", "migrant", "error"), ordered = T),
Inferred = ifelse(STATUS == "migrant" & Current == Inferred, "?", as.character(Inferred)),
Inferred = factor(Inferred, levels = pop.levels.lhome, ordered = T)
) %>%
group_by(Current, Inferred, STATUS) %>%
summarise(IND = as.numeric(n())) %>%
group_by(Current) %>%
mutate(
N_CORRECTED = as.numeric(sum(IND) - sum(IND[STATUS == "migrant"])),
POUR = as.numeric(round((IND/N_CORRECTED), 2))
) %>%
ungroup() %>%
mutate(
DISCRIMINATE = POUR * 100,
DISCRIMINATE = ifelse(as.character(Current) == as.character(Inferred), DISCRIMINATE, ""),
STATISTICS = rep("Likelihood ratio", n())
)
# View(assignment_lmax)
# names(assignment_lmax)
# levels(assignment_lmax$STATUS)
# class(assignment_lmax$Current)
# class(assignment_lmax$Inferred)
# levels(assignment_lmax$Current)
# levels(assignment_lmax$Inferred)
assignment_lhome <- read_delim(
assignment.lhome,
delim = "\t",
skip = lhome.skip,
n_max = number.individuals,
col_names = c("Individuals", "Current", "Inferred", "Lik_max", "Lik_home", "Lik_ratio",
paste(pop.levels, sep = ",")),
progress = interactive(),
col_types = col.types) %>%
mutate(
Current = str_sub(Current, pop.id.start, pop.id.end),
Inferred = str_sub(Inferred, pop.id.start, pop.id.end),
Current = factor(stri_replace_all_fixed(Current, sites.levels, pop.labels, vectorize_all = F), levels = pop.levels.lhome, ordered =T),
Inferred = factor(stri_replace_all_fixed(Inferred, sites.levels, pop.labels, vectorize_all = F), levels = pop.levels.lhome, ordered =T)) %>%
mutate(
STATUS = ifelse(Individuals %in% migrants_lhome$Individuals, "migrant",
ifelse(Current != Inferred, "error", "resident")),
STATUS = factor(STATUS, levels = c("resident", "migrant", "error"), ordered = T),
Inferred = ifelse(STATUS == "migrant" & Current == Inferred, "?", as.character(Inferred)),
Inferred = factor(Inferred, levels = pop.levels.lhome, ordered = T)
) %>%
group_by(Current, Inferred, STATUS) %>%
summarise(IND = as.numeric(n())) %>%
group_by(Current) %>%
mutate(
N_CORRECTED = as.numeric(sum(IND) - sum(IND[STATUS == "migrant"])),
POUR = as.numeric(round((IND/N_CORRECTED), 2))
) %>%
ungroup() %>%
mutate(
DISCRIMINATE = POUR * 100,
DISCRIMINATE = ifelse(as.character(Current) == as.character(Inferred), DISCRIMINATE, ""),
STATISTICS = rep("Home likelihood", n())
)
# View(assignment_lhome)
# names(assignment_lhome)
# invert the vector for figure will be easier to inspect
pop.levels.inverted <- rev(pop.levels)
# Bind the 2 datasets
assignment.summary <- assignment_lhome %>%
rbind(assignment_lmax) %>%
ungroup() %>%
mutate(
Current = factor(Current, levels = pop.levels.inverted, ordered = T),
Inferred = factor(Inferred, levels = pop.levels.lhome, ordered = T),
STATUS = factor(STATUS, levels = c("resident", "migrant", "error"), ordered = T),
STATISTICS = factor(STATISTICS, levels = c("Home likelihood", "Likelihood ratio"), ordered = T)
)
}
#' @title Cleveland dot plot figure of assignement results.
#' @description GGPLOT2 Cleveland dot plot figure of assignment results.
#' The figure will need some work in Adobe Illustrator or similar sofware.
#' @param assignment.summary The assignment summary file created
#' with assignment_genodive function.
#' @import ggplot2
#' @references Meirmans PG, Van Tienderen PH (2004) genotype and genodive:
#' two programs for the analysis of genetic diversity of asexual organisms.
#' Molecular Ecology Notes, 4, 792-794.
#' @author Thierry Gosselin \email{thierrygosselin@@icloud.com}
#' @export
#' @rdname figure_assignment
plot_assignment <- function(assignment.summary) {
Individuals <- NULL
Current <- NULL
Inferred <- NULL
STATUS <- NULL
IND <- NULL
DISCRIMINATE <- NULL
POUR <- NULL
ggplot(assignment.summary, aes(x = Inferred, y = Current, size = (POUR*100)))+
# geom_jitter(shape = 21, alpha = 0.5, aes(fill = STATUS), position = position_jitter(width = 0.05))+
geom_point(shape = 21, alpha = 0.5, aes(fill = STATUS))+
geom_text(aes(y = as.numeric(Current)-sqrt(POUR)/3, label = IND), vjust = 0.9, colour = "grey60", size = 3)+ # with 3 colors
geom_text(aes(y = as.numeric(Current), label = DISCRIMINATE), vjust = 0.5, colour = "black", size = 4, face = "bold")+
scale_fill_manual(name = "Assignment", values = c("darkgreen", "blue", "darkred"))+ # with 3 categories
# scale_fill_manual(name = "Assignment", values = c("darkgreen", "blue"))+ # with 2 categories
scale_size_area(guide = FALSE, max_size = 15)+
labs(x = "Inferred population")+
labs(y = "Current population")+
theme_bw()+
theme(
legend.position = "bottom",
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour = "grey60", linetype = "dashed"),
axis.title.x = element_text(size = 10, family = "Helvetica", face = "bold"),
axis.text.x = element_text(size = 10, family = "Helvetica", face = "bold"),
axis.title.y = element_text(size = 10, family = "Helvetica", face = "bold"),
axis.text.y = element_text(size = 10, family = "Helvetica", face = "bold"))+
facet_wrap(~ STATISTICS, scales = "free_x")
}
#' @title Classic stacked bar plot figure of assignement results.
#' @description GGPLOT2 stacked bar plot figure of assignment results.
#' @param assignment.summary The assignment summary file created
#' with assignment_genodive function.
#' @param pop.levels An optional character string with your populations ordered.
#' @references Meirmans PG, Van Tienderen PH (2004) genotype and genodive:
#' two programs for the analysis of genetic diversity of asexual organisms.
#' Molecular Ecology Notes, 4, 792-794.
#' @author Thierry Gosselin \email{thierrygosselin@@icloud.com}
#' @export
#' @rdname plot_assignment_stacked_bar
plot_assignment_stacked_bar <- function(assignment.summary, pop.levels) {
Current <- NULL
Inferred <- NULL
STATUS <- NULL
IND <- NULL
assignment_summary_stacked <- assignment.summary %>%
mutate(Current = factor(Current, levels = pop.levels, ordered = T)) %>%
arrange(STATUS)
ggplot(assignment_summary_stacked, aes(x = Current, y = IND, fill = factor(STATUS)))+
geom_bar(stat = "identity", position = "fill")+
scale_y_continuous(breaks = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0), expand = c(0, 0))+ #in order for the line not to expand beyond the graph!
scale_fill_manual(name = "Assignment",
breaks = c("resident", "migrant", "error"),
values = c("darkgreen", "blue", "darkred"))+ # with 3 categories
labs(x = "Sampling sites")+
labs(y = "Proportion")+
facet_wrap(~ STATISTICS, nrow = 1, ncol = 2)+
theme(
panel.background = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
axis.title.x = element_text(size = 14, family = "Helvetica", face = "bold"),
axis.title.y = element_text(size = 14, family = "Helvetica", face = "bold"),
axis.text.x = element_text(size = 12, family = "Helvetica", face = "bold"),
axis.line = element_line(),
axis.line.y = element_line(linetype = "solid", size = 0.5, colour = "grey50"),
axis.line.x = element_blank(),
legend.title = element_text(size = 12, family = "Helvetica", face = "bold"),
legend.text = element_text(size = 12, family = "Helvetica", face = "bold"),
strip.text.y = element_text(angle = 0, size = 12, family = "Helvetica", face = "bold"),
strip.text.x = element_text(size = 12, family = "Helvetica", face = "bold")
)
} # Figure function: assignment stacked bar function
#' @name plot_assignment_dlr
#' @title Assignment plot of genotype likelihood.
#' @description Create a figure similar to Paetkau's et al. (2004) Fig 6.
#'
#' @param data The output assignment file (home likelihood or
#' likelihood ratio statistics) from GENODIVE.
#' @param l.skip (integer) The number of lines to skip before the individuals info
#' in GenoDive assignment results (see Vignette).
#' @param number.individuals (integer) The number of individuals analysed.
#' @param number.pop (integer) The number of populations analysed.
#' @param pop.id.start (Optional) The start of your population id
#' in the name of your individual sample. Your individuals are identified
#' in this form : SPECIES-POPULATION-MATURITY-YEAR-ID = CHI-QUE-ADU-2014-020,
#' then, \code{pop.id.start} = 5. If you didn't name your individuals
#' with the pop id in it, use the \code{strata} argument.
#' @param pop.id.end (Optional) The end of your population id
#' in the name of your individual sample. Your individuals are identified
#' in this form : SPECIES-POPULATION-MATURITY-YEAR-ID = CHI-QUE-ADU-2014-020,
#' then, \code{pop.id.end} = 7. If you didn't name your individuals
#' with the pop id in it, use the \code{strata} argument.
#' @param strata (optional) A tab delimited file with 2 columns with header:
#' \code{INDIVIDUALS} and \code{STRATA}. Default: \code{strata = NULL}.
#' The \code{STRATA} column is used here as the populations id of your sample.
#' @param pop.levels A character string with your sampling sites or populations
#' in the order you prefer (for tables and figure).
#' @param pop.labels (optional) A character string for your populations labels.
#' If you need to rename sampling sites in \code{pop.levels} or combined sites/pop
#' into a different names, here is the place.
#' @param POPA First population to compare.
#' @param POPB Second population to compare (with A).
#' @param dlr (optional) Character string with Dlr value.
#' @param x.dlr (optional) Position to the x-axis of the Dlr value.
#' @param y.dlr (optional) Position to the y-axis of the Dlr value.
#' @param fst (optional) Character string with Fst value.
#' @param x.fst (optional) Position to the x-axis of the Fst value.
#' @param y.fst (optional) Position to the y-axis of the Fst value.
#' @param filename (optional) Name of the figure written
#' in the working directory.
#' @param plot.width (optional) Width in cm of the figure.
#' @param plot.height (optional) height in cm of the figure.
#' @param plot.dpi (optional) Number of dpi for the figure (e.g 600).
#' @return A list with the assignment table and the assignment plot.
#' @import dplyr
#' @import readr
#' @import lazyeval
#' @import stringi
#' @export
#' @rdname plot_assignment_dlr
#' @references Paetkau D, Slade R, Burden M, Estoup A (2004)
#' Genetic assignment methods for the direct, real-time estimation of migration
#' rate: a simulation-based exploration of accuracy and power.
#' Molecular Ecology, 13, 55-65.
#' @references Meirmans PG, Van Tienderen PH (2004) genotype and genodive:
#' two programs for the analysis of genetic diversity of asexual organisms.
#' Molecular Ecology Notes, 4, 792-794.
#' @author Thierry Gosselin \email{thierrygosselin@@icloud.com}
# required to pass the R CMD check and have 'no visible binding for global variable'
if (getRversion() >= "2.15.1") {
utils::globalVariables(
c('INDIVIDUALS', 'Current', 'Inferred', 'Lik_max', 'Lik_home', 'Lik_ratio',
'POP_ID', 'RATIO', 'DLR', 'DLR_RELATIVE')
)
}
plot_assignment_dlr <- function(data,
l.skip,
number.individuals,
number.pop,
pop.id.start,
pop.id.end,
pop.levels,
pop.labels,
strata,
POPA, POPB,
dlr, x.dlr, y.dlr,
fst, x.fst, y.fst,
filename,
plot.width, plot.height, plot.dpi) {
if (missing(data)) stop("GenoDive file missing")
if (missing(strata)) strata <- NULL
if (missing(pop.id.start)) pop.id.start <- NULL
if (missing(pop.id.end)) pop.id.end <- NULL
if (is.null(strata) & is.null(pop.id.start) & is.null(pop.id.end)) {
stop("pop.id.start and pop.id.end or strata arguments are required to
identify your populations")
}
if (missing(pop.labels)) pop.labels <- pop.levels
if (missing(filename)) filename <- NULL
# import and modify the assignment file form GenoDive-------------------------
assignment <- read_delim(
data,
delim = "\t",
skip = l.skip,
n_max = number.individuals,
col_names = TRUE,
progress = interactive(),
col_types = stri_join("cccddd", stri_dup("d", times = number.pop), sep = "")) %>%
select(-c(Current, Inferred, Lik_max, Lik_home, Lik_ratio))
if (is.null(strata)){
header <- names(assignment)
header.sites <- header[2:(1+number.pop)]
header.sites.clean <- stri_sub(header.sites, pop.id.start, pop.id.end)
header.pop <- stri_replace_all_fixed(header.sites.clean, pop.levels, pop.labels, vectorize_all = FALSE)
new.header <- c("INDIVIDUALS", header.pop)
colnames(assignment) <- new.header
assignment <- assignment %>%
mutate(
POP_ID = stri_sub(INDIVIDUALS, pop.id.start, pop.id.end),
POP_ID = factor(stri_replace_all_fixed(POP_ID, pop.levels, pop.labels, vectorize_all = FALSE), levels = unique(pop.labels), ordered = TRUE),
POP_ID = droplevels(POP_ID),
INDIVIDUALS = as.character(INDIVIDUALS)
)
} else { # strata provided
strata.df <- read_tsv(file = strata, col_names = TRUE, col_types = "cc") %>%
rename(POP_ID = STRATA)
header.pop <- as.character(unique(strata.df$POP_ID))
new.header <- c("INDIVIDUALS", header.pop)
colnames(assignment) <- new.header
assignment <- assignment %>%
mutate(INDIVIDUALS = as.character(INDIVIDUALS)) %>%
left_join(strata.df, by = "INDIVIDUALS") %>%
mutate(POP_ID = factor(POP_ID, levels = unique(pop.labels), ordered =TRUE))
}
assignment <- assignment %>%
filter_(interp(~ POP_ID == as.name(POPA) | POP_ID == as.name(POPB)))
x_title <- stri_join("Log (genotype likelihood) population: ", POPA, sep = "")
y_title <- stri_join("Log (genotype likelihood) population: ", POPB, sep = "")
assignment.plot <- ggplot(assignment, aes_string(x = POPA, y = POPB)) +
geom_point(aes(fill = POP_ID, shape = POP_ID), na.rm = T, alpha = 0.8, size = 4) +
geom_abline(slope = 1) +
# scale_x_continuous(name = x_title, limits = c(-2700, -2000))+
# scale_y_continuous(name = y_title, limits = c(-2700, -2000))+
scale_shape_manual(values = c(21, 24))+
scale_fill_manual(values=c("black", NA))+
labs(x = x_title)+
labs(y = y_title)+
theme(
# legend.position = "none",
axis.title.x = element_text(size = 10, family = "Helvetica", face = "bold"),
axis.title.y = element_text(size = 10, family = "Helvetica", face = "bold"),
legend.title = element_text(size = 10, family = "Helvetica", face = "bold"),
legend.text = element_text(size = 10, family = "Helvetica", face = "bold"),
strip.text.y = element_text(angle = 0, size = 10, family = "Helvetica", face = "bold")
)
if (missing(dlr)) {
assignment.plot <- assignment.plot
} else {
assignment.plot <- assignment.plot + annotate("text", x = x.dlr, y = y.dlr,
label = dlr, colour = "black")
}
if (missing(fst)) {
assignment.plot <- assignment.plot
} else {
assignment.plot <- assignment.plot + annotate("text", x = x.fst, y = y.fst,
label = fst, colour = "black")
}
if (missing(filename)) {
saving <- "Saving was not selected..."
} else {
saving <- paste("Saving the figure was selected, the filename:",
filename, sep = " ")
ggsave(filename, width = plot.width, height = plot.height,
dpi = plot.dpi, units = "cm", useDingbats = F)
}
res <- list()
res$assignment <- assignment
res$assignment.plot <- assignment.plot
invisible(cat(sprintf(
"%s\n
Working directory:
%s",
saving, getwd()
)))
return (res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.