#' Combine txt files generated by GeneMapper
#'
#' @param folder_name a folder where all the .txt files
#' generated by GeneMapper are stored.
#' @param export_name the name of the exported csv file.
#' @param export_option whether to export a csv file (option "csv")
#' or a data.frame in the R Environment (option "R").
#'
#' @return a single csv file or a data.frame
#' @export
#'
#' @author Nikolaos Tourvas
#' @description This function allows the user to quickly convert multiple txt
#' files of microsatellite genotyping data from GeneMapper to a csv or a data.frame.
#' It was tested with txt files produced by GeneMapper v4.0 up to v6.0.
#'
#' @examples
#' \dontrun{
#'
#' # Export csv file for use in GenAlEx
#' genemapper2df(folder_name = "...", export_name = "my_data", export_option = "csv")
#'
#' # Create genind object
#' df <- genemapper2df(folder_name = "...", export_option = "R")
#' obj <- loci2genind(as.loci(df))
#'
#' # For genind objects you will probably want to import population data as well.
#' # This can be done in the following way.
#'
#' strata(obj) <- strata_df #provide a data.frame with population strata (e.g. Year, Plot)
#' setPop(obj) <- ~Year/Plot #set the appropriate population hierachy for the analysis
#' ## Note that you can change population hierarchy between analyses using the previous command.
#' }
#' @import dplyr
#' @import tidyr
#' @importFrom magrittr %>%
genemapper2df <- function(folder_name,
export_name = "genemapper_export.csv",
export_option = "csv") {
# function
# if Allele.2 spot is empty but Allele.1 is not,
# then copy Allele.1 to Allele.2
replace.empty <- function(x) {
x %>%
mutate(Allele.2.edit =
case_when(
is.na(Allele.2) ~ Allele.1,
is.numeric(Allele.2) ~ Allele.2
)
)
}
temp <- list.files(path = folder_name,
pattern = "*.txt",
full.names = TRUE)
df <- lapply(temp, read.csv, sep = "\t",
na.strings = "")
# Check if all input files are tab delimited
for (i in 1:length(df)){
if(ncol(df[[i]]) < 3) {
stop(call. = FALSE,
"The input file is probably not tab delimited for marker",
unique(df[[i]]$Marker))
}
}
# Check if all alleles are coded as numbers
for (i in 1:length(df)){
if(!is.numeric(df[[i]]$Allele.1)) {
stop(call. = FALSE,
"An allele is not coded as a number in the txt file of marker ",
unique(df[[i]]$Marker))
} else if (!is.numeric(df[[i]]$Allele.2)) {
stop(call. = FALSE,
"An allele is not coded as a number in the txt file of marker ",
unique(df[[i]]$Marker))
}
}
## TODO: make check that marker names do not have spaces
# sort data.frame based on Sample.File column
df <- lapply(df, arrange, Sample.File)
df <- lapply(df, select,
Sample.File, Marker, Allele.1, Allele.2)
df_edit <- lapply(df, replace.empty)
df_edit <- lapply(df_edit, select,
Allele.1, Allele.2.edit)
# vector of marker names
markers <- list()
for (i in 1:length(df)){
markers[[i]] <- unique(df[[i]]$Marker)
}
markers <- unlist(markers)
markers <- rep(markers, each=2)
# merge multiple data.frames inside the list
# into one data.frame
df_2col <- do.call(cbind, df_edit)
colnames(df_2col) <- markers # add marker names
# add ind names column
ind_names <- df[[1]]$Sample.File
ind_names <- gsub(pattern = ".fsa",
replacement = "",
x = ind_names)
# df_final <- df_final %>%
# add_column(Genotype = ind_names,
# .before = 1)
rownames(df_2col) <- ind_names
df_2col[is.na(df_2col)] <- 0
if (export_option == "csv") {
write.csv(x = df_2col, file = export_name)
} else if (export_option == "R") {
N<-ncol(df_2col)
n=N/2
A<-seq(1,N, by=2)
B<-seq(2,N, by=2)
for (i in 1:n){
locus<-(paste(df_2col[,A[i]],"/",df_2col[,B[i]],sep=""))
if (exists("geno")) geno<-data.frame(geno,locus) else
geno<-as.data.frame(locus)
}
rownames(geno) <- rownames(df_2col) # ind names
markers <- colnames(df_2col)[c(TRUE, FALSE)]
colnames(geno) <- markers
return(geno)
} else {
stop(call. = FALSE,
'The export_option parameter should take one of the following values
"csv" or "R" ')
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.