Nothing
#' NestTablesXL() function
#'
#' \code{\link{NestTablesXL}} reads the R lists output by the HpltFind() function
#' and translates them to an Excel workbook for more convenient evaluation of the
#' inferred haplotypes and curation of unresolved and incongruent sequences. The
#' workbook contains separate tabs for each nest in the data set and provides an
#' overview of the genotypes of the samples in each nest and the inferred
#' haplotypes.
#'
#' If you publish data or results produced with MHCtools, please cite both of
#' the following references:
#' Roved, J. 2022. MHCtools: Analysis of MHC data in non-model species. Cran.
#' Roved, J., Hansson, B., Stervander, M., Hasselquist, D., & Westerdahl, H. 2022.
#' MHCtools - an R package for MHC high-throughput sequencing data: genotyping,
#' haplotype and supertype inference, and downstream genetic analyses in non-model
#' organisms. Molecular Ecology Resources. https://doi.org/10.1111/1755-0998.13645
#'
#' @param nest_table is a table containing the sample names of parents and
#' offspring in each nest. This table should be organized so that the
#' individual names are in the first column (Sample_ID), and the nest number
#' is in the second column (Nest). For each nest, the first two rows should be
#' the parents, followed immediately by the offspring in the subsequent rows,
#' and then followed by the next nest, and so on. It is assumed that nests are
#' numbered consecutively beginning at 1. Please use the same table as was used
#' to generate the haplotypes using HpltFind().
#' @param seq_table seq_table is a sequence table as output by the 'dada2'
#' pipeline, which has samples in rows and nucleotide sequence variants in
#' columns. Please use the same table as was used to generate the haplotypes
#' using HpltFind().
#' @param filepath is a user defined path to the folder where the output files
#' from the HpltFind() function have been saved.
#' @param path_out is a user defined path to the folder where the output file
#' will be saved.
#' @return An Excel workbook with individual tabs for each nest in nest_table.
#' Each tab contains a binary (logical) occurrence matrix with the samples from
#' each nest in columns and sequences (inherited from seq_table) in rows. The
#' order of the samples is derived from nest_table, with parents in the two
#' leftmost columns. Each tab also lists the putative haplotypes inferred by the
#' HpltFind() function and provides lists of unresolved sequences in haplotypes,
#' sequences with unidentified decent (i.e., present in parents but not in
#' offspring), sequences not assigned to haplotypes, and sequences with
#' unidentified origin (i.e., present in offspring but not in parents).
#' Note: NestTablesXL() will overwrite any existing file with the output file
#' name in path_out.
#' @seealso \code{\link{HpltFind}}; for more information about 'dada2' visit
#' <https://benjjneb.github.io/dada2/>
#' @examples
#' nest_table <- nest_table
#' seq_table <- sequence_table_hplt
#' filepath <- system.file("extdata/HpltFindOut/", package="MHCtools")
#' path_out <- tempdir()
#' NestTablesXL(nest_table, seq_table, filepath, path_out)
#' @importFrom "openxlsx" "addWorksheet" "createWorkbook" "saveWorkbook"
#' "setColWidths" "writeData" "writeDataTable"
#' @export
NestTablesXL <- function(nest_table, seq_table, filepath, path_out) {
# Create an Excel workbook
wb <- createWorkbook()
# Get the file names of the .Rds output generated by the HpltFind function
file_names <- dir(filepath)
# Sort the file names by nest number
file_names <- file_names[order(as.numeric(gsub("[^0-9]", "", file_names)))]
# Define the sequence names
seq_names <- colnames(seq_table)
# Define the number of nests in the data set
No_nests <- max(nest_table$Nest)
for (i in 1:No_nests) {
# Create a new sheet in the workbook
addWorksheet(wb, sheetName = paste(i))
# Create a vector for the names of the samples in nest i
Nest_samples <- factor()
# Get the names of the samples in nest i
Nest_samples <- nest_table[nest_table[,"Nest"]==i,"Sample_ID"]
# Create a vector for the names of all the sequences found in any sample from nest i
Seqs_nest <- factor()
# Create a list that will contain vectors with the names of the sequences that are found in each sample
Seqs <- vector("list", length(Nest_samples))
# Get the names of the sequences that are found in each sample in nest i
for (j in 1:length(Nest_samples)) {
# Fetch column numbers for the sequences in sample j in the sequence table
z <- which(seq_table[paste(Nest_samples[j]),] > 0)
# Enter the names of the sequences in sample j into the list Seqs
Seqs[[j]] <- seq_names[z]
# Enter the names of the sequences in sample j into Seqs_nest
for (Seq in Seqs[[j]]) {
# If Seq is not present in Seqs_nest, append Seq to Seqs_nest
if (Seq %in% Seqs_nest == FALSE) {
Seqs_nest <- append(Seqs_nest, Seq)
}
}
}
# Order the sequence names in Seqs_nest
Seqs_nest <- sort(Seqs_nest)
# Create a list that will contain vectors with the presence/absense of sequences for each sample
Seqs_occ <- vector("list", length(Nest_samples))
# Assign the presence/absence of sequences for each sample to Seqs_occ
for (j in 1:length(Nest_samples)) {
Seqs_occ[[j]] <- numeric(length(Seqs_nest))
for (k in 1:length(Seqs_nest)) {
# If Seqs_nest[k] is present in Seqs[[j]], assign 1 to Seqs_occ[[j]][k]
# else assign 0
if (Seqs_nest[k] %in% Seqs[[j]] == TRUE) {
Seqs_occ[[j]][k] <- 1
} else {
Seqs_occ[[j]][k] <- 0
}
}
}
# Create an occurrence matrix showing presence/absense of sequences in samples in nest i
Occ_matrix <- data.frame(sapply(Seqs_occ,c))
rownames(Occ_matrix) <- Seqs_nest
colnames(Occ_matrix) <- Nest_samples
# Add this as a table in the worksheet
writeDataTable(wb, i, Occ_matrix, startCol = 1, startRow = 1, rowNames = TRUE, colNames = TRUE)
# Change column width
setColWidths(wb, i, cols = 1:15, widths = 12)
# Load the nest lists from the output of HpltFind
Nest_i <- readRDS(file.path(filepath, file_names[i]))
# Add "Haplotypes" as title in the worksheet
writeData(wb, i, x = as.character("Haplotypes"), xy = c(1, (3+length(Seqs_nest))))
# Add the haplotypes to the worksheet below the occurrence matrix
Hplt_1 <- data.frame(Nest_i$Putative_haplotypes[[1]], stringsAsFactors=FALSE)
colnames(Hplt_1) <- paste0(Nest_samples[1], "A")
writeDataTable(wb, i, Hplt_1, startRow = (4+length(Seqs_nest)), startCol = 1, rowNames = FALSE, colNames = TRUE)
Hplt_2 <- data.frame(Nest_i$Putative_haplotypes[[2]], stringsAsFactors=FALSE)
colnames(Hplt_2) <- paste0(Nest_samples[1], "B")
writeDataTable(wb, i, Hplt_2, startRow = (4+length(Seqs_nest)), startCol = 2, rowNames = FALSE, colNames = TRUE)
Hplt_3 <- data.frame(Nest_i$Putative_haplotypes[[3]], stringsAsFactors=FALSE)
colnames(Hplt_3) <- paste0(Nest_samples[2], "A")
writeDataTable(wb, i, Hplt_3, startRow = (4+length(Seqs_nest)), startCol = 3, rowNames = FALSE, colNames = TRUE)
Hplt_4 <- data.frame(Nest_i$Putative_haplotypes[[4]], stringsAsFactors=FALSE)
colnames(Hplt_4) <- paste0(Nest_samples[2], "B")
writeDataTable(wb, i, Hplt_4, startRow = (4+length(Seqs_nest)), startCol = 4, rowNames = FALSE, colNames = TRUE)
# Add "Unresolved sequences" as title in the worksheet
writeData(wb, i, x = as.character("Unresolved sequences"), xy = c(6, (3+length(Seqs_nest))))
# Add the unresolved sequences to the worksheet
Unr_seqs_1 <- data.frame(Nest_i$Unresolved_seqs[[1]], stringsAsFactors=FALSE)
colnames(Unr_seqs_1) <- paste(Nest_samples[1])
writeDataTable(wb, i, Unr_seqs_1, startRow = (4+length(Seqs_nest)), startCol = 6, rowNames = FALSE, colNames = TRUE)
Unr_seqs_2 <- data.frame(Nest_i$Unresolved_seqs[[2]], stringsAsFactors=FALSE)
colnames(Unr_seqs_2) <- paste(Nest_samples[2])
writeDataTable(wb, i, Unr_seqs_2, startRow = (4+length(Seqs_nest)), startCol = 7, rowNames = FALSE, colNames = TRUE)
# Add "Unidentified descent" as title in the worksheet
writeData(wb, i, x = as.character("Unidentified descent"), xy = c(9, (3+length(Seqs_nest))))
# Add the haplotype sequences not identified in any offspring to the worksheet
Hplt_1_inc <- data.frame(Nest_i$Incongruent_seqs[[1]], stringsAsFactors=FALSE)
colnames(Hplt_1_inc) <- paste0(Nest_samples[1], "A")
writeDataTable(wb, i, Hplt_1_inc, startRow = (4+length(Seqs_nest)), startCol = 9, rowNames = FALSE, colNames = TRUE)
Hplt_2_inc <- data.frame(Nest_i$Incongruent_seqs[[2]], stringsAsFactors=FALSE)
colnames(Hplt_2_inc) <- paste0(Nest_samples[1], "B")
writeDataTable(wb, i, Hplt_2_inc, startRow = (4+length(Seqs_nest)), startCol = 10, rowNames = FALSE, colNames = TRUE)
Hplt_3_inc <- data.frame(Nest_i$Incongruent_seqs[[3]], stringsAsFactors=FALSE)
colnames(Hplt_3_inc) <- paste0(Nest_samples[2], "A")
writeDataTable(wb, i, Hplt_3_inc, startRow = (4+length(Seqs_nest)), startCol = 11, rowNames = FALSE, colNames = TRUE)
Hplt_4_inc <- data.frame(Nest_i$Incongruent_seqs[[4]], stringsAsFactors=FALSE)
colnames(Hplt_4_inc) <- paste0(Nest_samples[2], "B")
writeDataTable(wb, i, Hplt_4_inc, startRow = (4+length(Seqs_nest)), startCol = 12, rowNames = FALSE, colNames = TRUE)
# Add "Not assigned to haplotypes" as title in the worksheet
writeData(wb, i, x = as.character("Not assigned to haplotypes"), xy = c(14, (3+length(Seqs_nest))))
# Add the sequences found in parents that were not assigned to a haplotype to the worksheet
Pinc_1 <- data.frame(Nest_i$Incongruent_seqs$Parents[[1]], stringsAsFactors=FALSE)
colnames(Pinc_1) <- paste(Nest_samples[1])
writeDataTable(wb, i, Pinc_1, startRow = (4+length(Seqs_nest)), startCol = 14, rowNames = FALSE, colNames = TRUE)
Pinc_2 <- data.frame(Nest_i$Incongruent_seqs$Parents[[2]], stringsAsFactors=FALSE)
colnames(Pinc_2) <- paste(Nest_samples[2])
writeDataTable(wb, i, Pinc_2, startRow = (4+length(Seqs_nest)), startCol = 15, rowNames = FALSE, colNames = TRUE)
# Add "Unidentified origin" as title in the worksheet
writeData(wb, i, x = as.character("Unidentified origin"), xy = c(17, (3+length(Seqs_nest))))
# Add the sequences found in offspring, but not identified in any of the parents to the worksheet
for (j in 1:(length(Nest_samples)-2)) {
Cinc <- data.frame(Nest_i$Incongruent_seqs$Chicks[[j]], stringsAsFactors=FALSE)
colnames(Cinc) <- paste(Nest_samples[(2+j)])
writeDataTable(wb, i, Cinc, startRow = (4+length(Seqs_nest)), startCol = (16+j), rowNames = FALSE, colNames = TRUE)
}
}
# Save the workbook to a file
saveWorkbook(wb, file=paste0(path_out, "/Nest_tables_",c(format(Sys.Date(),"%Y%m%d")),".xlsx"), overwrite=TRUE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.