#' A function that writes headers for the filtered FASTA database
#' @param i index of the sequence in the FASTA file that must be changed
#' @export
writeHeaders <- function(i){
paste(database_filt[i,"superkingdom"], database_filt[i, "kingdom"],
database_filt[i,"phylum"], database_filt[i,"class"], database_filt[i, "order"],
database_filt[i,"family"], database_filt[i,"genus"], database_filt[i, "species"], sep=";")
}
#' A function to remove all non-ITS2 sequences and change any synonyms, both specified by CSVs
#' @param database_path Path of taxonomy database
#' @param fasta_path The path of the DADA2 formatted reference taxonomy database
#' @param to_remove_path Path of a CSV file containing names of sequences to remove
#' @param synonyms_path Path of a CSV specifying horse parasite synonym and official Lichtenfels 2008 name
#' @export
filter_and_remove_synonyms <- function(database_path, fasta_path, to_remove_path, synonyms_path) {
date <- gsub("-", "_", Sys.Date())
to_remove <- data.frame(lapply(read.csv(to_remove_path, header = F), as.character), stringsAsFactors = FALSE)
colnames(to_remove) <- c("accn", "sequence")
fasta <- readDNAStringSet(fasta_path)
synonyms <- data.frame(lapply(read.csv(synonyms_path), as.character), stringsAsFactors=FALSE)
filter_indices <- which(database$accn %in% to_remove$accn)
database_filt <- as.data.frame(database[-filter_indices,])
fasta_filt <- fasta[-filter_indices]
database_filt$speciesOld <- database_filt$species #Used to check if anything has changed
database_filt$genusOld <- database_filt$genus #Also to check if genera changed
which(!database_filt$species == database_filt$speciesOld) #Should be 0, haven't changed anything yet
which(!database_filt$genus == database_filt$genusOld) #Same as above
#Change to official species names, and if species name is changed then also change genus name
database_filt$species <- mapvalues(database_filt$species, from=synonyms$synonym_species_name, to=synonyms$species_name_used_by_Lichtenfels_2008, warn_missing = F)
changed_species_indices <- which(!database_filt$species == database_filt$speciesOld)
#Change genus name
database_filt$genus <- mapvalues(database_filt$genus, from=synonyms$synonym_genus_name, to=synonyms$genus_name_used_by_Lichtenfels_2008, warn_missing = F)
changed_genus_indices <- which(!database_filt$genusOld == database_filt$genus) #many genera changed?
which(!database_filt$species == database_filt$speciesOld)
which(!database_filt$genus == database_filt$genusOld)
database_filt$speciesOld <- NULL ; database_filt$genusOld <- NULL
database_filt$species <- sub(" ", "_", database_filt$species)
#writexl::write_xlsx(database_filt, paste0("database_nr_v0_9_3_species_names_updated_", date,".xlsx"))
synonym_indices <- c(changed_species_indices, changed_genus_indices)
newHeaders <- unlist(lapply(synonym_indices, FUN=writeHeaders))
fasta_filt@ranges@NAMES[synonym_indices] <- newHeaders
fasta_filt@ranges@NAMES[synonym_indices]
writeXStringSet(fasta_filt, paste0("dada2_nr_v0_9_3_species_names_updated_", date,".fasta"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.