#'Copy a Random Subset of Files to a New Directory for Manual Vetting
#'
#'\code{manual_vet_extractor} copies a randomly selected subset of files in a
#'project to a new folder to facilitate manual vetting. The function will only
#'move files with an accepted automatic species assignment from SonoBat.
#'
#'@family Manual Vetting Functions
#'
#'@param data_path Character. Path to an existing RData file for the data set
#' you wish to manually vet.
#'@param WAV_directory Character, the parent directory containing original WAV
#' files.
#'@param save_directory Character, the destination where selected files will be
#' copied to
#'@param species_list Character, a single species or list in the form of four
#' character species codes used by SonoBat, e.g. "Epfu". Defaults to non
#' Ontario SAR: \code{c("Epfu", "Labo", "Laci", "Lano")}.
#'@param percentage Number. What proportion of files do you wish to extract.
#' Defaults to 0.05 - 5 percent.
#'@param no_manual Logical vector, defaults to \code{FALSE}. If \code{TRUE} the
#' function will ignore files that have already been assigned an ID.
#'
#'@return Creates folder within the file folder filled with a percentage of
#' files of the selected copied.
#'
#'@examples
#'\dontrun{
#' manual_vet_extractor("raw_data_project", "C:/Folder/Folder/File_Folder")
#'}
#'@export
manual_vet_extractor <- function(data_path, WAV_directory, save_directory, species_list, percentage = 0.05, no_manual = FALSE, fast_import = F) {
.check_data_path(data_path)
#load(data_path)
dataset <- .location_subsetter(data_path)
# dataset <- .species_subsetter(data_path, dataset)
# dataset <- observations # rename for ease
if(no_manual == TRUE) {
dataset <- dataset[is.na(dataset$Species.Manual.ID),]
} # drop observations with existing manual IDs if selected
dataset <- dataset[!is.na(dataset$Species),] # drop observations without species
message("Matching observations to WAV files.")
WAV_directory_files <- .get_file_list(WAV_directory, fast_import) # get file list from the directory of WAV files
dataset2 <- merge(dataset, WAV_directory_files, by = "File.Name", all.x = T) # merge the list of observations with the list of WAV files provided
if (sum(is.na(dataset2$Full.Path)) != 0) {
message("Error: cannot locate WAV files for some observations in the specified WAV_directory.
Would you like to see a list of observations that cannot be located?")
report <- readline(prompt="y/n:")
if (report == "y" | report == "Y") {
missing_files <- dataset2[is.na(dataset2$Full.Path),]
missing_files <- missing_files$File.Name
message("Ending due to missing WAV files, no file have been copied for manual vetting.")
return(missing_files)
}
stop("Ending due to missing WAV files, no file have been copied for manual vetting.")
} # check if any WAV files are missing for the specified observations and offer a list if not
for (i in species_list) {
message(paste("Copying", i, "vetting files to output directory."))
if(!dir.exists(paste(save_directory,"/",i,sep=""))) # Does species folder exist within 5% folder?
dir.create(paste(save_directory,"/",i,sep="")) # If not then create it.
temp_dataset <- dataset2[ sample( which( dataset2$Species == i ) , (sum(dataset2$Species == i)*percentage) ) , ] # Create a temporary dataset with a random 5% of the rows for species.
file.copy(temp_dataset$Full.Path,
paste(save_directory,"/",i,sep="")) # Copy the five percent subset to the previously created species folder.
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.