Nothing
#' @title Check field names of a range model metadata list against conventions
#'
#' @description Identify nonstandard fields
#'
#' @details
#' See Examples.
#'
#' @param rmm a range model metadata list
#' @param cutoff_distance number of allowed different characters to match standardized names
#' @param returnData logical. If FALSE, the function will return the (possibly) corrected rmm object. If TRUE, the function will return a data.frame containing information on incorrect names.
#' @param interactiveCorrections logical. If TRUE, the user will be prompted to indicate whether the proposed correction should be accepted, thereby modifying the `rmm` object. If FALSE, suggestions will just be printed to the screen and users can edit them manually.
#' @examples
#' rmm <- rmmTemplate() # Make an empty template
#' rmm$dataPrep$biological$taxonomicHarmonization$taxonomy_source <- "The Plant List"
#' # Add a new, non-standard field
#' rmm.1 <- rmmCheckName(rmm)
#' # Checking the names should identify the new, non-standard field we've added ("taxonomy_source")
#'
#'
#' @return Either an rmm list object (`returnData=FALSE`) or a data.frame containing information on possible name errors (`returnData=TRUE`).
#' @author Cory Merow <cory.merow@@gmail.com>, Brian Maitner <bmaitner@@gmail.com>,
#' @note Names returned by this check may be either incorrectly named or correctly named but missing from the data dictionary.
# @seealso
# @references
# @aliases - a list of additional topic names that will be mapped to
# this documentation when the user looks them up from the command
# line.
#' @family check
#' @import stats
#' @import utils
#' @export
rmmCheckName <- function(rmm,
cutoff_distance = 3,
returnData = F,
interactiveCorrections=FALSE ){
list_elements <- utils::capture.output(rmm)
list_elements <- list_elements[grep(pattern = "$",
x = list_elements,fixed = T)] #remove elements that aren't field names
#Now, we need to purge the non-terminal list elements.
#Solution (probably not optimal):
#Identify elements that are completely contained within another element (but are not identical to that element).
#Any of these will not be terminal element names
terminal <- lapply(X = list_elements,FUN = function(x){
if(length(grep(pattern = x,x = unique(list_elements),fixed = T))>1){
output<-FALSE
}else{
output<-TRUE
}
return(output)
})
terminal <- unlist(terminal)
list_elements <- list_elements[terminal]
rm(terminal)
dd <- utils::read.csv(system.file("extdata/dataDictionary.csv",
package='rangeModelMetadata'),
stringsAsFactors=FALSE)
dd_names <- NULL
for(i in 1:nrow(dd)){
val_i <- dd[i,][unique(c(grep(pattern = "field",x = colnames(dd)),
grep(pattern = "entity",x = colnames(dd)))) ]#The complicate indexing ensures that id additional fields (eg field4,field5) are added things won't break
val_i <- val_i[which(!is.na(val_i))]
val_i <- paste("$",paste(val_i,collapse = "$"),sep = "")
dd_names <- c(dd_names,val_i)
}
rm(i,val_i)
#i=26,12
name_check_df <- as.data.frame((matrix(ncol = 5, nrow = length(list_elements))))
colnames(name_check_df) <- c("exact_match","partial_match",
"partial_match_suggestions","corrected_name",
"not_matched")
for(i in 1:length(list_elements)){
element_i <- list_elements[i]
if(element_i%in%dd_names){
name_check_df$exact_match[i] <- element_i}else{ #if name is valid, else:
min_dist <- min(adist(x = element_i,y = dd_names))
if(min_dist <= cutoff_distance){
name_check_df$partial_match[i] <- element_i
name_check_df$partial_match_suggestions[i] <-
dd_names[which.min(adist(x = element_i, y = dd_names))]
# prompt
message("\n\n")
message("\n Element name '",element_i,
"' not found in data dictionary", "!\n Did you mean: '",
name_check_df$partial_match_suggestions[i],"'? " ) # prompt
if(interactiveCorrections){
message("Type 'y' or 'n'.")
take <- scan(n = 1, quiet = TRUE, what = 'raw')
if (take == 'y' | take == 'Y') {
#rename list element_i to the partial match suggestion
el_i<-paste("rmm",element_i,sep = "")
bad_name <- unlist(strsplit(el_i,"$",fixed = T))[length(unlist(strsplit(el_i,"$",fixed = T)))]
parent_i <- paste(unlist(strsplit(el_i,"$",fixed = T))[1:length(unlist(strsplit(el_i,"$",fixed = T)))-1],collapse = "$")
exp_i <- paste("names(",parent_i,")[",which(names(eval(parse(text = parent_i)))==bad_name),"]
<- '",unlist(strsplit(name_check_df$partial_match_suggestions[i],split = "$",fixed = T))[length(unlist(strsplit(name_check_df$partial_match_suggestions[i],split = "$",fixed = T)))],"'",sep = "")
eval(parse(text = exp_i))
name_check_df$corrected_name[i] <- dd_names[which.min(adist(x = element_i,y = dd_names))]
}
} # end interactive corrections
#end prompt
}
if(min_dist > cutoff_distance){
name_check_df$not_matched[i] <- element_i
}
}#if name is NOT valid exactly
}#i loop
if(nrow(name_check_df) > 0){
# if(length(which(!is.na(name_check_df$exact_match)))>0 ){
# message(paste("The following names appear accurate:", sep = "",collapse = "\n"))
# message(paste("\n",paste(name_check_df$exact_match[which(!is.na(name_check_df$exact_match))],collapse = "\n"), sep = "" ))
# message(noquote("\n "))
# }
if(length(which(!is.na(name_check_df$corrected_name))) >0 ){
message(noquote("\n"))
message("The following names were corrected:\n")
message(paste("\n",paste(name_check_df$partial_match[which(!is.na(name_check_df$corrected_name))],collapse = "\n"), sep = "" ))
message(noquote("\n "))
}
if(length(which(!is.na(name_check_df$partial_match) & is.na(name_check_df$corrected_name)))>0 ){
message("The following names are similar to suggested names, please verify:\n")
message(paste(paste(name_check_df$partial_match[which(!is.na(name_check_df$partial_match) & is.na(name_check_df$corrected_name) )],collapse = "\n"), sep = "" ))
message(paste("\nSuggested alternatives include: \n",paste(name_check_df$partial_match_suggestions[which(!is.na(name_check_df$partial_match_suggestions) & is.na(name_check_df$corrected_name))],collapse = "\n"), sep = "" ))
#message(paste(paste(name_check_df$partial_match_suggestions[which(!is.na(name_check_df$partial_match_suggestions))],collapse = "\n"), sep = "" ))
message(noquote("\n "))
}
if(length(which(!is.na(name_check_df$not_matched)))>0 ){
message(paste("The following names are not similar to any suggested names, please verify that these are accurate:\n", sep = "" ))
message(paste(paste(name_check_df$not_matched[which(!is.na(name_check_df$not_matched))],collapse = "\n"), sep = "" ))
message(noquote("\n "))
}
if(returnData==TRUE){
return(name_check_df)
}
if(returnData==FALSE){return(rmm)}
}#overall fx
}
####################################################################
##############################################################
##############################################################
#' @title Check values of a range model metadata list against commonly used values
#' @export
#'
#' @description Identify nonstandard values
#'
#' @details
#' See Examples.
#' @param rmm a range model metadata list
#' @param cutoff_distance The maximum allowable similarity (Levenshtein (edit) distance) for use in fuzzy matching.
#' @param returnData Should a dataframe containing information on matched and unmatched values be returned? Default is FALSE
#'
#' @examples
#' rmm <- rmmTemplate() #First, we create an empty rmm template
#' rmm$data$environment$variableNames <- c("bio1", "bio 2", "bio3", "cromulent")
#' #We add 3 of the bioclim layers, including a spelling error (an extra space) in bio2,
#' # and a word that is clearly not a climate layer, 'cromulent'.
#' rmmCheckValue(rmm = rmm)
#' #Now, when we check the values, we see that bio1 and bio2 are reported as exact matches,
#' #while 'bio 2' is flagged as a partial match with a suggested value of 'bio2',
#' # and cromulent is flagged as not matched at all.
#' #If we'd like to return a dataframe containing this information in a perhaps more useful format:
#' rmmCheckValue_output <- rmmCheckValue(rmm = rmm, returnData = TRUE)
#'
#' @return Text describing identical, similar and non-matched values for rmm entities with suggested values. If returnData = T, a dataframe is returned containing 5 columns: field (the rmm entity), exact_match (values that appear correct), partial_match (values that are partial_match to common values), not_matched( values that are dissimilar from accepted values), partial_match_suggestions (suggested values for partial_match values).
#' @author Cory Merow <cory.merow@@gmail.com>, Brian Maitner <bmaitner@@gmail.com>,
#' @note Names returned by this check may be either incorrectly named or correctly named but missing from the data dictionary.
# @seealso
# @references
# @aliases - a list of additional topic names that will be mapped to
# this documentation when the user looks them up from the command
# line.
#' @family check
#' @export
rmmCheckValue <- function(rmm, cutoff_distance = 3, returnData = F ){
dd <- utils::read.csv(system.file("extdata/dataDictionary.csv",
package='rangeModelMetadata'),
stringsAsFactors=FALSE)
dd_constrained <- dd[which(dd$constrainedValues!='NULL'),]
#For all fields with either a kinda or yes in the valuesConstrained field, check values against those in datadictionary
#Split values into a "exact_match", "partial_match", and "not partial_match" set.
#Print each set of values along with a note.
value_check_df <- as.data.frame((matrix(ncol=5,nrow = nrow(dd_constrained))))
colnames(value_check_df) <- c("field","exact_match","partial_match",
"partial_match_suggestions","not_matched")
for(i in 1:nrow(dd_constrained)){
dd_i <- dd_constrained[i,][c("field1",'field2','field3','entity')]
if(length(which(is.na(dd_i))) > 0){
dd_i <- paste(dd_i[-which(is.na(dd_i))],collapse = "$")
}else{ dd_i <- paste(dd_i,collapse = "$")}
value_check_df[i,1] <- dd_i<-paste("rmm",dd_i,sep = "$")
constrained_values <- dd_constrained$constrainedValues[i]
constrained_values <- unlist(strsplit(x = constrained_values,split = "; "))
value_i <- eval(parse(text=dd_i))
if(!is.null(eval(parse(text=dd_i)))){
for(j in 1:length(eval(parse(text=dd_i)))){
element_j <- eval(parse(text=dd_i))[j]
if(element_j %in% constrained_values){
value_check_df$exact_match[i] <-
paste(na.omit(c(value_check_df$exact_match[i],element_j)),
sep = "; ",collapse = "; ")}else{
#get the distance between the value and the potential values.
min_dist <- min(adist(x = element_j,y = constrained_values))
if(min_dist <= cutoff_distance){
constrained_values[which.min(adist(x = element_j,y = constrained_values))]
value_check_df$partial_match[i] <- paste(na.omit(c(value_check_df$partial_match[i],element_j)),sep = "; ",collapse = "; ")
value_check_df$partial_match_suggestions[i] <- paste(na.omit(c(value_check_df$partial_match_suggestions[i],constrained_values[which.min(adist(x = element_j,y = constrained_values))])),sep = "; ",collapse = "; ")
}
if(min_dist>cutoff_distance){
value_check_df$not_matched[i] <-
paste(na.omit(c(value_check_df$not_matched[i],element_j)),
sep = "; ",collapse = "; ")
}else{
value_check_df$partial_match[i] <-
paste(na.omit(c(value_check_df$not_matched[i],element_j)),
sep = "; ",collapse = "; ")
}
#take the partial_matchst value,
#or if distance > something, label it as "not_matched"
}#if element is not in suggested values
}#j loop
}#if the value is not null
}
#Print stuff
#Remove fields consisting of all NAs
value_check_df <- value_check_df[!apply(is.na(value_check_df[,2:5]),1,all),]
if(nrow(value_check_df) > 0){
for(r in 1:nrow(value_check_df)){
message('\n==========================================\n')
message(noquote(paste("For the field ",value_check_df$field[r],
collapse = "\n",sep = "")) )
message(noquote("\n "))
if(!is.na(value_check_df$exact_match[r])){
message(noquote(paste("The following entries appear accurate:\n" )))
message(noquote(paste("\n",value_check_df$exact_match[r])))
message(noquote("\n "))
}
if(!is.na(value_check_df$partial_match[r])){
message(noquote(paste0("The following entries are similar to suggested values, please verify:\n" )))
message(noquote(paste(value_check_df$partial_match[r])))
message(noquote(paste0( "\n\nSuggested alternatives include: \n",
value_check_df$partial_match_suggestions[r] )))
message(noquote("\n "))
}
if(!is.na(value_check_df$not_matched[r])){
message(noquote(paste("The following entries are not similar to any suggested values, please verify that these are accurate:\n" )))
message(noquote(paste(value_check_df$not_matched[r])))
message(noquote("\n "))
}
}
}else{ #if there are values to check
message(noquote("There are no suggested fields to verify in this rmm object."))
message(noquote("\n "))
} #if there are no values to check
if(returnData){
return(value_check_df)
}
}#end of fx
####################################################################
####################################################################
####################################################################
#' @title Check for missing fields
#'
#' @description Identify obligate fields that are missing
#'
#' @details
#' See Examples.
#'
#' @param rmm a range model metadata list
#' @param family The rmm family to check the rmm against
#'
#' @examples
#' rmm <- rmmTemplate() # Make an empty template
#'
#'
#' @return A vector of names that are missing from the rmm object.
#' @author Cory Merow <cory.merow@@gmail.com>, Brian Maitner <bmaitner@@gmail.com>,
# @seealso
# @references
# @aliases - a list of additional topic names that will be mapped to
# this documentation when the user looks them up from the command
# line.
#' @family check
#' @import stats
#' @import utils
#' @export
rmmCheckMissingNames <- function(rmm, family=c("base")){
list_elements <- capture.output(rmm)
list_elements <- list_elements[grep(pattern = "$",x = list_elements,fixed = T)] #remove elements that aren't field names
#Now, we need to purge the non-terminal list elements.
#Solution (probably not optimal):
#Identify elements that are completely contained within another element (but are not identical to that element).
#Any of these will not be terminal element names
terminal <- lapply(X = list_elements,FUN = function(x){
if(length(grep(pattern = x,x = unique(list_elements),fixed = T))>1){
output<-FALSE
}else{
output<-TRUE
}
return(output)
})
terminal <- unlist(terminal)
list_elements <- list_elements[terminal]
dd <- utils::read.csv(system.file("extdata/dataDictionary.csv",
package='rangeModelMetadata'),
stringsAsFactors=FALSE)
keep <- unique(unlist(lapply(family,function(fam){grep(fam,dd$family)})))
dd_ob <- dd[keep,]
obligate_names <- NULL
for(i in 1:nrow(dd_ob)){
val_i <- dd_ob[i,][unique(c(grep(pattern = "field",x = colnames(dd_ob)),
grep(pattern = "entity",x = colnames(dd_ob)))) ]#The complicate indexing ensures that id additional fields (eg field4,field5) are added things won't break
val_i <- val_i[which(!is.na(val_i))]
val_i <- paste("$",paste(val_i,collapse = "$"),sep = "")
obligate_names<-c(obligate_names,val_i)
}
missing_names <- obligate_names[which(!obligate_names%in%list_elements)]
#list_elements[which(!list_elements%in%obligate_names)]
if(length(missing_names)==0){message("All obligate field names are present.")}
return(missing_names)
}
####################################################################
####################################################################
####################################################################
#' @title Check an rmm object for empty fields
#'
#' @description Identify empty fields in an rmm object and classify these into obligate and optional fields.
#'
#' @details
#' See Examples.
#'
#' @param rmm a range model metadata list
#' @param family an rmm family, "base" by default
#'
#' @examples
#' #First, make an empty rmm object:
#' rmm <- rmmTemplate()
#' #Next, we check for emtpy fields:
#' empties1 <- rmmCheckEmpty(rmm = rmm)
#' #If looks like there are quite a few empty obligate fields. Let's populate a few:
#' rmm$data$occurrence$taxon <- "Acer rubrum"
#' rmm$data$environment$variableNames <- "Bio1"
#' #Now, if we run rmmCheckEmpty again, we see there are 2 fewer empty, obligate fields
#' empties2 <- rmmCheckEmpty(rmm = rmm)
#'
#'
#' @return A dataframe containing empty fields labeled as obligate, optional, or suggested.
#' @author Cory Merow <cory.merow@@gmail.com>, Brian Maitner <bmaitner@@gmail.com>,
# @seealso
# @references
# @aliases - a list of additional topic names that will be mapped to
# this documentation when the user looks them up from the command
# line.
#' @family check
#' @export
#CM: 7/11/18: note that I removed the optional and suggested options as we don't have those any more.
rmmCheckEmpty <- function(rmm, family = c('base')){
list_elements <- capture.output(rmm)
list_elements <- list_elements[grep(pattern = "$",x = list_elements,fixed = T)] #remove elements that aren't field names
#Now, we need to purge the non-terminal list elements.
#Solution (probably not optimal):
#Identify elements that are completely contained within another element (but are not identical to that element).
#Any of these will not be terminal element names
terminal <- lapply(X = list_elements, FUN = function(x){
if(length(grep(pattern = x,x = unique(list_elements),fixed = T))>1){
output <- FALSE}else{output<-TRUE}
return(output)
})
terminal <- unlist(terminal)
list_elements <- list_elements[terminal]
rm(terminal)
dd <- utils::read.csv(system.file("extdata/dataDictionary.csv",
package='rangeModelMetadata'),
stringsAsFactors=FALSE)
#Identify which fields are empty
nulls <- NULL
for(i in 1:length(list_elements)){
# eval(parse(text = paste("rmm",list_elements[i],sep = "",collapse = "")))
if(is.null(eval(parse(text = paste("rmm",list_elements[i],
sep = "",collapse = ""))))){
output<-TRUE
}else{output<-FALSE}
nulls<-cbind(nulls,output) }
rm(i,output)
empties <- list_elements[nulls]
output_data <- as.data.frame(matrix(ncol = 4,nrow = length(empties)))
colnames(output_data) <- c("Empty_field","Obligate","Suggested","Optional")
output_data$Empty_field <- empties
dd <- utils::read.csv(system.file("extdata/dataDictionary.csv",
package='rangeModelMetadata'),
stringsAsFactors=FALSE)
dd_names <- NULL
for(i in 1:nrow(dd)){
val_i <- dd[i,][unique(c(grep(pattern = "field",x = colnames(dd)),
grep(pattern = "entity",x = colnames(dd)))) ]#The complicate indexing ensures that id additional fields (eg field4,field5) are added things won't break
val_i <- val_i[which(!is.na(val_i))]
val_i <- paste("$",paste(val_i,collapse = "$"),sep = "")
dd_names<-c(dd_names,val_i)
}
rm(i,val_i)
#CM: check the use case issue
for(fam in family){
output_data$Obligate[which(output_data$Empty_field %in%
dd_names[which(dd$family==fam)])] <- 1
}
# output_data$Obligate[which(output_data$Empty_field %in%
# dd_names[which(dd[family]==1)])]<-1
# output_data$Optional[which(output_data$Empty_field%in%
# dd_names[which(dd[family]==0)])]<-1
#output_data$Suggested[which(output_data$Empty_field%in%dd_names[which(dd[family]==2)])]<-1 Add this once we figure out how to label suggested fields
#If there are missing obligate values, warn the user
if(sum(na.omit(output_data$Obligate))>0){
message('===================================\n')
message(paste("There are ",sum(na.omit(output_data$Obligate)), "empty obligate fields:\n" ))
message(paste(output_data$Empty_field[which(output_data$Obligate==1)],sep = ", ",collapse = "\n"))
message("\n")
}
message('===================================\n')
if(sum(na.omit(output_data$Suggested))>0){
message(paste("There are ",sum(na.omit(output_data$Suggested)), "empty suggested fields." ))
message("\n")
}
message('===================================\n')
if(sum(na.omit(output_data$Optional))>0){
message(paste("There are ",sum(na.omit(output_data$Optional)), "empty optional fields." ))
message("\n")
}
if(nrow(output_data)==0){
message("All fields are populated.")
message("\n")
}
# CM 7/12/18 temporarily toss suggested and optional until/if we use
output_data=output_data[,1:2]
return(output_data)
}
#############################################################################
#############################################################################
#############################################################################
#' @title Remove NULL entries range model metadata list
#'
#' @description Check if fields are NULL in a range model metadata list and toss
#'
#' @details
#' See Examples.
#'
#' @param rmm a range model metadata list
#'
#' @examples
#' # see vignette('rmm_vignette')
#' @return printout to the console
#' @author Cory Merow <cory.merow@@gmail.com>, Brian Maitner <bmaitner@@gmail.com>,
# @note
# @seealso
# @references
# @aliases - a list of additional topic names that will be mapped to
# this documentation when the user looks them up from the command
# line.
#' @family check
#' @export
# would better if this didn't have the quotes in the names, but this is fine for viewing
rmmCleanNULLs <- function(rmm){
# from https://stackoverflow.com/questions/26539441/remove-null-elements-from-list-of-lists/26540063
is.NullOb <- function(x) is.null(x) | all(sapply(x, is.null))
## Recursively step down into list, removing all such objects
rmNullObs <- function(x) {
x <- Filter(Negate(is.NullOb), x)
lapply(x, function(x) if (is.list(x)) rmNullObs(x) else x)
}
rmNullObs(rmm)
}
##################################
#' @title Run a final check of an rmm object
#'
#' @description Check an rmm object for non-standard and missing values and fields
#'
#' @details
#' See Examples.
#'
#' @param rmm a range model metadata list
#' @param family The rmm family to check the rmm against
#'
#' @examples
#' rmm <- rmmTemplate() # Make an empty template
#' rmmCheckFinalize(rmm)
#'
#'
#' @return Prints feedback to point out possible errors.
#' @author Cory Merow <cory.merow@@gmail.com>, Brian Maitner <bmaitner@@gmail.com>,
# @seealso
# @references
# @aliases - a list of additional topic names that will be mapped to
# this documentation when the user looks them up from the command
# line.
#' @family check
#' @import stats
#' @import utils
#' @export
rmmCheckFinalize<-function(rmm,family = c('base')){
names <- rmmCheckName(rmm,returnData = TRUE)
values <- rmmCheckValue(rmm = rmm,returnData = TRUE)
missing_names <- rmmCheckMissingNames(rmm,family = family)
empty_values <- rmmCheckEmpty(rmm = rmm,family = family)
if(length(na.omit(values$partial_match))==0 & length(na.omit(values$not_matched))==0 & #All values are exactly matched
length(missing_names)==0 & #No names are missing
length(na.omit(names$partial_match))==0 & length(na.omit(names$not_matched))==0 & #All names are exactly matched
sum(na.omit(empty_values$Obligate))==0
){
message("Everything looks good!")
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.