#===============================================================================
#
# gen_wrapped_bdprob.R
#
#===============================================================================
compute_PU_spp_table_attributes_by_spp <- function (PU_IDs_for_one_spp_as_df,
dep_set)
{
num_pu_this_spp = length (PU_IDs_for_one_spp_as_df)
num_unique_pu_this_spp = length (unique (PU_IDs_for_one_spp_as_df))
num_pu_in_dep_set = sum (PU_IDs_for_one_spp_as_df %in% dep_set)
return (data.frame (num_pu_this_spp, num_unique_pu_this_spp, num_pu_in_dep_set))
}
#===============================================================================
# Hack to quiet CHECK command for data frame column names that CHECK flags
# as "no visible binding for global variable".
# This is the officially sanctioned hack for doing this, e.g., see
# https://github.com/STAT545-UBC/Discussion/issues/451
# https://github.com/tidyverse/magrittr/issues/29
# http://r.789695.n4.nabble.com/globalVariables-td4593980.html
if(getRversion() >= "2.15.1") utils::globalVariables(c("spp_ID", "."))
#===============================================================================
#' Validate the wrapping of a bdproblem
#'
#' Check a wrapping distribution to see that it will generate a legal wrapped
#' problem.
#'
#-------------------------------------------------------------------------------
#' @param extra_abund blah
#' @param dep_set_PUs_eligible blah
#' @param PU_spp_table blah
#' @param dep_set blah
#'
#' @return TRUE if wrap is valid, otherwise quits before end of function
#' export
#-------------------------------------------------------------------------------
validate_wrap <- function (extra_abund,
dep_set_PUs_eligible,
PU_spp_table,
dep_set)
{
# Apply compute_PU_spp_table_attributes_by_spp() function to PU_spp_table by spp_ID group
# Form of this call is taken from:
# http://www.milanor.net/blog/dplyr-do-tips-for-using-and-programming/
values_by_spp =
PU_spp_table %>%
dplyr::group_by (spp_ID) %>%
dplyr::do (compute_PU_spp_table_attributes_by_spp (PU_IDs_for_one_spp_as_df = .$PU_ID,
dep_set))
num_extra_spp = length (extra_abund)
# Rules that PU_spp_table for wrapped spp must satisfy to be valid:
num_test_rules = 6
ok = rep (FALSE, num_test_rules)
# Rule 1: At least one occurrence of each species ID must be on a PU in the dep set
ok [1] = all (values_by_spp$num_pu_in_dep_set > 0)
if (! ok[1])
{
message ("\nWrapping rule 1 violation:")
cat ("\nRule 1: At least one occurrence of each species ID must be on a PU in the dep set",
"\nnum_extra_spp = ", num_extra_spp,
"\nsum (values_by_spp$num_pu_in_dep_set > 0) = ",
sum (values_by_spp$num_pu_in_dep_set > 0),
"\nwhich (values_by_spp$num_pu_in_dep_set <= 0) = '",
which (values_by_spp$num_pu_in_dep_set <= 0), "'\n")
print (values_by_spp$num_pu_in_dep_set)
}
# Rule 2: If dep set not eligible, then one and only one occurrence of the species can be in dep set
ok [2] = TRUE
if (! dep_set_PUs_eligible)
ok [2] = all (values_by_spp$num_pu_in_dep_set == 1)
if (! ok[2])
{
message ("\nWrapping rule 2 violation:")
cat ("\nRule 2: If dep set not eligible, then one and only one occurrence of the species can be in dep set",
"\nnum_extra_spp = ", num_extra_spp,
"\nsum (values_by_spp$num_pu_in_dep_set == 1) = ",
sum (values_by_spp$num_pu_in_dep_set == 1),
"\nwhich (values_by_spp$num_pu_in_dep_set != 1) = '",
which (values_by_spp$num_pu_in_dep_set != 1), "'\n")
print (values_by_spp$num_pu_in_dep_set)
}
# Rule 3: No PU can occur more than once within a species
ok [3] = all (values_by_spp$num_pu_this_spp == values_by_spp$num_unique_pu_this_spp)
if (! ok[3])
{
cts_not_same = which (values_by_spp$num_pu_this_spp !=
values_by_spp$num_unique_pu_this_spp)
message ("\nWrapping rule 3 violation:")
cat ("\nRule 3: No PU can occur more than once within a species",
"\nduplicates at index(s): '", cts_not_same, "'")
cat ("\nvalues_by_spp$num_pu_this_spp = \n")
print (values_by_spp$num_pu_this_spp)
cat ("\nvalues_by_spp$num_unique_pu_this_spp = \n")
print (values_by_spp$num_unique_pu_this_spp)
}
# Rule 4: All species must occur in result table
ok [4] = (num_extra_spp == length (values_by_spp$spp_ID))
if (! ok[4])
{
message ("\nWrapping rule 4 violation:")
cat ("\nRule 4: All species must occur in result table",
"\nnum_extra_spp = ", num_extra_spp,
"\nlength (values_by_spp$spp_ID) = ", length (values_by_spp$spp_ID))
}
# Rule 5: All species must occur the number of times specified in their abundance
# Had to use isTRUE() here because all.equal doesn't seem to return
# a boolean. For some reason, it returns a string saying
# something like "Mean relative difference: ..." when they
# don't match. When they do match, it does return TRUE though...
ok [5] = isTRUE (all.equal (extra_abund, values_by_spp$num_unique_pu_this_spp))
if (! ok[5])
{
message ("\nWrapping rule 5 violation:")
cat ("\nRule 5: All species must occur the number of times specified in their abundance")
if (length (extra_abund) == length (values_by_spp$num_unique_pu_this_spp))
{
indices_of_mismatches = which (extra_abund != values_by_spp$num_unique_pu_this_spp)
cat ("\nindices_of_mismatches = '", indices_of_mismatches, "'")
}
cat ("\nextra_abund = \n")
print (extra_abund)
cat ("\nvalues_by_spp$num_unique_pu_this_spp = \n")
print (values_by_spp$num_unique_pu_this_spp)
}
# Rule 6: Total number of lines in the result table must equal total number of occurrences
ok [6] = (sum (extra_abund) == nrow (PU_spp_table))
if (! ok[6])
{
message ("\nWrapping rule 6 violation:")
cat ("\nRule 6: Total number of lines in the result table must equal total number of occurrences",
"\ntotal num spp = sum (extra_abund) = ", sum (extra_abund),
"\ntotal num lines in result table = ", nrow (PU_spp_table))
cat ("\nextra_abund = \n")
print (extra_abund)
cat ("\nPU_spp_table = \n")
print (PU_spp_table)
}
all_ok = all (ok)
if (! all_ok)
{
cat ("\nRule(s) ", which (!ok),
" violated in wrapped species PU_spp_table.\n")
stop_bdpg()
}
return (all_ok)
}
#===============================================================================
# Need to do some sanity checks on various wrap parameters
# to make sure that they can't generate nonsense or a crash
#
# Just a dummy placeholder routine until I know what needs checking.
#' Do sanity checks on parameters for wrapping a biodiversity problem
#'
#' Currently just a placeholder...
#-------------------------------------------------------------------------------
do_sanity_checks <- function ()
{
cat ("\n\nDummy sanity check for wrap_...().\n\n")
}
#===============================================================================
#-------------------------------------------------------------------------------
#' Create set of planning units eligible for adding species into
#'
#' When wrapping a species distribution around a Xu biodiversity problem,
#' the first instance of any new species is added to the solution set of the
#' original Xu problem that is being wrapped. However, if more than one
#' instance of the new species is being added in the wrapped problem, the
#' extra instances could go in a) just the new planning units outside the
#' original Xu problem added for the wrapper, or b) in both the original
#' solution set and the newly added planning units. This function builds
#' that target set of planning units based on the value of the
#' dep_set_PUs_eligible flag passed to it.
#'
#-------------------------------------------------------------------------------
#' @param Xu_dep_set integer vector of planning unit IDs for the solution set
#' of the original Xu problem being wrapped around
#' @param extra_PUs integer vector of plannning unit IDs being added to the
#' original Xu problem to make the wrapped problem
#' @param dep_set_PUs_eligible boolean indicating whether new species instances
#' can be added to the original Xu problem's solution set in addition to
#' the adding them to the newly added planning units; TRUE implies new
#' species instances can be added to both sets; FALSE implies new species
#' instances can only be added to the newly added planning units
#'
#' @return set of planning unit IDs where new species can be placed
#-------------------------------------------------------------------------------
create_eligible_PU_set <- function (Xu_dep_set,
extra_PUs,
dep_set_PUs_eligible
)
{
if (dep_set_PUs_eligible)
eligible_PUs = c(Xu_dep_set, extra_PUs)
else
eligible_PUs = extra_PUs
return (eligible_PUs)
}
#===============================================================================
#-------------------------------------------------------------------------------
#' Drop spp from abund dist if on too many or too few patches
#'
#' This function is primarily to get rid of species that occur on
#' only 0 or 1 patches, however, some distribution generator might also
#' generate species that are too common as well, so this routine will
#' axe those too.
#'
#' Abundances are expressed here as a vector of count values where each value
#' is the count of the number of patches a particular species occurs on.
#' For example, [1 1 1 2 6 6] would represent 3 species that occur
#' on exactly one patch, 1 species that occurs on 2 patches, and 2 species
#' that occur on 6 patches each. Calling this routine with a min_abund of 2
#' and a max_abund of 5 would return a 1 element vector, i.e., the vector [2].
#'
#' This doesn't really need to be a function since it's only one line
#' long, but I built an earlier version that was more verbose so that
#' I could monitor how many things were being cut out.
#' That version is in the git repository in case it needs to be
#' resurrected for some reason.
#'
#-------------------------------------------------------------------------------
#' @param rounded_abundances vector of abundances to be trimmed
#' @param min_abund lowest abundance to allow in the trimmed set
#' @param max_abund largest abundance to allow in the trimmed set
#'
#' @return vector of abundances whose values lie in the specified range
#-------------------------------------------------------------------------------
trim_abundances = function (rounded_abundances,
min_abund=2,
max_abund=.Machine$double.xmax
)
{
trimmed_rounded_abund_per_spp <-
rounded_abundances [(rounded_abundances <= max_abund) &
(rounded_abundances >= min_abund), drop=FALSE]
return (trimmed_rounded_abund_per_spp)
}
#===============================================================================
gen_raw_histogram_of_wrapped_dist <- function (Xu_PU_spp_table,
trimmed_rounded_abund_per_spp,
spp_col_name)
{
verbose_remove_base = FALSE # just for debugging now...
if (verbose_remove_base)
{
cat ("\n\nStarting ",
"gen_raw_histogram_of_wrapped_dist():")
cat ("\n Xu_PU_spp_table = \n")
show (Xu_PU_spp_table)
cat ("\n trimmed_rounded_abund_per_spp = \n")
show (trimmed_rounded_abund_per_spp)
cat ("\n spp_col_name = \n")
show (spp_col_name)
}
# Xu_PU_spp_table =
# PU_ID spp_ID
# 1 1 1
# 2 2 1
# 3 3 2
# 4 4 2
# ...
# 155 155 78
# 156 156 78
# 157 157 79
# 158 158 79
#
#
# trimmed_rounded_abund_per_spp =
# [1] 5 2 2 2 2 3 2 3 3 2 2 3 2 6 2 4 2 2 2 2 2 2 2 2 2 2 2 3 3 2 2 3 2 4 2 3 3
# [38] 2 3 2 2 2 3 4 2 3 3 2 2 2 2 3 2 2 2 2 3 2 4 2 3 3 2 2 3 4 2 2 2 2 2 3 2 3
# [75] 3 3 2 3 3 2 2 3 3 2 3 2 2 3 3 2 2 2 2 3 2 6 2 3 2 4 2 2 3 2 2 3 2 2 3 2 3
# [112] 4 4 2 2 3 3 2 2 4 2 2 3 3 2 2 3 3 2 3 3 2 2 2 5 2 3 2 2 2 4 2 3 3 2 2
#
# spp_col_name =
# [1] "spp_ID"
#-----------------------------------------------------------------------
# Count the number of occurrences of each species in the base problem.
#-----------------------------------------------------------------------
base_abund_by_spp = plyr::count (Xu_PU_spp_table [,spp_col_name])
if (verbose_remove_base)
{
cat ("\n\n base_abund_by_spp = \n")
show (base_abund_by_spp)
}
# base_abund_by_spp =
# x freq
# 1 1 2
# 2 2 2
# ...
# 78 78 2
# 79 79 2
#-----------------------------------------------------------------------
# Count how many times each number of occurrences appears in that set
# (e.g., how many species occur on 2 patches, on 3 patches, etc.).
# Should always be 2 for every species in the Xu base problem.
#-----------------------------------------------------------------------
base_abund_hist = plyr::count (base_abund_by_spp [,"freq"])
if (verbose_remove_base)
{
cat ("\n\n base_abund_hist = \n")
show (base_abund_hist)
}
# base_abund_hist =
# x freq
# 1 2 79
#-----------------------------------------------------------------------
# Do the same for the wrapping abundances.
# They are given as a vector of numbers of occurrences for each
# species in the wrapping distribution, but unlike the base problem
# abundances, no species ID has been assigned yet to these wrapping
# counts.
#-----------------------------------------------------------------------
wrapping_abund_hist = plyr::count (trimmed_rounded_abund_per_spp)
if (verbose_remove_base)
{
cat ("\n\n wrapping_abund_hist = \n")
show (wrapping_abund_hist)
}
# wrapping_abund_hist =
# x freq
# 1 2 86
# 2 3 46
# 3 4 10
# 4 5 2
# 5 6 2
#-----------------------------------------------------------------------
# There will probably be some different elements in the two histograms.
# For example, the original Xu histogram will only have one entry,
# i.e., the count for abundance = 2, since all species occur on
# exactly 2 patches. The wrapping histogram will probably (though
# not necessarily) have entries for other patch counts and may have
# a different value than the Xu histogram for the number of species
# occurring on 2 patches.
#
# We need to make a single histogram that merges the two sets of
# possible counts. The merge() function does this by making a single
# data.frame with a column for each of the 2 input histograms,
# labelling one column as x and the other as y. It adds a row for
# each matching entry in the inputs (e.g., a row for abundance = 2,
# with the x column giving the count for the wrapping histogram and
# the y column giving the count for the Xu histogram).
#
# It also adds a row for each entry in either input that doesn't
# appear in the other input (e.g., a row for every abundance other
# than 2 in the wrapping histogram when it's wrapping around an
# original Xu histogram that only has the value for abundance = 2).
#-----------------------------------------------------------------------
wrapped_extra_spp_abund_merge = merge (x=wrapping_abund_hist,
y=base_abund_hist,
by="x", all=TRUE)
if (verbose_remove_base)
{
cat ("\n\n wrapped_extra_spp_abund_merge = \n")
show (wrapped_extra_spp_abund_merge)
}
# wrapped_extra_spp_abund_merge =
# x freq.x freq.y
# 1 2 86 79
# 2 3 46 NA
# 3 4 10 NA
# 4 5 2 NA
# 5 6 2 NA
return (wrapped_extra_spp_abund_merge)
}
#===============================================================================
#' See whether wrapping species fully contain base species
#'
#' When wrapping a species distribution around a Xu biodiversity problem,
#' the optimizer may have stopped before finding a distribution that fully
#' encloses the base distribution, i.e., the wrapping distribution has fewer
#' species that occur on exactly 2 patches than the base distribution has.
#' In that case, the freq entry in the wrapping abundance distribution will
#' be negative. If the allow_imperfect_wrap flag is set to TRUE, then
#' set that histogram value to be 0 instead of the negative value, otherwise,
#' it's a fatal error and the program should stop.
#'
#' Note that the reason the value gets set to 0 instead of to the number of
#' species on 2 patches in the base distribution is that the histogram is
#' representing the total number of Extra species to be added that occur on
#' exactly 2 patches rather than the total number of species occurring on 2
#' patches in the entire combined distribution.
#'
#-------------------------------------------------------------------------------
#' @param final_wrapped_extra_spp_abund_hist data frame showing the number of
#' species to be added for each occurrence count, e.g., how many new species
#' to add that occur on exactly 2 patches, exactly 3 patches, etc.
#' @param allow_imperfect_wrap boolean indicating whether the proposed wrapping
#' distribution is allowed to not fully enclose the base distribution
#'
#' @return list containing the final wrapping abundance histogram and a boolean
#' flag indicating whether the wrap is imperfect or not
check_for_imperfect_wrap <- function (final_wrapped_extra_spp_abund_hist,
allow_imperfect_wrap)
{
# final_wrapped_extra_spp_abund_hist =
# abund freq
# 1 2 7 <<<<<----- perfect: wrap fully encloses base problem
# 2 3 46
# 3 4 10
# 4 5 2
# 5 6 2
# OR
# final_wrapped_extra_spp_abund_hist =
# abund freq
# 1 2 -20 <<<<<----- imperfect: does not fully enclose base problem
# 2 3 46
# 3 4 10
# 4 5 2
# 5 6 2
#browser()
wrap_is_imperfect = FALSE
num_neg_abund_freqs = sum (final_wrapped_extra_spp_abund_hist [,'freq'] < 0)
if (num_neg_abund_freqs > 0)
{
# Wrap is imperfect.
#
# There is at least one negative abundance frequency, i.e.,
# abundance level where the number of species with that abundance
# in the problem being wrapped is greater than in the wrapping
# wrapping distribution.
# For example, if it's not a perfect wrapping distribution,
# then there might only be 10 species that occur on exactly two
# PUs while the Xu base problem has 15 species occurring on
# exactly two PUs.
wrap_is_imperfect = TRUE
if (allow_imperfect_wrap)
{
error_or_warning_on_wrap_str = "- bdpg WARNING"
} else
{
error_or_warning_on_wrap_str = "- bdpg FATAL ERROR"
}
#-----------------------------------------------------------------------
# Write a warning message in 2 ways.
# First, write it using cat() so that it ends up in the console log.
# Second, write it using message() so that it shows up on the console
# in red to have more chance of making the user aware of it.
# Originally, I just used the message() call but it doesn't show up
# in the console log file (I think it goes to stderr or something.)
#-----------------------------------------------------------------------
msg_string = paste0 ("\n\nIMPERFECT WRAP ", error_or_warning_on_wrap_str,
"\n", num_neg_abund_freqs, " element of the wrapping distribution (freq.x) is smaller than",
"\nthe corresponding element in the wrapped distribution (freq.y).",
"\nThis would lead to a negative frequency for at least one level of abundance, ",
"\ni.e., a negative number of species having the given abundance.",
"\n\nwrapped_extra_spp_abund_merge = \n")
cat (msg_string)
message (msg_string)
print (final_wrapped_extra_spp_abund_hist)
if (allow_imperfect_wrap)
{
indices_of_negative_freqs = which (final_wrapped_extra_spp_abund_hist [,'freq'] < 0)
# In the current incarnation of problems, this should only
# occur for species that occur on exactly 2 patches.
# Set the count in the histogram to 0; don't set it to the
# number of species occuring on 2 patches in the base problem,
# since this histogram represents the number of species to
# be ADDED for the wrapped distribution. It doesn't
# represent the total resulting number of species on exactly
# 2 patches in the combined wrapping and base set.
final_wrapped_extra_spp_abund_hist [indices_of_negative_freqs, 'freq'] = 0
}
cat ("\nResulting final_wrapped_extra_spp_abund_hist = \n")
print (final_wrapped_extra_spp_abund_hist)
cat ("\n")
if (! allow_imperfect_wrap) stop_bdpg ("Fail on imperfect wrap of bdproblem")
} # end if - num_neg_abund_freqs > 0, i.e., imperfect wrap
###2018 02 28 branch### return (final_wrapped_extra_spp_abund_hist)
return (list (final_wrapped_extra_spp_abund_hist = final_wrapped_extra_spp_abund_hist,
wrap_is_imperfect = wrap_is_imperfect))
}
#===============================================================================
compute_final_wrapped_extra_spp_abund_hist <- function (wrapped_extra_spp_abund_merge,
allow_imperfect_wrap)
{
verbose_remove_base = FALSE # just for debugging now...
if (verbose_remove_base)
{
cat ("\n\nStarting ",
"compute_final_wrapped_extra_spp_abund_hist():")
cat ("\nallow_imperfect_wrap = ", allow_imperfect_wrap,
"\nwrapped_extra_spp_abund_merge = \n")
show (wrapped_extra_spp_abund_merge)
}
#browser()
# wrapped_extra_spp_abund_merge =
# x freq.x freq.y
# 1 2 86 79
# 2 3 46 NA
# 3 4 10 NA
# 4 5 2 NA
# 5 6 2 NA
#-----------------------------------------------------------------------
# Now we need to clean up this data frame so that NAs are replaced
# with 0's and so that any missing abundance values are added to
# the data.frame (e.g., if the highest abundance value was 10 but
# neither input histogram had any species that occurred on 3, 4, or
# 9 PUs).
#-----------------------------------------------------------------------
# Replace NA counts with 0s.
wrapped_extra_spp_abund_merge [is.na (wrapped_extra_spp_abund_merge)] = 0
if (verbose_remove_base)
{
cat ("\n\n After NA replacement with 0, wrapped_extra_spp_abund_merge = \n")
show (wrapped_extra_spp_abund_merge)
}
# After NA replacement with 0, wrapped_extra_spp_abund_merge =
# x freq.x freq.y
# 1 2 86 79
# 2 3 46 0
# 3 4 10 0
# 4 5 2 0
# 5 6 2 0
final_wrapped_extra_spp_abund_hist =
as.data.frame (cbind (wrapped_extra_spp_abund_merge [,"x"],
wrapped_extra_spp_abund_merge [, "freq.x"] -
wrapped_extra_spp_abund_merge [, "freq.y"]
))
names (final_wrapped_extra_spp_abund_hist) = c("abund","freq")
if (verbose_remove_base)
{
cat ("\n\n final_wrapped_extra_spp_abund_hist = \n")
show (final_wrapped_extra_spp_abund_hist)
}
# final_wrapped_extra_spp_abund_hist =
# abund freq
# 1 2 7
# 2 3 46
# 3 4 10
# 4 5 2
# 5 6 2
###2018 02 28 branch### final_wrapped_extra_spp_abund_hist =
final_wrapped_extra_spp_abund_hist_list =
check_for_imperfect_wrap (final_wrapped_extra_spp_abund_hist,
allow_imperfect_wrap)
###2018 02 28 branch### return (final_wrapped_extra_spp_abund_hist)
return (final_wrapped_extra_spp_abund_hist_list)
}
#===============================================================================
build_vec_of_extra_spp_and_their_abundances <- function (wrapped_extra_spp_abund_hist)
{
num_extra_spp = sum (wrapped_extra_spp_abund_hist [,"freq"])
# num_extra_spp =
# 767
extra_spp_abund = rep (NA, num_extra_spp)
# extra_spp_abund = logi [1:767] NA NA NA NA NA NA ...
num_abund_rows = length (wrapped_extra_spp_abund_hist[,"abund"])
# num_abund_rows =
# 5
verbose_remove_base = TRUE # just for debugging now...
if (verbose_remove_base)
{
cat ("\nIn build_vec_of_extra_spp_and_their_abundances() just before loop: ",
"\nnum_extra_spp = ", num_extra_spp,
"\nnum_abund_rows = ", num_abund_rows,
"\ninitial extra_spp_abund = \n")
print (extra_spp_abund)
}
start_idx = 1
for (cur_idx in 1:num_abund_rows)
{
if (wrapped_extra_spp_abund_hist [cur_idx, "freq"] > 0) # Should always be true, but just in case...
{
end_idx = start_idx + wrapped_extra_spp_abund_hist [cur_idx, "freq"] - 1
extra_spp_abund [start_idx:end_idx] = wrapped_extra_spp_abund_hist [cur_idx, "abund"]
if (verbose_remove_base)
{
cat ("\nAt bottom of cur_idx = ", cur_idx, ",
start_idx = ", start_idx,
"end_idx = ", end_idx,
"extra_spp_abund = \n")
print (extra_spp_abund)
}
start_idx = end_idx + 1
}
}
return (extra_spp_abund)
}
#===============================================================================
clean_up_wrapped_abund_dist <- function (wrapped_extra_spp_abund_merge,
allow_imperfect_wrap)
{
verbose_remove_base = TRUE # just for debugging now...
if (verbose_remove_base)
{
cat ("\n\nStarting ",
"clean_up_wrapped_abund_dist():")
cat ("\nallow_imperfect_wrap = \n", allow_imperfect_wrap,
"\nwrapped_extra_spp_abund_merge = \n")
show (wrapped_extra_spp_abund_merge)
}
#browser()
# wrapped_extra_spp_abund_merge =
# x freq.x freq.y
# 1 2 86 79
# 2 3 46 NA
# 3 4 10 NA
# 4 5 2 NA
# 5 6 2 NA
#-----------------------------------------------------------------------
# Now we need to clean up this data frame so that NAs are replaced
# with 0's and so that any missing abundance values are added to
# the data.frame (e.g., if the highest abundance value was 10 but
# neither input histogram had any species that occurred on 3, 4, or
# 9 PUs).
#-----------------------------------------------------------------------
###2018 02 28 branch### wrapped_extra_spp_abund_hist =
wrapped_extra_spp_abund_hist_list = ###2018 02 28 branch###
compute_final_wrapped_extra_spp_abund_hist (wrapped_extra_spp_abund_merge,
allow_imperfect_wrap)
wrapped_extra_spp_abund_hist = ###2018 02 28 branch###
wrapped_extra_spp_abund_hist_list$final_wrapped_extra_spp_abund_hist ###2018 02 28 branch###
#-----------------------------------------------------------------------
# Now build a vector of the abundances for just the extra species
# based on the histogram of abundances of just the extra species
# (i.e., the species in the wrapped distribution but not in the
# base problem's distribution.
#-----------------------------------------------------------------------
extra_spp_abund =
build_vec_of_extra_spp_and_their_abundances (wrapped_extra_spp_abund_hist)
###2018 02 28 branch### return (extra_spp_abund)
return (list (extra_spp_abund = extra_spp_abund, ###2018 02 28 branch###
wrap_is_imperfect = ###2018 02 28 branch###
wrapped_extra_spp_abund_hist_list$wrap_is_imperfect)) ###2018 02 28 branch###
}
#===============================================================================
# Remove base species abundances from wrapping distribution
#' Remove base problem's species from the wrapping problems set of species to
#' be distributed over the planning units
#'
#' When wrapping one distribution around another, the outside distribution
#' needs to contain the inside distribution as a proper subset, however,
#' when it's time to distribute the wrapping distribution over the landscape,
#' the inside distribution has already been distributed. That means that the
#' inside distribution needs to be removed from full wrapping distribution
#' before its instances are spread across the landscape. This function
#' strips that inside distribution out of the full wrapped distribution and
#' returns a set of abundances ready for spreading.
#'
#-------------------------------------------------------------------------------
#' @param Xu_PU_spp_table PU_spp table for original Xu problem being wrapped
#' around
#' @param trimmed_rounded_abund_per_spp vector of abundances of all species in
#' the full wrapped distribution, i.e., including the original Xu problem
#' abundances
#' @param spp_col_name string containing the name of the column in the PU_spp
#' table that contains the species IDs
#' @inheritParams std_param_defns
#'
#' @return vector of abundances in the wrapping abundance distribution that are
#' not also in the original Xu problem
#-------------------------------------------------------------------------------
remove_base_spp_abundances_from_wrapping_distribution <-
function (Xu_PU_spp_table,
trimmed_rounded_abund_per_spp,
spp_col_name,
allow_imperfect_wrap)
{
verbose_remove_base = FALSE # just for debugging now...
if (verbose_remove_base)
{
cat ("\n\nStarting ",
"remove_base_spp_abundances_from_wrapping_distribution():")
cat ("\n Xu_PU_spp_table = \n")
show (Xu_PU_spp_table)
cat ("\n trimmed_rounded_abund_per_spp = \n")
show (trimmed_rounded_abund_per_spp)
cat ("\n spp_col_name = \n")
show (spp_col_name)
cat ("\nallow_imperfect_wrap = \n")
show (allow_imperfect_wrap)
}
wrapped_extra_spp_abund_merge =
gen_raw_histogram_of_wrapped_dist (Xu_PU_spp_table,
trimmed_rounded_abund_per_spp,
spp_col_name)
###2018 02 28 branch### extra_spp_abund = clean_up_wrapped_abund_dist (wrapped_extra_spp_abund_merge,
extra_spp_abund_list = clean_up_wrapped_abund_dist (wrapped_extra_spp_abund_merge, ###2018 02 28 branch###
allow_imperfect_wrap)
###2018 02 28 branch### return (extra_spp_abund)
return (extra_spp_abund_list) ###2018 02 28 branch###
}
#===============================================================================
#' Create wrapping PU/species table
#-------------------------------------------------------------------------------
#' @param extra_abund vector
#' @param dep_set vector
#' @param eligible_set vector
#' @param use_testing_only_rand_seed boolean
#' @param testing_only_rand_seed integer
#'
#' @return PU_spp_table
#' @export
#'
#-------------------------------------------------------------------------------
create_wrapping_PU_spp_table <- function (extra_abund,
dep_set,
eligible_set,
# NOTE: These 2 args are ONLY for use when called by unit test
# routines that need to reproduce random values exactly.
# You should omit them in all other cases.
# See tests/testthat/test_gen_wrapped_bdprob.R for the
# only places where they are and should be used.
use_testing_only_rand_seed = FALSE,
testing_only_rand_seed = 17)
{
if (use_testing_only_rand_seed) set.seed (testing_only_rand_seed)
#-----------------------------------------------------------------------
# Create and initialize a table showing each occurrence of each extra
# species (i.e., wrapping species) on a particular PU, i.e., one
# PU_ID/spp_ID pair for each occurrence.
#-----------------------------------------------------------------------
num_extra_occurrences = sum (extra_abund)
num_extra_spp = length (extra_abund)
PU_spp_table = data.frame (PU_ID = rep (NA, num_extra_occurrences),
spp_ID = rep (NA, num_extra_occurrences))
#-----------------------------------------------------------------------
# Now, for each extra species (i.e., species in the wrapping set),
# randomly draw planning units where that species occurs in the
# eligible set of planning units.
# Then, load each of those occurrences into the PU_spp_table.
#-----------------------------------------------------------------------
cur_row = 1
for (cur_spp_idx in 1:num_extra_spp)
{
num_PUs_to_draw = extra_abund [cur_spp_idx]
#-------------------------------------------------------------------
# Draw the one mandatory PU in the dependent set for this species
# and add a PU_ID/spp_ID pair for it in the PU_spp_table.
#-------------------------------------------------------------------
cur_dep_set_PU = safe_sample (dep_set, 1, replace=FALSE)
PU_spp_table [cur_row, "PU_ID"] = cur_dep_set_PU
PU_spp_table [cur_row, "spp_ID"] = cur_spp_idx
cur_row = cur_row + 1
#-------------------------------------------------------------------
# If that PU was part of the overall eligible set to draw from
# (e.g., the eligible set was the union of the dependent set and
# the extra set rather than just being the extra set),
# remove it from the set of PUs eligible for any remaining draws.
#-------------------------------------------------------------------
idx_of_cur_dep_set_PU = which (eligible_set == cur_dep_set_PU)
cur_eligible_set =
if (length (idx_of_cur_dep_set_PU) > 0)
eligible_set [-idx_of_cur_dep_set_PU] else eligible_set
#-------------------------------------------------------------
# Now draw all remaining occurrences of the current species
# from the eligible set of PUs and load a PU_ID/spp_ID pair
# for each one into the PU_spp_table.
#-------------------------------------------------------------
num_PUs_to_draw = num_PUs_to_draw - 1
if (num_PUs_to_draw > 0)
{
extra_PUs_for_cur_spp = safe_sample (cur_eligible_set,
num_PUs_to_draw,
replace=FALSE)
end_row = cur_row + num_PUs_to_draw - 1
PU_spp_table [cur_row:end_row, "PU_ID"] = extra_PUs_for_cur_spp
PU_spp_table [cur_row:end_row, "spp_ID"] = cur_spp_idx
cur_row = end_row + 1
} # end if - num_PUs_to_draw
} # end for - cur_spp_idx
return (PU_spp_table)
}
#===============================================================================
#' Wrap abundances around eligible set
#'
#' Take a given distribution (e.g., from a Xu problem) and add more species
#' (and probably more planning units too) to the problem so that the new
#' problem has a specified distribution, but retains the original problem's
#' solution.
#'
#-------------------------------------------------------------------------------
#' @param dep_set integer vector
#' @param eligible_set integer vector
#' @param rounded_abund_per_spp integer vector
#' @param num_base_spp integer
#' @param Xu_PU_spp_table data frame
#' @param allow_imperfect_wrap boolean
#' @param min_allowed_abundance integer
#' @param PU_col_name character string
#' @param spp_col_name character string
#' @param use_testing_only_rand_seed boolean
#' @param testing_only_rand_seed integer
#'
#' @return Returns PU_spp_table data frame
#' @export
#-------------------------------------------------------------------------------
wrap_abundances_around_eligible_set <- function (
dep_set,
eligible_set,
rounded_abund_per_spp,
num_base_spp,
Xu_PU_spp_table,
allow_imperfect_wrap,
min_allowed_abundance = 2,
PU_col_name = "PU_ID",
spp_col_name = "spp_ID",
use_testing_only_rand_seed = FALSE,
testing_only_rand_seed = 17
)
{
cat ("\n\nStarting wrap_abundances_around_eligible_set()\n", sep='')
#-----------------------------------------------------------------------
# 2016 06 21 - BTL
# NEED TO REMOVE ANY SPP WHOSE ABUNDANCE IS 1,
# BECAUSE THEY MAKE THE PROBLEM MUCH EASIER, I.E., ALL PUs CONTAINING
# ANY SPECIES WITH ABUNDANCE = 1 MUST AUTOMATICALLY BE INCLUDED IN
# CORRECT SOLUTION.
# THEREFORE, ANY PROBLEM CAN AUTOMATICALLY BE REDUCED TO A DIFFERENT
# AND HARDER PROBLEM BY REMOVING ALL SINGLETON SPECIES.
# The same would apply to any other target value if the target
# was > 1. For example, if a species had a target of 3 and it only
# occurred on 3 patches or less, that species would need to be
# removed. This doesn't happen in these initial Xu problems since
# the generator currently only works for having all targets = 1.
#-----------------------------------------------------------------------
trimmed_rounded_abund_per_spp = trim_abundances (rounded_abund_per_spp,
min_abund=min_allowed_abundance)
plot (sort(trimmed_rounded_abund_per_spp, decreasing = TRUE))
#-----------------------------------------------------------------
# Build a vector containing the abundances of each of the extra
# species.
#-----------------------------------------------------------------
###2018 02 28 branch### extra_abund =
extra_abund_list = ###2018 02 28 branch###
remove_base_spp_abundances_from_wrapping_distribution (Xu_PU_spp_table,
trimmed_rounded_abund_per_spp,
spp_col_name,
allow_imperfect_wrap)
extra_abund = extra_abund_list$extra_spp_abund ###2018 02 28 branch###
wrap_is_imperfect = extra_abund_list$wrap_is_imperfect ###2018 02 28 branch###
PU_spp_table =
create_wrapping_PU_spp_table (extra_abund,
dep_set,
eligible_set,
# Only for unit tests.
# Normally just use default
# values from arg list above.
use_testing_only_rand_seed,
testing_only_rand_seed
)
cat ("\n\n--------------------------------------\n\n", "PU_spp_table = \n")# print (PU_spp_table) # usually too long to print...
print (head (PU_spp_table)) # usually too long to print entire table...
cat ("\n\n")
#-------------------------------------------------------------------
# Combine the PU_spp_table just created for the wrapping species
# with the PU_spp_table from the original problem.
# Since both the extra species and the base species started their
# species numbering at 1, need to offset the numbering of one or
# the other of these two groups. Will change the numbering of
# the extra species to begin just after the last of the base
# species.
# This is a fairly arbitrary choice, but it may help debugging
# sometime since the base species will have the same spp IDs in
# the wrapped set as in the base set.
#-------------------------------------------------------------------
PU_spp_table [, "spp_ID"] = PU_spp_table [, "spp_ID"] + num_base_spp
PU_spp_table = rbind (Xu_PU_spp_table, PU_spp_table)
cat ("\n\nAfter rbind(), PU_spp_table = \n")
print (head (PU_spp_table)) # usually too long to print entire table...
cat ("\n\n")
###2018 02 28 branch### return (PU_spp_table)
return (list (PU_spp_table = PU_spp_table, ###2018 02 28 branch###
wrap_is_imperfect = wrap_is_imperfect)) ###2018 02 28 branch###
} # end function - wrap_abundances_around_eligible_set
#===============================================================================
#-------------------------------------------------------------------------------
#' Wrap abundance distribution around Xu problem
#'
#' Note that this function doesn't care where you got the abundances,
#' i.e., they don't have to have come from the lognormal generator.
#' It can be any abundance set that you want, as long as it contains
#' at least as many species sitting on exactly 2 patches as the base
#' Xu problem has. It can have more than the Xu problem, but not less.
#'
#-------------------------------------------------------------------------------
#' @inheritParams std_param_defns
#' @param starting_dir character string
#' @param rounded_abundances DESCRIPTION.
#' @param Xu_bdprob DESCRIPTION.
#' @param dep_set_PUs_eligible DESCRIPTION.
#' @param tot_num_PUs_in_landscape DESCRIPTION.
#' @param seed_value_for_search_list blah
#' @param allow_imperfect_wrap blah
#' @param search_outfile_name_base character string
#' @param search_outfile_name character string
#' @param wrap_prob_name_stem string
#' @param cor_dir_name_stem string
#' @return Returns a Xu_wrapped_bd_problem
#' @export
#-------------------------------------------------------------------------------
wrap_abundance_dist_around_Xu_problem <- function (starting_dir,
#compute_network_metrics_for_this_prob,
parameters,
rounded_abundances,
Xu_bdprob,
dep_set_PUs_eligible,
tot_num_PUs_in_landscape,
# seed_value_for_search,
seed_value_for_search_list,
allow_imperfect_wrap,
search_outfile_name_base,
search_outfile_name,
wrap_prob_name_stem = "wrap_prob",
cor_dir_name_stem = "cor"
)
{
#------------------------------------------------------------
# Get values for local variables to be used throughout the
# computations in this function.
#------------------------------------------------------------
Xu_nodes = Xu_bdprob@nodes
Xu_dep_set = get_dependent_node_IDs (Xu_nodes)
Xu_dep_and_indep_set = c(get_independent_node_IDs (Xu_nodes), Xu_dep_set)
largest_PU_ID = max (Xu_nodes$node_ID)
# extra_PUs = (Xu_bdprob@num_PUs + 1) : tot_num_PUs_in_landscape
extra_PUs = (largest_PU_ID + 1) : tot_num_PUs_in_landscape
eligible_PUs =
create_eligible_PU_set (Xu_dep_set, extra_PUs, dep_set_PUs_eligible)
#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
#---------------------------------------------------------------
# Given the rounded distribution of species abundances to
# wrap around the given Xu problem, do the wrap to generate a
# table of pairs of indices of PUs and species.
#---------------------------------------------------------------
###2018 02 28 branch### wrapped_PU_spp_indices =
wrapped_PU_spp_indices_list = ###2018 02 28 branch###
wrap_abundances_around_eligible_set (Xu_dep_set,
eligible_PUs,
rounded_abundances,
Xu_bdprob@num_spp, #Xu_tot_num_spp,
Xu_bdprob@PU_spp_pair_indices,
##FixPUsppPairIndices-2018-02-17## Xu_bdprob@cor_PU_spp_pair_indices,
allow_imperfect_wrap,
min_allowed_abundance = 2,
Xu_bdprob@PU_col_name,
Xu_bdprob@spp_col_name)
wrapped_PU_spp_indices = wrapped_PU_spp_indices_list$PU_spp_table ###2018 02 28 branch###
wrap_is_imperfect = wrapped_PU_spp_indices_list$wrap_is_imperfect ###2018 02 28 branch###
#----------------------------------
wrapped_PU_vector = wrapped_PU_spp_indices [, Xu_bdprob@PU_col_name]
unique_wrapped_PUs = sort (unique (wrapped_PU_vector))
#-------------------------------------------------------------------
# Highest PU_ID may not be occupied, so old counts like
# wrapped_highest_PU_ID and wrapped_num_PUs are wrong.
# This is especially important because these are the numbers that
# drive the dimensions of the bpm and costs and
# the length of marxan boolean solution indicator vectors.
#-------------------------------------------------------------------
wrapped_highest_PU_ID = tot_num_PUs_in_landscape #max (unique_wrapped_PUs)
wrapped_num_PUs = tot_num_PUs_in_landscape #length (unique_wrapped_PUs)
if (wrapped_num_PUs != wrapped_highest_PU_ID)
{
cat ("\n\nXu_bdprob@num_PUs = \n", Xu_bdprob@num_PUs)
cat ("\nsetdiff (unique_wrapped_PUs, 1:wrapped_highest_PU_ID) = \n")
print (setdiff (unique_wrapped_PUs, 1:wrapped_highest_PU_ID))
missing_PU_IDs = setdiff (1:wrapped_highest_PU_ID, unique_wrapped_PUs)
cat ("\nMissing ", length (missing_PU_IDs),
" PU_IDs = setdiff (1:wrapped_highest_PU_ID, unique_wrapped_PUs) = \n")
print (setdiff (1:wrapped_highest_PU_ID, unique_wrapped_PUs))
cat ("\n")
message (paste0 ("\n\nwrapped_num_PUs (", wrapped_num_PUs,
") != wrapped_highest_PU_ID (", wrapped_highest_PU_ID,
")")
)
if (min (missing_PU_IDs) > Xu_bdprob@num_PUs)
{
message (paste0 ("\nProbably just didn't draw some PU_IDs in ",
"extra set when placing wrapping species, since ",
"lowest missing PU_ID (", min (missing_PU_IDs),
" is larger than highest PU_ID in base problem (",
Xu_bdprob@num_PUs, ").\n\n"))
} else
{
stop_bdpg (paste0 ("\nlowest missing PU_ID (", min (missing_PU_IDs),
" is <= highest PU_ID in base problem (",
Xu_bdprob@num_PUs, ").\n\n"))
}
}
used_extra_PUs = setdiff (unique_wrapped_PUs, Xu_dep_and_indep_set)
num_used_extra_PUs = length (used_extra_PUs)
# unused_extra_PUs = setdiff (extra_PUs, used_extra_PUs)
num_extra_PUs = length (extra_PUs)
extra_nodes =
data.frame (node_ID = extra_PUs, #used_extra_PUs,
group_ID = rep (NA, num_extra_PUs), #num_used_extra_PUs),
dependent_set_member = rep (FALSE, num_extra_PUs)) #num_used_extra_PUs))
wrapped_nodes = rbind (Xu_bdprob@nodes, extra_nodes)
all_PU_IDs = 1:wrapped_highest_PU_ID
#----------------------------------
cat ("\n\nJust after loading wrapped_nodes:\n")
#browser()
# nodes = data.frame (node_ID = node_IDs,
# group_ID = group_IDs,
# dependent_set_member = dependent_set_members)
#----------------------------------
wrapped_spp_vector = wrapped_PU_spp_indices [, Xu_bdprob@spp_col_name]
unique_wrapped_spp = sort (unique (wrapped_spp_vector))
wrapped_num_spp = length (unique_wrapped_spp)
wrapped_highest_spp_ID = max (unique_wrapped_spp)
if (wrapped_num_spp != wrapped_highest_spp_ID)
{
cat ("\n\nWARNING: wrapped_num_spp (", wrapped_num_spp,
") != wrapped_highest_spp_ID (", wrapped_highest_spp_ID, ")")
cat ("\n setdiff (unique_wrapped_spp, 1:wrapped_highest_spp_ID) = ",
setdiff (unique_wrapped_spp, 1:wrapped_highest_spp_ID))
cat ("\n setdiff (1:wrapped_highest_spp_ID, unique_wrapped_spp) = ",
setdiff (1:wrapped_highest_spp_ID, unique_wrapped_spp))
cat ("\n\n")
}
all_spp_IDs = 1:wrapped_highest_spp_ID
#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
#-----------------------------------------------------------
# Convert PU/spp data structure into other formats needed
# downstream.
#
# NOTE: I'm using wrapped_highest_???_ID instead of
# wrapped_num_??? in this call because I think
# that there may be runs where not all of the
# extra_PUs have something land on them.
# If that's the case, then the bpm adjacency
# matrix might end up being too small if it's
# dimensioned from the counts instead of from the
# highest ID number. Probably won't matter for
# spp, but I'm doing the same for species just
# in case.
#
# 2017 12 15 - BTL
# This reminds me that I need to note and test the
# things I assume to be true about each data
# structure, e.g., using assertions.
#-----------------------------------------------------------
cor_costs_all_equal_1 = vb (parameters$cor_costs_all_equal_1,
def_on_empty = TRUE, def = TRUE)
if (cor_costs_all_equal_1)
{
wrapped_PU_costs = get_default_identical_PU_costs (wrapped_highest_PU_ID) #wrapped_num_PUs)
} else
{
stop_bdpg ("No code for handling case where COR costs are something other than all 1.")
}
wrapped_bpm =
create_adj_matrix_with_spp_rows_vs_PU_cols (
wrapped_highest_spp_ID, #wrapped_num_spp,
wrapped_highest_PU_ID, #wrapped_num_PUs,
wrapped_PU_spp_indices, #wrapped_PU_spp_pair_indices,
wrapped_PU_costs, # only used if verifying solution
Xu_bdprob@spp_col_name,
Xu_bdprob@PU_col_name,
Xu_dep_set,
Xu_bdprob@correct_solution_vector_is_known)
#----------------------------------------------------------------------
# Now have a completed problem, so build the structure describing it
# for return to the caller.
#----------------------------------------------------------------------
wrapped_bdprob = new ("Xu_wrapped_bd_problem")
#------------------------------------------------------------------
# Assign a unique identifier to this newly generated problem.
# These IDs are useful when combining or adding error to
# problems so that you can identify exactly which problems
# were combined or used as a base when provenance might get
# confusing.
# Also save the UUID of the problem that's being wrapped around.
#------------------------------------------------------------------
wrapped_bdprob@UUID = uuid::UUIDgenerate()
cat ("\n\n>>>>> Creating wrapped_bdprob ", wrapped_bdprob@UUID, "\n")
wrapped_bdprob@UUID_of_base_problem_that_is_wrapped = Xu_bdprob@UUID
wrapped_bdprob@prob_is_ok = FALSE
wrapped_bdprob@rand_seed = seed_value_for_search_list$seed_value
cat ("\n@@@TRACKING rand_seed in wrap_abundance_dist_around_Xu_problem:: wrapped_bdprob@rand_seed = ", wrapped_bdprob@rand_seed, "\n")
wrapped_bdprob@R_internal_seed_array = seed_value_for_search_list$R_internal_seed_array
#----------
wrapped_bdprob@allow_imperfect_wrap = allow_imperfect_wrap
wrapped_bdprob@wrap_is_imperfect = wrap_is_imperfect
#*****************************************************************************************
# Possible bug (2017 02 09):
#
# THIS SECTION MAY BE AN ISSUE SINCE IT'S ABOUT SAVING THE PROBLEM GENERATION
# PARAMETERS IF THEY'RE KNOWN.
# IF WRAPPING AROUND A XU PROBLEM GENERATED FROM SCRATCH, THEN THOSE PARS ARE KNOWN.
# IF WRAPPING AROUND A XU PROBLEM READ FROM A XU BENCHMARK FILE, THEY'RE NOT KNOWN.
# OTHER GENERATION PARAMETERS THAT WE MIGHT WANT TO KNOW ARE THINGS ABOUT THE WRAPPING,
# E.G., WAS A LOGNORMAL THE WRAPPER AND IF SO, WHAT WERE ITS PARAMETERS?
# THIS ALL SUGGESTS THAT I PROBABLY NEED TO CREATE ANOTHER PROB_GEN_INFO CLASS.
# For the moment, I'll just copy in the entries from the problem that's being wrapped.
#*****************************************************************************************
# Replace old Xu information with an indirection.
# wrapped_bdprob@read_Xu_problem_from_Xu_file = Xu_bdprob@read_Xu_problem_from_Xu_file
# wrapped_bdprob@Xu_parameters = Xu_bdprob@Xu_parameters
### Is this even necessary? Is it ever used?
wrapped_bdprob@prob_type = Xu_bdprob@prob_type # "Xu_prob_gen_info_class"
### Should this be base_prob_gen_info instead of prob_gen_info?
wrapped_bdprob@prob_gen_info = Xu_bdprob@prob_gen_info
### Make a new class for wrap_gen_info?
### Also, need to add this slot to the wrap class definition itself?
### Would this ever be used? Maybe in generation of problem features in g15?
### wrapped_bdprob@wrap_gen_info = Xu_bdprob@wrap_gen_info
#*****************************************************************************************
wrapped_bdprob@prob_generator_params_known = Xu_bdprob@prob_generator_params_known
wrapped_bdprob@correct_solution_vector_is_known = Xu_bdprob@correct_solution_vector_is_known
wrapped_bdprob@dependent_node_IDs = Xu_bdprob@dependent_node_IDs # Added 2019 01 01 - BTL
##FixPUsppPairIndices-2018-02-17## wrapped_bdprob@cor_PU_spp_pair_indices = wrapped_PU_spp_indices
wrapped_bdprob@PU_spp_pair_indices = wrapped_PU_spp_indices
wrapped_bdprob@all_PU_IDs = all_PU_IDs
wrapped_bdprob@all_spp_IDs = all_spp_IDs
wrapped_bdprob@PU_col_name = Xu_bdprob@PU_col_name
wrapped_bdprob@spp_col_name = Xu_bdprob@spp_col_name
wrapped_bdprob@num_PUs = wrapped_num_PUs
wrapped_bdprob@num_spp = wrapped_num_spp
#-----------------------------------------------------------------------
# Since the problem is wrapped around the Xu problem,
# the correct optimum cost is the same as for the Xu problem.
# The vector of costs is longer though, since there are more PUs
# in the wrapped problem.
# At the moment, those PU costs are all dummied to 1 anyway.
# If that ever changes, then get_default_identical_PU_costs() is
# going to have to change.
#-----------------------------------------------------------------------
wrapped_bdprob@correct_solution_cost = Xu_bdprob@correct_solution_cost #correct_solution_cost
wrapped_bdprob@PU_costs = wrapped_PU_costs
wrapped_bdprob@nodes = wrapped_nodes
wrapped_bdprob@bpm = wrapped_bpm
#-------------------------------------------------------------
# Quit if there are any duplicate edges/spp in the problem.
#
# NOTE: 2017 02 07 - BTL
# This was done for the basic Xu problem, but should
# it still be done for the wrapped problem.
# I don't think it was necessary for correctness
# in the base problem, more likely for increasing
# the difficulty.
# I'll leave the check in here for now, but may want
# to change this later or else change the underlying
# algorithm that generates the wrapping so that it
# doesn't generate any duplicates if it is
# generating them now.
#-------------------------------------------------------------
# 2017 02 14 - BTL
# This started crashing the whole R session when it found 1
# duplicate link, so I'm commenting it out for now.
#-------------------------------------------------------------
# see_if_there_are_any_duplicate_links (wrapped_bpm,
# wrapped_bdprob@num_spp)
#----------------------------------------
# Create directories for this problem.
#----------------------------------------
wrapped_bdprob@obj_type_str = "RSprob"
wrapped_bdprob@cor_or_app_str = Xu_bdprob@cor_or_app_str
wrapped_bdprob@basic_or_wrapped_or_comb_str = "Wrap"
wrapped_bdprob@file_name_prefix =
paste (wrapped_bdprob@obj_type_str,
wrapped_bdprob@cor_or_app_str,
wrapped_bdprob@basic_or_wrapped_or_comb_str,
sep='-')
create_RSprob_dir_and_subdirs (starting_dir, # parameters$fullOutputDir_NO_slash, # usually parameters$fullOutputDir_NO_slash
wrapped_bdprob)
#-----------------------------------------------------------------
# Tracking of details of the search for a wrapping distribution
# were written to a file in the top directory of the tzar run
# before the final directory for the wrapped problem was known.
# Move search tracking file to the newly created dir of the
# wrapped problem.
#-----------------------------------------------------------------
if (file.exists (search_outfile_name))
{
file.rename (search_outfile_name,
file.path (get_RSprob_path_topdir (wrapped_bdprob,
starting_dir),
search_outfile_name_base))
}
#-----------------------------------------------------------------
# Compute and save the distribution and network metrics for the
# problem.
#-----------------------------------------------------------------
# Summarize and plot graph and distribution structure information.
wrapped_bdprob@final_link_counts_for_each_node =
summarize_and_plot_graph_and_distribution_structure_information (
##FixPUsppPairIndices-2018-02-17## wrapped_bdprob@cor_PU_spp_pair_indices,
wrapped_bdprob@PU_spp_pair_indices,
"COR",
wrapped_bdprob@all_PU_IDs, #####!!!!!#####all_correct_node_IDs,
get_RSprob_path_plots (wrapped_bdprob, starting_dir),
wrapped_bdprob@spp_col_name,
wrapped_bdprob@PU_col_name,
wrapped_bdprob@presences_col_name
)
#----------------------------
# Compute network metrics.
#----------------------------
wrapped_bdprob =
init_object_graph_data (
wrapped_bdprob,
starting_dir,
parameters$compute_network_metrics,
parameters$compute_network_metrics_wrapped_COR,
parameters$use_igraph_metrics,
parameters$use_bipartite_metrics,
parameters$bipartite_metrics_to_use)
wrapped_bdprob@combined_err_label = "01-NO_err"
#------------------------------------------------------------
# Everything seems to have worked.
# Save the bdprob to disk as a first cut at how to archive
# and retrieve problems in general.
# This particular bit of code may disappear later on, once
# it's clearer how to archive.
#------------------------------------------------------------
wrapped_bdprob@prob_is_ok = TRUE
wrapped_bdprob = save_rsprob (wrapped_bdprob, starting_dir)
save_rsprob_results_data (wrapped_bdprob, starting_dir, parameters$run_id)
return (wrapped_bdprob) # end function - wrap_abundance_dist_around_Xu_problem
}
#===============================================================================
#-------------------------------------------------------------------------------
#' Generate a correct wrapped Xu biodiversity problem from a correct base problem
#'
#' Wrap a given distribution around a given Xu problem's distribution.
#' That is, add more planning units and species to the flat Xu distribution's
#' set of PUs and species so that the Xu distribution is a proper subset of
#' the larger distribution.
#'
#' This is intended as a way of embedding the Xu problem's unrealistic
#' species distribution (i.e., every species occurs on exactly 2 patches)
#' inside a more realistic distribution, but one that has exactly the same
#' correct solution set as the base Xu problem.
#'
#'@section Restrictions on wrapping distribution:
#' At the moment, the only wrapping distribution that there is code for
#' generating is the lognormal distribution. However, the basic idea allows
#' for ANY distribution where the base set of Xu species and planning units
#' is a proper subset of the final distribution.
#'
#' To enhance the capabilities of this routine to allow other distributions,
#' you would just have to
#'
#' \itemize{
#' \item{provide some kind of option(s) to choose what kind
#' of wrapping distribution to use}
#' \item{provide a function to generate that wrapping distribution}
#' \item{replace the call to find_lognormal_to_wrap_around_Xu() with
#' the new function}
#'}
#'
#' One caveat is that all species that occur on one and only one planning unit
#' are removed from the distribution. This is because those species would
#' automatically require their planning unit to be included in the final
#' solution and therefore, make the problem simpler for the optimizer.
#'
#-------------------------------------------------------------------------------
#' @inheritParams std_param_defns
#' @param starting_dir character string
#' @param base_bdprob a correct Xu_bd_problem whose correct solution vector is
#' known
#'
#' @return Returns a correct wrapped biodiversity problem
#' @export
#' @importFrom assertthat assert_that
#-------------------------------------------------------------------------------
gen_wrapped_bdprob_COR <- function (starting_dir,
#compute_network_metrics_for_this_prob,
parameters,
base_bdprob)
{
if (options_are_legal_for_single_bdprob_WRAP (parameters))
{
#---------------------------------
# Control parameters from user.
#---------------------------------
desired_Xu_spp_frac_of_all_spp = parameters$desired_Xu_spp_frac_of_all_spp
solution_frac_of_landscape = parameters$solution_frac_of_landscape
desired_max_abundance_frac = parameters$desired_max_abundance_frac
dep_set_PUs_eligible = value_or_FALSE_if_null (parameters$dep_set_PUs_eligible)
add_one_to_lognormal_abundances = value_or_FALSE_if_null (parameters$add_one_to_lognormal_abundances)
max_search_iterations = parameters$max_search_iterations
#-----------------------
# Derived parameters.
#-----------------------
# Choose total landscape size so that correct solution size takes
# up the fraction of the landscape specified in the related input
# parameter.
num_nodes_in_base_solution = base_bdprob@prob_gen_info@Xu_parameters@derived_params@num_dependent_set_nodes
tot_num_PUs_in_landscape = round (num_nodes_in_base_solution /
solution_frac_of_landscape)
# For now, verify that the fraction is correct rather than
# making an external test in testthat since the calculation
# was changed to fix a bug (commit 58418956 - 2021 05 23).
actual_solution_frac_of_landscape =
round (num_nodes_in_base_solution / tot_num_PUs_in_landscape, 2)
cat ("\nactual_solution_frac_of_landscape = ", actual_solution_frac_of_landscape,
", desired solution_frac_of_landscape = ", solution_frac_of_landscape, sep='')
# assert_that (actual_solution_frac_of_landscape == solution_frac_of_landscape)
cat ("\n\n================================================================================\n\n")
cat ("\n=====> In gen_wrapped_bdprob_COR")
cat ("\n\n----------------------------------------------------------------------\n\n")
cat ("\n\n=====> base_bdprob = \n")
print (base_bdprob)
cat ("\n\n----------------------------------------------------------------------\n\n")
cat ("\n\n=====> base_bdprob@prob_gen_info = \n")
print (base_bdprob@prob_gen_info)
cat ("\n\n----------------------------------------------------------------------\n\n")
cat ("\n\n=====> base_bdprob@prob_gen_info@Xu_parameters = \n")
print (base_bdprob@prob_gen_info@Xu_parameters)
cat ("\n\n----------------------------------------------------------------------\n\n")
cat ("\n\n=====> base_bdprob@prob_gen_info@Xu_parameters@derived_params = \n")
print (base_bdprob@prob_gen_info@Xu_parameters@derived_params)
cat ("\n\n----------------------------------------------------------------------\n\n")
cat ("\n\n=====> base_bdprob@prob_gen_info@Xu_parameters@derived_params@num_dependent_set_nodes = \n")
print (base_bdprob@prob_gen_info@Xu_parameters@derived_params@num_dependent_set_nodes)
cat ("\n\n----------------------------------------------------------------------\n\n")
cat ("\n=====> num_nodes_in_base_solution = ", num_nodes_in_base_solution)
cat ("\n=====> tot_num_PUs_in_landscape = ", tot_num_PUs_in_landscape)
cat ("\n=====> actual_solution_frac_of_landscape = ", actual_solution_frac_of_landscape)
cat ("\n=====> solution_frac_of_landscape = ", solution_frac_of_landscape)
stopifnot (actual_solution_frac_of_landscape == solution_frac_of_landscape)
cat ("\n\n================================================================================\n\n")
search_outfile_name_base = "wrap_search_outfile.csv"
search_outfile_name = file.path (starting_dir,
search_outfile_name_base)
#-----------------------------------------------------
# Set random seed if parameters specify doing that.
#-----------------------------------------------------
seed_value_for_search_list =
set_new_or_forced_rand_seed_if_necessary (is_rsrun = FALSE,
is_rsprob = TRUE,
parameters,
cor_or_app_str = "COR",
basic_or_wrapped_or_comb_str = "WRAP",
location_string = "Start of wrap_abundance_dist_around_Xu_problem(),COR,WRAP")
cat ("\n@@@TRACKING rand_seed in gen_wrapped_bdprob_COR:: seed_value_for_search_list$seed_value = ", seed_value_for_search_list$seed_value, "\n")
#-----------------------------------------------------------
# Search for a set of lognormal parameters that fit the
# user's constraints while including the base Xu problem.
#-----------------------------------------------------------
rounded_abundances =
find_lognormal_to_wrap_around_Xu (base_bdprob,
parameters,
desired_Xu_spp_frac_of_all_spp,
solution_frac_of_landscape,
desired_max_abundance_frac,
# seed_value_for_search,
seed_value_for_search_list$seed_value,
max_search_iterations,
add_one_to_lognormal_abundances,
search_outfile_name)
#--------------------------------------------------------------
# Wrap the generated lognormal around the Xu base problem.
#
# Note that the wrap_abundance_dist_around_Xu_problem()
# function doesn't care where you got the abundances, i.e.,
# they don't have to have come from the lognormal generator.
#
# It can be any abundance set that you want, as long as it
# contains at least as many species sitting on exactly
# 2 patches as the base Xu problem has.
#
# It can have more species that sit on exactly 2 patches
# than the base Xu problem, but not less.
#--------------------------------------------------------------
wrapped_bdprob_COR =
wrap_abundance_dist_around_Xu_problem (starting_dir,
#compute_network_metrics_for_this_prob,
#parameters$compute_network_metrics_wrapped_COR,
parameters,
rounded_abundances,
base_bdprob,
dep_set_PUs_eligible,
tot_num_PUs_in_landscape,
# seed_value_for_search,
seed_value_for_search_list,
value_or_FALSE_if_null (parameters$allow_imperfect_wrap),
search_outfile_name_base,
search_outfile_name)
} # end if - options_are_legal_for_single_bdprob_WRAP
return (wrapped_bdprob_COR)
}
#===============================================================================
#-------------------------------------------------------------------------------
#' Check whether options for wrapping have legal values
#'
#-------------------------------------------------------------------------------
#' @inheritParams std_param_defns
#'
#' @return boolean with TRUE if options are legal and FALSE otherwise
#-------------------------------------------------------------------------------
options_are_legal_for_single_bdprob_WRAP <- function (parameters)
{
#----------------------------------------------------------------------
# Make sure that the base problem for the multiproblem is not one of
# Xu's benchmark problems read in from a file, since they do not
# contain the correct solution set. They only contain the correct
# solution cost.
#----------------------------------------------------------------------
if (value_or_FALSE_if_null (parameters$read_Xu_problem_from_Xu_bench_file))
{
stop_bdpg (paste0 ("\n\nParameter read_Xu_problem_from_Xu_file is TRUE.",
"\nCannot wrap around Xu problem read from file ",
"because correct solution IDs ",
"\nare not given with the file.",
"\nOnly the correct cost is given.",
"\nQuitting.\n\n")
)
}
#----------------------------------------------------------------------
# Base problem is not a Xu benchmark problem, so try to do the wrap
# now.
# At the moment, the only kind of wrap that's available is the
# lognormal, so check to make sure that is the type that has been
# requested. If not, then fail. Otherwise, do the lognormal wrap
# now.
#----------------------------------------------------------------------
if (! value_or_FALSE_if_null (parameters$wrap_lognormal_dist_around_Xu))
{
#-------------------------------------------------------------
# Wrap request is not for a lognormal distribution, so fail.
#-------------------------------------------------------------
stop_bdpg (paste0 ("\n\nwrap_lognormal_dist_around_Xu is not set to TRUE. ",
"\n It is currently the only defined wrap function.\n")
)
}
return (TRUE)
}
#===============================================================================
#-------------------------------------------------------------------------------
#' Generate a single wrapped biodiversity problem from a given base problem
#'
#-------------------------------------------------------------------------------
#' @inheritParams std_param_defns
#' @param bdprob_to_wrap a correct Xu_bd_problem whose correct solution vector is
#' known
#'
#' @return Returns a correct wrapped biodiversity problem
#' @export
#-------------------------------------------------------------------------------
gen_single_bdprob_WRAP <- function (bdprob_to_wrap, parameters, starting_dir)
{
# starting_dir =
# file.path (normalizePath (parameters$full_output_dir_with_slash))
# compute_network_metrics_for_this_prob =
# value_or_FALSE_if_null (parameters$compute_network_metrics_wrapped_COR)
WRAP_prob =
gen_wrapped_bdprob_COR (starting_dir,
#compute_network_metrics_for_this_prob,
parameters,
bdprob_to_wrap)
return (WRAP_prob)
}
#===============================================================================
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.