#===============================================================================
#
# gen_lognormal_overlay.R
#
# Functions to generate a vector of counts of PUs to be occupied by a set
# of species so that the distribution of counts is roughly lognormal.
# The intention is to add these species to the species already generated
# by the Xu problem generator so that the resulting combined distribution
# is roughly lognormal.
#
# When the distributions are specified here, they are to be given the number
# of species that have been generated by the Xu problem generator as a
# count of the number of species to occur on exactly 2 PUs. The generator
# here will try to build a lognormal distributions of counts that contains
# that given number of species occurring on exactly 2 PUs. Then, when this
# distribution is returned to the problem generator, that code can add
# occurrences for all of the other species in the distribution that occur
# on some number of PUs other than 2 PUs and use the existing Xu species
# for the ones generated here as occurring on exactly 2 PUs.
#
# This generator is guaranteed not to return less than the given number
# of "species on exactly 2 PUs" species. However, it's possible that
# it will return more than that. This is not a problem though, because
# these extra species that occur on exactly 2 PUs can be treated like any
# of the other species that occur on some other PU count than 2.
#
#-------------------------------------------------------------------------------
#
# Various Rmd files of notes generated for exploring lognormal to build the
# functions in this file are in:
# .../ProblemDifficulty/ProbDiff_Notes/LognormalAbundanceDistributionBuilding
#
#-------------------------------------------------------------------------------
#
# History
# - 2016 04 05 - Created - BTL.
#
#===============================================================================
#===============================================================================
# Hack to quiet CHECK command for data frame column names that CHECK flags
# as "no visible binding for global variable".
# This is the officially sanctioned hack for doing this, e.g., see
# https://github.com/STAT545-UBC/Discussion/issues/451
# https://github.com/tidyverse/magrittr/issues/29
# http://r.789695.n4.nabble.com/globalVariables-td4593980.html
if(getRversion() >= "2.15.1") utils::globalVariables(c("x"))
#===============================================================================
#-------------------------------------------------------------------------------
# plot_hist_and_normal_curve_for_sampled_lognormal_data ()
#
# Function to plot a histogram for a sampled lognormal distribution and
# then lay over it, the curve of the normal distribution that it is
# approximating. (Since it's a lognormal distribution, taking the
# log of the sampled points should produce a normal distribution.)
#
#-------------------------------------------------------------------------------
plot_hist_and_normal_curve_for_sampled_lognormal_data =
function (rounded_abundances, meanlog, sdlog)
{
# Plot histogram of sampled lognormal data.
hist (log (rounded_abundances), freq=FALSE, breaks=20)
# Overlay a curve for the normal distribution
# that the sample should approximate.
curve (dnorm (x, mean=meanlog, sd=sdlog), add=TRUE, lwd=2)
plot (sort(rounded_abundances, decreasing=TRUE))
} # end function - plot_hist_and_normal_curve_for_sampled_lognormal_data ()
#===============================================================================
#-------------------------------------------------------------------------------
#' Generate rounded abundances
#'
#' Function to generate a sampled lognormal distribution whose values are
#' rounded to produce a vector of counts of the number of PUs that each
#' species will occupy.
#'
#' No zero counts are allowed, so any species that was assigned a 0 count in
#' the lognormal generation is removed from the returned vector.
#' This means that the number of species returned may be less than the value
#' of tot_num_spp_after_wrapping specified in the inputs.
#'
#-------------------------------------------------------------------------------
#' @param tot_num_spp_after_wrapping integer
#' @param meanlog numeric
#' @param sdlog numeric
#' @param add_one_to_abundances boolean
#' @param plot_rounded_abundances boolean
#'
#' @return Returns a vector with one count for each species
#-------------------------------------------------------------------------------
gen_rounded_abundances = function (tot_num_spp_after_wrapping, meanlog, sdlog,
add_one_to_abundances,
plot_rounded_abundances=FALSE)
{
# Note that "abundance" is used here to mean the number of PUs
# that a species occurs on.
abundances = rlnorm (tot_num_spp_after_wrapping, meanlog = meanlog, sdlog = sdlog)
rounded_abundances = round (abundances) # Returns a vector.
#browser()
#---------------------------------------------------------------------
# rlnorm() often generates a huge number of species that occur on
# just 1 patch, which will probably make the problems much easier
# since every patch containing a species that only occurs once will
# have to be included in the solution.
# So, give the option to add one to all of the patch counts as a
# way of shifting the distribution slightly.
#---------------------------------------------------------------------
if (add_one_to_abundances)
rounded_abundances = rounded_abundances + 1
num_spp_on_exactly_2_patches = length (which (rounded_abundances == 2))
max_abundance_ct = max (rounded_abundances)
abundance_data = list()
abundance_data$rounded_abundances = rounded_abundances
abundance_data$num_spp_on_exactly_2_patches = num_spp_on_exactly_2_patches
abundance_data$max_abundance_ct = max_abundance_ct
if (plot_rounded_abundances)
plot_hist_and_normal_curve_for_sampled_lognormal_data (rounded_abundances,
meanlog,
sdlog)
return (abundance_data)
} # end function - gen_rounded_abundances ()
#===============================================================================
#
#-------------------------------------------------------------------------------
# gen_lognormal ()
#
# No longer used?
#
# Function to repeatedly generate a sampled lognormal distribution
# whose values are within specified bounds in terms of the largest
# element value (max_max_abundance) and the maximum allowed number
# of individual elements whose value is exactly 2 (max_frac_spp_on_2_PUs).
#
#-------------------------------------------------------------------------------
gen_lognormal = function (tot_num_spp_after_wrapping,
max_frac_spp_on_2_PUs, max_max_abundance,
min_meanlog = 0.5, max_meanlog = 1.5,
min_sdlog = 0.5, max_sdlog = 1.0,
add_one_to_abundances,
max_iterations
)
{
cat ("\nAt start of gen_lognormal()",
": max_frac_spp_on_2_PUs = ", max_frac_spp_on_2_PUs,
", max_max_abundance = ", max_max_abundance,
"\n")
frac_of_spp_that_are_on_2_PUs = .Machine$double.xmax #1000000 #Inf
max_abundance_ct = .Machine$double.xmax #1000000 #Inf
cur_iteration = 0
#-----------------------------------------------------------------------
# Draw abundance distributions to try to find a distribution where:
# - frac_of_spp_that_are_on_2_PUs <= max_frac_spp_on_2_PUs
# AND
# - max_abundance_ct <= max_abundance_ct
# May not be possible, so also set a limit on the number of iterations
# of trying.
#-----------------------------------------------------------------------
while ((frac_of_spp_that_are_on_2_PUs > max_frac_spp_on_2_PUs) |
(max_abundance_ct > max_max_abundance))
{
cur_iteration = cur_iteration + 1
cat ("\n\n================================================================================",
"\n\niteration: ", cur_iteration, "\n\n")
if (cur_iteration > max_iterations)
stop_bdpg (paste0 ("\n\ngen_lognormal() exceeded max_iterations = ",
max_iterations,
" without finding a distribution satisfying: \n",
"\tfrac_of_spp_that_are_on_2_PUs <= max_frac_spp_on_2_PUs = ",
max_frac_spp_on_2_PUs,
"\tmax_abundance_ct <= max_abundance_ct = ", max_abundance_ct,
"\n\n"
))
# Randomly draw a new meanlog and sdlog and try them as the basis
# of a lognormal distribution.
meanlog = runif (1, min=min_meanlog, max=max_meanlog)
sdlog = runif (1, min=min_sdlog, max=max_sdlog)
abundance_data = gen_rounded_abundances (tot_num_spp_after_wrapping,
meanlog, sdlog,
add_one_to_abundances,
plot_rounded_abundances=FALSE)
rounded_abundances = abundance_data$rounded_abundances
num_spp_on_exactly_2_patches = abundance_data$num_spp_on_exactly_2_patches
max_abundance_ct = abundance_data$max_abundance_ct
sorted_rounded_abundances = sort (rounded_abundances, decreasing=TRUE) # Returns a vector.
cat ("\n\nsorted_rounded_abundances = \n")
print (sorted_rounded_abundances)
# 2016 06 21 - BTL
# Have moved this trimming of 0's into a new function trim_abundances()
# that takes care of trimming in a more general way that also allows
# trimming of 1's and trimming of species that are too abundant.
# nonexistant_spp = which (rounded_abundances == 0)
# num_spp_on_0_patches = length (nonexistant_spp)
#
# frac_of_spp_that_are_on_2_PUs =
# num_spp_on_exactly_2_patches / (tot_num_spp_after_wrapping - num_spp_on_0_patches)
#
# cat ("\n\nnum_spp_on_exactly_2_patches = ", num_spp_on_exactly_2_patches,
# "\nnum_spp_on_0_patches = ", num_spp_on_0_patches,
# "\nfrac_of_spp_that_are_on_2_PUs = ", frac_of_spp_that_are_on_2_PUs,
# "\nmax_abundance_ct = ", max_abundance_ct,
# "\nnonexistant_spp = \n")
# print (nonexistant_spp)
# cat ("\n")
} # end while - good distribution not found
cat ("\n\n")
# 2016 06 21 - BTL
# see comment above about trimming...
# if (num_spp_on_0_patches > 0)
# {
# rounded_abundances_without_nonexistant_spp = rounded_abundances [-nonexistant_spp]
# } else
# {
# rounded_abundances_without_nonexistant_spp = rounded_abundances
# }
# return (rounded_abundances_without_nonexistant_spp)
return (rounded_abundances)
} # end function - gen_lognormal ()
#===============================================================================
#
calculate_mu <- function (num_PUs_per_spp_ie_rarity, num_spp_with_given_num_PUs, sigma, plusOrMinus1)
{
cat ("\n\nIn calculate_mu(): num_PUs_per_spp_ie_rarity = ", num_PUs_per_spp_ie_rarity,
", num_spp_with_given_num_PUs = ", num_spp_with_given_num_PUs,
", sigma = ", sigma,
", plusOrMinus1 = ", plusOrMinus1,
"\n\n")
error_string = ""
if (num_PUs_per_spp_ie_rarity < 1)
error_string <-
paste0 (error_string, "num_PUs_per_spp_ie_rarity = ", num_PUs_per_spp_ie_rarity,
". Must be >= 1.\n")
if (num_spp_with_given_num_PUs < 1)
error_string <-
paste0 (error_string, "num_spp_with_given_num_PUs = ",
num_spp_with_given_num_PUs,
". Must be >= 1.\n")
sigma_lower_bound <-
0.398942 / (num_spp_with_given_num_PUs * num_PUs_per_spp_ie_rarity)
if (sigma <= sigma_lower_bound)
error_string <-
paste0 (error_string, "sigma = ", sigma,
". Must be > sigma_lower_bound = ", sigma_lower_bound,
"\n")
if (!((plusOrMinus1 == 1) | (plusOrMinus1 == -1)))
error_string <-
paste0 (error_string, "plusOrMinus1 = ",
plusOrMinus1, ". Must = 1 or -1.\n")
if (nchar (error_string) > 0) stop_bdpg (error_string) else
{
return (log (num_PUs_per_spp_ie_rarity) +
plusOrMinus1 * 1.41421 *
sqrt ( (sigma^2) *
log (2.50663 *
num_spp_with_given_num_PUs *
num_PUs_per_spp_ie_rarity*sigma
)
)
)
}
}
#===============================================================================
#
# EF ()
#
# Evaluation function (EF) to be used by an optimizer to search for a
# (meanlog, sdlog) pair that generates a lognormal distribution as close as
# possible to the specified number of species on 2 PUs and largest abundance
# count.
#
# The constraints are as follows:
#
# - number of species occurring on exactly 2 PUs must be >= to the
# number given in min_num_spp_on_2_PUs. It is not allowed to be
# less than that min.
# - The minimum used is the number of species in the original
# Xu problem.
# - The distance is calculated as:
# (num_spp_on_exactly_2_patches - min_num_spp_on_2_PUs)
# -----------------------------------------------------
# min_num_spp_on_2_PUs
#
# - the species in the generated lognormal distribution that occurs on
# the highest number of patches is
# - less than or equal to the specified max_max_abundance value
# - as close as possible to the target_max_abundance_ct
# - It is allowed to be less than, greater than, or equal to the
# specified target, but it is penalized proportionally to
# its distance from that target.
# - The distance is calculated as:
# abs (max_abundance_ct - target_max_abundance_ct)
# ------------------------------------------------
# target_max_abundance_ct
#
# The EF values each of the two constraints equally by minimizing the
# maximum of the two values, so it can't get a good score by focusing
# on just one of the criteria.
#
#
#--------------------------------------------------------------------
# It may be useful to keep track of the values visited during the
# optimization so that they can be saved for possible later use as
# part of a big lookup table in place of running an optimizer.
# So, allow specifying an output file (which will later by specified
# in the yaml file) and the column headings for the file.
# If the value given for the outfile is NULL, however, the values
# will not be written out.
#--------------------------------------------------------------------
#===============================================================================
#-------------------------------------------------------------------------------
#' Evaluation Function to be passed to optim()
#'
#'Note that the "Local Variable Structures and examples" section below was
#'not labelled as belonging to a function "EF". It was labelled as belonging
#'to a function "fn". I'm assuming that the variables actually belong to EF
#'because "fn" is the argument name that EF gets assigned to when passed to
#'optim().
#'
#-------------------------------------------------------------------------------
#' @param seed_value integer
#' @param tot_num_spp_after_wrapping integer
#' @param min_num_spp_on_2_PUs integer
#' @param max_max_abundance integer
#' @param target_max_abundance_ct integer
#' @param add_one_to_abundances boolean
#' @param outfile character string
#' @param mean_sd_pair 2 element numeric vector containing the mean and
#' standard deviation to use for the lognormal
#'
#' @return Returns numeric score from evaluating the given lognormal; the
#' closer to 0 the better
#' @export
#-------------------------------------------------------------------------------
EF <- function (
seed_value,
tot_num_spp_after_wrapping,
min_num_spp_on_2_PUs,
max_max_abundance,
target_max_abundance_ct,
add_one_to_abundances,
outfile,
mean_sd_pair)
{
cat ("\n\nStarting EF:",
"\n seed_value = ", seed_value,
"\n tot_num_spp_after_wrapping = ", tot_num_spp_after_wrapping,
"\n min_num_spp_on_2_PUs = ", min_num_spp_on_2_PUs,
"\n max_max_abundance = ", max_max_abundance,
"\n target_max_abundance_ct = ", target_max_abundance_ct,
"\n add_one_to_abundances = ", add_one_to_abundances,
"\n mean_sd_pair [1] = ", mean_sd_pair [1],
"\n mean_sd_pair [2] = ", mean_sd_pair [2],
"\n", sep='')
#set.seed (seed_value)
abundance_data = gen_rounded_abundances (tot_num_spp_after_wrapping,
mean_sd_pair [1],
mean_sd_pair [2],
add_one_to_abundances,
plot_rounded_abundances=FALSE)
num_spp_on_exactly_2_patches = abundance_data$num_spp_on_exactly_2_patches
max_abundance_ct = abundance_data$max_abundance_ct
# cat ("\n\n num_spp_on_exactly_2_patches = ", num_spp_on_exactly_2_patches, sep='')
# cat ("\n max_abundance_ct = ", max_abundance_ct, sep='')
# cat ("\n target_max_abundance_ct = ", target_max_abundance_ct, sep='')
warn_on_constraint_violation = TRUE
if (warn_on_constraint_violation)
{
if (num_spp_on_exactly_2_patches < min_num_spp_on_2_PUs)
{
cat ("\n\n EF returning large value due to constraint violation ",
"in lognormal generator: ",
"\n num_spp_on_exactly_2_patches (", num_spp_on_exactly_2_patches,
") < min_num_spp_on_2_PUs (", min_num_spp_on_2_PUs, ")\n", sep='')
}
if (max_abundance_ct > max_max_abundance)
{
cat ("\n\n EF returning large value due to constraint violation ",
"in lognormal generator: ",
"\n max_abundance_ct (", max_abundance_ct,
") > max_max_abundance (", max_max_abundance, ")\n", sep='')
}
}
double_diff_penalty_exponent = 5 # Should be an input option.
double_diff = abs (num_spp_on_exactly_2_patches - min_num_spp_on_2_PUs)
if (num_spp_on_exactly_2_patches < min_num_spp_on_2_PUs) # More heavily penalize undershooting num on 2 patches
{
# Adding 1 to the double_diff because if double_diff has
# an absolute value of 1, raising it to a power will not
# change its value.
double_diff_penalty = (double_diff + 1) ^ double_diff_penalty_exponent
double_diff = double_diff + double_diff_penalty
}
max_abundance_penalty_exponent = 2 # Should be an input option.
max_abundance_diff = abs (max_abundance_ct - target_max_abundance_ct)
if (max_abundance_ct > max_max_abundance)
{
max_abundance_penalty =
(max_abundance_ct - max_max_abundance) ^
max_abundance_penalty_exponent
max_abundance_diff = max_abundance_diff + max_abundance_penalty
}
double_frac = double_diff / min_num_spp_on_2_PUs
max_abundance_frac = max_abundance_diff / target_max_abundance_ct
# cat ("\n double_frac = ", double_frac, sep='')
# cat ("\n max_abundance_frac = ", max_abundance_frac, sep='')
score = max (double_frac, max_abundance_frac)
# if ((num_spp_on_exactly_2_patches < min_num_spp_on_2_PUs) |
# (max_abundance_ct > max_max_abundance))
# {
# score = .Machine$double.xmax #1000000 #Inf
#
# }
# if ((num_spp_on_exactly_2_patches < min_num_spp_on_2_PUs) |
# (max_abundance_ct > max_max_abundance))
# {
# score = .Machine$double.xmax #1000000 #Inf
#
# } else
# {
# double_diff = num_spp_on_exactly_2_patches - min_num_spp_on_2_PUs
# max_abundance_diff = abs (max_abundance_ct - target_max_abundance_ct)
#
# # cat ("\n double_diff = ", double_diff, sep='')
# # cat ("\n max_abundance_diff = ", max_abundance_diff, sep='')
#
# double_frac = double_diff / min_num_spp_on_2_PUs
# max_abundance_frac = max_abundance_diff / target_max_abundance_ct
#
# # cat ("\n double_frac = ", double_frac, sep='')
# # cat ("\n max_abundance_frac = ", max_abundance_frac, sep='')
#
# score = max (double_frac, max_abundance_frac)
# }
# Keep track of search iteration number.
# Doing this through a global variable since it's hard or impossible
# to update its value on each EF call inside the generic R optimizer
# function.
# EF_num <<- EF_num + 1
EF_num = getOption ("bdpg.EF_num") + 1
options (bdpg.EF_num = EF_num)
# If echoing EF value to an output file, do it now.
if (! is.null (outfile))
{
out_record = paste (EF_num,
score,
mean_sd_pair [1],
mean_sd_pair [2],
double_frac,
max_abundance_frac,
min_num_spp_on_2_PUs,
target_max_abundance_ct,
max_max_abundance,
seed_value,
tot_num_spp_after_wrapping,
add_one_to_abundances,
sep=',')
cat (out_record, file=outfile, sep="\n", append=TRUE)
}
cat ("\nEF RETVAL ", EF_num, ": ", score, sep='')
return (score)
} # end function - EF ()
#===============================================================================
#-------------------------------------------------------------------------------
#' Search for approximating lognormal
#'
#' Function to search for a lognormal that comes close to meeting the
#' constraints specified in the argument list, i.e.,
#' \enumerate{
#' \item{to generate roughly the given number of species but never more
#' than the given number (tot_num_spp_after_wrapping)}
#' \item{to come close to the given number of species that occur on
#' exactly 2 PUs without ever falling below that minimum value
#' (min_num_spp_on_2_PUs)}
#' \item{to have the most abundant generated species have roughly
#' the target value (target_max_abundance_ct), but never exceed
#' the given max value (max_max_abundance).}
#'}
#'
#-------------------------------------------------------------------------------
#' @param seed_value random seed to pass to optim function
#' @param tot_num_spp_after_wrapping number of species to generate in distribution
#' @param min_num_spp_on_2_PUs minimum number of species that occur on exactly 2 patches in distribution
#' @param max_max_abundance largest maximum abundance allowed in distribution
#' @param target_max_abundance_ct desired maximum abundance in distribution
#' @param initial_meanlog initial value of lognormal's meanlog in search for distribution
#' @param initial_sdlog initial value of lognormal's sdlog in search for distribution
#' @param max_iterations maximum number of iterations allowed for search
#' @param add_one_to_abundances boolean flag to indicate whether to add 1 to all abundances
#' @param outfile output file
#'
#' @return Returns the best lognormal found in the given maximum number
#' of search iterations (max_iterations). The value returned is a structure
#' containing the following information about the best lognormal curve found:
#' $meanlog - the meanlog used in specifying the curve, e.g., 1.035326
#' $sdlog - the sdlog used in specifying the curve, e.g., 0.9283901
#' $score - the EF score for the curve, i.e., how well does it fit the
#' given constraints (where smaller is better), e.g., 0.1
#' $abundance_data - a structure containing specifics of the curve, e.g.,
#' $abundance_data$rounded_abundances - a vector of the number of PUs
#' each species occurs on, e.g.,
#' [1] 2 4 3 2 7 3 8 15 5 5 9 6 10 4 13 4 7 4 6 6 6 2 2 14 3 9 4 5 5
#' [30] 2 2 2 2 1 2 3 2 2 4 3 5 3 3 10 17 4 15 3 6 2 5 28 6 4 2 10 2 3
#' [59] 8 23 4 2 5 2 4 4 3 8 8 3 6 2 2 1 4 3 1 5 3 16 2 6 9 3 2 5 7
#' [88] 4 3 4 6 2 4 3 3 6 2 3 5 3
#' $abundance_data$num_spp_on_exactly_2_patches - number of species
#' that occur on exactly
#' 2 PUs, e.g., 22
#' $abundance_data$max_abundance_ct - number of PUs that the most
#' abundant species occurs on,
#' e.g., 28
#' @export
#'
#-------------------------------------------------------------------------------
# Other search algorithm possibilities:
#
# Pragmatic Programming Techniques
# Monday, January 14, 2013
# Optimization in R
# http://horicky.blogspot.com.au/2013/01/optimization-in-r.html
#
# Optimising a Noisy Objective Function
# http://www.exegetic.biz/blog/2013/07/optimising-a-noisy-objective-function/
# Posted by Andrew Collier on 16 July 2013.
#
# DEoptim {DEoptim}
# http://www.inside-r.org/packages/cran/DEoptim/docs/DEoptim
# Differential Evolution Optimization
# Package:
# DEoptim
# Version:
# 2.2-3
# Description
# Performs evolutionary global optimization via the Differential Evolution algorithm.
# Usage
# DEoptim(fn, lower, upper, control = DEoptim.control(), ..., fnMap=NULL)
#-------------------------------------------------------------------------------
search_for_approximating_lognormal <- function (
seed_value,
tot_num_spp_after_wrapping,
min_num_spp_on_2_PUs,
max_max_abundance,
target_max_abundance_ct,
initial_meanlog,
initial_sdlog,
max_iterations,
add_one_to_abundances,
outfile
)
{
cat ("\n\nStart of search_for_approximating_lognormal:",
"\n seed_value = ", seed_value,
"\n tot_num_spp_after_wrapping = ", tot_num_spp_after_wrapping,
"\n min_num_spp_on_2_PUs = ", min_num_spp_on_2_PUs,
"\n max_max_abundance = ", max_max_abundance,
"\n target_max_abundance_ct = ", target_max_abundance_ct,
"\n initial_meanlog = ", initial_meanlog,
"\n initial_sdlog = ", initial_sdlog,
"\n max_iterations = ", max_iterations,
"\n add_one_to_abundances = ", add_one_to_abundances,
"\n outfile = ", outfile,
"\n\n"
)
# If echoing EF value to an output file,
# write the column headers to the outfile.
if (! is.null (outfile))
{
cat (paste0 ("\nEF_num,score,meanlog,sdlog,double_frac,",
"max_abundance_frac,min_num_spp_on_2_PUs,",
"target_max_abundance_ct,max_max_abundance,",
"seed_value,tot_num_spp_after_wrapping,add_one_to_abundance",
sep=''),
file=outfile, sep="\n")
}
#-------------------------------------------------------------------
# Run the optimizer.
# Hints on how to use optim() derived from text and code at:
# http://machinelearningmastery.com/convex-optimization-in-r/
#-------------------------------------------------------------------
result <- optim (par = c(initial_meanlog, initial_sdlog),
EF,
control=c(maxit=max_iterations),
seed_value = seed_value,
tot_num_spp_after_wrapping = tot_num_spp_after_wrapping,
min_num_spp_on_2_PUs = min_num_spp_on_2_PUs,
max_max_abundance = max_max_abundance,
target_max_abundance_ct = target_max_abundance_ct,
add_one_to_abundances = add_one_to_abundances,
outfile = outfile
)
#set.seed (seed_value)
abundance_data = gen_rounded_abundances (tot_num_spp_after_wrapping,
result$par [1],
result$par [2],
add_one_to_abundances)
lognormal_result = list (meanlog = result$par [1],
sdlog = result$par [2],
score = result$value,
abundance_data = abundance_data
)
cat ("\n\n=============> FINAL RESULT <=============\n\n")
cat ("\nscore = ", result$value,
", meanlog = ", result$par [1], ", sdlog = ", result$par [2],
"\nmin_num_spp_on_2_PUs = ", min_num_spp_on_2_PUs,
", abundance_data$num_spp_on_exactly_2_patches = ",
abundance_data$num_spp_on_exactly_2_patches,
"\ntarget_max_abundance_ct = ", target_max_abundance_ct,
", max_max = ", max_max_abundance,
", abundance_data$max_abundance_ct = ",
abundance_data$max_abundance_ct,
"\n\n", sep='')
show_abundance_data = FALSE # Should be an input option or gone altogether...
if (show_abundance_data)
{
print (lognormal_result)
cat ("\n")
}
return (lognormal_result)
} # end function - search_for_approximating_lognormal ()
#===============================================================================
#-------------------------------------------------------------------------------
#' Find lognormal to wrap around Xu problem
#'
#' Uses optimizer to search for parameters of a lognormal distribution that
#' will have at least as many species occurring on exactly 2 patches as the
#' given Xu problem and meet the other curve shape criteria given in the
#' argument list.
#'
#-------------------------------------------------------------------------------
#' @param Xu_bdprob a Xu_bd_problem object
#' @param desired_Xu_spp_frac_of_all_spp numeric
#' @param solution_frac_of_landscape numeric
#' @param desired_max_abundance_frac numeric
#' @param seed_value_for_search integer
#' @param max_search_iterations integer
#' @param add_one_to_lognormal_abundances boolean
#' @param search_outfile_name character string
#' @inheritParams std_param_defns
#'
#' @return Returns numeric vector of abundances
#-------------------------------------------------------------------------------
find_lognormal_to_wrap_around_Xu = function (Xu_bdprob, parameters,
desired_Xu_spp_frac_of_all_spp,
solution_frac_of_landscape,
desired_max_abundance_frac,
seed_value_for_search,
max_search_iterations,
add_one_to_lognormal_abundances,
search_outfile_name)
{
cat ("\n\n>>>>>>>>>>>>>>>>>>>>>> ABOUT TO find_lognormal_to_wrap_around_Xu() <<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n")
#---------------------------------------------------------------------------
# Parameters related to and/or derived from the Xu problem.
Xu_tot_num_spp = Xu_bdprob@num_spp # all spp in Xu are on 2 PUs
Xu_tot_num_PUs = Xu_bdprob@num_PUs # num dep nodes + num indep nodes
Xu_nodes = Xu_bdprob@nodes
Xu_dep_set = get_dependent_node_IDs (Xu_nodes)
num_Xu_dep_set_nodes = length (Xu_dep_set)
#---------------------------------------------------------------------------
# Derived control parameters.
min_num_spp_on_2_PUs = Xu_tot_num_spp # Try for all 2's occurring only in the dependent set
tot_num_PUs_in_landscape = round (num_Xu_dep_set_nodes / solution_frac_of_landscape)
if (tot_num_PUs_in_landscape < Xu_tot_num_PUs)
{
warning (paste0 ("\n\ntot_num_PUs_in_landscape (",
tot_num_PUs_in_landscape, ") < Xu_tot_num_PUs (",
Xu_tot_num_PUs, "). ",
# "\nResetting tot_num_PUs_in_landscape to ",
# "Xu_tot_num_PUs, ",
# "\nwhich resets solution_frac_of_landscape from ",
# solution_frac_of_landscape, " to ",
# num_Xu_dep_set_nodes / Xu_tot_num_PUs,
"\n\n"
))
# solution_frac_of_landscape = num_Xu_dep_set_nodes / Xu_tot_num_PUs
}
#-----------------------------------------------------------------------
# 2018 12 26 - BTL
# Replacing num_spp_to_generate with tot_num_spp_after_wrapping
# because num_spp_to_generate is ambiguous, i.e., is it the number
# of new species to add in the wrap or is it the total number of
# species in the final wrapped distribution including the original
# Xu species? The correct answer is the total number of species
# in the final wrap, but the code that was here doesn't do that.
#
# I supposedly fixed a bug in this back on 2018 11 08 in commit
# de06807e "Fix bug in computing num_spp_to_generate for wrap".
# Instead of fixing it, I made it calculate the number of new species
# to add in the wrap (i.e., excluding the original Xu species),
# however, it needs to be the total number of species in the final
# wrapped distribution. This led to fewer species in the wrapped
# distribution, which still worked in the sense that it didn't
# usuall crash the wrapping code, but it made the fraction of Xu
# species in the final distribution slightly off.
# However, when the desired_Xu_spp_frac_of_all_spp was 50%, it did
# crash the wrapping code because it meant that the lognormal search
# was trying to fit a lognormal that had exactly the same number of
# total points as points on exactly 2 PUs.
#
# So, I'm changing the name to make it clearer exactly what the
# aim of this computation is and changing the computation itself so
# that it gets to that aim instead of the incorrect aim that was
# introduced in the November "bug fix". Hopefully, this time it
# really IS a fix. To that end, I'll also add a validation test
# after the wrap is built and in that test, make sure that the
# desired fraction is met.
#-----------------------------------------------------------------------
# OLD (INCORRECT) CODE that calculated num new spp to add:
# num_spp_to_generate =
# round (Xu_tot_num_spp * (1 - desired_Xu_spp_frac_of_all_spp) /
# desired_Xu_spp_frac_of_all_spp)
#-----------------------------------------------------------------------
# CORRECT CODE:
tot_num_spp_after_wrapping = round (Xu_tot_num_spp / desired_Xu_spp_frac_of_all_spp)
#-----------------------------------------------------------------------
max_abundance_frac = min (1.0,
max (0,
desired_Xu_spp_frac_of_all_spp,
desired_max_abundance_frac))
target_max_abundance_ct = round (tot_num_PUs_in_landscape *
max_abundance_frac) # Desired max number of patches to allow any species to occur on.
max_max_abundance_ct = tot_num_PUs_in_landscape # Max number of patches to allow any species to occur on.
#---------------------------------------------------------------------------
# Try to find a good starting point for the search algorithm.
#
# Calculate some initial start points for the optimization based on
# choosing an sdlog value and solving for mu given that sdlog.
# Hopefully, this will get you to a start point that is somewhere
# near a decent solution.
# If the calculated value is illegal (which is checked by the
# calculate_mu() routine) or if the optimization fails or returns
# a large EF value, then try some other combinations of sigma and
# either the bottom rarity (i.e., spp on exactly 2 PUs) or the
# top rarity (i.e., spp on the max allowed abundance ct or less).
#
# AT THE MOMENT, I'M JUST TRYING ONE OF THESE TO SEE IF THINGS RUN.
# 2016 05 28 - BTL.
num_PUs_per_spp_ie_rarity <- 2
num_spp_with_given_num_PUs <- min_num_spp_on_2_PUs
sigma <- 0.1
plusOrMinus1 <- 1
mu <- calculate_mu (num_PUs_per_spp_ie_rarity,
num_spp_with_given_num_PUs,
sigma,
plusOrMinus1)
initial_meanlog_for_search <- mu
initial_sdlog_for_search <- sigma
#---------------------------------------------------------------------------
# Need to do a couple of quick sanity checks on those parameters
# to make sure that they can't generate nonsense or a crash?
# Just a dummy placeholder routine until I know what needs checking.
do_sanity_checks ()
#---------------------------------------------------------------------------
# Ready to search for a useful lognormal distribution, i.e.,
# one that approximately fits the desired attributes passed in.
# EF_num <<- 0
options (bdpg.EF_num = 0)
lognormal_search_results =
search_for_approximating_lognormal (
seed_value_for_search,
tot_num_spp_after_wrapping,
min_num_spp_on_2_PUs,
max_max_abundance_ct,
target_max_abundance_ct,
initial_meanlog_for_search,
initial_sdlog_for_search,
max_search_iterations,
add_one_to_lognormal_abundances,
search_outfile_name
)
rounded_abundances = lognormal_search_results$abundance_data$rounded_abundances
#browser()
plot (sort(rounded_abundances, decreasing = TRUE))
#max_abund = max (rounded_abundances)
#normalized_abundances_xxx = rounded_abundances / max_abund
#plot (normalized_abundances_xxx)
#-------------------------------------------------------------------
# Make sure that the in the final wrapped distribution,
# the original Xu species make up the desired fraction of all
# the species together (since there was a bug in that before).
# Creating some intermediate variables here that arent' strictly
# necessary but make it easier to test and to generate error msg.
#-------------------------------------------------------------------
tot_num_spp_in_wrapped_spp_set = length (rounded_abundances)
Xu_spp_frac_of_full_wrapped_spp_set =
Xu_tot_num_spp / tot_num_spp_in_wrapped_spp_set
if (abs (Xu_spp_frac_of_full_wrapped_spp_set -
desired_Xu_spp_frac_of_all_spp) > 0.000001)
{
stop_bdpg (paste0 ("\nXu_spp_frac_of_full_wrapped_spp_set (",
Xu_spp_frac_of_full_wrapped_spp_set,
") must equal desired_Xu_spp_frac_of_all_spp (",
desired_Xu_spp_frac_of_all_spp, ").\n"))
}
return (rounded_abundances)
} # end function - find_lognormal_to_wrap_around_Xu ()
#===============================================================================
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.