R/BioGeoBEARS_classes_v1.R

Defines functions BioGeoBEARS_model_defaults get_perEvent_probs define_BioGeoBEARS_model_object BioGeoBEARS_model_object_to_init_params BioGeoBEARS_model_object_to_est_params BioGeoBEARS_model_object_to_params_lower BioGeoBEARS_model_object_to_params_upper params_into_BioGeoBEARS_model_object merge_words_nonwords calc_linked_params_BioGeoBEARS_model_object define_BioGeoBEARS_run read_dispersal_multipliers_fn read_times_fn read_distances_fn read_area_of_areas_fn read_areas_allowed_fn prune_states_list readfiles_BioGeoBEARS_run check_BioGeoBEARS_run define_tipranges_object

Documented in BioGeoBEARS_model_defaults BioGeoBEARS_model_object_to_est_params BioGeoBEARS_model_object_to_init_params BioGeoBEARS_model_object_to_params_lower BioGeoBEARS_model_object_to_params_upper calc_linked_params_BioGeoBEARS_model_object check_BioGeoBEARS_run define_BioGeoBEARS_model_object define_BioGeoBEARS_run define_tipranges_object get_perEvent_probs merge_words_nonwords params_into_BioGeoBEARS_model_object prune_states_list read_area_of_areas_fn read_areas_allowed_fn read_dispersal_multipliers_fn read_distances_fn readfiles_BioGeoBEARS_run read_times_fn

require("ape")
require("rexpokit")
require("cladoRcpp")


#######################################################
# Defining Classes
#######################################################
# Based on advice here:
# http://stackoverflow.com/questions/7368262/how-to-properly-document-s4-class-slots-using-roxygen2




# Set up a default BioGeoBEARS model
# continuous parameters
# implements LAGRANGE model


#######################################################
# BioGeoBEARS_model_defaults
#######################################################
#' Set up a default BioGeoBEARS model object
#' 
#' What it says.
#' 
#' @param minval_anagenesis Minimum value above zero for d, e, a, b parameters.
#' @param minval_cladogenesis Minimum value above zero for j, v, etc.
#' @param maxval Maximum value for d, e, a
#' @return \code{param_table} Return the parameter table object
#' @export
#' @seealso \code{\link[base]{rbind}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
BioGeoBEARS_model_defaults <- function(minval_anagenesis=1e-15, minval_cladogenesis=1e-5, maxval=5)
	{
	defaults='
	minval_anagenesis = 1e-15
	minval_cladogenesis = 1e-5
	maxval = 5
	'
	
	param_data_starter = as.data.frame(matrix(data=0, nrow=1, ncol=7), stringsAsFactors=FALSE)
	names(param_data_starter) = c("type", "init", "min", "max", "est", "note", "desc")
	param_table = NULL
	param_names = NULL
	
	# For a parameter on a branch (anagenesis)
	
	# dispersal rate
	param_name = "d"
	param_data = param_data_starter
	param_data$type = "free"
	param_data$init = 0.01
	param_data$min = 0 + minval_anagenesis
	param_data$max = maxval - minval_anagenesis
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "anagenesis: rate of 'dispersal' (range expansion)"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table
	
	# extinction rate
	param_name = "e"
	param_data = param_data_starter
	param_data$type = "free"
	param_data$init = 0.01
	param_data$min = 0 + minval_anagenesis
	param_data$max = maxval - minval_anagenesis
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "anagenesis: rate of 'extinction' (range contraction)"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table

	# between-ranges transition rate (anagenic; i.e., with a standard morphology model)
	param_name = "a"
	param_data = param_data_starter
	param_data$type = "fixed"
	param_data$init = 0.0
	param_data$min = 0 + minval_anagenesis
	param_data$max = maxval - minval_anagenesis
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "anagenesis: rate of range-switching (i.e. for a standard char.)"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table

	# branch-length exponent (0: all branches=1; 1: use branchlengths; 2: use branchlengths^2, not meaningful)
	param_name = "b"
	param_data = param_data_starter
	param_data$type = "fixed"
	param_data$init = 1.0
	param_data$min = 0 + minval_anagenesis
	param_data$max = 1 - minval_anagenesis
	param_data$est = param_data$init
	param_data$note = "non-stratified only"
	param_data$desc = "anagenesis: exponent on branch lengths"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table

	# Exponent on distance (dist^-1 = inverse distance weighting; dist^-0 = 1 (equal probs); dist^-2 = exponential dispersal decay)
	param_name = "x"
	param_data = param_data_starter
	param_data$type = "fixed"
	param_data$init = 0.0
	param_data$min = -10
	param_data$max = 10
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "anagenesis: exponent on distance (modifies d, j, a)"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table

	# Exponent on extinction risk with area (area^-0, equiprobable extinction; area^-1, prob(ext) is inverse to area; area^-2, prob declines exponentially)
	param_name = "u"
	param_data = param_data_starter
	param_data$type = "fixed"
	param_data$init = 0.0
	param_data$min = -10
	param_data$max = 10
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "anagenesis: exponent on extinction risk with area (modifies e)"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table

	
	# For a parameter at a branch point (cladogenesis)
	# (these are slightly different as I sometimes get errors running optim()/optimx() when 0 is 
	#  included as a possibility)
	param_name = "j"
	param_data = param_data_starter
	param_data$type = "fixed"
	param_data$init = 0.0
	param_data$min = 0 + minval_cladogenesis
	param_data$max = 1
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "cladogenesis: relative per-event weight of jump dispersal"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table

	param_name = "ysv"
	param_data = param_data_starter
	param_data$type = "3-j"
	param_data$init = 3 - minval_cladogenesis
	param_data$min = 0 + minval_cladogenesis
	param_data$max = 3
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "cladogenesis: y+s+v"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table


	param_name = "ys"
	param_data = param_data_starter
	param_data$type = "ysv*2/3"
	param_data$init = 2 - minval_cladogenesis
	param_data$min = 0 + minval_cladogenesis
	param_data$max = 2
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "cladogenesis: y+s"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table

	param_name = "y"
	param_data = param_data_starter
	param_data$type = "ysv*1/3"
	param_data$init = 1
	param_data$min = 0 + minval_cladogenesis
	param_data$max = 1
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "cladogenesis: relative per-event weight of sympatry (range-copying)"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table

	param_name = "s"
	param_data = param_data_starter
	param_data$type = "ysv*1/3"
	param_data$init = 1
	param_data$min = 0 + minval_cladogenesis
	param_data$max = 1
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "cladogenesis: relative per-event weight of subset speciation"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table

	param_name = "v"
	param_data = param_data_starter
	param_data$type = "ysv*1/3"
	param_data$init = 1
	param_data$min = 0 + minval_cladogenesis
	param_data$max = 1
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "cladogenesis: relative per-event weight of vicariant speciation"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table



	# Maxent parameters (prob of smaller offspring | size)
	param_name = "mx01"
	param_data = param_data_starter
	param_data$type = "fixed"
	param_data$init = 0.0001
	param_data$min = 0.0001
	param_data$max = 0.9999
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "cladogenesis: controls range size of smaller daughter"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table

	param_name = "mx01j"
	param_data = param_data_starter
	param_data$type = "mx01"
	param_data$init = param_table[param_data$type,"init"]
	param_data$min = 0.0001
	param_data$max = 0.9999
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "cladogenesis: controls range size of smaller daughter"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table

	param_name = "mx01y"
	param_data = param_data_starter
	param_data$type = "mx01"
	param_data$init = param_table[param_data$type,"init"]
	param_data$min = 0.0001
	param_data$max = 0.9999
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "cladogenesis: controls range size of smaller daughter"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table

	param_name = "mx01s"
	param_data = param_data_starter
	param_data$type = "mx01"
	param_data$init = param_table[param_data$type,"init"]
	param_data$min = 0.0001
	param_data$max = 0.9999
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "cladogenesis: controls range size of smaller daughter"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table

	param_name = "mx01v"
	param_data = param_data_starter
	param_data$type = "mx01"
	param_data$init = param_table[param_data$type,"init"]
	param_data$min = 0.0001
	param_data$max = 0.9999
	param_data$est = param_data$init
	param_data$note = "works"
	param_data$desc = "cladogenesis: controls range size of smaller daughter"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table

	# Prob distribution on range sizes at root
	param_name = "mx01r"
	param_data = param_data_starter
	param_data$type = "fixed"
	param_data$init = 0.5
	param_data$min = 0.0001
	param_data$max = 0.9999
	param_data$est = param_data$init
	param_data$note = "no"
	param_data$desc = "root: controls range size probabilities of root"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table


	# Mean frequency of truly sampling OTU of interest
	param_name = "mf"
	param_data = param_data_starter
	param_data$type = "fixed"
	param_data$init = 0.1
	param_data$min = 0 + minval_anagenesis
	param_data$max = 1 - minval_anagenesis
	param_data$est = param_data$init
	param_data$note = "yes"
	param_data$desc = "mean frequency of truly sampling OTU of interest"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table


	# Detection probability (per true sample of OTU of interest)
	param_name = "dp"
	param_data = param_data_starter
	param_data$type = "fixed"
	param_data$init = 1
	param_data$min = 0 + minval_anagenesis
	param_data$max = 1 - minval_anagenesis
	param_data$est = param_data$init
	param_data$note = "yes"
	param_data$desc = "detection probability per true sample of OTU of interest"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table

	# False detection probability (per true sample of non-OTU of interest)
	param_name = "fdp"
	param_data = param_data_starter
	param_data$type = "fixed"
	param_data$init = 0
	param_data$min = 0 + minval_anagenesis
	param_data$max = 1 - minval_anagenesis
	param_data$est = param_data$init
	param_data$note = "yes"
	param_data$desc = "false detection of OTU probability per true taphonomic control sample"
	names(param_data) = names(param_data_starter)
	param_table = rbind(param_table, param_data)
	rownames(param_table) = (param_names = c(param_names, param_name))
	param_table


	
	return(param_table)
	}




#######################################################
# get_perEvent_probs
#######################################################
#' Get the per-event probabilities at cladogenesis
#' 
#' At a cladogenesis event, a large number of events are possible. The simplest way to
#' compute these is just to assign some weight to each event, then sum all the events
#' and divide by the sum to get the probabilities.  More complex schemes can be 
#' imagined, but these are fairly pointless as they would all break down once
#' e.g. distance-dependence, user-specified connectivities, etc., are imposed.
#' 
#' In addition, one could imagine trying to assign total probabilities to each category of
#' event, but each row of the cladogenesis matrix may have a different count of the different
#' types of events (one row may have 1 y event and 2 j events; another row may have 4 j, 2 v, and
#' 2 s, and 0 y events; etc.).
#' 
#' One thing that IS meaningful is the per-event weight, i.e. the values that
#' the program is using for j, v, y, and s.  These ARE meaningful, as long as
#' they are forced to sum to some value (default 4).  This ensures that they are
#' identifiable (otherwise, j,v,y,s=1 and j,v,y,s=2 would be the same model).
#' 
#' This function calculates the per-event weight as a proportion of some total
#' weight, e.g. default 1.  If the optim result was j=0, s=1, y=1, v=1, the \code{get_perEventprobs()}
#' result would be 0, 0.333, 0.333, 0.333.
#' 
#' @param params_table The \code{params_table} from a \code{BioGeoBEARS_model_object}.
#' @param sumval Default=1.
#' @param plotwhat Default "est", use "init" to get the initial starting values instead.
#' @return \code{wts} Return the per-event weights
#' @export
#' @seealso \code{\link[base]{rbind}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' 
#' # default DEC+J model
#' BioGeoBEARS_run_object = define_BioGeoBEARS_run()
#' BioGeoBEARS_run_object$BioGeoBEARS_model_object@@params_table
#' params_table = BioGeoBEARS_run_object$BioGeoBEARS_model_object@@params_table
#' params_table
#' 
#' get_perEvent_probs(params_table)
#' 
#' 
#' # DEC+J model
#' BioGeoBEARS_run_object = define_BioGeoBEARS_run()
#' BioGeoBEARS_run_object$BioGeoBEARS_model_object@@params_table["j","type"] = "free"
#' BioGeoBEARS_run_object$BioGeoBEARS_model_object@@params_table["j","init"] = 1
#' BioGeoBEARS_run_object$BioGeoBEARS_model_object@@params_table["j","est"] = 1
#' 
#' BioGeoBEARS_run_object$BioGeoBEARS_model_object@@params_table
#' 
#' BioGeoBEARS_run_object$BioGeoBEARS_model_object = 
#' calc_linked_params_BioGeoBEARS_model_object(
#' BioGeoBEARS_model_object=BioGeoBEARS_run_object$BioGeoBEARS_model_object, 
#' update_init=TRUE)
#' 
#' 
#' BioGeoBEARS_run_object$BioGeoBEARS_model_object@@params_table
#' params_table = BioGeoBEARS_run_object$BioGeoBEARS_model_object@@params_table
#' 
#' get_perEvent_probs(params_table)
#' 
#'
get_perEvent_probs <- function(params_table, sumval=1, plotwhat="est")
	{
	j = params_table["j", plotwhat]
	y = params_table["y", plotwhat]
	s = params_table["s", plotwhat]
	v = params_table["v", plotwhat]
	
	jysv_val = j+y+s+v
	
	j = j / jysv_val * sumval
	y = y / jysv_val * sumval
	s = s / jysv_val * sumval
	v = v / jysv_val * sumval
	
	vals = c(j, y, s, v)
	tmpmat = matrix(vals, nrow=1)
	wts = adf2(tmpmat)
	names(wts) = c("j", "y", "s", "v")
	
	return(wts)
	}






#######################################################
# BioGeoBEARS_model
#######################################################
#' An object of class BioGeoBEARS_model holding the model inputs
#'
#'@section Slots: 
#'  \describe{
#'    \item{\code{df}:}{Data.frame of class \code{"numeric"}, containing data from df}
#'  }
#'
#' @note Go BEARS!
#' @name BioGeoBEARS_model 
#' @rdname BioGeoBEARS_model-class
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @seealso \code{\link{define_tipranges_object}}, \code{\link{getareas_from_tipranges_object}},
#' \code{\link[cladoRcpp]{areas_list_to_states_list_old}}, \code{\link{areas_list_to_states_list_new}},
#' \code{\link{tipranges_to_tip_condlikes_of_data_on_each_state}}
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' tipranges_object = define_tipranges_object()
#' tipranges_object
#'
setClass(Class="BioGeoBEARS_model", representation=representation(params_table="data.frame"))

#setClass(Class="BioGeoBEARS_model", representation=representation(params_table="data.frame"), contains="numeric")


#setClass(Class="BioGeoBEARS_model", representation=representation(params_table="data.frame"), prototype=BioGeoBEARS_model_defaults())


#setClass("tipranges", representation(df="data.frame"),
#    contains = "numeric"
#)


#######################################################
# define_BioGeoBEARS_model_object
#######################################################
#' Define a BioGeoBEARS_model class and object
#' 
#' Class \code{BioGeoBEARS_model} is an extension of the \code{\link{data.frame}} class.  It is used for holding
#' discrete geographic range data for the tips on a phylogeny. Geographic ranges are represented with bit 
#' encoding (0/1) indicating absence or presence in each possible area.
#' 
#' This is just a data.frame with:
#' rows = taxanames\cr
#' columns = area names\cr
#' cells = 0/1 representing empty/occupied\cr
#' 
#' @param minval_anagenesis Minimum value above zero for d, e, a, b parameters.
#' @param minval_cladogenesis Minimum value above zero for j, v, etc.
#' @param maxval Maximum value for d, e, a
#' @return \code{BioGeoBEARS_model_object} The BioGeoBEARS_model object, of class \code{BioGeoBEARS_model}
#' @export
#' @seealso \code{\link[cladoRcpp]{areas_list_to_states_list_old}}, \code{\link{areas_list_to_states_list_new}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' testval=1
#' BioGeoBEARS_model_object = define_BioGeoBEARS_model_object()
#' BioGeoBEARS_model_object
#' define_BioGeoBEARS_model_object()
define_BioGeoBEARS_model_object <- function(minval_anagenesis=1e-15, minval_cladogenesis=1e-5, maxval=5)
	{
	# Define the BioGeoBEARS_model class;

	#BioGeoBEARS_model_object = new("BioGeoBEARS_model", df=tmpdf)
	BioGeoBEARS_model_object = new("BioGeoBEARS_model", params_table=BioGeoBEARS_model_defaults(minval_anagenesis, minval_cladogenesis, maxval))
	BioGeoBEARS_model_object
	
	# you can get the dataframe with
	# BioGeoBEARS_model_object@df

	return(BioGeoBEARS_model_object)
	}



#######################################################
# BioGeoBEARS_model_object_to_init_params
#######################################################
#' Produce initial parameters from a BioGeoBEARS model object
#' 
#' What it says.
#' 
#' @param BioGeoBEARS_model_object The BioGeoBEARS_model object, of class \code{BioGeoBEARS_model}
#' @return \code{params} parameter vector
#' @export
#' @seealso \code{\link[BioGeoBEARS]{define_BioGeoBEARS_model_object}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
BioGeoBEARS_model_object_to_init_params <- function(BioGeoBEARS_model_object)
	{
	free_params_TF = BioGeoBEARS_model_object@params_table$type == "free"
	num_free_params = sum(free_params_TF, na.rm=TRUE)
	
	params = rep(NA, num_free_params)
	
	params = BioGeoBEARS_model_object@params_table$init[free_params_TF]
	
	return(params)	
	}



#######################################################
# BioGeoBEARS_model_object_to_est_params
#######################################################
#' Extract estimated parameters from a BioGeoBEARS model object
#' 
#' What it says.
#' 
#' @param BioGeoBEARS_model_object The BioGeoBEARS_model object, of class \code{BioGeoBEARS_model}
#' @return \code{params} parameter vector
#' @export
#' @seealso \code{\link[BioGeoBEARS]{define_BioGeoBEARS_model_object}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
BioGeoBEARS_model_object_to_est_params <- function(BioGeoBEARS_model_object)
	{
	free_params_TF = BioGeoBEARS_model_object@params_table$type == "free"
	num_free_params = sum(free_params_TF, na.rm=TRUE)
	
	params = rep(NA, num_free_params)
	
	params = BioGeoBEARS_model_object@params_table$est[free_params_TF]
	
	return(params)	
	}




#######################################################
# BioGeoBEARS_model_object_to_params_lower
#######################################################
#' Produce the lower limit on the parameters from a BioGeoBEARS model object
#' 
#' What it says.
#' 
#' @param BioGeoBEARS_model_object The BioGeoBEARS_model object, of class \code{BioGeoBEARS_model}
#' @return \code{params} parameter vector
#' @export
#' @seealso \code{\link[BioGeoBEARS]{define_BioGeoBEARS_model_object}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
BioGeoBEARS_model_object_to_params_lower <- function(BioGeoBEARS_model_object)
	{
	free_params_TF = BioGeoBEARS_model_object@params_table$type == "free"
	num_free_params = sum(free_params_TF, na.rm=TRUE)
	
	params = rep(NA, num_free_params)
	
	params = BioGeoBEARS_model_object@params_table$min[free_params_TF]
	
	return(params)	
	}


#######################################################
# BioGeoBEARS_model_object_to_params_upper
#######################################################
#' Produce the upper limit on the parameters from a BioGeoBEARS model object
#' 
#' What it says.
#' 
#' @param BioGeoBEARS_model_object The BioGeoBEARS_model object, of class \code{BioGeoBEARS_model}
#' @return \code{params} parameter vector
#' @export
#' @seealso \code{\link[BioGeoBEARS]{define_BioGeoBEARS_model_object}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
BioGeoBEARS_model_object_to_params_upper <- function(BioGeoBEARS_model_object)
	{
	free_params_TF = BioGeoBEARS_model_object@params_table$type == "free"
	num_free_params = sum(free_params_TF, na.rm=TRUE)
	
	params = rep(NA, num_free_params)
	
	params = BioGeoBEARS_model_object@params_table$max[free_params_TF]
	
	return(params)	
	}


#######################################################
# params_into_BioGeoBEARS_model_object
#######################################################
#' Feed modified parameters back into a BioGeoBEARS model object
#' 
#' What it says.
#' 
#' @param BioGeoBEARS_model_object The BioGeoBEARS_model object, of class \code{BioGeoBEARS_model}
#' @param params parameter vector
#' @return \code{BioGeoBEARS_model_object} The BioGeoBEARS_model object, of class \code{BioGeoBEARS_model}
#' @export
#' @seealso \code{\link[BioGeoBEARS]{define_BioGeoBEARS_model_object}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
params_into_BioGeoBEARS_model_object <- function(BioGeoBEARS_model_object, params)
	{
	free_params_TF = BioGeoBEARS_model_object@params_table$type == "free"
	num_free_params = sum(free_params_TF, na.rm=TRUE)
	
	BioGeoBEARS_model_object@params_table$est[free_params_TF] = params
	
	return(BioGeoBEARS_model_object)
	}

#######################################################
# merge_words_nonwords
#######################################################
#' Merge lists of words and nonwords (numbers) that may be of different length
#' 
#' Utility function.
#' 
#' @param words A list of words
#' @param nonwords A list of nonwords
#' @return \code{sentence} A text string.
#' @export
#' @seealso \code{\link[BioGeoBEARS]{define_BioGeoBEARS_model_object}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
merge_words_nonwords <- function(words, nonwords)
	{
	if (length(nonwords) == ((length(words) + 1)))
		{
		words = c(words, "")
		}
	
	wordsmat = cbind(nonwords, words)
	paste1 = apply(X=wordsmat, MARGIN=1, FUN=paste, sep="", collapse="")
	paste1
	
	sentence = paste(paste1, sep="", collapse="")
	return(sentence)
	}






#######################################################
# calc_linked_params_BioGeoBEARS_model_object
#######################################################
#' Update parameters that are deterministic functions of free parameters
#' 
#' This function updates the linked parameters (which are listed as neither "fixed" nor
#' "free" in \code{params_table$type}; i.e., they are equations which are calculated from #' the fixed and free parameters,
#' which should have already been set by other functions).
#'
#' \code{params_table$type} is typically stored in: \code{BioGeoBEARS_run_object$BioGeoBEARS_model_object@@params_table}.
#' 
#' @param BioGeoBEARS_model_object The BioGeoBEARS_model object, of class \code{BioGeoBEARS_model}
#' @param update_init If \code{TRUE}, put the estimates into the initial values in the \code{params_table}.
#' Default: \code{FALSE}.
#' @return \code{BioGeoBEARS_model_object} Updated version of the BioGeoBEARS_model
#' object, of class \code{BioGeoBEARS_model}.
#' @export
#' @seealso \code{\link[BioGeoBEARS]{define_BioGeoBEARS_model_object}} \code{\link[BioGeoBEARS]{define_BioGeoBEARS_run}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' # Define a BioGeoBEARS run object
#' BioGeoBEARS_run_object = define_BioGeoBEARS_run()
#' BioGeoBEARS_run_object$BioGeoBEARS_model_object@@params_table
#' 
#' # Set 'j' to be free, i.e. as in a DEC+J model (adding jump dispersal
#' # to the LAGRANGE DEC model)
#' BioGeoBEARS_run_object$BioGeoBEARS_model_object@@params_table["j","type"] = "free"
#' BioGeoBEARS_run_object$BioGeoBEARS_model_object@@params_table["j","init"] = 0.25
#' BioGeoBEARS_run_object$BioGeoBEARS_model_object@@params_table["j","est"] = 0.25
#' 
#' # Display result
#' BioGeoBEARS_run_object$BioGeoBEARS_model_object@@params_table
#' 
#' # Update the other parameters
#' BioGeoBEARS_run_object$BioGeoBEARS_model_object = 
#' calc_linked_params_BioGeoBEARS_model_object(
#' BioGeoBEARS_model_object=BioGeoBEARS_run_object$BioGeoBEARS_model_object)
#' 
#' # Display result
#' BioGeoBEARS_run_object$BioGeoBEARS_model_object@@params_table
#'
calc_linked_params_BioGeoBEARS_model_object <- function(BioGeoBEARS_model_object, update_init=FALSE)
	{
	# identify free parameters
	linked_TF1 = BioGeoBEARS_model_object@params_table$type != "free"

	# identify fixed parameters
	linked_TF2 = BioGeoBEARS_model_object@params_table$type != "fixed"

	# the other parameters (linked parameters) are calculated from the fixed and free parameters
	# which should have already been set
	linked_TF = (linked_TF1 + linked_TF2) == 2
	
	# If none are linked, skip
	if (sum(linked_TF) == 0)
		{
		return(BioGeoBEARS_model_object)
		}
	
	linked_types = unique(BioGeoBEARS_model_object@params_table$type[linked_TF])
	
	# Get the list of variables
	mstr = paste(rownames(BioGeoBEARS_model_object@params_table), sep="", collapse="|")
	
	for (ll in 1:length(linked_types))
		{
		# Match this type
		linked_type = linked_types[ll]
		this_type_TF = BioGeoBEARS_model_object@params_table$type == linked_type
		
		# Get the formula
		tmp_formula = linked_type
		
		# Extract the words
		words = regmatches(x=tmp_formula, m=gregexpr(mstr, tmp_formula), invert=FALSE)[[1]]
		nonwords = regmatches(x=tmp_formula, m=gregexpr(mstr, tmp_formula), invert=TRUE)[[1]]
		
		
		# Get the values for each word
		tmpfun <- function(word, params_table)
			{
			return(params_table[word, "est"])
			}
		vals = sapply(X=words, FUN=tmpfun, params_table=BioGeoBEARS_model_object@params_table)

		# Make the formula, with numbers put in
		new_formula = merge_words_nonwords(vals, nonwords)
		new_formula
		
		# Calculate the new value
		newval = NA
		cmdstr = paste("newval = ", new_formula, sep="")
		eval(parse(text=cmdstr))
		
		# Input it into the matched cells
		BioGeoBEARS_model_object@params_table$est[this_type_TF] = newval
		
		# Also update the initial values, if desired
		if (update_init == TRUE)
			{
			BioGeoBEARS_model_object@params_table$init = BioGeoBEARS_model_object@params_table$est
			BioGeoBEARS_model_object@params_table$init[this_type_TF] = newval
			}
		
		}
	return(BioGeoBEARS_model_object)
	}




# Define a MAXIMUM LIKELIHOOD search

#######################################################
# define_BioGeoBEARS_run
#######################################################
#' Define a maximum likelihood search, perhaps stratified
#' 
#' Set up the inputs object for an ML search.  See parameter descriptions for defaults.
#' 
#' @param abbr Text abbreviation of run, e.g. "default"
#' @param description Text description of run, e.g. "defaults"
#' @param BioGeoBEARS_model_object Default is \code{define_BioGeoBEARS_model_object()}
#' @param trfn The filename of the phylogenetic tree, in NEWICK format (\url{http://evolution.genetics.washington.edu/phylip/newicktree.html}).  
#' Tipnames should match the names in geogfn.  See \code{\link[ape]{read.tree}} in APE for reading in phylogenetic trees. Default "Psychotria_5.2.newick"
#' @param geogfn A PHYLIP-style file with geographic range data (see \code{\link{getranges_from_LagrangePHYLIP}}) for each tipname. This is the same format
#' used by C++ LAGRANGE (\cite{SmithRee2010_CPPversion}). Default "Psychotria_geog.data"
#' @param timesfn Filename for the stratified times.
#' @param distsfn Filename for the changing distances.
#' @param dispersal_multipliers_fn Filename for the changing hard-coded dispersal multipliers
#' @param area_of_areas_fn Filename for the area of each area
#' @param areas_allowed_fn Filename for the allowed connections between areas for single-species ranges.
#' @param detects_fn Filename for the counts of detections of OTUs of interest. See \code{\link{calc_obs_like}}.
#' @param controls_fn Filename for the counts of taphonomic controls (which INCLUDE the OTUs of interest). See \code{\link{calc_obs_like}}.
#' @param max_range_size The maximum rangesize, in number of areas.  Having a smaller maximum range size means that you can have more areas (the size of the
#' state space is greatly reduced; see \code{\link[cladoRcpp]{numstates_from_numareas}}.
#' @param states_list A list of the possible states/geographic ranges, in 0-based index form.
#' @param force_sparse Should sparse matrix exponentiation be used?  Default \code{FALSE}, which means dense matrix exponentiation is always used. 
#' If \code{NA}, the program will
#' use sparse matrix exponentiation for transition matrices above rank 128 (size 128x128).  NOTE: Sparse matrix exponentiation seems to give correlated, but 
#' not exact, results, and these errors may accumulate.  Presumably the problems become less with larger matrices, but I have not explored this in detail.
#' @param use_detection_model If TRUE, use the detection model (with parameters mf, dp, and fdp) and counts of detections and counts of taphonomic controls to 
#' calculate the \code{tip_condlikes_of_data_on_each_state}.
#' @param print_optim If TRUE (default), print the optimization steps as ML estimation progresses.
#' @param tmpwd The working directory in which the input and output files will be placed. Default is \code{\link[base]{getwd}}. This is stored 
#' mostly for future reference; users are responsible for manually navigating to the appropriate directory ahead of time, using \code{\link[base]{setwd}}.
#' @param num_cores_to_use If >1, parallel processing will be attempted. \bold{Note:} parallel processing via \code{library (parallel)} will work in Mac command-line
#' R, but not in Mac GUI \code{R.app}.
#' @param cluster_already_open If the user wants to distribute the matrix exponentiation calculations from all the branches across a number of processors/nodes on 
#' a cluster, specify the cluster here.  E.g. \code{cluster_already_open = makeCluster(rep("localhost",num_cores_to_use), type = "SOCK")}.  Note: this will work on 
#' most platforms, including Macs running R from command line, but will NOT work on Macs running the R GUI \code{R.app}, because parallel processing functions like
#' \code{MakeCluster} from e.g. \code{library(parallel)} for some reason crash R.app.  The program runs a check for R.app and will just run on 1 node if found. 
#' @param use_optimx If TRUE, use \code{optimx} rather that \code{optim}.
#' @param return_condlikes_table If \code{TRUE}, return the table of ALL conditional likelihood results, including at branch subsections
#' (only some should be used in calculating the final log-likelihood of the geography range data on the tree!)
#' @param calc_TTL_loglike_from_condlikes_table If TRUE, force making of the condlikes table, and use it to calculate the log-likelihood
#' (default=TRUE; matches LAGRANGE).
#' @param calc_ancprobs If \code{TRUE} (default), calculate and return the necessary pieces (uppass and downpass probs) for ancestral states.
#' @param fixnode If the state at a particular node is going to be fixed (e.g. for ML marginal ancestral states), give the node number.
#' @param fixlikes The state likelihoods to be used at the fixed node.  I.e. 1 for the fixed state, and 0 for the others.
#' @param speedup If \code{TRUE} (default), set the maximum number of iterations to \code{itnmax=50*(number of free parameters)}, instead of the
#' \code{optimx} default, 250.  Also set \code{optimx} \code{reltol} parameter to 0.001 (instead of the default, ~1e-8).
#' @return \code{inputs} Inputs for ML search.
#' @export
#' @seealso \code{\link{readfiles_BioGeoBEARS_run}}, \code{\link[BioGeoBEARS]{define_BioGeoBEARS_model_object}}, \code{\link[base]{setwd}}, \code{\link[base]{getwd}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
#' 
define_BioGeoBEARS_run <- function(abbr="default", description="defaults", BioGeoBEARS_model_object=define_BioGeoBEARS_model_object(), trfn="Psychotria_5.2.newick", geogfn="Psychotria_geog.data", timesfn=NA, distsfn=NA, dispersal_multipliers_fn=NA, area_of_areas_fn=NA, areas_allowed_fn=NA, detects_fn=NA, controls_fn=NA, max_range_size=NA, states_list=NULL, force_sparse=FALSE, use_detection_model=FALSE, print_optim=TRUE, num_cores_to_use=NA, cluster_already_open=FALSE, use_optimx=TRUE, return_condlikes_table=FALSE, calc_TTL_loglike_from_condlikes_table=TRUE, calc_ancprobs=TRUE, fixnode=NULL, fixlikes=NULL, speedup=TRUE, tmpwd=getwd())
	{
	inputs = list()
	
	inputs$abbr = abbr
	inputs$description = description
	inputs$BioGeoBEARS_model_object = BioGeoBEARS_model_object
	inputs$trfn = trfn
	inputs$geogfn = geogfn
	inputs$timesfn = timesfn
	inputs$distsfn = distsfn										# distance between areas, for dispersal ~ dist^x
	inputs$dispersal_multipliers_fn = dispersal_multipliers_fn		# hard-coded dispersal multiplier (or 0s/1s for constraints)
	inputs$area_of_areas_fn = area_of_areas_fn						# area of each areas (for extinction ~ area^u)
	inputs$areas_allowed_fn = areas_allowed_fn						# if ONLY connected areas are allowed
	inputs$detects_fn = detects_fn									# used only if use_detection_model==TRUE
	inputs$controls_fn = controls_fn								# used only if use_detection_model==TRUE
	inputs$max_range_size = max_range_size
	inputs$states_list = states_list
	inputs$force_sparse = force_sparse
	inputs$use_detection_model = use_detection_model
	inputs$print_optim = print_optim
	inputs$wd = tmpwd												# Store the working directory you are in
	inputs$num_cores_to_use = num_cores_to_use
	inputs$cluster_already_open = cluster_already_open
	inputs$use_optimx = use_optimx
	inputs$return_condlikes_table = return_condlikes_table
	inputs$calc_TTL_loglike_from_condlikes_table = calc_TTL_loglike_from_condlikes_table
	inputs$calc_ancprobs = calc_ancprobs
	inputs$fixnode = fixnode
	inputs$fixlikes = fixlikes
	inputs$speedup = speedup

	
	return(inputs)
	}


#######################################################
# BioGeoBEARS_run
#######################################################
#' An object of class BioGeoBEARS_run holding the model inputs
#'
#'
#'@section Slots: 
#'  \describe{
#'    \item{\code{list}:}{List of class \code{"list"}, containing inputs list from define_BioGeoBEARS_run}
#'  }
#'
#' @note Go BEARS!
#' @name BioGeoBEARS_run 
#' @rdname BioGeoBEARS_run-class
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @seealso \code{\link{define_tipranges_object}}, \code{\link{getareas_from_tipranges_object}},
#' \code{\link[cladoRcpp]{areas_list_to_states_list_old}}, \code{\link{areas_list_to_states_list_new}},
#' \code{\link{tipranges_to_tip_condlikes_of_data_on_each_state}}
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
#'
setClass(Class="BioGeoBEARS_run", representation=representation(inputs="list"), prototype=define_BioGeoBEARS_run())



# dispersal_multipliers file is just a list of distance matrices, separated by blank lines,
# from youngest to oldest
#######################################################
# read_dispersal_multipliers_fn
#######################################################
#' Read in the hard-coded dispersal multipliers from file
#' 
#' dispersal_multipliers file is just a list of distance matrices, separated by blank lines,
#' from youngest to oldest
#' 
#' @param inputs The inputs list
#' @param dispersal_multipliers_fn The dispersal multipliers filename.
#' @return \code{list_of_dispersal_multipliers_mats} A list object
#' @export
#' @seealso \code{\link[stats]{convolve}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
read_dispersal_multipliers_fn <- function(inputs=NULL, dispersal_multipliers_fn=NULL)
	{
	defaults='
	dispersal_multipliers_fn = "/Dropbox/_njm/__packages/BioGeoBEARS_setup/inst/extdata/examples/Psychotria_dists_stratified/Hawaii_KOMH_dispersal_multipliers.txt"
	'

	if (!is.null(inputs))
		{
		dispersal_multipliers_fn = inputs$dispersal_multipliers_fn
		}
	if (!is.null(dispersal_multipliers_fn))
		{
		dispersal_multipliers_fn = dispersal_multipliers_fn
		}
	
	
	tmplines = readLines(dispersal_multipliers_fn)
	list_of_dispersal_multipliers_mats = list()
	lnum = 1
	
	newmat = TRUE
	for (i in 1:length(tmplines))
		{

		if (tmplines[i] == "END")
			{
			return(list_of_dispersal_multipliers_mats)
			}
		
		if (tmplines[i] == "")
			{
			# You've hit the end of a matrix,
			# increment list and add
			tmpmat = as.matrix(tmprows)
			tmpmat = adf2(tmpmat)
			names(tmpmat) = nameslist

			list_of_dispersal_multipliers_mats[[lnum]] = tmpmat
			lnum = lnum + 1
			
			newmat = TRUE
			} else {
			if (newmat == TRUE)
				{
				nameslist = strsplit(tmplines[i], split="\t")[[1]]
				newmat = FALSE
				tmprows = NULL
				} else {
				tmprow = as.numeric(strsplit(tmplines[i], split="\t")[[1]])
				tmprows = rbind(tmprows, tmprow)
				}
			}
		}
	list_of_dispersal_multipliers_mats
	return(list_of_dispersal_multipliers_mats)
	}




# Timeperiods file is just a list of times, 1 per line, from
# youngest to oldest
#######################################################
# read_times_fn
#######################################################
#' Read in the stratification time breakpoints
#' 
#' The timeperiods file is just a list of times, 1 per line, from
#' youngest to oldest.
#' 
#' @param inputs The inputs list
#' @param timesfn The times filename.
#' @return \code{timeperiods} A list object
#' @export
#' @seealso \code{\link[stats]{convolve}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
read_times_fn <- function(inputs=NULL, timesfn=NULL)
	{
	defaults='
	timesfn = "/Dropbox/_njm/__packages/BioGeoBEARS_setup/inst/extdata/examples/Psychotria_dists_stratified/Hawaii_timeperiods.txt"
	'
	
	if (!is.null(inputs))
		{
		timesfn = inputs$timesfn
		}
	if (!is.null(timesfn))
		{
		timesfn = timesfn
		}
	
	timeperiods = as.numeric(readLines(timesfn))
	
	return(timeperiods)
	}




# Distances file is just a list of distance matrices, separated by blank lines,
# from youngest to oldest
#######################################################
# read_distances_fn
#######################################################
#' Read in the distances by time
#' 
#' Distances file is just a list of distance matrices, separated by blank lines,
#' from youngest to oldest.
#' 
#' @param inputs The inputs list
#' @param distsfn The distances filename.
#' @return \code{list_of_distances_mats} A list object
#' @export
#' @seealso \code{\link[stats]{convolve}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
read_distances_fn <- function(inputs=NULL, distsfn=NULL)
	{
	defaults='
	distsfn = "/Dropbox/_njm/__packages/BioGeoBEARS_setup/inst/extdata/examples/Psychotria_dists_stratified/Hawaii_KOMH_distances.txt"
	'

	if (!is.null(inputs))
		{
		distsfn = inputs$distsfn
		}
	if (!is.null(distsfn))
		{
		distsfn = distsfn
		}
	
	
	tmplines = readLines(distsfn)
	list_of_distances_mats = list()
	lnum = 1
	
	newmat = TRUE
	for (i in 1:length(tmplines))
		{
		print(tmplines[i])
		
		if (tmplines[i] == "END")
			{
			return(list_of_distances_mats)
			}
		
		if (tmplines[i] == "")
			{
			# You've hit the end of a matrix,
			# increment list and add
			tmpmat = as.matrix(tmprows)
			tmpmat = adf2(tmpmat)
			names(tmpmat) = nameslist

			# Correct distances if any are <= 0
			# (to ensure consistent behavior of the exponent, i.e. dist ^ -1*(x_exponent)
			tmpmat = as.matrix(tmpmat)
			diag(tmpmat) = NA
			minval = min(tmpmat, na.rm=TRUE)
 			if (minval <= 0)
 				{
 				stop("\n\nERROR: Minimum distance between regions must be > 0")
 				}
			
			list_of_distances_mats[[lnum]] = tmpmat
			lnum = lnum + 1
			
			newmat = TRUE
			} else {
			if (newmat == TRUE)
				{
				nameslist = strsplit(tmplines[i], split="\t")[[1]]
				newmat = FALSE
				tmprows = NULL
				} else {
				tmprow = as.numeric(strsplit(tmplines[i], split="\t")[[1]])
				tmprows = rbind(tmprows, tmprow)
				}
			}
		}
	list_of_distances_mats
	return(list_of_distances_mats)
	}


# area_areas file is just a list of distance matrices, separated by blank lines,
# from youngest to oldest

#######################################################
# read_area_of_areas_fn
#######################################################
#' Read in the area areas by time
#' 
#' area_areas file is just a list of distance matrices, separated by blank lines,
#' from youngest to oldest.
#' 
#' @param inputs The inputs list
#' @param area_of_areas_fn The area-of-areas filename.
#' @return \code{list_of_area_areas_mats} A list object
#' @export
#' @seealso \code{\link[stats]{convolve}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
read_area_of_areas_fn <- function(inputs=NULL, area_of_areas_fn=NULL)
	{
	defaults='
	area_areas_fn = "/Dropbox/_njm/__packages/BioGeoBEARS_setup/inst/extdata/examples/Psychotria_dists_stratified/Hawaii_KOMH_area_of_areas.txt"
	'

	if (!is.null(inputs))
		{
		area_of_areas_fn = inputs$area_of_areas_fn
		}
	if (!is.null(area_of_areas_fn))
		{
		area_of_areas_fn = area_of_areas_fn
		}
	
	
	tmplines = readLines(area_of_areas_fn)
	list_of_area_areas_mats = list()
	lnum = 1
	
	newmat = TRUE
	for (i in 1:length(tmplines))
		{
		if (tmplines[i] == "END")
			{
			return(list_of_area_areas_mats)
			}
		
		if (tmplines[i] == "")
			{
			# You've hit the end of a matrix,
			# increment list and add
			tmpmat = as.matrix(tmprows)
			tmpmat = adf2(tmpmat)
			names(tmpmat) = nameslist

			# Correct distances if any are <= 0
			# (to ensure consistent behavior of the exponent, i.e. dist ^ -1*(x_exponent)
			tmpmat = as.matrix(tmpmat)
			minval = min(tmpmat, na.rm=TRUE)
 			if (minval <= 0)
 				{
 				stop("\n\nERROR: Minimum area of a region must be > 0")
 				}


			list_of_area_areas_mats[[lnum]] = tmpmat
			lnum = lnum + 1
			
			newmat = TRUE
			} else {
			if (newmat == TRUE)
				{
				nameslist = strsplit(tmplines[i], split="\t")[[1]]
				newmat = FALSE
				tmprows = NULL
				} else {
				tmprow = as.numeric(strsplit(tmplines[i], split="\t")[[1]])
				tmprows = rbind(tmprows, tmprow)
				}
			}
		}
	list_of_area_areas_mats
	return(list_of_area_areas_mats)
	}





# areas_allowed file is just a list of distance matrices, separated by blank lines,
# from youngest to oldest

#######################################################
# read_areas_allowed_fn
#######################################################
#' Read in the area areas by time
#' 
#' areas_allowed file is just a list of 1/0 matrices, separated by blank lines,
#' from youngest to oldest. 1s represent allowed combinations of areas
#' 
#' @param inputs The inputs list
#' @param areas_allowed_fn The areas-allowed filename.
#' @return \code{list_of_areas_allowed_mats} A list object
#' @export
#' @seealso \code{\link[stats]{convolve}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
read_areas_allowed_fn <- function(inputs=NULL, areas_allowed_fn=NULL)
	{
	defaults='
	areas_allowed_fn = "/Dropbox/_njm/__packages/BioGeoBEARS_setup/inst/extdata/examples/Psychotria_dists_stratified/Hawaii_KOMH_areas_allowed.txt"
	'

	if (!is.null(inputs))
		{
		areas_allowed_fn = inputs$areas_allowed_fn
		}
	if (!is.null(areas_allowed_fn))
		{
		areas_allowed_fn = areas_allowed_fn
		}
	
	
	tmplines = readLines(areas_allowed_fn)
	list_of_areas_allowed_mats = list()
	lnum = 1
	
	newmat = TRUE
	for (i in 1:length(tmplines))
		{
		if (tmplines[i] == "END")
			{
			return(list_of_areas_allowed_mats)
			}
			
		if (tmplines[i] == "")
			{
			# You've hit the end of a matrix,
			# increment list and add
			tmpmat = as.matrix(tmprows)
			tmpmat = adf2(tmpmat)
			names(tmpmat) = nameslist

			list_of_areas_allowed_mats[[lnum]] = tmpmat
			lnum = lnum + 1
			
			newmat = TRUE
			} else {
			if (newmat == TRUE)
				{
				nameslist = strsplit(tmplines[i], split="\t")[[1]]
				newmat = FALSE
				tmprows = NULL
				} else {
				tmprow = as.numeric(strsplit(tmplines[i], split="\t")[[1]])
				tmprows = rbind(tmprows, tmprow)
				}
			}
		}
	list_of_areas_allowed_mats
	return(list_of_areas_allowed_mats)
	}


#######################################################
# Cut down the states list according to areas_allowed_mat
#######################################################
#######################################################
# prune_states_list
#######################################################
#' Cut down the states list according to areas_allowed_mat
#' 
#' Go through a list of states.  Remove states that represent areas disallowed according
#' to areas_allowed_mat.  It is assumed (crucial!) that the areas in the \code{states_list}, 
#' and in the \code{areas_allowed_mat}, have the same order.
#' 
#' @param states_list_0based_index A states_list, 0-based, e.g. from \code{\link[cladoRcpp]{rcpp_areas_list_to_states_list}}
#' @param areas_allowed_mat The matrix of area combinations allowed (represented by 1s)
#' @return \code{states_list_0based_index_new} A 0-based list of allowed states/ranges
#' @export
#' @seealso \code{\link[cladoRcpp]{rcpp_areas_list_to_states_list}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
prune_states_list <- function(states_list_0based_index, areas_allowed_mat)
	{
	defaults='
	areas_allowed_fn = "/Dropbox/_njm/__packages/BioGeoBEARS_setup/inst/extdata/examples/Psychotria_dists_stratified/Hawaii_KOMH_areas_allowed.txt"
	tmpinputs = NULL
	tmpinputs$areas_allowed_fn = areas_allowed_fn
	list_of_areas_allowed_mats = read_areas_allowed_fn(inputs=tmpinputs)
	list_of_areas_allowed_mats
	areas_allowed_mat = list_of_areas_allowed_mats[[1]]
	areas_allowed_mat
	
	areas = c("K", "O", "M", "H")
	states_list_0based_index = rcpp_areas_list_to_states_list(areas)
	states_list_0based_index
	'
	
	
	# First, eliminate the null range (represented by NA) if present
	nums = 1:length(states_list_0based_index)
	NA_TF = is.na(states_list_0based_index)
	nonNA_range_nums = nums[NA_TF == FALSE]
	
	mini_func <- function(areas_0based, areas_allowed_mat)
		{
		areas_1based = areas_0based + 1
		
		tmpmat = areas_allowed_mat[areas_1based, areas_1based]
		
		if (is.null(dim(tmpmat)))
			{
			if (tmpmat == 1)
				{
				return(TRUE)
				} else {
				return(FALSE)
				}
			} else {
			if (sum(tmpmat[1,]) == ncol(tmpmat))
				{
				return(TRUE)
				} else {
				return(FALSE)				
				}
			}
		}


	match_TF = sapply(X=states_list_0based_index[nonNA_range_nums], FUN=mini_func, areas_allowed_mat=areas_allowed_mat)
	match_TF
	
	if (sum(NA_TF) == 1)
		{
		match_TF = c(TRUE, match_TF)
		} else {
		pass = 1
		# match_TF = match_TF
		}
	
	states_list_0based_index_new = states_list_0based_index[match_TF]


	# Print to show it
	areas_allowed_mat
	states_list_0based_index_new

	return(states_list_0based_index_new)
	}




#######################################################
# readfiles_BioGeoBEARS_run
#######################################################
#' Read in the extra input files, if any
#' 
#' This function reads input files for stratification, constraints, and detection, i.e.,
#' everything except the tree and geography files. E.g., \code{areas_allowed_fn} file is 
#' just a list of distance matrices, separated by blank lines,
#' from youngest to oldest.
#' 
#' @param inputs The inputs list
#' @return \code{inputs} The modified inputs list
#' @export
#' @seealso \code{\link{define_BioGeoBEARS_run}}, \code{\link{read_times_fn}}, 
#' \code{\link{read_distances_fn}}, \code{\link{read_dispersal_multipliers_fn}}, 
#' \code{\link{read_area_of_areas_fn}}, \code{\link{read_areas_allowed_fn}}, 
#' \code{\link{read_detections}}, \code{\link{read_controls}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
readfiles_BioGeoBEARS_run <- function(inputs)
	{
	# Read in all the specified input files
	if (is.character(inputs$timesfn))
		{
		inputs$timeperiods = read_times_fn(inputs)
		}
	
	if (is.character(inputs$distsfn))
		{
		inputs$list_of_distances_mats = read_distances_fn(inputs)
		}

	if (is.character(inputs$dispersal_multipliers_fn))
		{
		inputs$list_of_dispersal_multipliers_mats = read_dispersal_multipliers_fn(inputs)
		}	
		
	if (is.character(inputs$area_of_areas_fn))
		{
		inputs$list_of_area_of_areas = read_area_of_areas_fn(inputs)
		}	
				
	if (is.character(inputs$areas_allowed_fn))
		{
		inputs$list_of_areas_allowed_mats = read_areas_allowed_fn(inputs)
		}
	
	
	# If the detection model is turned on:
	if (inputs$use_detection_model == TRUE)
		{
		phy = read.tree(inputs$trfn)
		
		if (is.character(inputs$detects_fn))
			{
			inputs$detects_df = read_detections(inputs$detects_fn, OTUnames=NULL, areanames=NULL, tmpskip=0, phy=phy)
			}
		if (is.character(inputs$detects_fn))
			{
			inputs$controls_df = read_controls(inputs$controls_fn, OTUnames=NULL, areanames=NULL, tmpskip=0, phy=phy)
			}
		}
		
	return(inputs)
	}












#######################################################
# check_BioGeoBEARS_run
#######################################################
#' Check the inputs for various problems
#' 
#' Numerous subtle mistakes in the input files for a BioGeoBEARS run can cause the run 
#' to crash.  As I come across these, I am putting in error checks for them.
#'
#' Some include: 
#' 
#' - Trees with negative branchlengths (as produced sometimes by e.g. BEAST MCC consensus trees
#' (MCC = majority clade consensus).  These trees are always fully resolved, but the median node
#' heights can sometimes be behind the node position in the tree.  Users should fix this manually,
#' pathological results or crashes will result otherwise.
#'
#' - Trees with polytomies.  \code{BioGeoBEARS} (and \code{LAGRANGE}, and \code{DIVA}) assume a model where lineages 
#' bifurcate, and never multifurcate.  Users can convert multifurcating trees to bifurcating trees
#' with \code{APE}'s \code{\link[ape]{multi2di}} (they will have to decide what branchlength to use for the new branches; it should be small, 
#' but bigger than the minimum branchlength used to identify fossils hooks (as hooks are considered to be
#' anagenetic members of a lineage, and thus are connected to the tree without a cladogenesis event invoked).  
#' Users can then run their analysis several times on differently-resolved trees.  
#' 
#' NOTE: After the above correction, users may wish to correct the tip branchlengths
#' (or make some other adjustment) so that all the tips are at age 0 my before present, as in an ultrametric tree.
#' (However, note that trees with fossil tips are not ultrametric according to APE's \code{\link[ape]{is.ultrametric}}, 
#' even though they are time-scaled.  To make living (nonfossil) tips line up to zero, see \code{\link{average_tr_tips}} or the 
#' (different!) .  
#' They should be used with care.  Alternately,
#' a small amount of error in tip heights will make very little difference in the likelihood calculations (e.g. if some
#' tips are 0.1 my too high, but the tree spans 200 my), which would be an argument for not requiring perfection after the
#' (crucial) corrections of negative branchlengths, zero-branchlengths, and polytomies have been made.
#'
#' - Check for an absurdly large number of states.  I've set the limit at 500 (it starts getting slow around 200), users can
#' override with \code{allow_huge_ranges=TRUE}.
#' 
#' - Geography tipranges files should have same number of area labels as columns.
#' 
#' - Geography tipranges files should have same number of taxa as the tree, and with the (exact!!) same names.  This can be 
#' the source of many headaches, as different programs (\code{Mesquite}, etc.) treat spaces, periods, etc. in different ways, and 
#' re-write tipnames with/without quotes, underscores, etc.; and in my experience, my biologist colleagues find it very difficult
#' to guarantee that the tipnames in their tree and their data tables will match exactly.  The SAFEST approach is to NEVER use
#' these characters in tipnames or table names: space, comma, semicolon, dashes, parentheses, brackets, apostrophes or quote marks, or periods.  
#' Use ONLY letters, numbers, and underscores (_).  When plotting trees, \code{APE} automatically reads underscores as spaces, which
#' is nice for display.
#'
#' - There must be the same or more timeperiods than the other stratified items (distances matrices, etc.)
#' 
#' @param inputs The inputs list
#' @param allow_huge_ranges Default FALSE, which will stop the run if there are more than 500 states. If TRUE, this will just print a warning, and continue, at which point
#' you will wait for weeks or forever for the analysis to finish.  See \code{\link[cladoRcpp]{cladoRcpp}}'s \code{\link[cladoRcpp]{numstates_from_numareas}} function 
#' to calculate the size of the state space ahead of time, and links therein to see how the number of states scales with areas (2^number of areas, in an 
#' unconstrained analysis), how the size of the transition matrix you will be exponentiating scales (size = numstates * numstates), and the size of the 
#' ancestor/left-descendant/right-descendant cladogenesis matrix scales (numstates * numstates * numstates).  At 500 states, this is 500^3 = 125,000,000 
#' combinations of ancestor/left/right to check at every cladogenesis event, although \code{\link[cladoRcpp]{cladoRcpp}}'s tricks speed this up substantially.
#' @return \code{TRUE} if no errors found; otherwise a stop() is called.
#' @export
#' @seealso \code{\link{average_tr_tips}}, 
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' test=1
check_BioGeoBEARS_run <- function(inputs, allow_huge_ranges=FALSE)
	{
	# Load the tree
	tmptr = read.tree(inputs$trfn)
	
	# Make sure it exists
	if (exists("tmptr") == FALSE)
		{
		stoptxt = paste("\nFATAL ERROR in inputs: no readable Newick tree at:\n", inputs$trfn, "\n", sep="")
		cat(stoptxt)
		stop(stoptxt)
		}

	# Check for negative branchlengths
	blren_equal_below_0_TF = tmptr$edge.length <= 0
	if (sum(blren_equal_below_0_TF) > 0)
		{
		tmptxt = paste(tmptr$edge.length[blren_equal_below_0_TF], collapse=", ", sep="")
		stoptxt = paste("\nFATAL ERROR in average_tr_tips(): the output tree has branchlengths <= 0:\n", tmptxt, 
		"\nThis can sometimes happen in e.g. MCC (majority clade consensus) trees output by BEAST's TreeAnnotator.\nYou must fix the Newick file. See ?check_BioGeoBEARS_run for comments.\n", sep="")
		cat(stoptxt)
		stop(stoptxt)
		}

	# Check for polytomies
	if (is.binary.tree(tmptr) == FALSE)
		{
		stoptxt = paste("\nFATAL ERROR in inputs: tree not bifurcating, i.e. is.binary.tree(tmptr) returns FALSE.\n", 
		"\nYou must fix the Newick file. APE's multi2di() function is an option.  See ?check_BioGeoBEARS_run for comments.\n", sep="")
		cat(stoptxt)
		stop(stoptxt)
		}
	
	
	# Load the tipranges file
	tipranges = getranges_from_LagrangePHYLIP(inputs$geogfn)
	# Make sure it exists
	if (exists("tipranges") == FALSE)
		{
		stoptxt = paste("\nFATAL ERROR in inputs: no readable tipranges text file at:\n", inputs$geogfn, "\n", sep="")
		cat(stoptxt)
		stop(stoptxt)
		}

	# Read in all the specified input files
	tipranges_colnames_TF = is.na(colnames(tipranges@df))
	if (sum(tipranges_colnames_TF) > 0)
		{
		catstr = paste(colnames(tipranges@df), collapse=" ", sep="")
		stoptxt = paste("\nFATAL ERROR in inputs: tipranges area names (columns) have NAs:\n", catstr, 
		"\nThis probably means your input tipranges file is missing ", sum(tipranges_colnames_TF), " areaname(s).\n", sep="")
		cat(stoptxt)
		moref(inputs$geogfn)
		stop(stoptxt)
		}
	
	# Check that tipranges taxa names match tree taxa names
	tipnames = sort(tmptr$tip.label)
	geogtaxa = sort(rownames(tipranges@df))
	
	tipnames_in_geogfile_TF = tipnames %in% geogtaxa
	if ((sum(tipnames_in_geogfile_TF) == length(tipnames_in_geogfile_TF)) == FALSE)
		{
		tipnames_NOT_in_geogfile_TF = tipnames_in_geogfile_TF == FALSE
		numtips_not_in_geogfn = sum(tipnames_NOT_in_geogfile_TF)
		
		tips_not_in_geogfile = tipnames[tipnames_NOT_in_geogfile_TF]
		tips_not_in_geogfile_txt = paste(tips_not_in_geogfile, collapse=", ", sep="")
		
		if (length(geogtaxa) == length(tipnames))
			{
			match_TF = geogtaxa == tipnames
			match_TF_txt = paste(match_TF, collapse=" ", sep="")
			} else {
			match_TF_txt = paste("Cannot display TRUE/FALSE, as length(tipnames)=", length(tipnames), " & length(geogtaxa)=", length(geogtaxa), ".\n", sep="")
			}
		stoptxt = paste("\nFATAL ERROR in inputs: ", numtips_not_in_geogfn, " tree tips are not in the geographic ranges file.  These are:\n",
		tips_not_in_geogfile_txt, "\n",
		"TRUE/FALSE between sort(tmptr$tip.label)==sort(rownames(tipranges@df)):\n",
		match_TF_txt, "\n", sep="")
		cat(stoptxt)
		stop(stoptxt)
		}
	

	# Check that tree taxa names match tipranges taxa names
	tipnames = sort(tmptr$tip.label)
	geogtaxa = sort(rownames(tipranges@df))
	
	geogtaxa_in_treetips_TF = geogtaxa %in% tipnames
	if ((sum(geogtaxa_in_treetips_TF) == length(geogtaxa_in_treetips_TF)) == FALSE)
		{
		geogtaxa_NOT_in_treetips_TF = geogtaxa_in_treetips_TF == FALSE
		num_geogtaxa_not_in_treetips = sum(geogtaxa_NOT_in_treetips_TF)
		
		geogtaxa_not_in_treetips = geogtaxa(geogtaxa_NOT_in_treetips_TF)
		geogtaxa_not_in_treetips_txt = paste(geogtaxa_not_in_treetips, collapse=", ", sep="")
		
		if (length(geogtaxa) == length(tipnames))
			{
			match_TF = geogtaxa == tipnames
			match_TF_txt = paste(match_TF, collapse=" ", sep="")
			} else {
			match_TF_txt = paste("Cannot display TRUE/FALSE, as length(tipnames)=", length(tipnames), " & length(geogtaxa)=", length(geogtaxa), ".\n", sep="")
			}
		stoptxt = paste("\nFATAL ERROR in inputs: ", num_geogtaxa_not_in_treetips, " taxa in the geography file are not in the tree tips.  These are:\n",
		geogtaxa_not_in_treetips_txt, "\n",
		"TRUE/FALSE between sort(tmptr$tip.label)==sort(rownames(tipranges@df)):\n",
		match_TF_txt, "\n", sep="")
		cat(stoptxt)
		stop(stoptxt)
		}
	

	# Check that no tips have larger ranges than allowed by max, if there is a inputs$max_range_size specified
	if (!is.na(inputs$max_range_size))
		{
		max_tipsize = max(rowSums(tipranges@df))
		if (max_tipsize > inputs$max_range_size)
			{
			tips_too_big_TF = rowSums(tipranges@df) > inputs$max_range_size
			tipranges_too_big = tipranges@df[tips_too_big_TF, ]
			
			stoptxt = paste("\nFATAL ERROR in inputs: max_tipsize=", max_tipsize, " > inputs$max_range_size=", inputs$max_range_size, ". Examples:\n", sep="")
			for (i in 1:sum(tips_too_big_TF))
				{
				cat(tipranges_too_big[i,])
				cat("\n")
				}
			stop(stoptxt)
			}
		}



	# Check for OTUs with ZERO (0) ranges coded
	rangesize_is_ZERO_TF = (rowSums(tipranges@df)) == 0
	ranges_have_NAs_TF = is.na(unlist(tipranges@df))
	if ( (sum(rangesize_is_ZERO_TF) > 0) || (sum(ranges_have_NAs_TF) > 0) )
		{
		stoptxt = paste("\nFATAL ERROR in inputs: your tipranges file has NAs and/or tips with rangesize 0!\n See tipranges printed below:\n\n", sep="")
		cat(stoptxt)
		print(tipranges@df)
		stop(stoptxt)

		}


	# Check for absurdly huge states_list
	tmp_numareas = ncol(tipranges@df)
	if (!is.na(inputs$max_range_size))
		{
		max_tipsize = max(rowSums(tipranges@df))
		} else {
		max_tipsize = tmp_numareas
		}
	tmp_numstates1 = length(inputs$states_list)
	if (length(tmp_numstates1) == 0)
		{
		tmp_numstates1 = numstates_from_numareas(numareas=tmp_numareas, maxareas=max_tipsize, include_null_range=TRUE)
		}
	if (tmp_numstates1 > 500)
		{
		stoptxt = paste("\nYour setup has ", tmp_numstates1, " states (# states = # combinations of geographic ranges). This will be veerry slow. \n",
		"In check_BioGeoBEARS_run(), set allow_huge_ranges=TRUE to proceed, but you probably shouldn't bother.  See e.g. ?numstates_from_numareas.\n", sep="")
		cat(stoptxt)
		if (allow_huge_ranges == TRUE)
			{
			pass=1
			} else {
			stop(stoptxt)
			}
		}
	
	
	# If there is a times file, it should have already been read in 
	# (via readfiles_BioGeoBEARS_run())
	if (is.character(inputs$timesfn))
		{
		# Extract the times
		timevals = inputs$timeperiods
	
		if (is.character(inputs$distsfn))
			{
			if (length(inputs$list_of_distances_mats) < length(inputs$timeperiods))
				{
				stoptxt = paste("\nFATAL ERROR: fewer distances matrices than timeperiods.\n",
				"length(inputs$timeperiods)=", length(inputs$timeperiods), "\n",
				"length(inputs$list_of_distances_mats)=", length(inputs$list_of_distances_mats), "\n",
				sep="")
				cat(stoptxt)
				stop(stoptxt)
				}
			}

		if (is.character(inputs$dispersal_multipliers_fn))
			{
			if (length(inputs$list_of_dispersal_multipliers_mats) < length(inputs$timeperiods))
				{
				stoptxt = paste("\nFATAL ERROR: fewer manual dispersal multipliers matrices than timeperiods.\n",
				"length(inputs$timeperiods)=", length(inputs$timeperiods), "\n",
				"length(inputs$list_of_dispersal_multipliers_mats)=", length(inputs$list_of_dispersal_multipliers_mats), "\n",
				sep="")
				cat(stoptxt)
				stop(stoptxt)
				}
			}	
		
		if (is.character(inputs$area_of_areas_fn))
			{
			if (length(inputs$list_of_area_of_areas) < length(inputs$timeperiods))
				{
				stoptxt = paste("\nFATAL ERROR: fewer area-of-areas vectors than timeperiods.\n",
				"length(inputs$timeperiods)=", length(inputs$timeperiods), "\n",
				"length(inputs$list_of_area_of_areas)=", length(inputs$list_of_area_of_areas), "\n",
				sep="")
				cat(stoptxt)
				stop(stoptxt)
				}
			}	
				
		if (is.character(inputs$areas_allowed_fn))
			{
			if (length(inputs$list_of_areas_allowed_mats) < length(inputs$timeperiods))
				{
				stoptxt = paste("\nFATAL ERROR: fewer area-allowed matrices than timeperiods.\n",
				"length(inputs$timeperiods)=", length(inputs$timeperiods), "\n",
				"length(inputs$list_of_areas_allowed_mats)=", length(inputs$list_of_areas_allowed_mats), "\n",
				sep="")
				cat(stoptxt)
				stop(stoptxt)
				}
			}
		}		

	return(TRUE)
	}








#######################################################
# tipranges
#######################################################
#' The tipranges class
#'
#' This class holds geographic range data for each tip in a phylogeny.
#'
#' Geographic range data can be read into a tipranges class object with BioGeoBEARS functions, e.g. \code{define_tipranges_object} or
#' \code{getareas_from_tipranges_object}.
#'
#' Class \code{tipranges} is an extension of the \code{\link{data.frame}} class.  It is used for holding
#' discrete geographic range data for the tips on a phylogeny. Geographic ranges are represented with bit 
#' encoding (0/1) indicating absence or presence in each possible area.
#' 
#' This is just a data.frame with:
#' rows = taxanames\cr
#' columns = area names\cr
#' cells = 0/1 representing empty/occupied\cr
#'
#'@section Slots: 
#'  \describe{
#'    \item{\code{df}:}{Data.frame of class \code{"numeric"}, containing data from df}
#'  }
#'
#' @note Go BEARS!
#' @name tipranges 
#' @rdname tipranges-class
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @seealso \code{\link{define_tipranges_object}}, \code{\link{getareas_from_tipranges_object}},
#' \code{\link[cladoRcpp]{areas_list_to_states_list_old}}, \code{\link{areas_list_to_states_list_new}},
#' \code{\link{tipranges_to_tip_condlikes_of_data_on_each_state}}
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' tipranges_object = define_tipranges_object()
#' tipranges_object
#'
setClass("tipranges", representation(df="data.frame"),
    contains = "numeric"
)





#######################################################
# define_tipranges_object
#######################################################
#' Define a tipranges class and object
#' 
#' Class \code{tipranges} is an extension of the \code{\link{data.frame}} class.  It is used for holding
#' discrete geographic range data for the tips on a phylogeny. Geographic ranges are represented with bit 
#' encoding (0/1) indicating absence or presence in each possible area.
#' 
#' This is just a data.frame with:
#' rows = taxanames\cr
#' columns = area names\cr
#' cells = 0/1 representing empty/occupied\cr
#' 
#' @param tmpdf The user may input a \code{data.frame} holding the range data, if they like. Default is \code{NULL}, which
#' means the function will produce a temporary \code{data.frame} as an example.
#' @return \code{tipranges_object} The tipranges object, of class \code{tipranges}
#' @export
#' @seealso \code{\link{getareas_from_tipranges_object}}, \code{\link[cladoRcpp]{areas_list_to_states_list_old}},
#' \code{\link{areas_list_to_states_list_new}}, \code{\link{tipranges_to_tip_condlikes_of_data_on_each_state}}
#' @note Go BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @bibliography /Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_refs.bib
#'   @cite Matzke_2012_IBS
#' @examples
#' testval=1
#' tipranges_object = define_tipranges_object()
#' tipranges_object
#'
define_tipranges_object <- function(tmpdf=NULL)
	{
	# Define the tipranges class;
	# Now done separately
	#tipranges_class = setClass("tipranges", representation(df="data.frame"))

	# Create an instance of the class
	
	# junk:
	## A class that extends the built-in data type "numeric"
	# setClass("numWithId", representation(id = "character"),
    #     contains = "numeric")
	

	
	# Make a simple default example df if needed
#	if (is.null(tmpdf))
#		{
		# Default tip ranges data
		areanames = c("A", "B", "C")
		tipnames = c("tip1", "tip2", "tip3")
		tip1_geog = c(1, 0, 0)
		tip2_geog = c(0, 1, 0)
		tip3_geog = c(0, 0, 1)
	
		# Make the temporary data frame
		tmpdf2 = cbind(tip1_geog, tip2_geog, tip3_geog)
		tmpdf2 = adf2(data.matrix(tmpdf2))
		names(tmpdf2) = areanames
		row.names(tmpdf2) = tipnames
#		}
	
	if (is.null(tmpdf))
		{
		tipranges_object = new("tipranges", df=tmpdf2)
		} else {
		tipranges_object = new("tipranges", df=tmpdf2)
		tipranges_object@df = tmpdf
		}

	# you can get the dataframe with
	# tipranges_object@df


	return(tipranges_object)
	}

Try the BioGeoBEARS package in your browser

Any scripts or data that you put into this service are public.

BioGeoBEARS documentation built on May 29, 2017, 8:36 p.m.