R/BioGeoBEARS_basics_v1.R

Defines functions sort_list_of_lists_of_numbers put_jcol_after_ecol order_tipranges_by_tr unscale_BGB_params scale_BGB_params process_optim nullsym_to_NA vfunc yfunc sfunc relative_probabilities_of_vicariants relative_probabilities_of_subsets get_probvals calcP_n calcZ_part symbolic_to_relprob_matrix_sp symbolic_cell_to_relprob_cell_sp symbolic_to_Q_matrix_exper symbolic_to_Q_matrix symbolic_cell_to_relprob_cell symbolic_to_P_matrix remove_null_rowcols_from_mat paste_rows_without_zeros meval make_relprob_nummatrix_sp1 make_relprob_txtmatrix_sp1 divide_probs_by_number_of_options_nums divide_probs_by_number_of_options_txt make_relprob_matrix_bi make_spmat_row expand.grid.jc expand.grid.alt size_species_matrix make_relprob_matrix_de make_dispersal_multiplier_matrix read_PHYLIP_data save_tipranges_to_LagrangePHYLIP states_list_indexes_to_areastxt getname tipranges_to_area_strings order_tipranges_by_tree_tips getranges_from_LagrangePHYLIP check_tipranges_for_allQs letter_strings_to_tipranges_df letter_string_to_binary binary_ranges_to_letter_codes binary_range_to_letter_code_list binary_range_to_letter_code_txt states_list_0based_to_ranges_txt_list areas_list_to_states_list_new tipranges_to_tip_condlikes_of_data_on_each_state get_tiplabel_ranges getareanames_from_tipranges_object getareas_from_tipranges_object prune_specimens_to_species normat

# source("/Dropbox/_njm/__packages/BioGeoBEARS_setup/BioGeoBEARS_basics_v1.R")

require("ape")
require("rexpokit")
require("cladoRcpp")




# axisPhylo() with more flexibility in labeling

#######################################################
# axisPhylo2
#######################################################
#' axisPhylo with more flexibility in labeling
#' 
#' Hacking \code{ape}'s \code{axisPhylo()} to make it more flexible.
#' 
#' Note: to have ages ending at, say, 2016 (for an analysis
#' by calendar year), just set minage to "-2016".  
#' This will produce e.g. "2013 2014 2015 2016" as
#' x-axis labels.
#' 
#' @param side The side to plot on (default 1, bottom)
#' @param roundlabels Number of digits to round to, if desired
#' @param minage Starting age, if desired
#' @param ... Additional arguments to standard functions
#' @return nothing
#' @export
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#' @examples
#' testval=1
#' 
axisPhylo2 <- function (side = 1, roundlabels=TRUE, minage=0, ...) 
	{
    lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
    if (lastPP$type %in% c("phylogram", "cladogram")) {
        if (lastPP$direction %in% c("rightwards", "leftwards")) {
            x <- pretty(lastPP$xx)
            if (lastPP$direction == "rightwards") 
                maxi <- max(lastPP$xx)
            else {
                maxi <- min(lastPP$xx)
                x <- -x
            }
        }
        else {
            x <- pretty(lastPP$yy)
            if (lastPP$direction == "upwards") 
                maxi <- max(lastPP$yy)
            else {
                maxi <- min(lastPP$yy)
                x <- -x
            }
        }
    }
    
    if (roundlabels == FALSE)
		{
	    label_nums = abs(minage+x)
	    } # END  if (roundlabels == FALSE)
	if (roundlabels == TRUE)
		{
		label_nums = round(abs(minage+x), digits=roundlabels)
		} # END if (roundlabels == TRUE)
    
	# Plot the axis labels
	axis_at = axis(side = side, at = c(maxi - x), labels=label_nums, ...)
	return(axis_at)
	}



#######################################################
# Matrix functions
#' Utility functions to help deal with matrices
#######################################################

#######################################################
# normat
#######################################################
#' Normalize a transition matrix
#' 
#' \code{normat} normalizes a square transition matrix, such that each row sums to 0, and 
#' the diagonal equals the negative of the sum of the rest of the cells in the row.  This 
#' matrix can then be exponentiated by values of \code{t} (time or another measure of branch
#' length) to produce transition probabilities for any given value of \code{t}.
#' 
#' See \cite{FosterIdiots} for a succinct summary of transition matrices and their exponentiation.
#' 
#' @param relative_matrix A square matrix giving the relative probabilities/weights of transitions.
#' @return \code{m} A Q matrix, i.e. normalized transition matrix (Qmat)
#' @export
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#'
#' @examples
#' testval=1
#' 
normat <- function(relative_matrix)
	{
	# make the rows sum to zero (not 1), as in here:
	# http://en.wikipedia.org/wiki/Substitution_model
	m = as.matrix(relative_matrix)
	
	diag(m) = 0
	rowsums = rowSums(m)
	diag(m) = -rowsums
	
	return(m)
	}



#######################################################
# prune_specimens_to_species
#######################################################
#' Take a tree and species names/geography table and produce a pruned tree and tipranges object
#' 
#' This function takes a tree and species names/geography table and produces a pruned tree 
#' and (optionally) a tipranges object.
#' 
#' Often, users will have an phylogeny where the tips/OTUs (operational taxonomic units) are 
#' specimens rather than species.  The analyses done by models like DEC, DEC+J, etc., in programs 
#' like LAGRANGE and BioGeoBEARS, assume as a core part of the model that species might occupy
#' more than one areas.  A phylogeny of specimens, then, would not be an appropriate input 
#' to these programs, as each single specimen can only be found in one region. The exception 
#' would occur when the researcher is confident that each species lives in only one region; in
#' that case, the specimen geography is representative of the species geography.
#' 
#' This function requires a table containing 
#'
#' (1) Column "OTUs": all tipnames in the input tree (often, 
#' original specimen/original OTU names) ); 
#' 
#' (2) Column "species": the corresponding species names;
#' 
#' (3) optionally, the geographic range inhabited by each specimen (column "region").  If 
#' an OTU has more than one geographic range in the original table, these should be 
#' split by "|".
#' 
#' (4) optionally, the specimen to use as the representative of each tree 
#' (marked with "yes")
#' 
#' When the pruning occurs, all tips belonging to the same species are cut, except the first.
#'
#' NOTE: Tips that should be cut because they are outgroups, or because they are geographically 
#' outside of your domain of analysis, should be represented in xls$region by "out_group" or "Out". These
#' will be cut from the final tree/geography table.
#' 
#' @param original_tr The input tree (an \code{\link[ape]{ape}} \code{\link[ape]{phylo}} object).
#' @param xls The input table (a \code{\link[base]{data.frame}})
#' @param group_name The name of the clade in the tree.  For use in plots and output files. Default="default".
#' @param titletxt Additional text for the plots. Default "".
#' @param areas_abbr An optional table, containing the abbreviations (e.g. letters) corresponding to each
#' region in xls$region. Default is NULL, in which case the program imposes A, B, C, D, etc. \code{areas_abbr}
#' must have column headings \code{abbr} and \code{letter}.
#' @param plot_intermediate If TRUE, the starting, ending, and intermediate stages of tree pruning are plotted.
#' @param pick_specimens_to_use If TRUE, use the column xls$specimen_to_use to
#' pick the specimen to represent the species (with "yes").  If the column, 0 "yes", or 
#' >1 "yes" are found, reverts to default behavior (the first specimen is kept)
#' @return The outputs are a \code{\link[base]{list}} with a pruned tree and, optionally, a tipranges object.
#' @export
#' @seealso \code{\link[ape]{drop.tip}}, \code{\link{define_tipranges_object}}, 
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @examples
#' testval=1
#' tipranges_object = define_tipranges_object()
#' tipranges_object
#'
#' areanames = getareas_from_tipranges_object(tipranges_object)
#' areanames
#'
prune_specimens_to_species <- function(original_tr, xls, group_name="default", titletxt="", areas_abbr=NULL,  plot_intermediate=TRUE, pick_specimens_to_use=FALSE)#, inputs="Robjects", outputs="Robjects")
	{
	runjunk='
	group_name="default"; titletxt=""; areas_abbr=NULL;  plot_intermediate=TRUE
	group_name="default"; titletxt=""; areas_abbr=NULL;  plot_intermediate=FALSE
	'
	
	# Check that input table has the correct column headings
	if ("OTUs" %in% names(xls) == FALSE)
		{
		stoptxt = "\nFATAL ERROR: input table 'xls' must have a column named 'OTUs'\n"
		cat(stoptxt)
		stop(stoptxt)
		}
	if ("species" %in% names(xls) == FALSE)
		{
		stoptxt = "\nFATAL ERROR: input table 'xls' must have a column named 'species'\n"
		cat(stoptxt)
		stop(stoptxt)
		}	
	
	#################################################################
	# CLEAN UP NAMES
	# Biologists love Excel but are often very sloppy about making sure their names in the 
	# table match the names in their tree.  This section fixes the more common mistakes.
	#################################################################
	
	# Convert all "_" to " "
	xls$OTUs = gsub(pattern="_", replacement=" ", x=xls$OTUs)
	xls$species = gsub(pattern="_", replacement=" ", x=xls$species)

	# Also remove all "'"
	xls$OTUs = gsub(pattern="'", replacement="", x=xls$OTUs)
	xls$species = gsub(pattern="'", replacement="", x=xls$species)


	# Before converting spaces to underscores, trim all beginning and ending spaces
	#require(gdata)	# for trim
	xls$OTUs = trim(xls$OTUs)
	xls$species = trim(xls$species)
	
	# Also in regions. Jesus, MVD!  What kind of crazy person has some cells 
	# say "Out" and others say "Out " ??????!???!?!?!?!?!
	if ("region" %in% names(xls) == TRUE)
		{
		xls$region = trim(xls$region)
		}

	# Convert all spaces to "_"
	xls$OTUs = gsub(pattern=" ", replacement="_", x=xls$OTUs)
	xls$species = gsub(pattern=" ", replacement="_", x=xls$species)

	# Load the tree file
	#nexfn = nexus_trfns[i]
	#original_tr = read.nexus(nexfn)

	# Cut trailing spaces and underscores from tip.labels
	# Also cut "'", which e.g. Mesquite inserts whenever an OTU name has spaces, but which
	# e.g. MrBayes won't read. (NEVER USE SPACES IN COMPUTER STUFF, PEOPLE!!)
	tmp_tipnames = original_tr$tip.label
	tmp_tipnames = gsub(pattern="'", replacement="", x=tmp_tipnames)
	tmp_tipnames = gsub(pattern="_", replacement=" ", x=tmp_tipnames)
	tmp_tipnames = trim(tmp_tipnames)
	tmp_tipnames = gsub(pattern=" ", replacement="_", x=tmp_tipnames)
	original_tr$tip.label = tmp_tipnames
	
	
	#######################################################
	# Now, go through the unique species in the XLS file,
	# for each, keep just one OTU, remove the rest
	#######################################################
	tr = original_tr	# copy the original tree

	if (plot_intermediate == TRUE)
		{
		plot(tr, cex=0.5)
		axisPhylo()
		#titletxt = paste(group_name, ": BEAST MCC tree", "\nNEXUS file='", nexfn, "'", sep="")
		titletxt = paste(group_name, titletxt, sep="")
		title(titletxt)
		}

	#######################################################
	# Error check
	#######################################################

	# Check for names matching tips in tree
	tipnames = sort(tr$tip.label)
	tablenames = sort(xls$OTUs)

	length(tipnames)
	length(tablenames)
	tipnames == tablenames

	# Find phylogeny tips not in tablenames
	tipnames_in_tablenames_TF = tipnames %in% tablenames
	tipnames_not_in_table = tipnames[tipnames_in_tablenames_TF == FALSE]

	# Find phylogeny tips not in tablenames
	tablenames_not_in_tipnames_TF = tablenames %in% tipnames
	tablenames_not_in_tips = tablenames[tablenames_not_in_tipnames_TF == FALSE]

	num_tips_nomatch = sum(tipnames_in_tablenames_TF == FALSE)
	num_table_nomatch = sum(tablenames_not_in_tipnames_TF == FALSE)

	if (num_tips_nomatch > 0)
		{
		cat("\nWARNING: Tip names not in table:\n", sep="")

		cat("length(tipnames)=", length(tipnames), "\n", sep="")
		cat("length(tablenames)=", length(tablenames), "\n", sep="")
		
		TF = tipnames == tablenames
		tmptxt = paste(as.character(TF), collapse=" ", sep="")
		cat("tipnames == tablenames:\n", tmptxt, "\n", sep="")

		cat("\ntipnames_not_in_table:\n", sep="")
		cat(tipnames_not_in_table, sep="\n")

		#cat(sort(tipnames), sep="\n")
		#cat(sort(tablenames), sep="\n")
		}

	if (num_table_nomatch > 0)
		{
		cat("\nWARNING: Table names not in tips:\n")
		cat("length(tipnames)=", length(tipnames), "\n", sep="")
		cat("length(tablenames)=", length(tablenames), "\n", sep="")
		
		TF = tipnames == tablenames
		tmptxt = paste(as.character(TF), collapse=" ", sep="")
		cat("tipnames == tablenames:\n", tmptxt, "\n", sep="")

		cat("\ntablenames_not_in_tips:\n", sep="")
		cat(tablenames_not_in_tips, sep="\n")
	
		#cat(sort(tipnames), sep="\n")
		#cat(sort(tablenames), sep="\n")
		}
	
	
	#######################################################
	# Cut the outgroups and taxa outside of your analysis 
	# (represented by "out_group" or "Out" or "out" or "Outgroup" or "outgroup")
	#######################################################
	if ("region" %in% names(xls) == TRUE)
		{
		# Get outgroups and cut; also cut blank ranges
		outgroups_TF = ((xls$region == "Out") + (xls$region == "out_group") + (xls$region == "out") + (xls$region == "outgroup") + (xls$region == "Outgroup") + (xls$region == "") + (xls$region == " ")) == 1
		outgroup_tips = xls$OTUs[outgroups_TF]
		outgroup_species = unique(xls$species[outgroups_TF])
		tmptxt = paste(outgroup_species, collapse=", ", sep="")
		cat("\nRemoving outgroups: ", tmptxt, sep="")

		# Drop the tips
		tr = drop.tip(phy=tr, tip=outgroup_tips)

		if (plot_intermediate == TRUE)
			{
			plot(tr, cex=0.5)
			title("Cutting outgroups")
			axisPhylo()
			}

		cat("...done.", sep="")
		
		# Also, remove from xls
		xls = xls[outgroups_TF==FALSE, ]
		}	
	
	# Get unique species
	species_uniq = unique(xls$species)
	species_uniq


	# Assemble geography data
	#dispersal_xlsfn = "__dispersal_matrix_v01.xlsx"
	#areas_abbr = read.xls(dispersal_xlsfn, sheet="area_codes")
	if ("region" %in% names(xls) == TRUE)
		{
		if (is.null(areas_abbr))
			{
			areas_split = sapply(X=xls$region, FUN=strsplit2, split="\\|")
			areas_unlisted = unlist(areas_split)
			abbr = sort(unique(areas_unlisted))
			letters = LETTERS[1:length(abbr)]
			areas_abbr = adf2(data.matrix(cbind(abbr, letters)))
			}
		}
	areas_abbr

	# Go through and drop all non-unique tips
	tmp_species_ranges_table = NULL
	for (j in 1:length(species_uniq))
		{
		#######################################################
		# Prune down the tree
		#######################################################
		# Collapsing species
		species_name = species_uniq[j]
		cat("\n\n\nCollapsing OTUs to species: ", species_name, "...", sep="")

		OTU_names_for_this_species_TF = xls$species == species_name
		OTU_names_for_this_species = xls$OTUs[OTU_names_for_this_species_TF]
	
		# The OTU to keep/rename
		if (pick_specimens_to_use == FALSE)
			{
			# Then just use the first one in the list
			OTU_to_keep = OTU_names_for_this_species[1]
			} else {
			if ("specimen_to_use" %in% names(xls) == FALSE)
				{
				errortxt = paste("\n\nWARNING in prune_specimens_to_species(): option pick_specimens_to_use=TRUE only works if\nxls has a column named xls$specimen_to_use. Defaulting to just use the first\nspecimen: '", OTU_names_for_this_species[1], "'\n", sep="")
				cat(errortxt)
				OTU_to_keep = OTU_names_for_this_species[1]
				} else {
				list_of_specimen_yes_nos = xls$specimen_to_use[OTU_names_for_this_species_TF]
				TF = list_of_specimen_yes_nos == "yes"
				
				# There should be one and only one "yes" per species; otherwise, default
				if (sum(TF) == 1)
					{
					OTU_to_keep = OTU_names_for_this_species[TF]
					} else {
					errortxt = paste("\n\nWARNING in prune_specimens_to_species(): something other than one 'yes' found in\nxls$specimen_to_use for species '", species_name, "'. Defaulting to just use the first\nspecimen: '", OTU_names_for_this_species[1], "'\n", sep="")
					cat(errortxt)
					OTU_to_keep = OTU_names_for_this_species[1]
					} # end if (sum(TF) == 1)
				} # end if ("specimen_to_use" %in% names(xls) == FALSE)
			} # end if (pick_specimens_to_use == FALSE)
	
		# Leave the first OTU, drop the rest
		OTU_names_for_this_species = OTU_names_for_this_species[-1]
	
		# Find the tips that match this species
		tr = drop.tip(phy=tr, tip=OTU_names_for_this_species)
	
		# Rename the kept tip
		tr$tip.label[tr$tip.label == OTU_to_keep] = species_name
	
		if (plot_intermediate == TRUE)
			{
			plot(tr, cex=0.5)
			title("Cutting redundant OTUs")
			axisPhylo()
			}
		cat("		...done.", sep="")



		if ("region" %in% names(xls) == TRUE)
			{
			#######################################################
			# Assemble geography data
			#######################################################
			region_codes = xls$region[OTU_names_for_this_species_TF]
			# Some tips have multiple, find with "|"
			region_codes_multiple_TF = grepl(pattern="\\|", x=region_codes)
	
	
			if (sum(region_codes_multiple_TF) > 0)
				{
				list_o_multiple_regions = NULL
				for (k in 1:sum(region_codes_multiple_TF))
					{
					tmp_regions_before_split = region_codes[region_codes_multiple_TF][k]
					tmp_regions_after_split = strsplit(tmp_regions_before_split, split="\\|")[[1]]
					list_o_multiple_regions = c(list_o_multiple_regions, c(tmp_regions_after_split))
					}
				region_codes = region_codes[region_codes_multiple_TF == FALSE]	# cut the ones that were split
				all_regions_after_splitting = c(region_codes, list_o_multiple_regions)		# add them, post-parsing
				} else {
				all_regions_after_splitting = region_codes
				}
	
			areas_for_this_sp = sort(unique(all_regions_after_splitting))
			tmp_letter_codes_for_this_species = NULL
			for (k in 1:length(areas_for_this_sp))
				{
				# Abbreviation for this unique area
				abbr_TF = areas_abbr$abbr == areas_for_this_sp[k]
		
				# Error check
				if (sum(abbr_TF) == 0)
					{
					stoptxt = cat("\nMAJOR WARNING!: the areaname '", areas_for_this_sp[k], "' of species '", species_uniq, "' is not found in your abbreviation table!\n", sep="")
					cat(stoptxt)
					#stop(stoptxt)
					}
		
				tmp_letter_code = areas_abbr$letter[abbr_TF]
				tmp_letter_codes_for_this_species = c(tmp_letter_codes_for_this_species, tmp_letter_code)
				}
	
			# Add to table
			range_letter_strs = paste(tmp_letter_codes_for_this_species, collapse="", sep="")
			tmprow = c(species_name, range_letter_strs)
			tmp_species_ranges_table = rbind(tmp_species_ranges_table, tmprow)
			} # End if regions
		} # end forloop
	
	
	
	if ("region" %in% names(xls) == TRUE)
		{
		tmp_species_ranges_table = dfnums_to_numeric(adf2(tmp_species_ranges_table))
		names(tmp_species_ranges_table) = c("species", "letterrange")

		tipranges = letter_strings_to_tipranges_df(letter_strings=tmp_species_ranges_table$letterrange, letter_codes_in_desired_order=areas_abbr$letter, tipnames_in_order=tmp_species_ranges_table$species)
		tipranges


		# Then, check the output
		tipnames1 = sort(tr$tip.label)
		tipnames2 = sort(rownames(tipranges@df))


		
		if (length(tipnames1) != length(tipnames2))
			{
			stoptxt = paste("\nFATAL ERROR: Your output tree and your output tipranges have different lengths:\nlength(tr$tip.label)=", length(tr$tip.label), ", length(rownames(tipranges@df))=", length(rownames(tipranges@df)), "\n", sep="")
			cat(stoptxt)
			stop(stoptxt)
			}
		
		TF = tipnames1 == tipnames2
		if (sum(TF) != length(TF))
			{
			stoptxt = paste("\nFATAL ERROR: Your output tree tips, and your output tipranges names, don't match:\n", sep="")
			cat(stoptxt)
			
			cat(tipnames1, ", ")
			cat(tipnames2, ", ")
			cat(TF, ", ")
			
			stop(stoptxt)
			}

		
		TF = rowSums(tipranges@df) == 0
		if (sum(TF) > 0)
			{
			stoptxt = paste("\nFATAL ERROR: tipranges contains ", sum(TF), " rows with 0 ranges:\n", sep="")
			cat(stoptxt)
			
			print(tipranges@df[TF, ])
			stop(stoptxt)
			}
		} # end if (sum(TF) != length(TF))

	#is.ultrametric(tr)









		print("Checkpoint #2")




	if (plot_intermediate == TRUE)
		{
		# Plot final tree
		titletxt = paste(group_name, ": Final pruned tree", sep="")
		plot(tr, cex=0.5)
		axisPhylo()
		title(titletxt)
		}



	pruning_results = list()
	if ("region" %in% names(xls) == TRUE)
		{
		pruning_results$tr = tr
		pruning_results$tipranges = tipranges
		} else {
		pruning_results$tr = tr
		}

	return(pruning_results)
	}









#######################################################
# Lagrange-related functions
#######################################################







#######################################################
# getareas_from_tipranges_object
#######################################################
#' Get the names of the areas in a tipranges object
#' 
#' This function extracts the names of the areas in a \code{tipranges} object.  Just a shortcut for 
#' \code{names(tipranges@@df)}.
#'
#' Duplicates \code{getareanames_from_tipranges_object} for back-compatibility.
#' 
#' @param tipranges An object of class \code{tipranges}.
#' @return \code{areanames}, a list of the names of the areas
#' @export
#' @seealso \code{\link{getareanames_from_tipranges_object}}, \code{\link{define_tipranges_object}}, \code{\link[cladoRcpp]{areas_list_to_states_list_old}},
#' \code{\link{areas_list_to_states_list_new}}, \code{\link{tipranges_to_tip_condlikes_of_data_on_each_state}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @examples
#' testval=1
#' tipranges_object = define_tipranges_object()
#' tipranges_object
#'
#' areanames = getareas_from_tipranges_object(tipranges_object)
#' areanames
#'
getareas_from_tipranges_object <- function(tipranges)
	{
	areanames = as.character(names(tipranges@df))
	return(areanames)
	}


#######################################################
# getareanames_from_tipranges_object
#######################################################
#' Get the names of the areas in a tipranges object
#' 
#' This function extracts the names of the areas in a \code{tipranges} object.  Just a shortcut for 
#' \code{names(tipranges@@df)}.
#' 
#' Duplicates \code{getareas_from_tipranges_object} which has a less clear name, but is
#' kept for back-compatibility.
#' 
#' @param tipranges An object of class \code{tipranges}.
#' @return \code{areanames}, a list of the names of the areas
#' @export
#' @seealso \code{\link{getareas_from_tipranges_object}}, \code{\link{define_tipranges_object}}, \code{\link[cladoRcpp]{areas_list_to_states_list_old}},
#' \code{\link{areas_list_to_states_list_new}}, \code{\link{tipranges_to_tip_condlikes_of_data_on_each_state}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @examples
#' testval=1
#' tipranges_object = define_tipranges_object()
#' tipranges_object
#'
#' areanames = getareas_from_tipranges_object(tipranges_object)
#' areanames
#'
getareanames_from_tipranges_object <- function(tipranges)
	{
	areanames = as.character(names(tipranges@df))
	return(areanames)
	}



#######################################################
# get_tiplabel_ranges
#######################################################
#' For each tip, get a text string of the areas in a tipranges object.
#' 
#' This function extracts the names of the areas in a \code{tipranges} object.  Just a shortcut for 
#' \code{names(tipranges@@df)}.
#' 
#' @param tipranges An object of class \code{tipranges}.
#' @param tr An ape phylo object.
#' @param sep Input to \code{\link[base]{paste}}.
#' @return \code{areanames}, a list of the names of the areas
#' @export
#' @seealso \code{\link{define_tipranges_object}}, \code{\link[cladoRcpp]{areas_list_to_states_list_old}},
#' \code{\link{areas_list_to_states_list_new}}, \code{\link{tipranges_to_tip_condlikes_of_data_on_each_state}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @examples
#' testval=1
#' tipranges_object = define_tipranges_object()
#' tipranges_object
#'
#' areanames = getareas_from_tipranges_object(tipranges_object)
#' areanames
#'
get_tiplabel_ranges <- function(tipranges, tr, sep="")
	{
	# Make sure the order matches the tr order
	tipranges = order_tipranges_by_tr(tipranges, tr)

	tmpdf = tipranges@df
	tmpnames = names(tmpdf)
	tmpnames_list = list()
	
	# TRUE/FALSE occupied
	tmpdf_TF = (tmpdf != 0)
	
	for (i in 1:nrow(tmpdf))
		{
		tmpnames_list[[i]] = c(tmpnames[tmpdf_TF[i,]])
		}
	
	tipranges_txt = sapply(FUN=paste, tmpnames_list, collapse="", sep=sep)
	return(tipranges_txt)
	}



#######################################################
# tipranges_to_tip_condlikes_of_data_on_each_state
#######################################################
#' Convert a tipranges object to the tip likelihoods
#' 
#' This function takes a tipranges object, and converts it to tip likelihoods for 
#' input into the likelihood calculations of \code{\link{calc_loglike_sp}}.
#' 
#' This (like LAGRANGE (\cite{ReeSmith2008}) and every other available program) assumes that the geographic
#' ranges at the tips are known with certainty.  Reality may be different, particularly
#' for sparsely-studied, scarce, or fossil taxa.  In such a case, a detection model 
#' is needed to specify the likelihood of the observation data under each possible
#' geographic range at the tips.
#'
#' Note that data likelihoods under this or that hypothesis are not the same thing as
#' probabilities.  E.g., with DNA, if sequencing machine says that the base could be either
#' A or C, but not G or T, then the likelihood of the data for that nucleotide position for
#' that species would be 1 1 0 0, not 0.5 0.5 0 0.  See Felsenstein (2004), p. 255, for more.
#' 
#' @param tipranges An object of class \code{tipranges}.
#' @param phy A phylogenetic tree (\code{\link[ape]{ape}} object of class \code{\link[ape]{phylo}})
#' @param states_list A complete list of the different states, of class \code{\link{list}}
#' form. If NULL (default), the states list is generated based on maxareas.
#' @param maxareas The maximum number of areas in a geographic range, if the user does 
#' @param include_null_range If states_list is NULL, when the states_list 
#' is generated, should the null range be included? Default TRUE.
#' @param useAmbiguities If TRUE, ambiguities ("?") are allowed in the geography input 
#' file. Any possible ranges/states that include the area with the question mark are 
#' given a tip likelihood of 1.
#' @trait A trait, in the format of class tipranges (i.e., 
#' loaded from a trait coded as a geography object). Default is NULL.
#' @return \code{tip_condlikes_of_data_on_each_state} For each tip/row, likelihood of that tip's data under each possible true geographic range (columns)
#' @export
#' @seealso \code{\link{define_tipranges_object}}, \code{\link{getareas_from_tipranges_object}}, 
#' \code{\link{areas_list_to_states_list_new}}, 
#' \code{\link[cladoRcpp]{areas_list_to_states_list_old}}, \code{\link{binary_ranges_to_letter_codes}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#'	 @cite Felsenstein2004
#' @examples
#' testval=1
#' # Define a tipranges object
#' tipranges_object = define_tipranges_object()
#' tipranges_object
#'
#' areanames = getareas_from_tipranges_object(tipranges_object)
#' areanames
#'
#' # Specify phylogeny to go with default tipranges object
#' newick_str = "((tip1:1,tip2:1):1,tip3:2):1;"
#' phy = read.tree(file="", text=newick_str)
#' 
#' # Here, we will assume the maximum range size is all areas, but it could be smaller
#' maxareas = length(areanames)
#' \dontrun{
#' states_list = areas_list_to_states_list_old(areas=areanames, include_null_range=TRUE, 
#' maxareas=maxareas)
#' states_list
#' }
#' 
#' states_list = areas_list_to_states_list_new(areas=areanames, include_null_range=TRUE, 
#' maxareas=maxareas)
#' states_list
#' 
#' tip_condlikes_of_data_on_each_state = tipranges_to_tip_condlikes_of_data_on_each_state(
#' tipranges=tipranges_object, phy=phy, states_list=states_list, maxareas=maxareas )
#' tip_condlikes_of_data_on_each_state
#' 
tipranges_to_tip_condlikes_of_data_on_each_state <- function(tipranges, phy, states_list=NULL, maxareas=length(getareas_from_tipranges_object(tipranges)), include_null_range=TRUE, useAmbiguities=FALSE, trait_as_tip_condlikes=NULL, allow_null_tips=FALSE)
	{
	default='
	tipranges
	phy
	maxareas=length(getareas_from_tipranges_object(tipranges))
	'
	#require(cladoRcpp)
	
	# Reorder the edge matrix into pruningwise order
	# This (may be) CRUCIAL!!
	#phy2 <- reorder(phy, "pruningwise")
	# NOTE: phy and phy2 have the tips in the same order, it's just the
	# edge matrix that gets reordered...
	
	#print(tipranges)
	
	areanames = getareas_from_tipranges_object(tipranges)
	areas_list_0based = seq(0, (length(areanames)-1), 1)
	#print(areanames)
	#print("stop here")
	
	if (is.null(states_list))
		{
		cat("Note: tipranges_to_tip_condlikes_of_data_on_each_state() is\n")
		cat("      creating 'states_list' automatically.\n")
		states_list_as_0based_area_numbers_wNULL_underscore = areas_list_to_states_list_new(areas=areas_list_0based, include_null_range=include_null_range, maxareas=maxareas)
		states_list_as_lists_of_letters = areas_list_to_states_list_new(areas=areanames, include_null_range=include_null_range, maxareas=maxareas)
		states_list = states_list_as_0based_area_numbers_wNULL_underscore
		}
	
	# If the states_list is using characters, convert to number
	TF = is.na(as.numeric(states_list[[length(states_list)]][1])) == TRUE
	print("Note: tipranges_to_tip_condlikes_of_data_on_each_state() is converting a states_list with (0-based) numbers to the equivalent areanames")
	if (TF == TRUE)
		{
		tmp_states_list = NULL
		tmpi = 1
		if (include_null_range == TRUE)
			{
			tmp_states_list[[tmpi]] = states_list[[tmpi]]
			tmpi = tmpi + 1
			}
		for (i in (include_null_range+1):(length(states_list)))
			{
			tmp_areas_for_state = NULL
			for (j in 1:length(states_list[[tmpi]]))
				{
				tmp_area_num_index = match(x=states_list[[tmpi]][j], table=areanames)
				tmp_area_num = areas_list_0based[tmp_area_num_index]
				tmp_areas_for_state = c(tmp_areas_for_state, tmp_area_num)
				}
			tmp_states_list[[tmpi]] = list(tmp_areas_for_state)
			tmpi = tmpi + 1
			}
		states_list = unlist(tmp_states_list, recursive=FALSE)
		}

	
	
	# Check for ranges greater than the maximum number of areas in states_list
	ranges_greater_than_maxareas_TF = sapply(states_list, length) > maxareas
	# Reduce states_list accordingly, if any are found
	if (sum(ranges_greater_than_maxareas_TF) > 0)
		{
		states_list = states_list[ranges_greater_than_maxareas_TF == FALSE]
		}


	
	tipranges_orig = tipranges
	# Make sure the letter code ranges and number code ranges are in the same order as the 
	# phylogeny tips
	tipranges_df_order = match(phy$tip.label, rownames(tipranges@df))

	tipranges_in_treeTip_order = tipranges@df[tipranges_df_order,]
	if (ncol(tipranges@df) == 1)
		{
		tmpmat = matrix(data=tipranges_in_treeTip_order, ncol=1)
		tmpdf = as.data.frame(tipranges_in_treeTip_order, stringsAsFactors=FALSE)
		names(tmpdf) = names(tipranges@df)
		row.names(tmpdf) = row.names(tipranges@df)[tipranges_df_order]
		tipranges_in_treeTip_order = tmpdf
		}
	
	tipranges@df = tipranges_in_treeTip_order
	
	if ((useAmbiguities == FALSE) || (sum(tipranges@df == "?") == 0))
		{
		# Get the letter code ranges for each binary state
		tipranges@df[tipranges@df == "?"] = 0
		letter_code_ranges = binary_ranges_to_letter_codes(tipranges=tipranges, areanames=areanames)
		number_code_ranges_0based = binary_ranges_to_letter_codes(tipranges=tipranges, areanames=areas_list_0based)
		#print(letter_code_ranges)
		names(letter_code_ranges) = NULL
		letter_code_ranges
		
	
		# Empty matrix of tip relative likelihoods
		tip_condlikes_of_data_on_each_state = matrix(data=0, nrow=length(phy$tip.label), ncol=length(states_list))

		#letter_code_ranges = letter_code_ranges[tipranges_df_order]
		#number_code_ranges_0based = number_code_ranges_0based[tipranges_df_order]
		
		# Collapse the states to text fields and compare
		allowed_states_list_0based_txt = sapply(X=states_list, FUN=paste, collapse="_", sep="")
		#print(allowed_states_list_0based_txt)
		#print(allowed_states_list_0based_txt)
		for (rownum in 1:nrow(tip_condlikes_of_data_on_each_state))
			{
			temp_state_txt = paste(letter_code_ranges[rownum][[1]], collapse="_", sep="")
			temp_state = paste(number_code_ranges_0based[rownum][[1]], collapse="_", sep="")
			
			# Check null range for test
			changeflag = FALSE
			if (temp_state == "")
				{
				temp_state = "NA"
				changeflag = TRUE
				}
			
			# See if this tipstate matches anything in the list
			state_match_TF = temp_state == allowed_states_list_0based_txt
			#state_match_TF = temp_state_txt == allowed_states_list_0based_txt
			
			# See if this tipstate matches anything in the list
			# If not...
			# Error check added 2015-04-03
			if (sum(state_match_TF) == 0)
				{
				error_txt = paste0("STOP ERROR in tipranges_to_tip_condlikes_of_data_on_each_state():\n\nYour geography file has a tip range which is not allowed in your specified state space (states_list).\n\nThe tip is: ", phy$tip.label[rownum], "\nThe 0-based areas in the range is/are: '", temp_state, "'\nThe letter codes in the range are: '", temp_state_txt, "'\n\n\nTry editing either the geography file, the areas_allowed or areas_adjacency matrices, or the states_list (non-stratified) / lists_of_states_lists_0based (stratified).\n\nOR, another thing that might work, if you have '?' data: BioGeoBEARS_run_object$useAmbiguities = TRUE\n\n")
				cat("\n\n")
				cat(error_txt)
				cat("\n")
				stop(error_txt)
				} # END if (sum(state_match_TF) == 0)

			if (changeflag == TRUE)
				{
				temp_state = ""
				changeflag = FALSE
				}

			
			tip_condlikes_of_data_on_each_state[rownum, state_match_TF] = 1
			} # END for (rownum in 1:nrow(tip_condlikes_of_data_on_each_state))
		# End tip likelihoods calculation when useAmbiguities==FALSE
		} else {
		# useAmbiguities == TRUE
		
		# In the add-fossils-randomly script, these changes are made to the geography file:
		#
		# "presence-only" means:
		#    areas with 1s are 1s
		#    areas with 0s are ?
		#
		# "absence-only" means:
		#    areas with 1s are ?
		#    areas with 0s are 0
		# 
		# "both" means:
		# use ranges like usual
		#

		# Get the basic likelihoods (no ambiguities)
		tipranges_noAmbig = tipranges
		Qs_mat_TF = tipranges_noAmbig@df == "?"
		
		someQs_TF = rowSums(Qs_mat_TF) > 0
		tipranges_noAmbig@df[Qs_mat_TF] = 0
		letter_code_ranges = binary_ranges_to_letter_codes(tipranges=tipranges_noAmbig, areanames=areanames)
		number_code_ranges_0based = binary_ranges_to_letter_codes(tipranges=tipranges_noAmbig, areanames=areas_list_0based)
		#print(letter_code_ranges)
		names(letter_code_ranges) = NULL
		letter_code_ranges
		
	
		# Empty matrix of tip relative likelihoods
		tip_condlikes_of_data_on_each_state_noAmbigs = matrix(data=0, nrow=length(phy$tip.label), ncol=length(states_list))

		#letter_code_ranges = letter_code_ranges[tipranges_df_order]
		#number_code_ranges_0based = number_code_ranges_0based[tipranges_df_order]
		
		# Collapse the states to text fields and compare
		allowed_states_list_0based_txt = sapply(X=states_list, FUN=paste, collapse="_", sep="")
		#print(allowed_states_list_0based_txt)
		#print(allowed_states_list_0based_txt)
		for (rownum in 1:nrow(tip_condlikes_of_data_on_each_state_noAmbigs))
			{
			temp_state_txt = paste(letter_code_ranges[rownum][[1]], collapse="_", sep="")
			temp_state = paste(number_code_ranges_0based[rownum][[1]], collapse="_", sep="")
			
			# See if this tipstate matches anything in the list
			state_match_TF = temp_state == allowed_states_list_0based_txt
			#state_match_TF = temp_state_txt == allowed_states_list_0based_txt
			
			# See if this tipstate matches anything in the list
			# If not...
			# Error check added 2015-04-03
			if (sum(state_match_TF) == 1)
				{
				tip_condlikes_of_data_on_each_state_noAmbigs[rownum, state_match_TF] = 1
				} # END if (sum(state_match_TF) == 0)

			} # END for (rownum in 1:nrow(tip_condlikes_of_data_on_each_state))

	
		#tipranges_orig
		tipranges_hard_0s_as_1s = tipranges
		tipranges_hard_1s = tipranges
		
		
		# ALL ?'s get all-1s for tip likelihoods
		
		
		# KNOWN ABSENCES FROM AREAS (HARD ZEROS)
		# ANY TIP WITH THESE AREAS GETS LIKELIHOOD 0
		# Get the letter code ranges for each binary state
		# RE-CODE the 0s as 1s
		tipranges_hard_0s_as_1s@df[tipranges_hard_0s_as_1s@df == "?"] = "z"
		tipranges_hard_0s_as_1s@df[tipranges_hard_0s_as_1s@df == "1"] = "z"
		tipranges_hard_0s_as_1s@df[tipranges_hard_0s_as_1s@df == "0"] = "1" # flip 0s to 1s
		tipranges_hard_0s_as_1s@df[tipranges_hard_0s_as_1s@df == "z"] = "0" # flip 1s and ?s to 0s

		# KNOWN PRESENCES FROM AREAS (HARD ZEROS)
		# ANY TIP WITH*OUT* THESE AREAS GETS LIKELIHOOD 0
		# Get the letter code ranges for each binary state
		tipranges_hard_1s@df[tipranges_hard_1s@df == "?"] = "0"
		tipranges_hard_1s@df[tipranges_hard_1s@df == "1"] = "1" # flip 1s to 1

		# Lists of areas that are DISALLOWED
		letter_code_ranges = binary_ranges_to_letter_codes(tipranges=tipranges_hard_0s_as_1s, areanames=areanames)
		names(letter_code_ranges) = NULL
		number_code_ranges_0based = binary_ranges_to_letter_codes(tipranges=tipranges_hard_0s_as_1s, areanames=areas_list_0based)
		letter_code_ranges

		# Make sure the letter code ranges are in the same order as the 
		# phylogeny tips
		#tipranges_df_order = match(phy$tip.label, rownames(tipranges_hard_0s_as_1s@df))
		#letter_code_ranges = letter_code_ranges[tipranges_df_order]
		#number_code_ranges_0based = number_code_ranges_0based[tipranges_df_order]



		# Lists of areas that are REQUIRED
		letter_code_ranges_hard_1s_as_0s = binary_ranges_to_letter_codes(tipranges=tipranges_hard_1s, areanames=areanames)
		names(letter_code_ranges_hard_1s_as_0s) = NULL
		number_code_ranges_0based_hard_1s_as_0s = binary_ranges_to_letter_codes(tipranges=tipranges_hard_1s, areanames=areas_list_0based)
		letter_code_ranges_hard_1s_as_0s

		# Make sure the letter code ranges are in the same order as the 
		# phylogeny tips
		#tipranges_df_order = match(phy$tip.label, rownames(tipranges_hard_0s_as_1s@df))
		#letter_code_ranges_hard_1s_as_0s = letter_code_ranges_hard_1s_as_0s[tipranges_df_order]
		#number_code_ranges_0based_hard_1s_as_0s = number_code_ranges_0based_hard_1s_as_0s[tipranges_df_order]


		
		# Empty matrix of tip relative likelihoods
		# HERE, set likelihoods to 1, then set some to 0
		tip_condlikes_of_data_on_each_state = matrix(data=1, nrow=length(phy$tip.label), ncol=length(states_list))

		# Here, the only meaningful thing is areas with 1s; exclude these states/ranges
		
		# Exclude the null range! (add it back in, if warranted, later on)
		tip_condlikes_of_data_on_each_state[,1] = 0

		# Quick function to find states in states_list that have disallowed areas
		return_TF_disallowed_areas_in_state <- function(state, disallowed_areas_0based)
			{
			TF = any(disallowed_areas_0based %in% state)
			return(TF)
			}

		# Quick function to find states in states_list that DON'T have REQUIRED areas
		return_TF_required_areas_not_found_in_state <- function(state, required_areas_0based)
			{
			TF = any((required_areas_0based %in% state) == FALSE)
			return(TF)
			}

		
		# Collapse the states to text fields and compare
		# 
		#states_list_txt = sapply(X=states_list, FUN=paste, collapse="_", sep="")
		for (rownum in 1:nrow(tip_condlikes_of_data_on_each_state))
			{
			#temp_states = letter_code_ranges[rownum][[1]]
			# temp_states: A 0-based list of area lists, by tip
			# states_list: A 0-based list of area lists, by state (column of the tip_condlikes matrix)
			disallowed_areas_0based = number_code_ranges_0based[rownum][[1]]
			
			# KNOWN ABSENCE AREAS -- set states with those areas to likelihood zero
			# Any state in states_list that has one or more of the 0-based areas in temp_states
			states_list 
			
			disallowed_TF = sapply(X=states_list, FUN=return_TF_disallowed_areas_in_state, disallowed_areas_0based=disallowed_areas_0based)
			tip_condlikes_of_data_on_each_state[rownum,disallowed_TF] = 0


			# KNOWN PRESENCE AREAS -- set states WITHOUT those areas to likelihood zero
			required_areas_0based = number_code_ranges_0based_hard_1s_as_0s[rownum][[1]]
			
			# Any state in states_list that has one or more of the 0-based areas in temp_states
			states_list 
			
			required_areas_missing_TF = sapply(X=states_list, FUN=return_TF_required_areas_not_found_in_state, required_areas_0based=required_areas_0based)
			tip_condlikes_of_data_on_each_state[rownum,required_areas_missing_TF] = 0

			
# 			if (length(temp_states) <= 0)
# 				{
# 				next()	# skip to the next loop
# 				}
# 			
# 			# E.g., temp_states = "A", "B"
# 			for (j in 1:length(temp_states))
# 				{
# 				temp_state = temp_states[j]
# 				#print(temp_state)
# 				state_match_TF = grepl(pattern=temp_state, x=states_list_txt)
# 				tip_condlikes_of_data_on_each_state[rownum, state_match_TF] = 0
# 				}
			} # END for (rownum in 1:nrow(tip_condlikes_of_data_on_each_state))
		

		# Check for rows with ALL Qs
		rows_with_allQs_TF = check_tipranges_for_allQs(tipranges, symbol_to_check="?")
		tip_condlikes_of_data_on_each_state[rows_with_allQs_TF, ] = 1

		# Also, set the null range (if it exists) to likelihood 0
		#state_match_TF = states_list_txt == "_"
		#tip_condlikes_of_data_on_each_state[, state_match_TF] = 0

		#print("printing 'tip_condlikes_of_data_on_each_state':")
		#print(tip_condlikes_of_data_on_each_state[1:5,])
		
		
		# Input any tip likelihoods for rows WITHOUT any ambiguities (no ?s)
		tip_condlikes_of_data_on_each_state[someQs_TF==FALSE, ] = tip_condlikes_of_data_on_each_state_noAmbigs[someQs_TF==FALSE, ]
		
		# But, if you have any rows that are actually null range (e.g. fossils), set those to likelihood c(1, 0, 0, ...)
		# Check for rows with ALL ZEROS (null ranges, i.e. fossils)
		if (include_null_range == TRUE)
			{
			rows_with_all0s_TF = check_tipranges_for_allQs(tipranges, symbol_to_check="0")
			#print(tipranges_orig)
			#print(rows_with_all0s_TF)
			state_match_TF = is.na(states_list)
			tip_condlikes_of_data_on_each_state[rows_with_all0s_TF, ][,state_match_TF] = 1
			}
		
		#print("printing 'tip_condlikes_of_data_on_each_state':")
		#print(tip_condlikes_of_data_on_each_state[1:5,])
		#stop()
		
		} # End tip likelihoods calculation when useAmbiguities==TRUE
	
	
	# If there is a trait, double-up the conditional likelihoods matrix
	# (or x3, or whatever) 
	if (is.null(trait_as_tip_condlikes) == FALSE)
		{
		tip_condlikes_of_data_on_each_state_ORIG = tip_condlikes_of_data_on_each_state
		ntrait_states = ncol(trait_as_tip_condlikes)
		ncols_orig = ncol(tip_condlikes_of_data_on_each_state)
		nrows_orig = nrow(tip_condlikes_of_data_on_each_state)
		
		new_tip_condlikes = matrix(data=0, nrow=nrows_orig, ncol=ntrait_states*ncols_orig)
		
		# Loop through the trait states
		for (traitstate in 1:ntrait_states)
			{
			startcol = ((traitstate-1) * ncols_orig) + 1
			endcol = (traitstate * ncols_orig) 
			new_tip_condlikes[,startcol:endcol] = tip_condlikes_of_data_on_each_state * trait_as_tip_condlikes[,traitstate]			
			} # END for (traitstate in 1:ntrait_states)
		
		# Use the new tip_condlikes
		tip_condlikes_of_data_on_each_state = new_tip_condlikes
		} # END if (is.null(trait_as_tip_condlikes) == FALSE)
	
	return(tip_condlikes_of_data_on_each_state)
	}




#######################################################
# areas_list_to_states_list_new
#######################################################
#' Convert a list of areas to a list of geographic ranges (states); R version
#' 
#' R version of areas_list_to_states_list_old, which makes use of \code{\link[cladoRcpp]{cladoRcpp}}'s
#' \code{\link[cladoRcpp]{rcpp_areas_list_to_states_list}}. 
#' 
#' This is the original R version of the function which converts a list of possible areas to
#' a list of all possible states (geographic ranges).  This gets slow for large numbers of areas.
#' 
#' The function is mostly replaced by \code{\link[cladoRcpp]{rcpp_areas_list_to_states_list}} 
#' in optimized code, but is still used in some places
#' for display purposes.
#' 
#' @param areas a list of areas (character or number; the function converts these to numbers, starting with 0)
#' @param maxareas maximum number of areas in this analyses
#' @param include_null_range \code{TRUE} or \code{FALSE}, should the \code{NULL} range be included in the possible states? (e.g., LAGRANGE default is yes)
#' @param split_ABC \code{TRUE} or \code{FALSE} If \code{TRUE} the output will consist of a list of lists (c("A","B","C"), c("A","B"), c("A","D"), etc.); 
#' if \code{FALSE}, the list of areas will be collapsed ("ABC", "AB", "AD", etc.).
#' @return \code{states_list} A list of the states.
#' @export
#' @seealso \code{\link{numstates_from_numareas}}, \code{\link{rcpp_areas_list_to_states_list}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#' @examples
#' areas = c("A","B","C")
#' areas_list_to_states_list_new(areas=areas, maxareas=length(areas), 
#' include_null_range=TRUE, split_ABC=TRUE)
#' areas_list_to_states_list_new(areas=areas, maxareas=length(areas), 
#' include_null_range=TRUE, split_ABC=FALSE)
#' areas_list_to_states_list_new(areas=areas, maxareas=length(areas), 
#' include_null_range=FALSE, split_ABC=TRUE)
#' areas_list_to_states_list_new(areas=areas, maxareas=length(areas), 
#' include_null_range=FALSE, split_ABC=FALSE)
#' areas_list_to_states_list_new(areas=areas, maxareas=2, 
#' include_null_range=TRUE, split_ABC=TRUE)
#' areas_list_to_states_list_new(areas=areas, maxareas=2, 
#' include_null_range=TRUE, split_ABC=FALSE)
#' areas_list_to_states_list_new(areas=areas, maxareas=2, 
#' include_null_range=FALSE, split_ABC=TRUE)
#' areas_list_to_states_list_new(areas=areas, maxareas=2, 
#' include_null_range=FALSE, split_ABC=FALSE)
#' areas_list_to_states_list_new(areas=areas, maxareas=1, 
#' include_null_range=TRUE, split_ABC=TRUE)
#' areas_list_to_states_list_new(areas=areas, maxareas=1, 
#' include_null_range=TRUE, split_ABC=FALSE)
#' areas_list_to_states_list_new(areas=areas, maxareas=1, 
#' include_null_range=FALSE, split_ABC=TRUE)
#' areas_list_to_states_list_new(areas=areas, maxareas=1, 
#' include_null_range=FALSE, split_ABC=FALSE)
#' 
areas_list_to_states_list_new <- function(areas=c("A","B","C"), maxareas=length(areas), include_null_range=TRUE, split_ABC=TRUE)
	{
	runjunk='
	areas = c("K", "O", "M", "H")
	maxareas=length(areas)
	include_null_range=FALSE
	split_ABC=FALSE
	'
	
	# Error trap
	if (maxareas > length(areas))
		{
		maxareas = length(areas)
		}
	
	
	# Initialize the states_list to the correct size
	#print(areas)
	#print(maxareas)
	#print(include_null_range)
	nstates = numstates_from_numareas(numareas=length(areas), maxareas=maxareas, include_null_range=include_null_range)
	#print(nstates)
	states_list = rep(NA, times=nstates)

	if (split_ABC == TRUE)
		{
		# Option #1: Don't split states
		# Add range combinations to the list
		states_list_area_indexes = rcpp_areas_list_to_states_list(areas=areas, include_null_range=include_null_range, maxareas=maxareas)
		
		# +1 to indexes
		states_list_area_indexes = lapply(X=states_list_area_indexes, FUN="+", 1)
		
		# convert to letters
		tmpfun <- function(x, abbr) { abbr[x] }
		states_list_areas = lapply(X=states_list_area_indexes, FUN=tmpfun, abbr=areas)
		states_list_areas
		
		# convert NA to "_"
		states_list_areas[is.na(states_list_areas)] = "_"
		return(states_list_areas)
		}


	if (split_ABC == FALSE)
		{
		# Option #1: Don't split states
		# Add range combinations to the list
		states_list_area_indexes = rcpp_areas_list_to_states_list(areas=areas, include_null_range=include_null_range, maxareas=maxareas)
		
		# +1 to indexes
		states_list_area_indexes = lapply(X=states_list_area_indexes, FUN="+", 1)
		
		# convert to letters
		tmpfun <- function(x, abbr) { abbr[x] }
		states_list_areas = lapply(X=states_list_area_indexes, FUN=tmpfun, abbr=areas)
		states_list_areas

	
		# convert NA to "_"
		states_list_areas[is.na(states_list_areas)] = "_"
		
		# Collapse the ranges
		states_list_areas = lapply(X=states_list_areas, FUN=paste, collapse="")
		states_list_areas
		
		return(states_list_areas)
		}

	return(stop("areas_list_to_states_list_new(): ERROR, you shouldn't reach this."))
	}
	

#######################################################
# states_list_0based_to_ranges_txt_list
#######################################################
#' Convert a 0-based list of range lists to a list of collapsed text for ranges
#' 
#' Converts e.g. list(c(NA), c(0), c(1), c(0,1), c(1,2)) to
#' c("_", "A", "AB", "BC")
#' 
#' @param state_indices_0based An R list object, where each list
#'        item is a range consisting of a vector of numbers, counting from 0,
#'        representing which areas are in the range. NA means null range. 
#'        E.g. \code{list(c(NA), c(0), c(1), c(0,1), c(1,2))}. Typically this
#'        list is produced by \code{\link[cladoRcpp]{rcpp_areas_list_to_states_list}}.
#' @param areanames
#' @return ranges_list The list of collapsed text representations of ranges 
#'                     (e.g. "ABC").
#' @export
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu}
#' @examples
#' test=1
#' areanames = c("K", "O", "M", "H")
#' state_indices_0based = list(c(NA), c(0), c(1), c(2), c(3), c(0,1),
#' c(0,2), c(0,3), c(1,2), c(1,3), c(2,3), c(0,1,2), c(0,1,3), c(1,2,3),
#' c(0,1,2,3))
#' ranges_list = states_list_0based_to_ranges_txt_list(state_indices_0based,
#' areanames)
#' ranges_list
#'
#' # areas = getareas_from_tipranges_object(tipranges)
#' max_range_size = 4
#' state_indices_0based = rcpp_areas_list_to_states_list(areas=areas, 
#' maxareas=max_range_size, include_null_range=TRUE)
#' ranges_list = states_list_0based_to_ranges_txt_list(state_indices_0based,
#' areanames)
#' ranges_list
#'
states_list_0based_to_ranges_txt_list <- function(state_indices_0based, areanames)
	{
	examples_txt='
	areanames = c("K", "O", "M", "H")
	state_indices_0based = list(c(NA), c(0), c(1), c(2), c(3), c(0,1),
	c(0,2), c(0,3), c(1,2), c(1,3), c(2,3), c(0,1,2), c(0,1,3), c(1,2,3),
	c(0,1,2,3))
	ranges_list = states_list_0based_to_ranges_txt_list(state_indices_0based,
	areanames)
	ranges_list
	# areas = getareas_from_tipranges_object(tipranges)
	max_range_size = 4
	state_indices_0based = rcpp_areas_list_to_states_list(areas=areas, 
	maxareas=max_range_size, include_null_range=TRUE)
	ranges_list = states_list_0based_to_ranges_txt_list(state_indices_0based,
	areanames)
	ranges_list
	' # END examples_txt
	
	
	states_list = state_indices_0based
	
	# Get the list of text ranges
	ranges_list = list()

	for (i in 1:length(states_list))
		{
		if (length(states_list[[i]]) == 1)
			{
			if (is.na(states_list[[i]])==TRUE) 
				{
				ranges_list[[i]] = "_"
				} else {
				ranges_list[[i]] = paste(areanames[ 1+states_list[[i]] ], sep="", collapse="")
				} # END if (is.na(states_list[[i]])==TRUE) 
			} else {
			ranges_list[[i]] = paste(areanames[ 1+states_list[[i]] ], sep="", collapse="")
			} # END if (length(states_list[[i]]) == 1)
		} # END for (i in 1:length(states_list))

	ranges_list
	
	return(ranges_list)
	} # END states_list_0based_to_ranges_txt()
	



#######################################################
# Binary ranges to letter codes
#######################################################

#######################################################
# binary_range_to_letter_code_txt
#######################################################
#' Convert binary presence/absence codes (1/0) to text area names
#' 
#' Given a row of a \code{tipranges} object, converts to the corresponding name(s), collapsed
#' into a string.  E.g., if the areas were \code{(A,B,C,D)}, and the tipranges row had \code{(1 0 1 0)}, the 
#' output statename would be "AC".
#' 
#' @param tipranges_row row of a \code{tipranges} object.
#' @param areanames a list of the names of the areas
#' @return \code{statename} The corresponding name(s), collapsed into a string
#' @export
#' @seealso \code{\link{binary_range_to_letter_code_list}}, \code{\link{tipranges_to_tip_condlikes_of_data_on_each_state}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#' @examples
#' testval=1
#' tipranges_row = c(1, 0, 1, 0)
#' areanames = c("A", "B", "C", "D")
#' statename = binary_range_to_letter_code_txt(tipranges_row, areanames)
#' statename
#' 
binary_range_to_letter_code_txt <- function(tipranges_row, areanames)
	{
	present_TF = tipranges_row == 1
	statename = paste(areanames[present_TF], collapse="", sep="")
	
	return(statename)
	}

#######################################################
# binary_range_to_letter_code_list
#######################################################
#' Convert binary presence/absence codes (1/0) to a list of text area names
#' 
#' Given a row of a \code{tipranges} object, converts to a list of the corresponding name(s).
#' E.g., if the areas were \code{(A,B,C,D)}, and the tipranges row had \code{(1 0 1 0)}, the 
#' output statename would be ("A","C").
#' 
#' @param tipranges_row row of a \code{tipranges} object.
#' @param areanames a list of the names of the areas
#' @return \code{list_of_areas_in_the_state} A list of the name(s) of the areas corresponding to the presence/absence coding in the row
#' @export
#' @seealso \code{\link{binary_ranges_to_letter_codes}}, \code{\link{letter_string_to_binary}}, \code{\link{letter_strings_to_tipranges_df}}, \code{\link{binary_range_to_letter_code_txt}}, \code{\link{tipranges_to_tip_condlikes_of_data_on_each_state}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' tipranges_row = c(1, 0, 1, 0)
#' areanames = c("A", "B", "C", "D")
#' list_of_areas_in_the_state = binary_range_to_letter_code_list(tipranges_row, 
#' areanames)
#' list_of_areas_in_the_state
#' 
binary_range_to_letter_code_list <- function(tipranges_row, areanames)
	{
	present_TF = tipranges_row == 1
	list_of_areas_in_the_state = c(areanames[present_TF])
	
	return(list_of_areas_in_the_state)
	}


#######################################################
# binary_ranges_to_letter_codes
#######################################################
#' Convert binary presence/absence codes (1/0) to a list of text area names
#' 
#' Given a row of a \code{tipranges} object, converts to a list of the corresponding statenames
#' for each row.
#' 
#' @param tipranges a \code{tipranges} object.
#' @param areanames a list of the names of the areas
#' @return \code{letter_code_ranges} A list of the states -- there will be as many states as there are rows/tips in \code{tipranges}.
#' Each state will be a list of area names.
#' @export
#' @seealso \code{\link{binary_range_to_letter_code_list}}, \code{\link{letter_string_to_binary}}, \code{\link{letter_strings_to_tipranges_df}}, \code{\link{tipranges_to_tip_condlikes_of_data_on_each_state}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#' @examples
#' testval=1
#' # Define a tipranges object
#' tipranges_object = define_tipranges_object()
#' tipranges_object
#'
#' areanames = getareas_from_tipranges_object(tipranges_object)
#' areanames
#'
#' letter_code_ranges = binary_ranges_to_letter_codes(tipranges=tipranges_object, 
#' areanames)
#' letter_code_ranges
#' 
binary_ranges_to_letter_codes <- function(tipranges, areanames)
	{
	letter_code_ranges = apply(X=tipranges@df, MARGIN=1, FUN=binary_range_to_letter_code_list, areanames=areanames)
	
	# DAMN, BUG DUE TO INCONSISTENT APPLY OUTPUT!!
	# 2017-06-23_NJM
	# Produces a matrix instead of a list if all ranges are the same length!
	# This then truncates ranges!
	if (is.matrix(letter_code_ranges) == TRUE)
		{
		letter_code_ranges = lapply(X=seq_len(ncol(letter_code_ranges)), FUN=function(i) letter_code_ranges[,i])
		}
	
	return(letter_code_ranges)
	}


#######################################################
# letter_string_to_binary
#######################################################
#' Convert ranges in the form of letters (A, AB, BFG, etc.) to binary state number codes
#' 
#' This function takes a letter string (e.g. ABD) and converts to binary encoding (e.g. 1101).
#' 
#' @param letter_string A string of letters (e.g. "ABD")
#' @param letter_codes_in_desired_order The letter codes in the desired order. The default keyword, "alphabet", uses the standard 26 capital letters;
#' the output binary codes will thus have 26 positions.  If the user inputs fewer letters here, or puts them in another order, those will be used.
#' @return \code{numcodes} A list with the binary codes.
#' @export
#' @seealso \code{\link{binary_ranges_to_letter_codes}}, \code{\link{binary_range_to_letter_code_list}}, \code{\link{letter_strings_to_tipranges_df}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' letter_string = "ABD"
#' letter_string_to_binary(letter_string, letter_codes_in_desired_order="alphabet")
#' 
#' letter_string = "ABD"
#' letter_string_to_binary(letter_string, 
#' letter_codes_in_desired_order=c("A","B","C","D","E","F"))
#' 
#' letter_string = "ABD"
#' letter_string_to_binary(letter_string, 
#' letter_codes_in_desired_order=strsplit("ABCDEF", split="")[[1]])
#' 
letter_string_to_binary <- function(letter_string, letter_codes_in_desired_order="alphabet")
	{
	tests='
	letter_string = "BGD"
	alphabet = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z")
	letter_codes_in_desired_order = "alphabet"
	'
	
	if ( (length(letter_codes_in_desired_order)==1 ) && (letter_codes_in_desired_order == "alphabet"))
		{
		# Alphabet, just so we have it
		alphabet = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z")
		letter_codes_in_desired_order = alphabet
		} 
	
	nums = seq(1, length(letter_codes_in_desired_order), by=1)
	
	letter_codes = strsplit(letter_string, split="")[[1]]
	
	# Start the numcodes as all zeros
	numcodes = rep(0, times=length(letter_codes_in_desired_order))
	
	# Check for which are in the full list
	codes_found_TF = letter_codes_in_desired_order %in% letter_codes
	numcodes[codes_found_TF] = 1
	
	return(numcodes)
	}
	

#######################################################
# Convert ranges in the form of letters (A, AB, BFG, etc.) to binary state number codes
# Apply to a vector of such strings, output a tipranges object
#######################################################
#######################################################
# letter_strings_to_tipranges_df
#######################################################
#' Convert ranges in the form of letters (A, AB, BFG, etc.) to a \code{tipranges} object
#' 
#' This function converts ranges in the form of concatenated letters (A, AB, BFG, etc.) to binary state number codes. 
#' Via \code{\link[base]{apply}}, this is done to each member of the entire input vector of strings.  It outputs \code{\link{tipranges}} object.
#' 
#' @param letter_strings A list of ranges in concatenated letter form ("A", "AB", "BFG", etc.)
#' @param letter_codes_in_desired_order The letter codes in the desired order. The default keyword, "alphabet", uses the standard 26 capital letters;
#' the output binary codes will thus have 26 positions.  If the user inputs fewer letters here, or puts them in another order, those will be used.
#' @param tipnames_in_order If given, the input tipnames will be applied as rownames in the tipranges object.  Default is \code{NULL}, which results in numbering the rows.
#' @return \code{tipranges} An object of class \code{tipranges}.
#' @export
#' @seealso \code{\link{letter_string_to_binary}}, \code{\link{binary_range_to_letter_code_list}}, \code{\link{binary_ranges_to_letter_codes}},  \code{\link{getranges_from_LagrangePHYLIP}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' letter_strings = c("A", "B", "C", "AB", "AC", "BC", "ABC")
#' letter_strings_to_tipranges_df(letter_strings)
#' 
#' letter_strings = c("A", "B", "C", "AB", "AC", "BC", "ABC")
#' letter_strings_to_tipranges_df(letter_strings, 
#' tipnames_in_order=paste("tip", seq(1,7), sep=""))
#' 
letter_strings_to_tipranges_df <- function(letter_strings, letter_codes_in_desired_order="alphabet", tipnames_in_order=NULL)
	{
	all_letters = paste(letter_strings, collapse="")
	all_letters_list = strsplit(all_letters, split="")[[1]]
	unique_letters = unique(all_letters_list)
	
	
	# Check if letter_codes should be default
	if ( (length(letter_codes_in_desired_order)==1 ) && (letter_codes_in_desired_order == "alphabet"))
		{
		# Alphabet, just so we have it
		alphabet = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z")
		letter_codes_in_desired_order = alphabet[1: length(unique_letters)]
		} 
	
	# Make the binary ranges (columns = each species, rows = presence/absence in that region)
	#binary_ranges = sapply(X=xls$tipranges, FUN=letter_string_to_binary, letter_codes_in_desired_order=Webb_Ree_2011_letter_codes)
	binary_ranges = sapply(X=letter_strings, FUN=letter_string_to_binary, letter_codes_in_desired_order=letter_codes_in_desired_order)
	
	binary_ranges = base::t(binary_ranges)
	binary_ranges_df = adf2(binary_ranges)
	
	# Apply tipnames, if available
	if (!is.null(tipnames_in_order))
		{
		rownames(binary_ranges_df) = tipnames_in_order
		}
	
	# Column names are the letter codes for each region
	colnames(binary_ranges_df) = letter_codes_in_desired_order
	
	# Use function to put in well-defined object
	tipranges = define_tipranges_object(tmpdf=binary_ranges_df)
	
	return(tipranges)
	}


	

#######################################################
# check_tipranges_for_allQs
#######################################################
#' Checks row of a tipranges object for all-question-marks ("?")
#' 
#' Rows with geography data consisting of all "?" (i.e., no information) 
#' are NOT allowed in standard BioGeoBEARS analysis.  Presumably any 
#' specimen/fossil was observed somewhere, whether or not the complete 
#' range is known.  
#'
#' This function is used by getranges_from_LagrangePHYLIP(). 
#' 
#' @rowvals The values of a row in tipranges@df
#' @symbol_to_check The symbol to check for uniformity in the row. Default "?".
#' @return \code{TRUE} or \code{FALSE}
#' @export
#' @seealso \code{\link{allQs}}, \code{\link{getranges_from_LagrangePHYLIP}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#'	 @cite SmithRee2010_CPPversion
#' @examples
#' 
#' x = c("?", "?", "?", "?")
#' allQs(x)
#' 
check_tipranges_for_allQs <- function(tipranges, symbol_to_check="?")
	{
	TF = apply(X=tipranges@df, MARGIN=1, FUN=allQs, symbol_to_check=symbol_to_check)
	return(TF)
	}


#######################################################
# getranges_from_LagrangePHYLIP
#######################################################
#' Read a LAGRANGE PHYLIP-style file containing geographic ranges into a \code{tipranges} object
#' 
#' Given some geographic range data for tips in the Lagrange C++/PHYLIP format (\cite{SmithRee2010_CPPversion}), this function imports
#' the range data into a \code{tipranges}-class \code{data.frame} structure.
#' 
#' LAGRANGE C++ geographic range files are ASCII text files with the format:
#' 
#' \code{19	4 (A B C D)}\cr
#' \code{P_mariniana_Kokee2	1000}\cr
#' \code{P_mariniana_Oahu	0100}\cr
#' \code{P_mariniana_MauiNui	0010}\cr
#' \code{P_hawaiiensis_Makaopuhi	0001}\cr
#' \code{P_wawraeDL7428	1000}\cr
#' [...]\cr
#' \cr
#'
#' The first row specifies the number of taxa (here, 19), the number of areas (here, 4), and finally, the names/abbreviations of the areas.
#' The rest of the rows give the taxon names, followed by a tab and then the presence/absence in each range with 1s/0s.
#'
#' The file above is part of the geographic range data for the Hawaiian \emph{Psychotria} dataset used by \cite{ReeSmith2008}.
#'
#' @param lgdata_fn The LAGRANGE geographic data file to be read.
#' @param block_allQs Should a row with all "?" be blocked? Default \code{TRUE}.
#' @return \code{tipranges_object} An object of class \code{tipranges}
#' @export
#' @seealso \code{\link{define_tipranges_object}}, \code{\link{save_tipranges_to_LagrangePHYLIP}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#'	 @cite SmithRee2010_CPPversion
#' @examples
#' testval=1
#' 
#' # Get the example files directory
#' extdata_dir = np(system.file("extdata", package="BioGeoBEARS"))
#' # tmp hard code: 
#' # extdata_dir = "/Dropbox/_njm/__packages/BioGeoBEARS_setup/inst/extdata/"
#' # Set the filename (Hawaiian Psychotria from Ree & Smith 2008)
#' fn = np(paste(extdata_dir, "/Psychotria_geog.data", sep=""))
#' getranges_from_LagrangePHYLIP(lgdata_fn=fn)
#' 
getranges_from_LagrangePHYLIP <- function(lgdata_fn, block_allQs=TRUE)
	{
	# Make a tipranges instance
	#tipranges_object = define_tipranges_object()
	#tipranges_object@df = dfnums_to_numeric(tipranges_object@df)

	if (file.exists(lgdata_fn) == FALSE)
		{
		txt = paste0("STOP ERROR in getranges_from_LagrangePHYLIP(): file lgdata_fn='", lgdata_fn, "' was not found. Check your inputs and your working directory (wd). Use commands like:\n - 'getwd()' to get your current R working directory (wd)\n - '?setwd' to see how to set your R working directory\n - 'list.files()' to list the files in your current R working directory\n - and use 'system('open .')' to open your file browsing program from the R command line (this works on macs, at least). ")
		cat("\n\n")
		cat(txt)
		cat("\n\n")
		stop(txt)
		} # END if (file.exists(lgdata_fn) == FALSE)

	
	# Read the Lagrange geographic ranges data file
	tmp_blah = read_PHYLIP_data(lgdata_fn)
	
	# 2014-11-18_NJM Error check: do NOT allow duplicated area names
	area_names = colnames(tmp_blah)
	uniq_area_names = unique(area_names)
	if (length(uniq_area_names) != length(area_names))
		{
		# Count area names
		countnames = rep(0, times=length(uniq_area_names))
		for (i in 1:length(uniq_area_names))
			{
			TF = uniq_area_names[i] == area_names
			countnames[i] = sum(TF)
			}
		TF_gt_1 = countnames > 1
		
		error_txt = paste("\n\nSTOP ERROR in getranges_from_LagrangePHYLIP(): Duplicate area names in your geography text file.\n\nEach area name should be used only once. The counts of each name are printed below:\n\n", sep="")
		cat(error_txt)
		
		dup_names = adf2(cbind(uniq_area_names, countnames))
		names(dup_names) = c("area_name", "count")
		print(dup_names)
		
		cat("\n\n")
		error_txt2 = paste("STOP ERROR in getranges_from_LagrangePHYLIP(): Duplicate area names in your geography text file. Each area name should be used only once.", sep="")
		stop(error_txt2)
		} # END if (length(uniq_area_names) != length(area_names))
	
	
	# 2014-05-05_NJM: Changed to allow question marks
	#tmp_input = adf2(data.matrix(tmp_blah))
	tmp_input = adf2(tmp_blah)

	#nums_as_char = as.numeric(unlist(tmp_input))
	#tmpdf = adf2(matrix(data=nums_as_char, nrow=nrow(tmp_input), ncol=ncol(tmp_input), byrow=TRUE))
	#names(tmpdf) = names(tmp_input)
	#rownames(tmpdf) = rownames(tmp_input)
	#sum(tmpdf)
	#tmpdf = dfnums_to_numeric(tmp_input, printout=TRUE)
	#cls.df(tmpdf)
	
	# Put into a tipranges object
	tipranges_object = define_tipranges_object(tmpdf=tmp_input)
	# you can get the dataframe with
	# tipranges_object@df
	
	# 2014-05-05_NJM: Changed to allow question marks
	#tipranges_object@df = adf2(data.matrix(tipranges_object@df))
	tipranges_object@df = adf2(tipranges_object@df)
	rownames(tipranges_object@df) = rownames(tmp_blah)
	
	# Check for allQs (all question marks)
	TF = check_tipranges_for_allQs(tipranges_object)
	if ( (block_allQs==TRUE) && (any(TF)) )
		{
		errortxt = paste("\n\nERROR in getranges_from_LagrangePHYLIP():\n", 
						  "Your input geography file, '", lgdata_fn, "' has all '?'\n", 
						  "in these rows:\n\n", sep="")
		cat(errortxt)
		print(tipranges_object@df[TF, ])
		
		cat("\n\nPlease fix the geography file before proceeding.\n\n", sep="")
		stop("Stopping getranges_from_LagrangePHYLIP() on geography file error.")
		}
	
	return(tipranges_object)
	}



#######################################################
# order_tipranges_by_tree_tips
#######################################################
#' Reorder the rows in a \code{tipranges} object, to correspond to tree tips
#' 
#' The tipranges object, as read from a LAGRANGE/PHYLIP-style geography file, may not
#' have the species names as the same order as they are in the tips of the tree. This 
#' function allows the user to reorder them to match the tree
#'
#' @param tipranges An object of class \code{\link{tipranges}}.
#' @param tr A \code{\link[ape]{phylo}} tree object.
#' @return \code{tipranges} An object of class \code{tipranges}
#' @export
#' @seealso \code{\link{tipranges_to_area_strings}}, \code{\link{define_tipranges_object}}, \code{\link{save_tipranges_to_LagrangePHYLIP}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#'	 @cite SmithRee2010_CPPversion
#' @examples
#' testval=1
#' 
#' # Get the example files directory
#' extdata_dir = np(system.file("extdata", package="BioGeoBEARS"))
#' # tmp hard code: 
#' # extdata_dir = "/Dropbox/_njm/__packages/BioGeoBEARS_setup/inst/extdata/"
#' # Set the filename (Hawaiian Psychotria from Ree & Smith 2008)
#' 
#' trfn = np(paste(extdata_dir, "/Psychotria_5.2.newick", sep=""))
#' tr = read.tree(trfn)
#' 
#' fn = np(paste(extdata_dir, "/Psychotria_geog.data", sep=""))
#' tipranges1 = getranges_from_LagrangePHYLIP(lgdata_fn=fn)
#' tipranges1
#' 
#' # Reorder the tipranges object
#' tipranges2 = order_tipranges_by_tree_tips(tipranges1, tr)
#' tipranges2
#' 
order_tipranges_by_tree_tips <- function(tipranges, tr)
	{
	# Get the order of the tips
	matchnums = get_indices_where_list1_occurs_in_list2(list1=tr$tip.label, list2=rownames(tipranges@df))
	
	# Reorder the tipranges object
	tipranges@df = tipranges@df[matchnums,]
	
	return(tipranges)
	}




#######################################################
# tipranges_to_area_strings
#######################################################
#' Convert tipranges binary coding to range strings
#' 
#' This function converts the 0110-type format of the tipranges object into a list of strings
#' describing the geographic ranges.  E.g., 1100 becomes AB, 0111 become BCD (assuming the regions
#' are abbreviated A, B, C...).  Users can input their preferred abbreviations with \code{areaabbr}.
#' 
#' Note that you will HAVE to use \code{\link{order_tipranges_by_tree_tips}} on the tipranges object first,
#' to make sure the tipranges are in the correct order on the tree tips.
#'
#' @param tipranges An object of class \code{\link{tipranges}}.
#' @param areaabbr A vector of the abbreviations (preferably 1 character each).
#' @return \code{tiprange_names} A vector of strings.
#' @export
#' @seealso \code{\link{order_tipranges_by_tree_tips}}, \code{\link{define_tipranges_object}}, \code{\link{save_tipranges_to_LagrangePHYLIP}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#'	 @cite SmithRee2010_CPPversion
#' @examples
#' 
#' # Get the example files directory
#' extdata_dir = np(system.file("extdata", package="BioGeoBEARS"))
#' # tmp hard code: 
#' # extdata_dir = "/Dropbox/_njm/__packages/BioGeoBEARS_setup/inst/extdata/"
#' # Set the filename (Hawaiian Psychotria from Ree & Smith 2008)
#' 
#' trfn = np(paste(extdata_dir, "/Psychotria_5.2.newick", sep=""))
#' tr = read.tree(trfn)
#' 
#' fn = np(paste(extdata_dir, "/Psychotria_geog.data", sep=""))
#' tipranges1 = getranges_from_LagrangePHYLIP(lgdata_fn=fn)
#' tipranges1
#' tipranges_to_area_strings(tipranges=tipranges1, areaabbr=NULL)
#' tipranges_to_area_strings(tipranges=tipranges1, areaabbr=c("K", "O", "M", "H"))
#' 
#' # Reorder the tipranges object
#' tipranges2 = order_tipranges_by_tree_tips(tipranges1, tr)
#' tipranges2
#' tipranges_to_area_strings(tipranges=tipranges2, areaabbr=NULL)
#' tipranges_to_area_strings(tipranges=tipranges2, areaabbr=c("K", "O", "M", "H"))
#' 
tipranges_to_area_strings <- function(tipranges, areaabbr=NULL)
	{
	if (is.null(areaabbr))
		{
		areaabbr = names(tipranges@df)
		} else {
		names(tipranges@df) = areaabbr
		}
	
	tiprangesTF = tipranges@df == 1
	tiprange_names = apply(X=tiprangesTF, 1, getname, tiparea_names=areaabbr)
	
	
	return(tiprange_names)
	}



#######################################################
# getname
#######################################################
#' Collapse range abbreviations to strings
#' 
#' This is a utility function used by apply in \code{\link{tipranges_to_area_strings}}. It extracts
#' the present areas and concatenates the abbreviations for one row.
#'
#' @param TFrow A list of TRUE and FALSE
#' @param tiparea_names The names of each area
#' @param concat If TRUE (default), merge the areas in a state into a single string.
#' @param sep The sep argument for \code{\link[base]{paste}}.
#' @return \code{tiparea} A string.
#' @export
#' @seealso \code{\link{states_list_indexes_to_areastxt}}, \code{\link{order_tipranges_by_tree_tips}}, \code{\link{define_tipranges_object}}, \code{\link{save_tipranges_to_LagrangePHYLIP}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' getname(TFrow=c(FALSE, TRUE, TRUE, FALSE), 
#' tiparea_names=c("K", "O", "M", "H"), sep="")
#' getname(TFrow=c(FALSE, TRUE, TRUE, FALSE), 
#' tiparea_names=c("K", "O", "M", "H"), sep="_")
#' 
getname <- function(TFrow, tiparea_names, concat=TRUE, sep="")
	{
	words = tiparea_names[TFrow]
	
	if (concat == TRUE)
		{
		tiparea = paste(words, sep=sep, collapse="")
		} else {
		tiparea = paste(words, sep=sep)
		}
	return(tiparea)
	}


#######################################################
# states_list_indexes_to_areastxt
#######################################################
#' States (ranges) lists to txt string of the areas
#' 
#' This is a utility function for converting are
#'
#' @param states_list A list of states, where each state consists of a list of areas.
#' @param areanames A list of areanamess.
#' @param counting_base Does states_list start indexing areas from 0 (default) or 1?
#' @param concat If TRUE (default), merge the areas in a state into a single string.
#' @param sep Character to merge on, as in \code{\link[base]{paste}}. Default "".
#' @return \code{tiparea} A string.
#' @export
#' @seealso \code{\link{getname}}, \code{\link{order_tipranges_by_tree_tips}}, \code{\link{define_tipranges_object}}, \code{\link{save_tipranges_to_LagrangePHYLIP}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' test=1
#' 
#' areas = c("K", "O", "M", "H")
#' states2 = rcpp_areas_list_to_states_list(areas=areas)
#' areanames = c("K", "O", "M", "H")
#' counting_base=0
#' states_list_indexes_to_areastxt(states_list=states2, areanames, counting_base=0)
#' 
states_list_indexes_to_areastxt <- function(states_list, areanames, counting_base=0, concat=TRUE, sep="")
	{
	defaults='
	areas = c("K", "O", "M", "H")
	states2 = rcpp_areas_list_to_states_list(areas=areas)
	areanames = c("K", "O", "M", "H")
	counting_base=0
	states_list_indexes_to_areastxt(states_list=states2, areanames, counting_base=0)
	'
	# Increment by 1 if your state indexes are 0-based
	if (counting_base == 0)
		{
		states_list = sapply(X=states_list, FUN="+", 1)
		}
	
	# sapply getname through the list of lists of indexes
	areastxt = sapply(X=states_list, FUN=getname, tiparea_names=areanames, concat=concat, sep=sep)
	
	return(areastxt)
	}


#######################################################
# save_tipranges_to_LagrangePHYLIP
#######################################################
#' Save a tipranges object to a LAGRANGE PHYLIP-style file containing binary-encoded geographic ranges
#' 
#' Given some geographic range data for tips in the \code{tipranges} object, this function
#' exports them to an ASCII text file in the Lagrange C++/PHYLIP format (\cite{SmithRee2010_CPPversion}).  This file can then be
#' read by \code{\link{getranges_from_LagrangePHYLIP}}.
#' 
#' LAGRANGE C++ geographic range files are ASCII text files with the format:
#' 
#' \code{19	4 (A B C D)}\cr
#' \code{P_mariniana_Kokee2	1000}\cr
#' \code{P_mariniana_Oahu	0100}\cr
#' \code{P_mariniana_MauiNui	0010}\cr
#' \code{P_hawaiiensis_Makaopuhi	0001}\cr
#' \code{P_wawraeDL7428	1000}\cr
#' [...]\cr
#' \cr
#'
#' The first row specifies the number of taxa (here, 19), the number of areas (here, 4), and finally, the names/abbreviations of the areas.
#' The rest of the rows give the taxon names, followed by a tab and then the presence/absence in each range with 1s/0s.
#'
#' The file above is part of the geographic range data for the Hawaiian \emph{Psychotria} dataset used by \cite{ReeSmith2008}.
#' @param tipranges_object An object of class \code{tipranges}.
#' @param lgdata_fn The LAGRANGE geographic data file to be output.
#' @param areanames A list of the names of the areas.
#' @return \code{tipranges_object} An object of class \code{tipranges}
#' @export
#' @seealso \code{\link{define_tipranges_object}}, \code{\link{getranges_from_LagrangePHYLIP}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#'	 @cite SmithRee2010_CPPversion
#' @examples
#' testval=1
#' 
#' # Create an example tipranges object
#' tipranges = define_tipranges_object()
#' 
#' # See current directory
#' getwd()
#' 
#' \dontrun{
#' # Save the file
#' # Set the filename
#' fn = "example_tipranges.data"
#' save_tipranges_to_LagrangePHYLIP(tipranges_object=tipranges, lgdata_fn=fn)
#'
#' # Show the file
#' tmplines = scan(file=fn, what="character", sep="\n")
#' cat(tmplines, sep="\n")
#' 
#' # Again, with areanames
#' save_tipranges_to_LagrangePHYLIP(tipranges_object=tipranges, 
#' lgdata_fn=fn, areanames=c("area1","area2","area3"))
#'
#' # Show the file
#' tmplines = scan(file=fn, what="character", sep="\n")
#' cat(tmplines, sep="\n")
#' } # End dontrun
#' 
save_tipranges_to_LagrangePHYLIP <- function(tipranges_object, lgdata_fn="lagrange_area_data_file.data", areanames=colnames(tipranges_object@df))
	{
	# Extract the tipranges data
	tipranges_df = tipranges_object@df

	# If there are no area names, or if they are the wrong length, assign A B C D E etc...
	if (length(areanames) ==1)
		{
		if (areanames == "replace")
			{
			new_areanames = LETTERS[1:ncol(tipranges_df)]
			areanames_txt = paste(new_areanames, sep=" ")
			cat("\nNote: assigning '", areanames_txt, "' as area names.\n", sep="")
			} else {
			areanames_txt = paste(areanames, collapse=" ")
			}
		} else {
		# Not length 1
		if (length(areanames) != ncol(tipranges_df))
			{
			new_areanames = LETTERS[1:ncol(tipranges_df)]
			areanames_txt = paste(new_areanames, sep=" ")
			cat("\nNote: assigning '", areanames_txt, "' as area names.\n", sep="")
			} else {
			areanames_txt = paste(areanames, collapse=" ")
			}
		}


	
	# Collapse ranges to strings
	ranges_strings = apply(X=tipranges_df, MARGIN=1, FUN=paste, collapse="")
	
	# Get taxon names
	taxon_names = row.names(tipranges_df)
	
	ranges_table = cbind(taxon_names, ranges_strings)
	ranges_table_txt = apply(X=ranges_table, MARGIN=1, FUN=paste, collapse="\t")
	
	header_string = paste(nrow(tipranges_df), "\t", ncol(tipranges_df), "\t(", areanames_txt, ")", sep="")
	
	# Write the header string
	#write_lines_good(dtf=header_string, outfn=lgdata_fn, sepval="\n", tmpappend=FALSE)
	write.table(x=header_string, file=lgdata_fn, append=FALSE, quote=FALSE, sep="\n", row.names=FALSE, col.names=FALSE)

	# Write the rest
	#write_lines_good(dtf=ranges_table_txt, outfn=lgdata_fn, sepval="\n", tmpappend=TRUE)
	write.table(x=ranges_table_txt, file=lgdata_fn, append=TRUE, quote=FALSE, sep="\n", row.names=FALSE, col.names=FALSE)
	
	return(lgdata_fn)
	}



#######################################
# Read PHYLIP data
# This assumes data are interleaved, and that names are separated from data
# by a tab character; none of the 10-character limit stuff.
#
# Also assumes no ambiguity in characters
#######################################
#######################################################
# read_PHYLIP_data
#######################################################
#' Read a PHYLIP-format file
#' 
#' This assumes data are interleaved, and that names are separated from data
#' by a tab character; there is no 10-character limit on names.
#'
#' This function is a precursor to \code{\link{getranges_from_LagrangePHYLIP}}.
#' 
#' @param lgdata_fn The filename to read.
#' @param regionnames A list of the names of the areas. Only used if the names are NOT specified in the file.
#' @return \code{tmpdf} A \code{\link[base]{data.frame}} containing the data.
#' @export
#' @seealso \code{\link{getranges_from_LagrangePHYLIP}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' 
#' # Get the example files directory
#' extdata_dir = np(system.file("extdata", package="BioGeoBEARS"))
#' # tmp hard code: extdata_dir = "/Dropbox/_njm/__packages/BioGeoBEARS_setup/inst/extdata/"
#' # Set the filename (Hawaiian Psychotria from Ree & Smith 2008)
#' fn = np(paste(extdata_dir, "/Psychotria_geog.data", sep=""))
#'
#' # Read in the file
#' tmpdf = read_PHYLIP_data(lgdata_fn=fn, regionnames=NULL)
#' tmpdf
#' 
#' # Read in the file
#' tmpdf = read_PHYLIP_data(lgdata_fn=fn, 
#' regionnames=c("Kauai", "Oahu", "Maui-Nui","Big Island"))
#' tmpdf	# Note that regionnames are only 
#' # used if they are NOT specified in the file.
#' # But, you could put them on manually
#' names(tmpdf) = c("Kauai", "Oahu", "Maui-Nui","Big Island")
#' tmpdf
#' 
#' # This one has no area names
#' fn = np(paste(extdata_dir, "/Psychotria_geog_noAreaNames.data", sep=""))
#' tmpdf = read_PHYLIP_data(lgdata_fn=fn, 
#' regionnames=c("Kauai", "Oahu", "Maui-Nui","Big Island"))
#' tmpdf	# Note that regionnames are only 
#' # used if they are NOT specified in the file.
#' 
read_PHYLIP_data <- function(lgdata_fn, regionnames=NULL)
	{
	setup='
	lgdata_fn = "/Users/nickm/Desktop/__projects/_2011-07-15_Hannah_spider_fossils/_data/lagrange_for_nick2/palp_no_Lacun.data"
	'

	if (file.exists(lgdata_fn) == FALSE)
		{
		txt = paste0("STOP ERROR in read_PHYLIP_data(): file lgdata_fn='", lgdata_fn, "' was not found. Check your inputs and your working directory (wd). Use commands like:\n - 'getwd()' to get your current R working directory (wd)\n - '?setwd' to see how to set your R working directory\n - 'list.files()' to list the files in your current R working directory\n - and use 'system('open .')' to open your file browsing program from the R command line (this works on macs, at least). ")
		cat("\n\n")
		cat(txt)
		cat("\n\n")
		stop(txt)
		} # END if (file.exists(lgdata_fn) == FALSE)


	# Read the 1st line, split on whitespaces
	firstline = scan(lgdata_fn, what="character", nlines=1)
	
	# Parse the firstline
	ntips = as.numeric(firstline[1])
	nareas = as.numeric(firstline[2])
	
	#######################################################
	# If the length of the first line is > 2, parse that information 
	#######################################################
	if (length(firstline) > 2)
		{
		# Get region names	
		regionnames = firstline[3:length(firstline)]
		
		# Remove "(" and ")"
		regionnames = mapply(sub, pattern="\\(", replacement="", x=regionnames)
		regionnames = mapply(sub, pattern="\\)", replacement="", x=regionnames)
		names(regionnames) = NULL
		}
	
	# Make the data.frame
	tmpdf = matrix(data=NA, nrow=ntips, ncol=nareas)
	
	# Parse the remaining rows
	# Original:
	#tmplines = scan(lgdata_fn, what="character", sep="\t", skip=1)
	# 2016-05-08_more flexible:
	tmplines = scan(lgdata_fn, what="character", skip=1, strip.white=TRUE)
	tmplines = matrix(data=tmplines, ncol=2, byrow=TRUE)
	tmplines

	tipnames = tmplines[,1]
	areas_char = tmplines[,2]
	
	# Check for missing characters (e.g. 3 instead of 4)
	incorrect_numchars_TF = nchar(areas_char) != nareas
	
	if ( sum(incorrect_numchars_TF) > 0)
		{
		errortxt = paste("\n\nSTOP ERROR in read_PHYLIP_data():\n\nThe first line of data file '", lgdata_fn, "' says that you have ", ntips, " tips and ", nareas, " characters/columns/areas.\n\nHowever, ", sum(incorrect_numchars_TF), " rows have the wrong number of areas.\n\nPrinting these out:\n\n", sep="")
		cat(errortxt)
		
		rows_to_print = tmplines[incorrect_numchars_TF,]
		print(rows_to_print)
		
		cat("\n\n")
		
		errortxt = "STOPPING on ERROR in read_PHYLIP_data()."
		stop(errortxt)
		}
	
	areas_char2 = unlist(mapply(strsplit, x=areas_char, split=""))
	
	# Error check! Do the number of data
	# the number of species * number of areas?
	TF = length(areas_char2) == (ntips * nareas)
	if (TF == FALSE)
		{
		cat("\n\n===============================================\n")
		txt = paste0("STOP ERROR in BioGeoBEARS::read_PHYLIP_data(), which is usually called by BioGeoBEARS::getranges_from_LagrangePHYLIP(). These functions try to load your geographic areas file (which is basically in PHYLIP format).\n\nThe problem is that, in the first line of your geography data file, you specified that:\n\n* the number of species = ", ntips, "\n* the number of areas   = ", nareas, "\n\nThis means you should have (", ntips, " x ", nareas, ") = ", ntips * nareas, " entries (1s and 0s) in your geography matrix.  However, read_PHYLIP_data() only sees ", length(areas_char2), " of these.  Check your geography file in a plain-text editor.  Likely things to check:\n\n* Your number of species is incorrect\n* Your number of areas is incorrect\n* You have accidentally left out a 1 or 0 somewhere\n* (or included an extra area, dropped or duplicated a line somewhere etc.)\n\nPlease fix the error(s) in a plain-text editor, re-save the geography file, and re-run.\n\nThis long and detailed error message was brought to you by Nick, who has made these mistakes many times himself. Please have a nice day.")
		cat("\n\n")
		cat(txt)
		cat("\n===============================================\n\n")
		stop(txt)
		} # END if (TF == FALSE)
	
	
	areas_char3 = matrix(data=areas_char2, nrow=ntips, byrow=TRUE)


	# Store in a data.frame and return
	tmpdf = areas_char3
	tmpdf = adf(tmpdf)
	
	if ( is.null(regionnames) )
		{
		alphabet = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z")
		regionnames	= alphabet[1:ncol(tmpdf)]
		colnames(tmpdf) = regionnames
		} else {
		names(tmpdf) = regionnames
		}
	row.names(tmpdf) = tipnames
	
	return(tmpdf)	
	}







#######################################################
# Biogeographic transition matrices
#######################################################


# Get the maximum rangesize for a given ancestral rangesize
# Now, go through, and make a list of the max minsize for each decsize
# e.g.
# 
# 	# Set the parameter controlling the size distribution of 
# 	# the smaller descendant species
# 	maxent01s_param = maxent_constraint_01
# 	maxent01v_param = maxent_constraint_01v
# 	maxent01j_param = maxent_constraint_01
# 	maxent01y_param = maxent_constraint_01
# 
# 	# Cladogenesis model inputs
# 	spPmat_inputs = NULL
# 	states_indices = states_list
# 
# 	# shorten the states_indices by 1 (cutting the 
# 	# null range state from the speciation matrix)
# 	if (include_null_range == TRUE)
# 		{
# 		states_indices[1] = NULL
# 		} # END if (include_null_range == TRUE)
# 	spPmat_inputs$l = states_indices
# 	l = states_indices
# 	spPmat_inputs$s = ys
# 	spPmat_inputs$v = v
# 	spPmat_inputs$j = j
# 	spPmat_inputs$y = ys
# 	spPmat_inputs$dmat = distances_mat
# 	spPmat_inputs$maxent01s_param = maxent01s_param
# 	spPmat_inputs$maxent01v_param = maxent01v_param
# 	spPmat_inputs$maxent01j_param = maxent01j_param
# 	spPmat_inputs$maxent01y_param = maxent01y_param
# 
# 	maxent01s = relative_probabilities_of_subsets(max_numareas=numareas, maxent_constraint_01=maxent01s_param, NA_val=0)
# 	maxent01v = relative_probabilities_of_vicariants(max_numareas=numareas, maxent_constraint_01=maxent01v_param, NA_val=0)
# 	maxent01j = relative_probabilities_of_subsets(max_numareas=numareas, maxent_constraint_01=maxent01j_param, NA_val=0)
# 	maxent01y = relative_probabilities_of_subsets(max_numareas=numareas, maxent_constraint_01=maxent01y_param, NA_val=0)
# 
# 	
# 	# Matrix of probs for each ancsize
# 	maxprob_as_function_of_ancsize_and_decsize = mapply(FUN=max, maxent01s, maxent01v, maxent01j, maxent01y, MoreArgs=list(na.rm=TRUE))
# 	maxprob_as_function_of_ancsize_and_decsize = matrix(data=maxprob_as_function_of_ancsize_and_decsize, nrow=nrow(maxent01s), ncol=ncol(maxent01s))
# 	maxprob_as_function_of_ancsize_and_decsize[maxprob_as_function_of_ancsize_and_decsize > 0] = 1
# 	maxprob_as_function_of_ancsize_and_decsize[maxprob_as_function_of_ancsize_and_decsize <= 0] = 0
# 	
# 	# Now, go through, and make a list of the max minsize for each decsize
# 	maxsize <- function (areasizes_possible_01)
# 		{
# 		max(1:sum(areasizes_possible_01 > 0, na.rm=TRUE))
# 		}
# 	# Now, go through, and make a list of the max minsize for each decsize
# 	max_minsize_as_function_of_ancsize = apply(X=maxprob_as_function_of_ancsize_and_decsize, MARGIN=1, FUN=maxsize)
# 	max_minsize_as_function_of_ancsize

#######################################################
# maxsize
#######################################################
#' Get the maximum rangesize for a given ancestral rangesize
#' 
#' This function returns the maximum descendant rangesize for a given ancestral rangesize, given a list of
#' 0/1 values specifying the possibility of each descendant rangesizes.
#' 
#' This is mostly a utility function used within \code{\link[base]{apply}} within other functions.
#' 
#' @param areasizes_possible_01 A list of 0/1 values, indicating whether an range of that size (rangesize = 1-based index = 1, 2, 3...) is possible (1) or not (0).
#' @return \code{max_number_of_areas} The maximum number of areas 
#' @export
#' @seealso \code{\link[base]{apply}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' areasizes_possible_01 = c(1,1,1,0,0)
#' maxsize(areasizes_possible_01)
#' 
maxsize <- function (areasizes_possible_01)
	{
	indexes_possible = 1:length(areasizes_possible_01)
	
	areasizes_possible_01_v2 = areasizes_possible_01 * indexes_possible
	max_number_of_areas = max(areasizes_possible_01_v2, na.rm=TRUE)
	
	return(max_number_of_areas)
	}




#######################################################
# default_states_list
#######################################################
#' Default input for a states_list
#' 
#' R CMD check limits the length of inputs to variables for functions; this is a 
#' workaround.
#' 
#' @return \code{states_list} The list of states
#' @export
#' @seealso \code{\link{make_dispersal_multiplier_matrix}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' states_list = default_states_list()
#' 
default_states_list <- function ()
	{
	states_list=list("_", c("A"), c("B"), c("C"), c("A","B"), c("B","C"), c("A","C"), c("A","B","C"))
	
	return(states_list)
	}




#######################################################
# make_dispersal_multiplier_matrix
#######################################################
#' Make a default matrix of relative dispersal probabilities between areas
#' 
#' Given either a list of areas, or a list of states, this function provides a square
#' dispersal matrix giving the relative probability of dispersal between areas.  The function
#' fills in these dispersals probabilities with the value 1.  The user can then modify this
#' as desired.
#' dispersal_multipliers_matrix Default NULL
#' distances_mat Default NULL
#' x_exponent Default 0
#' 
#' If only a states list is given, the list of areas is calculated by getting \code{\link[base]{unique}} values from
#' the concatenated states list.
#' 
#' @param areas A list of areas; if \code{NULL}, the states list will be used.
#' @param states_list A list of states, where each state consists of a list of areas. A default example list is provided.
#' @param dispersal_multipliers_matrix Input dispersal_multipliers_matrix, which will
#' be multiplied by further modifiers. Default NULL, which will populate the matrix 
#' with 1s.
#' @param w_exponent Exponent on dispersal multipliers matrix. Default 1 (the dispersal multipliers
#' are not modified).
#' @param distances_mat Distances matrix (can be any distance 
#' measure the user likes, however). Default NULL.
#' @param x_exponent Exponent on distance. Default 0.
#' @param envdistances_mat Environmental distances matrix (can be any distance 
#' measure the user likes, however). Default NULL.
#' @param n_exponent Exponent on the environmental distances matrix. Default 0.
#' @return \code{dispersal_multiplier_matrix} A square matrix, with 1s for all cells.
#' @export
#' @seealso \code{\link{make_relprob_matrix_de}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#'	 @cite FosterIdiots
#' @examples
#' testval=1
#' make_dispersal_multiplier_matrix(areas=NULL, 
#' states_list=list("_", c("A"), c("B"), c("C"), 
#' c("A","B"), c("B","C"), c("A","C"), c("A","B","C")))
#' make_dispersal_multiplier_matrix(areas=c("A","B","C","D"))
#' 
make_dispersal_multiplier_matrix <- function(areas=NULL, states_list=default_states_list(), dispersal_multipliers_matrix=NULL, distances_mat=NULL, envdistances_mat=NULL, w_exponent=0, x_exponent=0, n_exponent=0)
	{
	defaults='
	areas=NULL
	states_list=list("_", c("A"), c("B"), c("C"), c("A","B"), c("B","C"), c("A","C"), c("A","B","C"))
	make_dispersal_multiplier_matrix()
	'
	
	if (is.null(areas))
		{
		if (is.null(states_list))
			{
			stop("ERROR: for make_dispersal_multiplier_matrix() you must specify either an areas list or a states_list")
			}
		
		tmpareas = unlist(states_list)
		areas = unique(tmpareas)
		# Remove null range
		areas = areas[areas != "_"]
		}
	
	# Make a dispersal matrix giving the relative
	# probabilities of dispersal between
	# each area (areas, not ranges)
	if (is.null(dispersal_multipliers_matrix))
		{
		numareas = length(areas)
		dispersal_multipliers_matrix = as.data.frame(matrix(data=rep(1, numareas*numareas), nrow=numareas, ncol=numareas, byrow=TRUE))
	
		names(dispersal_multipliers_matrix) = areas
		rownames(dispersal_multipliers_matrix) = areas
		}
	dispersal_multipliers_matrix = dispersal_multipliers_matrix ^ w_exponent
	
	# If the distances matrix is NOT null, multiply it by the distance^-x_exponent matrix
	if (!is.null(distances_mat))
		{
		# By default, x_exponent is 0, i.e., no effect
		prob_dispersal_by_distance = distances_mat ^ (1 * x_exponent)
		dispersal_multipliers_matrix = dispersal_multipliers_matrix * prob_dispersal_by_distance
		}

	# If the distances matrix is NOT null, multiply it by the distance^-x_exponent matrix
	if (!is.null(envdistances_mat))
		{
		# By default, x_exponent is 0, i.e., no effect
		prob_dispersal_by_distance = envdistances_mat ^ (1 * n_exponent)
		dispersal_multipliers_matrix = dispersal_multipliers_matrix * prob_dispersal_by_distance
		}

	
	return(dispersal_multipliers_matrix)
	}


#######################################################
# make_relprob_matrix_de
#######################################################
#' Make a relative dispersal probability matrix (in text form)
#' 
#' This function takes a list of states/geographic ranges, and makes a relative probability matrix
#' describing the probability of transition between each state.  These probabilities are described 
#' in terms of d, "dispersal" (actually range expansion)
#' and "extinction" (actually local extirpation, or range contraction), as done in the program 
#' \code{LAGRANGE} (\cite{ReeSmith2008}, \cite{SmithRee2010_CPPversion}).
#'
#' The output \code{\link[base]{data.frame}}, termed \code{dedf} (dedf=dispersal-extinction data.frame), contains the 
#' actual text of the formulas by which the transition probability matrix would be calculated.  E.g., the example calculates
#' the matrix corresponding to Equation 1 on p. 6 of Ree & Smith (2008).
#'
#' Note that the geographic range-change process described here is a continuous-time process, where the probability of change
#' is a function of branch length, and all transitions occur because of dispersal and extinction.  LAGRANGE also implements a 
#' cladogenesis model (thus DEC -- dispersal-extinction-cladogenesis) which describes an "instantaneous" process of geographic
#' range change at speciation/lineage-splitting events. \code{BioGeoBEARS} allows users to turn on, turn off, or otherwise customize
#' both the continuous-time model and the cladogenesis model.
#'
#' @param states_list A list of states, where each state consists of a list of areas. A default example list is provided.
#' @param split_ABC \code{TRUE} or \code{FALSE} If \code{TRUE} then each state/range in the input geographic ranges (\code{states_list}) 
#' will be split on the argument contained in \code{split}.
#' @param split The character to split on.
#' @param remove_simultaneous_events If \code{TRUE} (default, as in \code{LAGRANGE} and almost all phylogenetic Markov models), then it is assumed that 
#' all changes in geographic range along branches must happen one event at a time.  If \code{FALSE}, simultaneous events are not excluded;
#' this is not recommended. However, notably, a commonly-used biogeographic model (treating biogeography as a multistate discrete character in an ML
#' framework, where every species/lineage inhabits one and only one area at any point in time) effectively is invoking a simultaneous event: e.g., A->B is a 
#' simultaneous range gain and range loss, from the perspective of the dispersal-extinction framework.
#' @param add_multiple_Ds If \code{TRUE} (default, as in \code{LAGRANGE}), the probabilities of dispersal from each possible source area are added together.
#' @param dispersal_multiplier_matrix A user-provided dispersal multiplier matrix; the default is a matrix of 1s from \code{\link{make_dispersal_multiplier_matrix}}(states_list=states_list).
#' @return \code{dedf} The output \code{\link[base]{data.frame}}, termed \code{dedf} (dedf=dispersal-extinction data.frame), contains the 
#' actual text of the formulas by which the transition probability matrix would be calculated.
#' @export
#' @seealso \code{\link{make_dispersal_multiplier_matrix}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#'	 @cite FosterIdiots
#' @examples
#' testval=1
#' 
#' states_list = list("_", c("A"), c("B"), c("C"), c
#' ("A","B"), c("B","C"), c("A","C"), c("A","B","C"))
#' 
#' states_list = areas_list_to_states_list_new(
#' areas=c("A","B","C"), include_null_range=TRUE, split_ABC=TRUE)
#' states_list
#' 
#' dedf = make_relprob_matrix_de(states_list=states_list, 
#' split_ABC=FALSE, split="", remove_simultaneous_events=TRUE, 
#' add_multiple_Ds=TRUE, 
#' dispersal_multiplier_matrix=make_dispersal_multiplier_matrix(states_list=states_list))
#' 
#' dedf
#' 
make_relprob_matrix_de <- function(states_list=default_states_list(), split_ABC=FALSE, split="", remove_simultaneous_events=TRUE, add_multiple_Ds=TRUE, 
dispersal_multiplier_matrix=make_dispersal_multiplier_matrix(states_list))
	{
	defaults='
	states_list=list("_", c("A"), c("B"), c("C"), c("A","B"), c("B","C"), c("A","C"), c("A","B","C"))
	split_ABC=FALSE
	split=""
	add_multiple_Ds=TRUE
	remove_simultaneous_events=TRUE
	# Default is a matrix of 1s
	dispersal_multiplier_matrix=make_dispersal_multiplier_matrix()
	'
	
	
	# If split_ABC=TRUE, assume areas can be split on ""
	if (split_ABC == TRUE)
		{
		states_list = mapply(FUN=strsplit2, split=split, states_list)
		names(states_list) = NULL		
		}	
	
	# Make an empty matrix
	tmpmat = matrix(0, nrow=length(states_list), ncol=length(states_list))
	
	# Go through the rows and columns
	for (i in 1:length(states_list))
		{
		ancstates = states_list[[i]]
		for (j in 1:length(states_list))
			{
			decstates = states_list[[j]]
			
			# If the ancestor is null, the descendant prob is 0
			if ( (length(ancstates)==1) && (ancstates == "_") )
				{
				tmpmat[i,j] = 0
				next()
				}
			
			# If the states are all the same, "-"
			if ( (length(ancstates)==length(decstates)) && (all(decstates == ancstates)==TRUE) )
				{
				tmpmat[i,j] = "-"
				next()
				}
			
			# If the descendant state is null (global extinction)
			if ( (length(decstates)==1) && (decstates == "_") )
				{
				# This many local extinction events (e) are required
				events = rep("e", times=length(ancstates))
				tmpmat[i,j] = paste(events, collapse="")
				next()
				}
			
			
			# If none of the above apply (ending the loop with next()) then
			# put in ancestral & descendant states
			tmplist = NULL
			
			# If the ancestral state is missing from descendants -- local extinction
			
			# Make sure that ALL the decstates are within the ancstates
			# No new states when an extinction event is being considered!
			if ( all(decstates %in% ancstates) == TRUE)
				{
				for (m in ancstates)
					{
					if ( (m %in% decstates) == FALSE)
						{
						tmplist = c(tmplist, "e")
						}
					}
				tmpout = paste(tmplist, collapse="")
				tmpmat[i,j] = tmpout
				next()
				}

			# If the descendant state is missing from ancestors -- local dispersal
			# THE DESCENDANT MUST BE LONGER THAN THE ANCESTOR, 
			# AND (FOR NOW) ONLY ONE AREA CAN BE ADDED
			# SO ALL AREAS IN ANCESTOR MUST BE IN THE DESCENDANT
			max_rangesize_increase = 1
			if ( (all(decstates %in% ancstates) == FALSE) && (all(ancstates %in% decstates)) && (length(decstates) == (length(ancstates)+max_rangesize_increase)) )
				{
				tmplist = NULL
				for (m in decstates)
					{
					if ( (m %in% ancstates) == FALSE)
						{
						if (add_multiple_Ds == TRUE)
							{
							# Go through each ancestral state, multiply by the relevant dispersal probability
							
							these_dispersal_probs = dispersal_multiplier_matrix[ancstates,m]
							ds_with_these_dispersal_probs = sapply(X=these_dispersal_probs, FUN=paste, "*d", sep="")
							#ds_with_these_dispersal_probs
							
							ds_to_add = paste(ds_with_these_dispersal_probs, collapse="+", sep="")			
							tmplist = c(tmplist, ds_to_add)
							} else {
							# (Note: A distance matrix has little meaning in this context)
							tmplist = c(tmplist, "d")
							}
						}
					}
				tmpout = paste(tmplist, collapse="+")
				tmpmat[i,j] = tmpout
				}
			} # end j, columns, descendant states
		} # end i, rows, ancestral states
	
	
	# Remove multiple simultaneous events if desired (default: TRUE)
	if (remove_simultaneous_events == TRUE)
		{	
		for (i in 1:nrow(tmpmat))
			{
			for (j in 1:ncol(tmpmat))
				{
				# Remove e.g. "dd"
				word = tmpmat[i,j]
				if (grepl(pattern="dd", x=word) == TRUE)
					{
					tmpmat[i,j] = 0
					}
				if (grepl(pattern="ee", x=word) == TRUE)
					{
					tmpmat[i,j] = 0
					}
				}
			}

		#character_counts = nchar(tmpmat)
		#character_counts
		#tmpmat[character_counts > 1] = 0
		}
	
	
	# Show null range as "()"
	tmpnames = unlist(lapply(X=states_list, FUN=paste, collapse=""))
	tmpnames[tmpnames == ""] = "()"
	
	
	# Dispersal-extinction matrix as data.frame
	dedf = adf(tmpmat)
	names(dedf) = tmpnames
	row.names(dedf) = tmpnames
	dedf
	
	return(dedf)
	}



#######################################################
# size_species_matrix
#######################################################
#' Calculate the dimensions of the cladogenesis/speciation matrix
#' 
#' This function calculates the dimensions of the cladogenesis/speciation matrix describing the 
#' transition probabilities between ancestral geographic ranges and descendant geographic range pairs
#' on Left (L) and Right (R) branches.
#' 
#' Under a cladogenesis model of geographic range change, the model will
#' give the conditional probability of each possible combination of geographic ranges on the 
#' Left (L) and Right (R) descendant branches, conditional on a particular ancestral state.  A matrix
#' representing these transitions will have \code{numstates} ancestral states, and \code{numstates*numstates} possible 
#' descendant pairs.  Many of these will have 0 conditional probability under the model, but,
#' for visualization or experimental purposes it can be useful to display them all.
#' 
#' However, because \code{numstates = 2^numareas} under default conditions, and the number of cells the processor has to 
#' consider (without optimization tricks) is \code{numstates^3}, this transition matrix can very quickly become cumbersome
#' to explicitly calculate or display.  \code{size_species_matrix} allows the user to check this ahead of time.
#' 
#' See \code{\link[cladoRcpp]{numstates_from_numareas}} for the details of calculating \code{numstates}.
#'
#' At various points in \code{BioGeoBEARS} code, the text and numeric versions of the cladogenesis matrix are named \code{spmat} and \code{spPmat}, respectively.
#'
#' @param states_list A list of states, where each state consists of a list of areas. A default example list is provided.
#' @param printwarn If \code{printwarn>0} (\code{printwarn=1} by default), then print to screen a message describing the size of the cladogenesis matrix.
#' @return \code{spmat_dimensions} The dimensions of the cladogenesis matrix.
#' @export
#' @seealso \code{\link{make_relprob_matrix_de}}, \code{\link{make_spmat_row}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#' @examples
#' testval=1
#' spmat_dimensions = size_species_matrix(
#' states_list=list("_", c("A"), c("B"), c("C"), c("A","B"), 
#' c("B","C"), c("A","C"), c("A","B","C")), printwarn=1)
#' spmat_dimensions
#'
size_species_matrix <- function(states_list=default_states_list(), printwarn=1)
	{
	states_list = states_list[states_list != "_"]
	areas = unique(unlist(states_list))
	
	num_nonNull_states = length(states_list)
	
	if (printwarn > 0)
		{
		cat("\nNOTE: Your states_list has ", length(areas), " geographic areas (", areas, "), and ", num_nonNull_states, " states.\n")
		cat("        This means that at every speciation event in your tree, you have\n")
		cat("        ", num_nonNull_states, "x", num_nonNull_states, "=", num_nonNull_states*num_nonNull_states, " possible combinations of states in the two branches\n")
		cat("        just above every node (speciation event) in the tree.  This means you have to calculate\n")
		cat("        ", num_nonNull_states^3, " cells in the relative probability matrix describing the various possible scenarios.\n")
		}
	
	spmat_dimensions = c(num_nonNull_states, num_nonNull_states^2)
	
	return(spmat_dimensions)
	}







#######################################################
# expand.grid.alt
#######################################################
#' A faster version of expand.grid
#' 
#' This should be faster than \code{\link[base]{expand.grid}}, which "[c]reate[s] a data frame from all combinations of the supplied vectors or factors" (R documentation).  
#' 
#' The source of this function was this discussion thread: \url{http://stackoverflow.com/questions/10405637/use-outer-instead-of-expand-grid}
#' 
#' @param seq1 A sequence of elements
#' @param seq2 A sequence of elements
#' @return \code{matrix_of_combinations} A matrix of all the possible combinations.
#' @export
#' @seealso \code{\link[stats]{convolve}}, \code{\link{expand.grid}}, \code{\link{expand.grid.jc}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' seq1 = c("A","B","C")
#' seq2 = seq1
#' expand.grid(seq1,seq2)
#' expand.grid.alt(seq1,seq2)
#' expand.grid.jc(seq1,seq2)
#' 
expand.grid.alt <- function(seq1,seq2)
	{
	matrix_of_combinations = cbind(rep.int(seq1, length(seq2)), c(base::t(matrix(rep.int(seq2, length(seq1)), nrow=length(seq2)))))
	return(matrix_of_combinations)
	}

#######################################################
# expand.grid.jc
#######################################################
#' An even faster version of expand.grid
#' 
#' This should be faster than \code{\link[base]{expand.grid}}, which "[c]reate[s] a data frame from all combinations of the supplied vectors or factors" (R documentation).  
#' 
#' The source of this function was this discussion thread: \url{http://stackoverflow.com/questions/10405637/use-outer-instead-of-expand-grid}
#' 
#' @param seq1 A sequence of elements
#' @param seq2 A sequence of elements
#' @return \code{matrix_of_combinations} A matrix of all the possible combinations.
#' @export
#' @seealso \code{\link[stats]{convolve}}, \code{\link{expand.grid}}, \code{\link{expand.grid.jc}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' seq1 = c("A","B","C")
#' seq2 = seq1
#' expand.grid(seq1,seq2)
#' expand.grid.alt(seq1,seq2)
#' expand.grid.jc(seq1,seq2)
#' 
expand.grid.jc <- function(seq1,seq2)
	{
  matrix_of_combinations = cbind(Var1 = rep.int(seq1, length(seq2)), Var2 = rep.int(seq2, rep.int(length(seq1),length(seq2))))
	return(matrix_of_combinations)
	}







#######################################################
# make_spmat_row
#######################################################
#' Construct a (text) cell of the cladogenesis/speciation matrix
#' 
#' Given the identity of the states/geographic ranges on the left branch (\code{Lstates}), right branch (\code{Rstates}), and ancestral areas (\code{ancareas_txt_tmp}),
#' construct the (text version) of the row of transition probabilities.  This means that each nonzero cell gets a \emph{v} for a vicariance event, a \emph{y}
#' for a sympatric speciation/range-copying event, a \emph{j} for a founder-event/jump speciation event, and an \emph{s} for a sympatric-subset event.
#'
#' This function is utilized by \code{\link[base]{apply}} in other functions (e.g. ) in an attempt to speed up calculation over rows.  However, processing of
#' text formulas via \code{\link[base]{apply}} will
#' never be fast enough for large matrices; see \code{\link[cladoRcpp]{cladoRcpp}} for optimized functions.
#' 
#' This text-based matrix later gets evaluated by other functions to calculate the numerical probabilities.  I.e., if j=0 and the other forms of speciation have weights 
#' equal to each other, this is the \code{LAGRANGE} cladogenesis model.
#'
#' @param Lstates A string listing the possible left states, which will be split by \code{splitval}.
#' @param Rstates A string listing the possible right states, which will be split by \code{splitval}.
#' @param ancareas_txt_tmp A string listing the possible ancestral states, which will be split by \code{splitval}.
#' @param splitval The character to split on.
#' @param code_for_overlapping_subsets Hypothetically, there is no reason that a vicariance event could happen, e.g. ABC-->AB, BC.  This is disallowed in \code{LAGRANGE}
#' BioGeoBEARS defaults, and, if one is going to employ the construct of discrete areas in the first place, overlaps should probably be avoided.  But this parameter
#' will allow experimentation.  Here, \code{code_for_overlapping_subsets=NA} equals the default, and any other value means that overlapping vicariance events are
#' included, with a number describing the number of areas in the overlap.  Users could then manually convert this to a probability according to some function.
#' @return \code{returncell} The text specifying the type of transition.
#' @export
#' @seealso \code{\link{size_species_matrix}}, \code{\link{make_relprob_matrix_bi}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#' @examples
#' testval=1
#' 
make_spmat_row = function(Lstates, Rstates, ancareas_txt_tmp, splitval="", code_for_overlapping_subsets=NA)
	{
	Lareas = strsplit(Lstates, split=splitval)[[1]]
	Rareas = strsplit(Rstates, split=splitval)[[1]]
	
	#Lareas = strsplit(LR2cells[1], split=splitval)[[1]]
	#Rareas = strsplit(LR2cells[2], split=split)[[1]]
	ancareas = strsplit(ancareas_txt_tmp, split=splitval)[[1]]
	
	# OK, now apply the rules
	# First, rule out impossible cases
	allareas = unique(c(Lareas, Rareas))
	
	# Any cells with "_" states in ancestor or descendant are ruled out
	if ( any(ancareas == "_") == TRUE) 
		{
		returncell = 0
		return(returncell)
		}
	
	# These include any where ranges are lost in both lineages
	if (any( (ancareas %in% allareas) == FALSE) )
		{
		returncell = 0
		return(returncell)
		}
	
	# And any cases where ALL of the descendant ranges are NOT in anc
	if (all( ((allareas %in% ancareas) == FALSE) ))
		{
		returncell = 0
		return(returncell)
		}
	
	# And any cases where >1 descstates are not in anc
	# (The only way to add a range is with jump dispersal, in
	#  this formulation. Which I'm not sure if I believe, colonization
	#  range-addition could be very common compared to background
	#  shifts in range over geological time.  But whatevs.)
	new_states_in_daughters_TF = (allareas %in% ancareas) == FALSE
	num_newareas_in_daughters = sum(new_states_in_daughters_TF)
	
	maxnum_new_areas_at_speciation = 1
	if (num_newareas_in_daughters > maxnum_new_areas_at_speciation)
		{
		returncell = 0
		return(returncell)
		}
	
	# If both descendants are identical to the ancestor,
	# we have "perfect" sympatric speciation
	#
	# Lagrange allows sympatry only when range size = 1
	# 
	# Case: sympatry
	if ( (length(Lareas)==length(ancareas)) && (length(Lareas)==length(Rareas)) && all(Lareas==Rareas) && all(Lareas==ancareas) )
		{
		rangesize = length(ancareas)
		returncell = paste("y", rangesize, sep="")
		return(returncell)
		}
	
	
	# If one of the descendants is identical to the ancestor, 
	# we can have jump dispersal or subset speciation
	LRareas = list(Lareas, Rareas)
	
	
	# FIX LRareas!!!
	
	LR_identical_TF = c( ((length(ancareas)==length(LRareas[[1]])) &&  all(ancareas==LRareas[[1]]) ), ((length(ancareas)==length(LRareas[[2]])) &&  all(ancareas==LRareas[[2]])))
	if ( any(LR_identical_TF) )
		{
		# BUT: We have to make sure that the non-identical daughter
		# is either (a) a single-new-area or (b) an actual subset
		identical_daughter = list(Lareas, Rareas)[LR_identical_TF][[1]]
		nonidentical_daughter = list(Lareas, Rareas)[LR_identical_TF==FALSE][[1]]
		
						
		# Case: jump dispersal ("j"), founding a new lineage in a single region
		# Jump dispersal if 1 area identical and the other with a single new area
		rangesize_nonidentical_daughter = length(nonidentical_daughter)
		if ( (rangesize_nonidentical_daughter == 1) && ( all(nonidentical_daughter %in% identical_daughter) == FALSE) )
			{
			#returncell = paste(returncell, "+j", sep="")
			returncell = "j"
			return(returncell)
			}
		
		# If both are identical, that's sympatry ("y"), we've already done that
		
		# If one new species has a subset of the range, that's sympatry2 (subset of range, "s")
		
		# For now, we only allow subset speciation where the new species has a range of 1
		if ( (all(nonidentical_daughter %in% identical_daughter) == TRUE) )
			{
			returncell = paste("s", length(nonidentical_daughter), "_", length(identical_daughter), sep="")
			return(returncell)		
			} else {
			returncell = 0
			return(returncell)					
			}


		} # end where one branch has an identical subset
	
	
	# Vicariance ("v") can only happen when daughter states cover all of the ancestral states
	if (all(allareas %in% ancareas))
		{
		# Classic vicariance occurs where the descendant ranges add up to the
		# ancestral range, perfectly
		desc_states_in_order = sort(c(Lareas, Rareas))
		
		# If and only if this matches the ancareas can we have vicariance
		# (the length test avoids a warning message)
		if ((length(desc_states_in_order)==length(ancareas)) && all(ancareas == desc_states_in_order))
			{
			# Get the smaller of the two range sizes
			range_sizes = c(length(Lareas), length(Rareas))
			smaller_vicariant_rangesize = min(range_sizes)
			ancestral_rangesize = length(ancareas)
			
			# Add the smaller range size
			#returncell = "v"
			returncell = paste("v", smaller_vicariant_rangesize, "_", ancestral_rangesize, sep="")
			return(returncell)
			} else {
			# The remaining cases are basically vicariance but with overlapping subsets
			if (is.na(code_for_overlapping_subsets))
				{
				returncell = 0
				return(returncell)
				} else {
				# Calculate length of overlap
				numareas_overlapping_in_daughters = sum(Lareas %in% Rareas)
				
				# put e.g. "v?2", vicariance with 2 overlapping areas
				returncell = paste(code_for_overlapping_subsets, numareas_overlapping_in_daughters, sep="")
				return(returncell)
				}
			}
		}
	
	# Remaining cases (hopefully none) are left blank
	returncell = 0			
	} # end tmpfunc




#######################################################
# make_relprob_matrix_bi
#######################################################
#' Make a relative probability matrix for a single speciation (bifurcation) event
#' 
#' Given the identity of the states/geographic ranges on the left branch (\code{Lstates}), right branch (\code{Rstates}), and ancestral areas (\code{ancareas_txt_tmp}),
#' construct the (text version) of the row of transition probabilities.  This means that each nonzero cell gets a \emph{v} for a vicariance event, a \emph{y}
#' for a sympatric speciation/range-copying event, a \emph{j} for a founder-event/jump speciation event, and an \emph{s} for a sympatric-subset event.
#'
#' This function is utilized by \code{\link[base]{apply}} in other functions (e.g. ) in an attempt to speed up calculation over rows.  However, processing of
#' text formulas via \code{\link[base]{apply}} will
#' never be fast enough for large matrices; see \code{\link[cladoRcpp]{cladoRcpp}} for optimized functions.
#' 
#' This text-based matrix later gets evaluated by other functions to calculate the numerical probabilities.  I.e., if j=0 and the other forms of speciation have weights 
#' equal to each other, this is the \code{LAGRANGE} cladogenesis model.
#'
#' NOTE: This function is veeeeeeery slow, even for only 3 areas (i.e. \code{2^3=8} geographic ranges).  It is mostly useful for illustration. See \code{\link[cladoRcpp]{cladoRcpp}} for drastic improvements in calculating cladogenesis models.
#'
#' @param states_list A list of states, where each state consists of a list of areas. A default example list is provided.
#' @param split_ABC \code{TRUE} or \code{FALSE} If \code{TRUE} then each state/range in the input geographic ranges (\code{states_list}) 
#' will be split on the argument contained in \code{split}.
#' @param splitval The character to split on.
#' @param code_for_overlapping_subsets Hypothetically, there is no reason that a vicariance event could happen, e.g. ABC-->AB, BC.  This is disallowed in \code{LAGRANGE}
#' BioGeoBEARS defaults, and, if one is going to employ the construct of discrete areas in the first place, overlaps should probably be avoided.  But this parameter
#' will allow experimentation.  Here, \code{code_for_overlapping_subsets=NA} equals the default, and any other value means that overlapping vicariance events are
#' included, with a number describing the number of areas in the overlap.  Users could then manually convert this to a probability according to some function.
#' @param printwarn If \code{printwarn>0} (\code{printwarn=1} by default), then print to screen a message describing the size of the cladogenesis matrix.
#' @return \code{probmat} A matrix of strings, where each cell contains the parameters describing the conditional probability of that ancestor-->(Left descendant,Right descendant) range inheritance scenario.
#' @export
#' @seealso \code{\link{size_species_matrix}}, \code{\link{make_spmat_row}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#' @examples
#' testval=1
#' probmat = make_relprob_matrix_bi(states_list=list("_", 
#' c("A"), c("B"), c("C"), c("A","B"), c("B","C"), c("A","C"), 
#' c("A","B","C")), split_ABC=FALSE, splitval="", 
#' code_for_overlapping_subsets=NA, printwarn=1)
#' probmat
#' 
make_relprob_matrix_bi <- function(states_list=default_states_list(), split_ABC=FALSE, splitval="", code_for_overlapping_subsets=NA, printwarn=1)
	{
	defaults='
	states_list=list("_", c("A"), c("B"), c("C"), c("A","B"), c("B","C"), c("A","C"), c("A","B","C"))
	split_ABC=FALSE
	splitval=""
	code_for_overlapping_subsets=NA
	printwarn=1
	'
	
	# If split_ABC=TRUE, assume areas can be split on ""
	if (split_ABC == TRUE)
		{
		states_list = mapply(FUN=strsplit2, split=splitval, states_list)
		names(states_list) = NULL		
		}	
	
	# Remove any null state
	#cat("\n\nNote: make_relprob_matrix_bi() is about range change at the point event of
	#  speciation, therefore the geographic range of NULL is irrelevant, it is
	#  being deleted.\n\n")
	#states_list = states_list[unlist(lapply(X=states_list, FUN=is.null) == FALSE)]
	# Remove the "NULL" range of "_"
	states_list = states_list[states_list != "_"]
	# Instead, set the probability of any range combo with "_" in it to 0


	# Check for the size of this matrix
	if (printwarn > 0)
		{
		# Print the predicted size of the spmat for the user.
		size_species_matrix(states_list, printwarn)
		}


	
	# We need descendant states for each of two branches
	# (this does not include diagonal, same-state for some reason)
	# row1 = states on left branch just above node
	# row2 = states on right branch just above node
	
	# 	JUNK
	# 	if (length(states_list) >= 2)
	# 		{
	# 		desc_states_mat = combn(x=states_list, m=2)
	# 		} else {
	# 		desc_states_mat = matrix
	# 		}
	

	# Insert the sympatric events (e.g., A --> A,A)
	# comma: yes
	# sympatric_desc_names = sapply(X=states_list, FUN=paste, collapse=",")
	# comma: no
	sympatric_desc_names = sapply(X=states_list, FUN=paste, collapse="")

	len_states_list = length(states_list)	
	if (len_states_list > 1)
		{
		# Initialize blank matrices
		RL = matrix(NA, nrow=len_states_list^2, ncol=2)
		RL2 = matrix(NA, nrow=len_states_list^2, ncol=2)
		LR = matrix(NA, nrow=len_states_list^2, ncol=2)
		descstates_txt = rep(NA, len_states_list^2)
		
		# Right and Left descendants
		# RL, RL2, and LR have 2 columns; these are then 
		#RL = expand.grid(R=sympatric_desc_names, L=sympatric_desc_names)
		#RL = expand.grid.jc(seq1=sympatric_desc_names, seq2=sympatric_desc_names)
		#RL2 = sapply(FUN=as.character, RL)
		#LR = cbind(RL2[,2], RL2[,1])

		RL = expand.grid.jc(seq1=sympatric_desc_names, seq2=sympatric_desc_names)
		#RL2 = sapply(FUN=as.character, RL)
		LR = cbind(RL[,2], RL[,1])
		#descstates_txt = paste(RL2[,2], RL2[,1], collapse="|")
		descstates_txt = apply(LR, 1, paste, collapse="|")
	
		# comma: yes
		# ancstates_txt = sapply(states_list, FUN=paste, collapse=",")
		# comma: no
		ancstates_txt = sapply(states_list, FUN=paste, collapse="")
		} else {
		# Sympatry only allowed
		descstates_txt = paste(as.character(sympatric_desc_names), "|", as.character(sympatric_desc_names), sep="")
		ancstates_txt = as.character(sympatric_desc_names)
		LR = cbind(sympatric_desc_names, sympatric_desc_names)
		}
	
	# Now make a matrix describing the probabilities
	probmat = matrix("", nrow=length(ancstates_txt), ncol=length(descstates_txt))
	
	
	# APPLY HERE ALSO
	# Rows/ancestral states
	for (i in 1:length(ancstates_txt))
		{
		#ancstates = strsplit(ancstates_txt[i], split=",")[[1]]
		#ancstates = strsplit(ancstates_txt[i], split=splitval)[[1]]
		
		print(i)
		probmat[i,] = mapply(FUN=make_spmat_row, Lstates=LR[,1], Rstates=LR[,2], ancareas_txt_tmp=ancstates_txt[i], splitval=splitval)

		
		} # end ancstates
	
	probmat = adf2(probmat)
	names(probmat) = descstates_txt
	row.names(probmat) = ancstates_txt
	
	return(probmat)
	}




#######################################################
# divide_probs_by_number_of_options_txt
#######################################################
#' Divide each type of event by its frequency
#' 
#' In a speciation/cladogenesis matrix, the conditional probabilities of each row must sum to 
#' 1.  This function sums the number of events of each category and scales them accordingly.
#'
#' This function returns the strings, which can then be processed in other functions by e.g. find/replace or \code{\link[base]{eval}}.
#'
#' @param probmat A character matrix of probabilities in the form of formulas, not normalized by the sum of each row.
#' @return \code{probmat} A matrix of strings, where each cell contains the parameters describing the conditional probability of
#' that ancestor-->(Left descendant,Right descendant) range inheritance scenario.
#' @export
#' @seealso \code{\link{make_relprob_matrix_bi}}, \code{\link{divide_probs_by_number_of_options_nums}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' probmat = make_relprob_matrix_bi()
#' probmat
#' 
#' probmat2 = divide_probs_by_number_of_options_txt(probmat)
#' probmat2
#' 
#' 
divide_probs_by_number_of_options_txt <- function(probmat)
	{
	defaults='
	probmat = spmat
	'
	
	for (i in 1:nrow(probmat))
		{
		tmprow = unlist(probmat[i, ])
		
		# Group everything by literal match (commented out)
		#unique_transitions = unlist(unique(tmprow))

		# Just take the first letter, and uniquify that
		# (so that e.g. s1, s2, or v1, v2, are in the same category)
		# Group everything with the same first letter
		tmprow_letters = sapply(X=tmprow, FUN=substr, start=1, stop=1)
		unique_transitions = unlist(unique(tmprow_letters))
		
		# Remove zeros
		unique_transitions = unique_transitions[unique_transitions != "0"]
		unique_transitions
		
		# Go through the types of transitions, and weight by their frequency
		for (j in 1:length(unique_transitions))
			{
			# Group everything by literal match (commented out)
			#count_of_this_transition = sum(tmprow == unique_transitions[j])
			
			# Group everything with the same first letter
			count_of_this_transition = sum(tmprow_letters == unique_transitions[j])
			
			# If there is only one of this type of transition, do nothing
			if (count_of_this_transition == 1)
				{
				next()
				} else{
				# Otherwise, divide by its frequency in the row
				cells_to_change_TF = tmprow_letters == unique_transitions[j]
				
				# Group everything by literal match (commented out)
				#newtxt = paste(unique_transitions[j], "/", count_of_this_transition, sep="")
				# Group everything with the same first letter
				newtxt = paste(tmprow[cells_to_change_TF], "/", count_of_this_transition, sep="")
				tmprow[cells_to_change_TF] = newtxt
				}
			}		
		# Input the revised cells into the original probmat	
		probmat[i, ] = tmprow
		}
	return(probmat)
	}




#######################################################
# divide_probs_by_number_of_options_nums
#######################################################
#' Divide each type of event by its frequency, return calculated probabilities
#' 
#' In a speciation/cladogenesis matrix, the conditional probabilities of each row must sum to 
#' 1.  This function sums the number of events of each category and scales them accordingly.
#'
#' This function returns the calculated conditional probabilities.
#'
#' @param spPmat A matrix of numbers, where each cell contains the conditional probability of
#' that ancestor-->(Left descendant,Right descendant) range inheritance scenario.
#' @param probmat A matrix of text, describing each of the allowed range-inheritance events.
#' @return \code{spPmat} A matrix of numbers, where each cell contains the conditional probability of
#' that ancestor-->(Left descendant,Right descendant) range inheritance scenario.
#' @export
#' @seealso \code{\link{make_relprob_matrix_bi}}, \code{\link{divide_probs_by_number_of_options_txt}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' spmat = make_relprob_matrix_bi()
#' spmat
#' 
#' spmat1 = divide_probs_by_number_of_options_txt(spmat)
#' spmat1
#' 
#' 
#' probmat = spmat
#' spPmat = symbolic_to_relprob_matrix_sp(spmat, cellsplit="\\+", 
#' mergesym="*", ys=1, j=0, v=1, maxent_constraint_01=0.0001, 
#' maxent_constraint_01v=0.0001, max_numareas=3)
#' spPmat
#' probmat2 = divide_probs_by_number_of_options_nums(spPmat, probmat)
#' probmat2
#' 
#' probmat = spmat1
#' spPmat = symbolic_to_relprob_matrix_sp(spmat, cellsplit="\\+", 
#' mergesym="*", ys=1, j=0, v=1, maxent_constraint_01=0.0001, 
#' maxent_constraint_01v=0.0001, max_numareas=3)
#' spPmat
#' probmat3 = divide_probs_by_number_of_options_nums(spPmat, probmat)
#' probmat3
#' 
divide_probs_by_number_of_options_nums <- function(spPmat, probmat)
	{
	defaults='
	probmat = spmat
	'
	
	for (i in 1:nrow(probmat))
		{
		tmprow = unlist(probmat[i, ])

		# Group everything by literal match (commented out)
		#unique_transitions = unlist(unique(tmprow))

		# Just take the first letter, and uniquify that
		# (so that e.g. s1, s2, or v1, v2, are in the same category)
		# Group everything with the same first letter
		tmprow_letters = sapply(X=tmprow, FUN=substr, start=1, stop=1)
		unique_transitions = unlist(unique(tmprow_letters))
	
		# Remove zeros
		unique_transitions = unique_transitions[unique_transitions != "0"]
		unique_transitions
		
		# Go through the types of transitions, and weight by their frequency
		for (j in 1:length(unique_transitions))
			{
			# Group everything by literal match (commented out)
			#count_of_this_transition = sum(tmprow == unique_transitions[j])
			
			# Group everything with the same first letter
			count_of_this_transition = sum(tmprow_letters == unique_transitions[j])
			
			# If there is only one of this type of transition, do nothing
			if (count_of_this_transition == 1)
				{
				next()
				} else{
				# Otherwise, divide by its frequency in the row
				#cells_to_change_TF = tmprow == unique_transitions[j]

				# Otherwise, divide by its frequency in the row
				cells_to_change_TF = tmprow_letters == unique_transitions[j]

				
				#newtxt = paste(unique_transitions[j], "/", count_of_this_transition, sep="")
				#tmprow[cells_to_change_TF] = newtxt
				spPmat[i, cells_to_change_TF] = spPmat[i, cells_to_change_TF] / count_of_this_transition
				}
			}		
		# Input the revised cells into the original probmat	
		#probmat[i, ] = tmprow
		}
	
	# Re-normalize so rows sum to 1
	spPmat = spPmat / rowSums(spPmat)
	return(spPmat)
	}




# Convert a observed-speciation transition matrix to an
# unobserved-speciation transition matrix
#######################################################
# make_relprob_txtmatrix_sp1
#######################################################
#' Convert a observed-speciation transition matrix to an unobserved-speciation transition matrix (text version)
#'
#' Convert a cladogenesis/speciation transition matrix (specifying the probability of each Left/Right descendant range pair,
#' conditional on each ancestral state) of dimensions \code{numstates} by \code{numstates^2} to a square transition matrix of dimensions
#' \code{numstates} by \code{numstates}, representing the probability of a transition when only one daughter survives in the tree.
#' 
#' This matrix could be used to quantify the probability of range-change along a branch due to unobserved speciation events; all that would 
#' be required would be an estimate of the number of unobserved speciation events on the branch, and treating this as a Poisson process.
#' (Note: this assumes that the probability of either branch surviving is identical, which might not be the case. See the GeoSSE
#' (\cite{Goldberg_etal_2011_GeoSSE}) and ClaSSE (\cite{Goldberg_Igic_2012_ClaSSE}) for the beginnings of work on this, with 2 and 3
#' geographic areas, respectively.
#' 
#' @param probmat A matrix of text, describing each of the allowed range-inheritance events.  Assumes that column names are in the "A|B" format.
#' @param split The value to split Left/Right pairs on (e.g., "A|B" --> "A", "B")
#' @return \code{newmat} A new square matrix.
#' @export
#' @seealso \code{\link{make_relprob_matrix_bi}}, \code{\link{make_relprob_nummatrix_sp1}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#' \url{http://tigger.uic.edu/~eeg/code/code.html}
#'	 @cite Goldberg_etal_2011_GeoSSE
#'	 @cite Goldberg_Igic_2012_ClaSSE
#' @examples
#' testval=1
#' probmat = make_relprob_matrix_bi(states_list=list("_", c("A"), 
#' c("B"), c("C"), c("A","B"), c("B","C"), c("A","C"), c("A","B","C")), 
#' split_ABC=FALSE, splitval="", code_for_overlapping_subsets=NA, printwarn=1)
#' probmat
#' 
#' newmat = make_relprob_txtmatrix_sp1(probmat=probmat, split="\\\\|")
#' newmat
#' 
make_relprob_txtmatrix_sp1 <- function(probmat, split="\\|")
	{
	# In this case, at the unobserved speciation event, there is only one 
	# known descendant.  This could (hypothetically) be 
	# either the "left" or "right" branch of a speciation
	# event -- obviously the labels are arbitrary.
	#
	# The probability of any particular ancestor-descendant transition can
	# therefore be found by looking at the transition probabilities for the
	# species bifurcation matrix, and summing the probabilities resulting
	# in the same state for the left branch.
	
	# Get unique states
	tmpnames = names(probmat)
	states = unlist(lapply(tmpnames, strsplit2, split="\\|"))
	uniq_states = unique(states)
	uniq_states
	
	nr = dim(probmat)[1]
	nc = dim(probmat)[2]
	
	newmat = matrix(NA, nrow=nr, ncol=nr)
	
	for (i in 1:length(uniq_states))
		{
		u = uniq_states[i]
		startcol = (i-1)*nr + 1
		endcol = (i)*nr
		
		# Subset to just the columns for a particular state on 
		# the left branch
		tmpmat = probmat[,startcol:endcol]
		tmpmat
		
		# Paste each row together
		tmpcol = paste_rows_without_zeros(tmpmat)
		
		# Insert into matrix
		newmat[,i] = tmpcol
		}
	newmat = adf2(newmat)
	names(newmat) = uniq_states
	row.names(newmat) = uniq_states
	
	return(newmat)
	}


# Convert a observed-speciation transition matrix to an
# unobserved-speciation transition matrix
#######################################################
# make_relprob_nummatrix_sp1
#######################################################
#' Convert a observed-speciation transition matrix to an unobserved-speciation transition matrix (numeric version)
#'
#' Convert a cladogenesis/speciation transition matrix (specifying the probability of each Left/Right descendant range pair,
#' conditional on each ancestral state) of dimensions \code{numstates} by \code{numstates^2} to a square transition matrix of dimensions
#' \code{numstates} by \code{numstates}, representing the probability of a transition when only one daughter survives in the tree.
#' 
#' This matrix could be used to quantify the probability of range-change along a branch due to unobserved speciation events; all that would 
#' be required would be an estimate of the number of unobserved speciation events on the branch, and treating this as a Poisson process.
#' (Note: this assumes that the probability of either branch surviving is identical, which might not be the case. See the GeoSSE
#' (\cite{Goldberg_etal_2011_GeoSSE}) and ClaSSE (\cite{Goldberg_Igic_2012_ClaSSE}) for the beginnings of work on this, with 2 and 3
#' geographic areas, respectively.
#' 
#' @param probmat A matrix of text, describing each of the allowed range-inheritance events.  Assumes that column names are in the "A|B" format.
#' @param spPmat A matrix of numbers, where each cell contains the conditional probability of
#' that ancestor-->(Left descendant,Right descendant) range inheritance scenario.
#' @param split The value to split Left/Right pairs on (e.g., "A|B" --> "A", "B")
#' @return \code{newmat} A new square matrix.
#' @export
#' @seealso \code{\link{make_relprob_matrix_bi}}, \code{\link{make_relprob_txtmatrix_sp1}}, \code{\link{paste_rows_without_zeros}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#' \url{http://tigger.uic.edu/~eeg/code/code.html}
#'	 @cite Goldberg_etal_2011_GeoSSE
#'	 @cite Goldberg_Igic_2012_ClaSSE
#' @examples
#' testval=1
#' spmat = make_relprob_matrix_bi(states_list=list("_", c("A"), 
#' c("B"), c("C"), c("A","B"), c("B","C"), c("A","C"), c("A","B","C")), 
#' split_ABC=FALSE, splitval="", code_for_overlapping_subsets=NA, printwarn=1)
#' spmat
#' 
#' spPmat = symbolic_to_relprob_matrix_sp(spmat, cellsplit="\\\\+", 
#' mergesym="*", ys=1, j=0, v=1, maxent_constraint_01=0.0001, 
#' maxent_constraint_01v=0.0001, max_numareas=3)
#' spPmat
#' 
#' newmat = make_relprob_nummatrix_sp1(probmat=spmat, spPmat=spPmat, split="\\\\|")
#' newmat
#'
make_relprob_nummatrix_sp1 <- function(probmat, spPmat, split="\\|")
	{
	# In this case, at the unobserved speciation event, there is only one 
	# known descendant.  This could (hypothetically) be 
	# either the "left" or "right" branch of a speciation
	# event -- obviously the labels are arbitrary.
	#
	# The probability of any particular ancestor-descendant transition can
	# therefore be found by looking at the transition probabilities for the
	# species bifurcation matrix, and summing the probabilities resulting
	# in the same state for the left branch.
	
	# Get unique states
	tmpnames = names(probmat)
	states = unlist(lapply(tmpnames, strsplit2, split="\\|"))
	uniq_states = unique(states)
	uniq_states
	
	nr = dim(probmat)[1]
	nc = dim(probmat)[2]
	
	newmat = matrix(NA, nrow=nr, ncol=nr)
	
	for (i in 1:length(uniq_states))
		{
		u = uniq_states[i]
		startcol = (i-1)*nr + 1
		endcol = (i)*nr
		
		# Subset to just the columns for a particular state on 
		# the left branch
		#tmpmat = probmat[,startcol:endcol]
		tmpmat = spPmat[,startcol:endcol]
		tmpmat
		
		# Paste each row together
		tmpcol = apply(tmpmat, 1, paste, collapse="+")
		
		tmpcol_nums = sapply(X=tmpcol, FUN=meval)
		
		# Insert into matrix
		newmat[,i] = tmpcol_nums
		}
	newmat = adf2(newmat)
	names(newmat) = uniq_states
	row.names(newmat) = uniq_states
	
	return(newmat)
	}



#######################################################
# meval
#######################################################
#' \code{eval()} function for use in \code{sapply}
#' 
#' \code{meval} is a wrapper for \code{\link{eval}}, to allow use in \code{sapply}.
#' 
#' This is an attempt to speed up the use of \code{\link{eval}}; in general use of \code{\link{eval}} to 
#' convert a text version of a transition matrix to a numeric version with probabilities
#' is a poor, slow choice; but it can be useful for examples and display purposes.
#' 
#' See \code{\link[cladoRcpp]{cladoRcpp}} for fast C++ implementations of transition matrix setup.
#' 
#' @param equation_txt The text of the equation to run \code{\link{eval}} on -- e.g., from a cell of a text-based transition matrix.
#' @return \code{outval} The numeric result of \code{\link{eval}}.
#' @export
#' @seealso \code{\link[stats]{convolve}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' d = 0.1
#' equation_txt = "1*d+1*d"
#' meval(equation_txt)
meval <- function(equation_txt)
	{
	outval = eval(parse(text = equation_txt))
	return(outval)
	}

	
# Convert e.g.
#
#       A|A A|B A|C A|A,B A|B,C A|A,C A|A,B,C
# A       s   j   j     0     0     0       0
# B       0   j   0     0     0     0       0
# C       0   0   j     0     0     0       0
# A,B     0   v   0    b1     0     0       0
# B,C     0   0   0     0     j     0       0
# A,C     0   0   v     0     0    b1       0
# A,B,C   0   0   0     0     v     0      b1
#
# ...to...
# 
#    A         B       C     A,B     B,C     A,C   A,B,C 
# "s+j+j"     "j"     "j"  "v+b1"     "j"  "v+b1"  "v+b1" 
# 


#######################################################
# paste_rows_without_zeros
#######################################################
#' Concatenate cells in each row of a text-based transition matrix, excluding zeros
#' 
#' This is a utility function for \code{\link{make_relprob_txtmatrix_sp1}}.
#' 
#' Convert e.g.:\cr
#' \cr
#' \code{       A|A A|B A|C A|A,B A|B,C A|A,C A|A,B,C}\cr
#' \code{ A       s   j   j     0     0     0       0}\cr
#' \code{ B       0   j   0     0     0     0       0}\cr
#' \code{ C       0   0   j     0     0     0       0}\cr
#' \code{ A,B     0   v   0    b1     0     0       0}\cr
#' \code{ B,C     0   0   0     0     j     0       0}\cr
#' \code{ A,C     0   0   v     0     0    b1       0}\cr
#' \code{ A,B,C   0   0   0     0     v     0      b1}\cr
#' \cr
#' ...to...\cr
#' \cr
#' \code{  A         B       C     A,B     B,C     A,C   A,B,C }\cr
#' \code{"s+j+j"     "j"     "j"  "v+b1"     "j"  "v+b1"  "v+b1" }\cr
#' \cr
#' 
#' @param tmpmat A cladogenesis/speciation probability matrix (text-based) to collapse each row of.
#' @return \code{tmpcol} A list containing each row, concatenated
#' @export
#' @seealso \code{\link{make_relprob_txtmatrix_sp1}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' 
#' spmat = make_relprob_matrix_bi(states_list=list("_", c("A"), 
#' c("B"), c("C"), c("A","B"), c("B","C"), c("A","C"), c("A","B","C")), 
#' split_ABC=FALSE, splitval="", code_for_overlapping_subsets=NA, printwarn=1)
#' spmat
#' tmpcol = paste_rows_without_zeros(tmpmat=spmat)
#' tmpcol
#' 
paste_rows_without_zeros <- function(tmpmat)
	{
	tmpcol = apply(tmpmat, 1, paste, collapse="+")
	tmpcol = gsub(pattern="\\+0", replacement="", tmpcol)
	tmpcol = gsub(pattern="0\\+", replacement="", tmpcol)
	return(tmpcol)
	}



#######################################################
# remove_null_rowcols_from_mat
#######################################################
#' Remove rows or columns representing a null geographic range from a matrix
#' 
#' This function removes rows or columns representing a null geographic range from a matrix. 
#' 
#' LAGRANGE (\cite{ReeSmith2008}) and other models often assume that a null geographic range (the lineage inhabits no
#' areas, i.e. is extinct) is a possible state.  However, this is never a possible ancestral state (since an extinct lineage will
#' never have descendants) so sometimes we must remove it.
#'
#' @param tmpmat The matrix to check for null ranges.  Function will only work if rows and columns have names, and one of the names matches \code{null_sym}.
#' @param null_sym The character(s) denoting a null range.
#' @return \code{tmpmat3} The revised matrix.
#' @export
#' @seealso \code{\link{areas_list_to_states_list_new}}, \code{\link[cladoRcpp]{areas_list_to_states_list_old}},
#' \code{\link{make_relprob_matrix_de}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#' @examples
#' testval=1
#' states_list = list("_", c("A"), c("B"), c("C"), c("A","B"), 
#' c("B","C"), c("A","C"), c("A","B","C"))
#' 
#' states_list = areas_list_to_states_list_new(areas=c("A","B","C"), 
#' include_null_range=TRUE, split_ABC=TRUE)
#' states_list
#' 
#' dedf = make_relprob_matrix_de(states_list=states_list, 
#' split_ABC=FALSE, split="", remove_simultaneous_events=TRUE, 
#' add_multiple_Ds=TRUE, 
#' dispersal_multiplier_matrix=make_dispersal_multiplier_matrix(states_list=states_list))
#' 
#' spmat_noNulls = remove_null_rowcols_from_mat(tmpmat=dedf, null_sym="()")
#' spmat_noNulls
#' 
#' spmat_noNulls = remove_null_rowcols_from_mat(tmpmat=dedf, null_sym="_")
#' spmat_noNulls
#' 
remove_null_rowcols_from_mat <- function(tmpmat, null_sym="()")
	{
	# Remove column with null state
	tmpnames = names(tmpmat)
	TF = tmpnames != null_sym
	tmpmat2 = tmpmat[,TF]
	
	# Remove row with null state
	tmpnames = row.names(tmpmat)
	TF = tmpnames != null_sym
	tmpmat3 = tmpmat2[TF,]
	
	return(tmpmat3)
	}




# Convert symbolic matrix to relprob matrix
#######################################################
# symbolic_to_P_matrix
#######################################################
#' Convert symbolic matrix to relprob matrix
#' 
#' This function takes a transition probability matrix (in text form) and converts to numeric form, given
#' values for \emph{d}, \emph{e}, or other parameters in the text formulas.
#' 
#' This is not particularly fast, but good for illustrative purposes.
#'
#' @param dedf The transition matrix or dispersal-extinction data.frame (dedf), contains the 
#' actual text of the formulas by which the transition probability matrix would be calculated.
#' @param cellsplit The symbol to split the formulas on. Default "\\+" (plus symbol, with escape code).
#' @param mergesym The symbol to merge the formulas with. Default "+".
#' @param diags_sum_to_1 Calculate the diagonals such that, when added to the sum of the off-diagonals in a row, the entire row sums to 1. This creates a transition
#' probability matrix where each row sums to 1, i.e. each cell represents the conditional probability of the column state, given the ancestral
#' row state.  The diagonal values represent the probability of staying the same.
#' @param d The dispersal/range expansion rate. Default \code{d=0.1}.
#' @param e The extinction/range contraction rate. Default \code{e=0.01}.
#' @param ... Additional arguments to pass to \code{\link{symbolic_cell_to_relprob_cell}} via \code{\link[base]{sapply}}, and thence to cell\code{\link[base]{strsplit}}.
#' @return \code{dedf_vals} The output \code{\link[base]{data.frame}}, contains the 
#' numeric results of the formulas calculating the transition probability matrix.
#' @export
#' @seealso \code{\link{areas_list_to_states_list_new}}, \code{\link[cladoRcpp]{areas_list_to_states_list_old}},
#' \code{\link{make_relprob_matrix_de}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' 
#' states_list = list("_", c("A"), c("B"), c("C"), c("A","B"), 
#' c("B","C"), c("A","C"), c("A","B","C"))
#' 
#' states_list = areas_list_to_states_list_new(areas=c("A","B","C"), 
#' include_null_range=TRUE, split_ABC=TRUE)
#' states_list
#' 
#' dedf = make_relprob_matrix_de(states_list=states_list,
#'  split_ABC=FALSE, split="", remove_simultaneous_events=TRUE, 
#' add_multiple_Ds=TRUE, 
#' dispersal_multiplier_matrix=make_dispersal_multiplier_matrix(states_list=states_list))
#' dedf
#' 
#' # Defaults
#' Pmat = symbolic_to_P_matrix(dedf, cellsplit="\\\\+", mergesym="+", 
#' diags_sum_to_1=FALSE, d=0.1, e=0.01)
#' Pmat
#' 
#' # Calculate diagonal
#' Pmat = symbolic_to_P_matrix(dedf, cellsplit="\\\\+", mergesym="+", 
#' diags_sum_to_1=TRUE, d=0.1, e=0.01)
#' Pmat
#' 
#' # You don't have to split, if the formulas are directly parsable
#' Pmat = symbolic_to_P_matrix(dedf, cellsplit="yadda", mergesym="", 
#' diags_sum_to_1=FALSE, d=0.1, e=0.01)
#' Pmat
#' 
symbolic_to_P_matrix <- function(dedf, cellsplit="\\+", mergesym="+", diags_sum_to_1=FALSE, d=0.1, e=0.01, ...)
	{
	defaults='
	cellsplit=""
	mergesym="*"
	diags_sum_to_1=TRUE
	d=0.1
	e=0.01
	'
	
	dedf[dedf == "-"] = NA
	
	dedf_vals = matrix(data=0, nrow=nrow(dedf), ncol=ncol(dedf))
	
	dedf_vals[!is.na(dedf_vals)] = sapply(X=dedf[!is.na(dedf_vals)], FUN=symbolic_cell_to_relprob_cell, cellsplit, mergesym, d, e, ...)
	
	# DON'T Normalize matrix into an appropriate Q matrix
	#diag_TF = diag(x=1, nrow=nrow(dedf), ncol(dedf)) == 1
	#dedf_vals[diag_TF] = -1 * rowSums(dedf_vals, na.rm=TRUE)
	
	# But, if desired, DO sum to 1:
	diag_vals = rep(NA, nrow(dedf_vals))
	if (diags_sum_to_1 == TRUE)
		{
		tmp_rowsums = rowSums(dedf_vals, na.rm=TRUE)
		diag(dedf_vals) = 1-tmp_rowsums
		}
	
	return(dedf_vals)
	}


#######################################################
# symbolic_cell_to_relprob_cell
#######################################################
#' Convert symbolic cell (a text equation) to relprob matrix (a numeric value).
#' 
#' This is a utility function for \code{\link{symbolic_to_P_matrix}} and \code{\link{symbolic_to_Q_matrix}}.
#'
#' This function can be used in \code{\link[base]{sapply}}.  It still will not be very fast compared to the calculations in 
#' \code{\link[cladoRcpp]{cladoRcpp}}, but can be useful for demonstrative purposes.
#' 
#' @param charcell The text formula.
#' @param cellsplit The symbol to split the formulas on. Default "\\+" (plus symbol, with escape code).
#' @param mergesym The symbol to merge the formulas with. Default "+".
#' @param d The dispersal/range expansion rate. Default \code{d=0.1}.
#' @param e The extinction/range contraction rate. Default \code{e=0.01}.
#' @param ... Additional arguments to pass to \code{\link[base]{strsplit}}.
#' @return \code{cellval} The output cell value.
#' @export
#' @seealso \code{\link{symbolic_to_P_matrix}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' 
#' charcell = "1*d+1*d"
#' 
#' # Right
#' cellval = symbolic_cell_to_relprob_cell(charcell, cellsplit="yadda", 
#' mergesym="", d=0.1, e=0.01)
#' cellval
#' 
#' # Wrong
#' cellval = symbolic_cell_to_relprob_cell(charcell, cellsplit="\\+", 
#' mergesym="*", d=0.1, e=0.01)
#' cellval
#' 
#' # Right
#' cellval = symbolic_cell_to_relprob_cell(charcell, cellsplit="\\+", 
#' mergesym="+", d=0.1, e=0.01)
#' cellval
#' 
symbolic_cell_to_relprob_cell <- function(charcell, cellsplit="", mergesym="*", d=0.1, e=0.01, ...)
	{
	# split the cell
	words = strsplit(charcell, split=cellsplit, ...)[[1]]
	
	# initialize the cell value
	cellval = NA
	equation = paste(words, sep="", collapse=mergesym)
	equation2 = paste("cellval = ", equation, sep="")
	
	eval(parse(text=equation2))
	
	return(cellval)
	}



#######################################################
# symbolic_to_Q_matrix
#######################################################
#' Convert symbolic matrix to an instantaneous rate matrix (Q matrix)
#' 
#' This function takes a transition probability matrix (in text form) and converts it to an 
#' instantaneous rate matrix (Q matrix), given values for \emph{d}, \emph{e}, or other parameters
#' in the text formulas.
#' 
#' This is not particularly fast, but good for illustrative purposes.
#'
#' @param dedf The transition matrix or dispersal-extinction data.frame (dedf), contains the 
#' actual text of the formulas by which the transition probability matrix would be calculated.
#' @param cellsplit The symbol to split the formulas on. Default "\\+" (plus symbol, with escape code).
#' @param mergesym The symbol to merge the formulas with. Default "+".
#' @param d The dispersal/range expansion rate. Default \code{d=0.1}.
#' @param e The extinction/range contraction rate. Default \code{e=0.01}.
#' @param ... Additional arguments to pass to \code{\link{symbolic_cell_to_relprob_cell}} via \code{\link[base]{sapply}}, and thence to cell\code{\link[base]{strsplit}}.
#' @return \code{dedf_vals} The output \code{\link[base]{data.frame}}, contains the Q matrix 
#' @export
#' @seealso \code{\link{areas_list_to_states_list_new}}, \code{\link[cladoRcpp]{areas_list_to_states_list_old}},
#' \code{\link{make_relprob_matrix_de}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#'	 @cite FosterIdiots
#' @examples
#' testval=1
#' 
#' states_list = list("_", c("A"), c("B"), c("C"), c("A","B"), 
#' c("B","C"), c("A","C"), c("A","B","C"))
#' 
#' states_list = areas_list_to_states_list_new(areas=c("A","B","C"), 
#' include_null_range=TRUE, split_ABC=TRUE)
#' states_list
#' 
#' dedf = make_relprob_matrix_de(states_list=states_list, split_ABC=FALSE, 
#' split="", remove_simultaneous_events=TRUE, add_multiple_Ds=TRUE, 
#' dispersal_multiplier_matrix=make_dispersal_multiplier_matrix(states_list=states_list))
#' dedf
#' 
#' # Right
#' Qmat = symbolic_to_Q_matrix(dedf, cellsplit="\\\\+", mergesym="+", d=0.1, e=0.01)
#' Qmat
#' 
#' # Wrong
#' Qmat = symbolic_to_Q_matrix(dedf, cellsplit="\\\\+", mergesym="*", d=0.1, e=0.01)
#' Qmat
#' 
#' # You don't have to split, if the formulas are directly parsable
#' Qmat = symbolic_to_Q_matrix(dedf, cellsplit="yadda", mergesym="", d=0.1, e=0.01)
#' Qmat
#' 
symbolic_to_Q_matrix <- function(dedf, cellsplit="\\+", mergesym="*", d=0.1, e=0.01, ...)
	{
	defaults='
	cellsplit="\\+"
	mergesym="*"
	'

	defaults2='
	d=10.9212
	e=1.73083e-07
	dedf = dedf_M2
	cellsplit="\\+"
	mergesym="+"
	'
	
	dedf[dedf == "-"] = NA
	
	dedf_vals = matrix(data=0, nrow=nrow(dedf), ncol=ncol(dedf))
	
	dedf_vals[!is.na(dedf_vals)] = sapply(X=dedf[!is.na(dedf_vals)], FUN=symbolic_cell_to_relprob_cell, cellsplit, mergesym, d, e, ...)
	dedf_vals
	
	# Normalize matrix into an appropriate Q matrix
	diag_TF = diag(x=1, nrow=nrow(dedf), ncol(dedf)) == 1
	dedf_vals[diag_TF] = -1 * rowSums(dedf_vals, na.rm=TRUE)
	
	return(dedf_vals)
	}


#######################################################
# symbolic_to_Q_matrix_exper
#######################################################
#' Experimental version of \code{symbolic_to_Q_matrix_exper}, including base frequencies
#' 
#' Still experimental.
#'
#' This function takes a transition probability matrix (in text form) and converts it to an 
#' instantaneous rate matrix (Q matrix), given values for \emph{d}, \emph{e}, or other parameters
#' in the text formulas.
#' 
#' This is not particularly fast, but good for illustrative purposes.
#'
#' @param dedf The transition matrix or dispersal-extinction data.frame (dedf), contains the 
#' actual text of the formulas by which the transition probability matrix would be calculated.
#' @param cellsplit The symbol to split the formulas on. Default "\\+" (plus symbol, with escape code).
#' @param mergesym The symbol to merge the formulas with. Default "+".
#' @param d The dispersal/range expansion rate. Default \code{d=0.1}.
#' @param e The extinction/range contraction rate. Default \code{e=0.01}.
#' @param basefreqs Base frequencies, i.e. the equilibrium probabilities of the different states; the meaning of such an idea is debatable
#' in the context of a LAGRANGE-like model where the null range (extinct everywhere) is included in the matrix and is 
#' a nonreversible absorbing state.  Default is \code{rep(1,nrow(dedf))/nrow(dedf)}.
#' @param ... Additional arguments to pass to \code{\link{symbolic_cell_to_relprob_cell}} via \code{\link[base]{sapply}}, and thence to cell\code{\link[base]{strsplit}}.
#' @return \code{dedf_vals} The output \code{\link[base]{data.frame}}, contains the Q matrix 
#' @export
#' @seealso \code{\link[cladoRcpp]{areas_list_to_states_list_old}}, \code{\link{make_relprob_matrix_de}}
#' \code{\link{areas_list_to_states_list_new}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#'	 @cite FosterIdiots
#' @examples
#' testval=1
#' 
#' states_list = list("_", c("A"), c("B"), c("C"), c("A","B"), 
#' c("B","C"), c("A","C"), c("A","B","C"))
#' 
#' states_list = areas_list_to_states_list_new(areas=c("A","B","C"), 
#' include_null_range=TRUE, split_ABC=TRUE)
#' states_list
#' 
#' dedf = make_relprob_matrix_de(states_list=states_list, split_ABC=FALSE, 
#' split="", remove_simultaneous_events=TRUE, add_multiple_Ds=TRUE, 
#' dispersal_multiplier_matrix=make_dispersal_multiplier_matrix(states_list=states_list))
#' dedf
#' 
#' # Right
#' Qmat = symbolic_to_Q_matrix_exper(dedf, cellsplit="\\\\+", mergesym="+", d=0.1, e=0.01)
#' Qmat
#' 
#' # Wrong
#' Qmat = symbolic_to_Q_matrix_exper(dedf, cellsplit="\\\\+", mergesym="*", d=0.1, e=0.01)
#' Qmat
#' 
#' # You don't have to split, if the formulas are directly parsable
#' Qmat = symbolic_to_Q_matrix_exper(dedf, cellsplit="yadda", mergesym="", d=0.1, e=0.01)
#' Qmat
#' 
#' # Compare to symbolic_to_Q_matrix
#' Qmat = symbolic_to_Q_matrix(dedf, cellsplit="yadda", mergesym="", d=0.1, e=0.01)
#' Qmat
#' 
symbolic_to_Q_matrix_exper <- function(dedf, cellsplit="\\+", mergesym="*", d=0.1, e=0.01, basefreqs=rep(1,nrow(dedf))/nrow(dedf), ...)
	{
	defaults='
	cellsplit=""
	mergesym="*"
	basefreqs=(rep(1,nrow(dedf))/nrow(dedf))

	cellsplit="*"
	mergesym="+"
	basefreqs=(rep(1,nrow(dedf))/nrow(dedf))
	'
	
	dedf[dedf == "-"] = NA
	
	dedf_vals = matrix(data=0, nrow=nrow(dedf), ncol=ncol(dedf))
	
	dedf_vals[!is.na(dedf_vals)] = sapply(X=dedf[!is.na(dedf_vals)], FUN=symbolic_cell_to_relprob_cell, cellsplit, mergesym, d, e, ...)
	
	
	# Now you have a relative rate matrix; you multiply the columns by the composition, then scale so they sum to 1
	Rmat = dedf_vals
	#Rmat2 = basefreqs * Rmat
	
	
	
	# Normalize matrix into an appropriate Q matrix
	Qmat = Rmat
	diag_TF = diag(x=1, nrow=nrow(dedf), ncol(dedf)) == 1
	Qmat[diag_TF] = -1 * rowSums(Qmat, na.rm=TRUE)
	
	Qmat = diag(basefreqs) %*% Qmat
	rowSums(Qmat)
	
	return(Qmat)
	}



# These are 1-event probability matrices, not instantaneous rate matrices
# Speciation matrix version
#######################################################
# symbolic_cell_to_relprob_cell_sp
#######################################################
#' Convert symbolic cell (a text equation) to relprob cell (a numeric value) -- speciation matrix version
#' 
#' This does the equivalent of \code{\link{symbolic_to_P_matrix}}, but for a speciation/cladogenesis matrix.
#' 
#' These are 1-event probability matrices, not instantaneous rate matrices.
#'
#' This function can be used in \code{\link[base]{sapply}}.  It still will not be very fast compared to the calculations in 
#' \code{\link[cladoRcpp]{cladoRcpp}}, but can be useful for demonstrative purposes.
#' 
#' @param charcell The text formula.
#' @param cellsplit The symbol to split the formulas on. Default "\\\\+" (plus symbol, with escape code).
#' @param mergesym The symbol to merge the formulas with. Default "+".
#' @param ys Relative weight of fully sympatric speciation (range-copying) and sympatric "subset" speciation. Default \code{s=1} mimics LAGRANGE model.
#' @param v Relative weight of vicariant speciation. Default \code{v=1} mimics LAGRANGE model.
#' @param j Relative weight of "founder event speciation"/jump speciation. Default \code{j=0} mimics LAGRANGE model.
#' @param relprob_subsets_matrix A numeric matrix describing the relative probability of each smaller daughter range, conditional on the ancestral rangesize.
#' @param relprob_vicar_matrix A numeric matrix describing the relative probability of each smaller daughter range, conditional on the ancestral rangesize.
#' @param ... Additional arguments to pass to \code{\link{relative_probabilities_of_subsets}} and \code{\link{relative_probabilities_of_vicariants}}, and
#' thence to \code{\link[base]{strsplit}}.
#' @return \code{cellval} The output cell value.
#' @export
#' @seealso \code{\link{symbolic_to_relprob_matrix_sp}}, \code{\link{make_relprob_matrix_de}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' 
#' charcell = "y1"
#' symbolic_cell_to_relprob_cell_sp(charcell, cellsplit="\\\\+", mergesym="*", ys=1, 
#' j=0, v=1, relprob_subsets_matrix=relative_probabilities_of_subsets(max_numareas=3, 
#' maxent_constraint_01=0.0001), 
#' relprob_vicar_matrix=relative_probabilities_of_vicariants(max_numareas=3, 
#' maxent_constraint_01v=0.0001))
#' 
#' charcell = "y1"
#' symbolic_cell_to_relprob_cell_sp(charcell, cellsplit="\\\\+", mergesym="*", ys=1, 
#' j=1, v=1, relprob_subsets_matrix=relative_probabilities_of_subsets(max_numareas=3, 
#' maxent_constraint_01=0.0001), 
#' relprob_vicar_matrix=relative_probabilities_of_vicariants(max_numareas=3, 
#' maxent_constraint_01v=0.0001))
#' 
#' charcell = "j"
#' symbolic_cell_to_relprob_cell_sp(charcell, cellsplit="\\\\+", mergesym="*", ys=1, 
#' j=0, v=1, relprob_subsets_matrix=relative_probabilities_of_subsets(max_numareas=3, 
#' maxent_constraint_01=0.0001), 
#' relprob_vicar_matrix=relative_probabilities_of_vicariants(max_numareas=3, 
#' maxent_constraint_01v=0.0001))
#' 
#' charcell = "j"
#' symbolic_cell_to_relprob_cell_sp(charcell, cellsplit="\\\\+", mergesym="*", ys=1, 
#' j=1, v=1, relprob_subsets_matrix=relative_probabilities_of_subsets(max_numareas=3, 
#' maxent_constraint_01=0.0001), 
#' relprob_vicar_matrix=relative_probabilities_of_vicariants(max_numareas=3, 
#' maxent_constraint_01v=0.0001))
#' 
#' charcell = "v1_2"
#' symbolic_cell_to_relprob_cell_sp(charcell, cellsplit="\\\\+", mergesym="*", ys=1, 
#' j=0, v=1, relprob_subsets_matrix=relative_probabilities_of_subsets(max_numareas=3, 
#' maxent_constraint_01=0.0001), 
#' relprob_vicar_matrix=relative_probabilities_of_vicariants(max_numareas=3, 
#' maxent_constraint_01v=0.0001))
#' 
#' charcell = "v1_2"
#' symbolic_cell_to_relprob_cell_sp(charcell, cellsplit="\\\\+", mergesym="*", ys=1,
#'  j=1, v=1, relprob_subsets_matrix=relative_probabilities_of_subsets(max_numareas=3, 
#' maxent_constraint_01=0.0001), 
#' relprob_vicar_matrix=relative_probabilities_of_vicariants(max_numareas=3, 
#' maxent_constraint_01v=0.0001))
#' 
#' charcell = "s1_2"
#' symbolic_cell_to_relprob_cell_sp(charcell, cellsplit="\\\\+", mergesym="*", ys=1, 
#' j=0, v=1, relprob_subsets_matrix=relative_probabilities_of_subsets(max_numareas=3, 
#' maxent_constraint_01=0.0001), 
#' relprob_vicar_matrix=relative_probabilities_of_vicariants(
#' max_numareas=3, 
#' maxent_constraint_01v=0.0001))
#' 
#' charcell = "s1_2"
#' symbolic_cell_to_relprob_cell_sp(charcell, cellsplit="\\\\+", mergesym="*", 
#' ys=1, j=1, v=1, relprob_subsets_matrix=relative_probabilities_of_subsets(
#' max_numareas=3, maxent_constraint_01=0.0001), 
#' relprob_vicar_matrix=relative_probabilities_of_vicariants(
#' max_numareas=3, 
#' maxent_constraint_01v=0.0001))
#' 
symbolic_cell_to_relprob_cell_sp <- function(charcell, cellsplit="\\+", mergesym="*", ys=1, j=0, v=1, relprob_subsets_matrix=relative_probabilities_of_subsets(6,0.0001), relprob_vicar_matrix=relative_probabilities_of_vicariants(6,0.0001), ...)
	{
	# 
	word = strsplit(charcell, split=cellsplit)[[1]]
	
	# If word DOES NOT have "s" or "y" or "v", just evaluate straight to a number
	if ( all( (grepl(pattern="s", x=word)==FALSE), (grepl(pattern="y", x=word)==FALSE), (grepl(pattern="v", x=word)==FALSE) ) )
		{
		# Evaluate straight to an answer (I guess, j only, here)
		equation = paste(word, sep="", collapse=mergesym)
		equation2 = paste("cellval = ", equation, sep="")
		eval(parse(text=equation2))
		# evaluation done
		} else {
		# If word includes "y" or "s", use sfunc or yfunc
		if (grepl(pattern="s", x=word)==TRUE)
			{
			cellval = ys * sfunc(charcell, relprob_subsets_matrix)
			}
		if (grepl(pattern="y", x=word)==TRUE)
			{
			cellval = ys * yfunc(charcell, relprob_subsets_matrix)
			}
# 		if (grepl(pattern="j", x=word)==TRUE)
# 			{
# 			cellval = j * jfunc(charcell, relprob_subsets_matrix)
# 			}
		if (grepl(pattern="v", x=word)==TRUE)
			{
			cellval = v * vfunc(charcell, relprob_vicar_matrix)
			}
		# evaluation done
		}

	return(cellval)
	}

# Convert symbolic matrix to relprob matrix
# These are 1-event probability matrices, not instantaneous rate matrices
# Speciation matrix version
#######################################################
# symbolic_to_relprob_matrix_sp
#######################################################
#' Convert symbolic matrix (with text equations) to relprob matrix (numeric values) -- speciation matrix version
#' 
#' This does the equivalent of \code{\link{symbolic_to_P_matrix}}, but for a speciation/cladogenesis matrix.
#' 
#' These are 1-event probability matrices, not instantaneous rate matrices.
#'
#' This function uses \code{\link{symbolic_cell_to_relprob_cell_sp}} in an \code{\link[base]{sapply}} call.  It still will not be very fast compared to the calculations in 
#' \code{\link[cladoRcpp]{cladoRcpp}}, but can be useful for demonstrative purposes.
#' 
#' @param spmat The speciation/cladogenesis matrix, with text formula.
#' @param cellsplit The symbol to split the formulas on. Default "\\\\+" (plus symbol, with escape code).
#' @param mergesym The symbol to merge the formulas with. Default "+".
#' @param ys Relative weight of fully sympatric speciation (range-copying) and sympatric "subset" speciation. Default \code{s=1} mimics LAGRANGE model.
#' @param v Relative weight of vicariant speciation. Default \code{v=1} mimics LAGRANGE model.
#' @param j Relative weight of "founder event speciation"/jump speciation. Default \code{j=0} mimics LAGRANGE model.
#' @param maxent_constraint_01 Parameter which assigns relative probabilities to different descendants range sizes, for the smaller descendant.  
#' Values can range from 0.0001 to 1. If \code{maxent_constraint_01=0.0001}, then the smaller descendant has a range size of 1 with probability 1
#' (i.e., the \code{LAGRANGE} default). If \code{maxent_constraint_01=0.5}, then all range sizes are equally weighted.  If \code{maxent_constraint_01=1},
#' then the largest possible smaller descendant gets probability 1.  The reference to "maxent" derives from the fact that the maxent probability
#' distribution on a multistate, ordered, discrete variable -- e.g. a die roll -- can be calculated given just the mean value.  Here, the
#' \code{maxent_constraint_01} parameter is multiplied by the (maximum rangesize + 1).  Thus, when \code{maxent_constraint_01=0.5}, if there are 6
#' possible states, then the parameter becomes 3.5, which sets equal probabilities of all possible descendant ranges sizes, when range size can range
#' from 1 to 6.
#' @param maxent_constraint_01v Works the same as \code{maxent_constraint_01}, but just for descendants of vicariant events.
#' @param max_numareas The maximum number of areas possible allowed for the smaller-ranged-daughter in either vicariant or sympatric types of cladogenesis/speciation.
#' @param ... Additional arguments to pass to \code{\link{relative_probabilities_of_subsets}} and \code{\link{relative_probabilities_of_vicariants}}, and
#' thence to \code{\link[base]{strsplit}}.
#' @return \code{cellval} The output cell value.
#' @export
#' @seealso \code{\link{symbolic_cell_to_relprob_cell_sp}}, \code{\link{make_relprob_matrix_de}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{https://code.google.com/p/lagrange/}
#' @examples
#' testval=1
#' # Generate the text version of the speciation/cladogenesis probability matrix 
#' # (actually a relative weights matrix
#' # until the rows are normalized so that each sums to 1).
#' spmat = make_relprob_matrix_bi(states_list=list("_", c("A"), c("B"), c("C"), 
#' c("A","B"), c("B","C"), c("A","C"), c("A","B","C")), split_ABC=FALSE, splitval="", 
#' code_for_overlapping_subsets=NA, printwarn=1)
#' spmat
#' 
#' # Look at the conditional probabilities generated by a variety of models
#' spPmat = symbolic_to_relprob_matrix_sp(spmat=spmat, cellsplit="\\\\+", mergesym="*", 
#' ys=1, j=0, v=1, maxent_constraint_01=0.0001, maxent_constraint_01v=0.0001, 
#' max_numareas=3)
#' spPmat = adf(spPmat); names(spPmat) = names(spmat); rownames(spPmat) = rownames(spmat)
#' spPmat
#' 
#' spPmat = symbolic_to_relprob_matrix_sp(spmat=spmat, cellsplit="\\\\+", 
#' mergesym="*", ys=0.5, j=0, v=0.5, maxent_constraint_01=0.0001, 
#' maxent_constraint_01v=0.0001, max_numareas=3)
#' spPmat = adf(spPmat); names(spPmat) = names(spmat); rownames(spPmat) = rownames(spmat)
#' spPmat
#' 
#' spPmat = symbolic_to_relprob_matrix_sp(spmat=spmat, cellsplit="\\\\+", 
#' mergesym="*", ys=1, j=1, v=1, maxent_constraint_01=0.0001, 
#' maxent_constraint_01v=0.0001, max_numareas=3)
#' spPmat = adf(spPmat); names(spPmat) = names(spmat); rownames(spPmat) = rownames(spmat)
#' spPmat
#' 
#' spPmat = symbolic_to_relprob_matrix_sp(spmat=spmat, cellsplit="\\\\+", 
#' mergesym="*", ys=0.25, j=0.25, v=0.25, maxent_constraint_01=0.0001, 
#' maxent_constraint_01v=0.0001, max_numareas=3)
#' spPmat = adf(spPmat); names(spPmat) = names(spmat); rownames(spPmat) = rownames(spmat)
#' spPmat
#' 
#' spPmat = symbolic_to_relprob_matrix_sp(spmat=spmat, cellsplit="\\\\+", 
#' mergesym="*", ys=1, j=1, v=0, maxent_constraint_01=0.0001, 
#' maxent_constraint_01v=0.0001, max_numareas=3)
#' spPmat = adf(spPmat); names(spPmat) = names(spmat); rownames(spPmat) = rownames(spmat)
#' spPmat
#' 
#' spPmat = symbolic_to_relprob_matrix_sp(spmat=spmat, cellsplit="\\\\+", 
#' mergesym="*", ys=1, j=1, v=0, maxent_constraint_01=0.5, 
#' maxent_constraint_01v=0.0001, max_numareas=3)
#' spPmat = adf(spPmat); names(spPmat) = names(spmat); rownames(spPmat) = rownames(spmat)
#' spPmat
#' 
#' spPmat = symbolic_to_relprob_matrix_sp(spmat=spmat, cellsplit="\\\\+", 
#' mergesym="*", ys=1, j=0, v=0, maxent_constraint_01=0.5, 
#' maxent_constraint_01v=0.0001, max_numareas=3)
#' spPmat = adf(spPmat); names(spPmat) = names(spmat); rownames(spPmat) = rownames(spmat)
#' spPmat
#' 
#' spPmat = symbolic_to_relprob_matrix_sp(spmat=spmat, cellsplit="\\\\+", 
#' mergesym="*", ys=1, j=0, v=1, maxent_constraint_01=0.0001, 
#' maxent_constraint_01v=0.5, max_numareas=3)
#' spPmat = adf(spPmat); names(spPmat) = names(spmat); rownames(spPmat) = rownames(spmat)
#' spPmat
#' 
symbolic_to_relprob_matrix_sp <- function(spmat, cellsplit="\\+", mergesym="*", ys=1, j=0, v=1, maxent_constraint_01=0.0001, maxent_constraint_01v=0.0001, max_numareas=6, ...)
	{
	defaults='
	cellsplit="\\+"
	mergesym="*"
	ys=0.2
	j=0.1
	v=0.03
	maxent_constraint_01=0.5
	max_numareas
	'
	
	# Calculate the relative probability of each rangesize
	relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=max_numareas, maxent_constraint_01=maxent_constraint_01, ...)
	
	if (max_numareas > 1)
		{
		relprob_vicar_matrix = relative_probabilities_of_vicariants(max_numareas=max_numareas, maxent_constraint_01v=maxent_constraint_01v, ...)
		} else {
		relprob_vicar_matrix = matrix(NA, nrow=1, ncol=1)
		}
	
	spmat[spmat == "-"] = NA
	
	spmat_vals = matrix(data=0, nrow=nrow(spmat), ncol=ncol(spmat))
	
	spmat_vals[!is.na(spmat_vals)] = sapply(X=spmat[!is.na(spmat_vals)], FUN=symbolic_cell_to_relprob_cell_sp, cellsplit=cellsplit, mergesym=mergesym, ys=ys, j=j, v=v, relprob_subsets_matrix=relprob_subsets_matrix, relprob_vicar_matrix=relprob_vicar_matrix)
	
	# DO NOT normalize, this is a single-event probability matrix,
	# not an instantaneous rate matrix
	# diag_TF = diag(x=1, nrow=nrow(spmat), ncol(spmat)) == 1
	# spmat_vals[diag_TF] = -1 * rowSums(spmat_vals, na.rm=TRUE)
	
	# But, it SHOULD be normalized so each row adds to 1
	# NOTE: THIS WILL TEND TO MEAN THAT EVENTS THAT OCCUPY MORE 
	# CELLS WILL GET MORE WEIGHT FOR THE SAME OVERALL SB RATE
	
	# The rows with "_" will get 0
# 	rowsums = rowSums(spmat_vals)
# 	rowsums[rowsums == 0] = 1
# 	spmat_vals = spmat_vals/rowsums
	
	spmat_vals = spmat_vals/rowSums(spmat_vals)
	rowSums(spmat_vals)
	
	return(spmat_vals)
	}



# Fun with Maximum Entropy probability distribution for discrete variable with given mean
# (and discrete uniform flat prior)
# http://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution
# 
# Equation 6.4 of Harte 2011

#######################################################
# calcZ_part
#######################################################
#' Calculate Z (equation 6.3 of Harte 2011)
#' 
#' This function is a used by \code{\link{calcP_n}} via \code{\link[base]{apply}}, all within \code{\link{get_probvals}}. \code{\link{get_probvals}}
#' calculates the Maximum Entropy (\cite{Harte2011}) discrete probability distribution of a number of ordered states (e.g., faces of a 6-sided 
#' die) given the mean of many rolls.  Here, this is merely used so that a single parameter can control the probability distribution of small
#' versus large descendant areas during cladogenesis.
#' 
#' See also: Maximum Entropy probability distribution for discrete variable with given mean
#' (and discrete uniform flat prior)
#' \url{http://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution}
#' 
#' @param n Value of the state (e.g., which of a number of faces on a die, or number of different size classes of geographic range)
#' @param lambda1 Lambda parameter (\cite{Harte2011}).
#' @return \code{Z}, numeric value
#' @export
#' @seealso \code{\link{calcP_n}}, \code{\link[FD]{maxent}}, \code{\link{symbolic_to_relprob_matrix_sp}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{http://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution}
#' \cite{Harte2011}
#' @examples
#' testval=1
#' n=6
#' lambda1 = 0.5
#' calcZ_part(n, lambda1)
#'
calcZ_part <- function(n, lambda1)
	{
	return(exp(-1*lambda1*n))
	}



# Harte 2011, equation 6.3
# Calc prob(n)
#######################################################
# calcP_n
#######################################################
#' Calculate Z (part of equation 6.4 of Harte 2011)
#' 
#' This function is a used by \code{\link{get_probvals}}, which 
#' calculates the Maximum Entropy (\cite{Harte2011}) discrete probability distribution of a number of ordered states (e.g., faces of a 6-sided 
#' die) given the mean of many rolls.  Here, this is merely used so that a single parameter can control the probability distribution of small
#' versus large descendant areas during cladogenesis.
#' 
#' See also: Maximum Entropy probability distribution for discrete variable with given mean
#' (and discrete uniform flat prior)
#' \url{http://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution}
#' 
#' @param n Value of the state (e.g., which of a number of faces on a die, or number of different size classes of geographic range).
#' @param lambda1 Lambda parameter (\cite{Harte2011}).
#' @param Z numeric values from \code{\link{calcZ_part}}.
#' @return \code{Prob_n}, numeric value of the probability of state \code{n}.
#' @export
#' @seealso \code{\link{calcZ_part}}, \code{\link[FD]{maxent}}, \code{\link{symbolic_to_relprob_matrix_sp}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{http://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution}
#' \cite{Harte2011}
#' @examples
#' testval=1
#' n = 6
#' lambda1 = 0.5
#' Z = 1
#' calcP_n(n, lambda1, Z)
#'
calcP_n = function(n, lambda1, Z)
	{
	Prob_n = (1/Z) * exp(-1*lambda1*n)
	return(Prob_n)
	}

	
	
	

# Set up the function based on
# Harte (2011), Maximum Entropy Theory of Ecology
#

#######################################################
# get_probvals
#######################################################
#' Calculate probability of ordered discrete states using a maxent distribution (equations 6.3-6.4 of Harte 2011)
#' 
#' This function is calculates the Maximum Entropy (\cite{Harte2011}) discrete probability distribution of a number of ordered states (e.g., faces
#' of a 6-sided die) given the mean of many rolls.  Here, this is merely used so that a single parameter can control the probability distribution
#' of small versus large descendant areas during cladogenesis.  This function could then used by \code{\link{relative_probabilities_of_subsets}} in BioGeoBEARS
#' to weight different descendant range sizes (although, currently, the function \code{\link[FD]{maxent}} from the \code{\link[FD]{FD}} package is used).
#'
#' This calculation is based on Equations 6.3-6.4 of \cite{Harte2011}.
#' 
#' See also: Maximum Entropy probability distribution for discrete variable with given mean
#' (and discrete uniform flat prior)
#' \url{http://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution}
#' 
#' @param die_vals Values of the ordered discrete variable state (e.g., \code{seq(1,6)} for a six-sided die) 
#' @param meanval Mean value (the knowledge supplied to the MaxEnt function).
#' @return \code{Prob_nvals}, numeric values of the probability of each state from \code{die_vals}.
#' @export
#' @seealso \code{\link{calcZ_part}}, \code{\link{calcP_n}}, \code{\link[FD]{maxent}}, \code{\link{symbolic_to_relprob_matrix_sp}}, \code{\link{relative_probabilities_of_subsets}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{http://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution}
#'	 @cite Harte2011
#' @examples
#' testval=1
#' # Examples
#' # Set up subplots
#' par(mfrow=c(3,2))
#' 
#' # Flat distribution (equal prob of any descendent size)
#' N = 6
#' # n = die_vals
#' die_vals = seq(1,N)
#' meanval = 3.5
#' probvals = get_probvals(die_vals, meanval)
#' probvals
#' barplot(height=probvals, width=1, names.arg=die_vals, ylim=c(0,1))
#' title(paste("Probabilities of each state, mean val=", meanval, sep=""))
#' 
#' # Descendents tend to have large ranges
#' N = 6
#' # n = die_vals
#' die_vals = seq(1,N)
#' meanval = 5.999
#' probvals = get_probvals(die_vals, meanval)
#' probvals
#' barplot(height=probvals, width=1, names.arg=die_vals, ylim=c(0,1))
#' title(paste("Probabilities of each state, mean val=", meanval, sep=""))
#' 
#' # Flat distribution (equal prob of any descendent size)
#' N = 6
#' # n = die_vals
#' die_vals = seq(1,N)
#' meanval = 5
#' probvals = get_probvals(die_vals, meanval)
#' probvals
#' barplot(height=probvals, width=1, names.arg=die_vals, ylim=c(0,1))
#' title(paste("Probabilities of each state, mean val=", meanval, sep=""))
#' 
#' # Flat distribution (equal prob of any descendent size)
#' N = 6
#' # n = die_vals
#' die_vals = seq(1,N)
#' meanval = 4
#' probvals = get_probvals(die_vals, meanval)
#' probvals
#' barplot(height=probvals, width=1, names.arg=die_vals, ylim=c(0,1))
#' title(paste("Probabilities of each state, mean val=", meanval, sep=""))
#' 
#' # Flat distribution (equal prob of any descendent size)
#' N = 6
#' # n = die_vals
#' die_vals = seq(1,N)
#' meanval = 2
#' probvals = get_probvals(die_vals, meanval)
#' probvals
#' barplot(height=probvals, width=1, names.arg=die_vals, ylim=c(0,1))
#' title(paste("Probabilities of each state, mean val=", meanval, sep=""))
#' 
#' # This produces the LAGRANGE default
#' # (all smaller descendents are of size 1)
#' N = 6
#' # n = die_vals
#' die_vals = seq(1,N)
#' meanval = 1.0001
#' probvals = get_probvals(die_vals, meanval)
#' probvals
#' barplot(height=probvals, width=1, names.arg=die_vals, ylim=c(0,1))
#' title(paste("LAGRANGE 'default', mean val=", meanval, sep=""))
#' 
#' # This is stopped by the error check
#' # (all smaller descendents are of size 1)
#' # N = 6
#' # # n = die_vals
#' # die_vals = seq(1,N)
#' # meanval = 0.5
#' # probvals = get_probvals(die_vals, meanval)
#' # probvals
#' # barplot(height=probvals, width=1, names.arg=die_vals, ylim=c(0,1))
#' # title(paste("Probabilities of each state, mean val=", meanval, sep=""))
#' 
get_probvals <- function(die_vals, meanval)
	{
	# Take Harte (2011), equation 6.4; set exp(-lambda) to == x
	
	defaults='
	# n = number of integers
	N = 6
	# n = die_vals
	die_vals = seq(1,N)
	meanval = 5.999
	'
	
	# Error check
	if (meanval <= min(die_vals) || meanval >= max(die_vals))
		{
		error_msg = paste("ERROR: meanval (", meanval, ") cannot be below min (", min(die_vals), ") or above max (", max(die_vals), ") die_vals!", sep="")
		stop(error_msg)
		}
	
	
	
	Harte_2011_eqn_6_9_left_coefficients = die_vals
	Harte_2011_eqn_6_9_right_coefficients = rep(meanval, length(die_vals))
	polynomial_coefs_that_EQ_0 = Harte_2011_eqn_6_9_left_coefficients - Harte_2011_eqn_6_9_right_coefficients
	
	solutions_for_x = polyroot(z=polynomial_coefs_that_EQ_0)
	solutions_for_x
	
	# Exclude those with nonzero imaginary values
	keepTF = abs(Im(solutions_for_x)) < 1e-14	
	solutions_for_x = solutions_for_x[keepTF]
	solutions_for_x
	
	# Exclude negative and 0 solutions
	keepTF = Re(solutions_for_x) > 1e-14	
	solutions_for_x = Re(solutions_for_x[keepTF])
	solutions_for_x
	
	# Lagrange multiplier Lambda1
	# lambda = -1 * ln(x)
	lambda1 = -1 * log(solutions_for_x)
	lambda1
	
	# Equation 6.4
	# 		calcZ_part <- function(n, lambda1)
	# 			{
	# 			return(exp(-1*lambda1*n))
	# 			}
	
	Z = sum(sapply(X=die_vals, FUN=calcZ_part, lambda1=lambda1))
	Z
	
	
	# Harte 2011, equation 6.3
	# Calc prob(n)
	# 		calcP_n = function(n, lambda1, Z)
	# 			{
	# 			Prob_n = (1/Z) * exp(-1*lambda1*n)
	# 			return(Prob_n)
	# 			}
	
	Prob_nvals = sapply(X=die_vals, FUN=calcP_n, lambda1=lambda1, Z=Z)
	Prob_nvals
	sum(Prob_nvals)
	
	
	
	return(Prob_nvals)
	}



#######################################################
# relative_probabilities_of_subsets
#######################################################
#' Calculate probability of different descendant rangesizes, for the smaller descendant, in subset speciation
#'
#' "Rangesize" here means "number of areas in a geographic range".  The \code{LAGRANGE} cladogenesis model requires that, during cladogenesis events, one daughter
#' lineage will ALWAYS have a geographic range of size 1.  This is argued for in \cite{ReeSmith2008} on the grounds that new species usually get isolated and start
#' in a new area.  This is a reasonable proposition, but still, it would be nice to test the assumption.  In addition, it could be that 
#' some speciation modes, especially vicariance, obey different rules.  E.g., \code{DIVA} (\cite{Ronquist1996_DIVA}, \cite{Ronquist_1997_DIVA})
#' allows vicariant speciation to divide up the ancestral range in every possible way (e.g., ABCD-->AB|CD, or AC|BD, or A|BCD, or D|ABC, etc.),
#' but \code{LAGRANGE} would only allow vicariance to split off areas of size 1: (ABCD-->A|BCD, B|ACD, etc.) (Ronquist_Sanmartin_2011).
#' 
#' To test different models, the user has to have control of the relative probability of different descendant rangesizes.  The probability of each
#' descendant rangesize could be parameterized individually, but we have a limited amount of observational data (essentially one character), so
#' efficient parameterizations should be sought.  
#'
#' One way to do this is with the Maximum Entropy (\cite{Harte2011}) discrete probability distribution
#' of a number of ordered states.  Normally this is applied (in examples) to the problem of estimation of the relative probability of the different
#' faces of a 6-sided die.  The input "knowledge" is the true mean of the dice rolls.  If the mean value is 3.5, then each face of the die will have 
#' probability 1/6.  If the mean value is close to 1, then the die is severely skewed such that the probability of rolling 1 is 99%+ and the probability
#' of other die rolls is very small.  If the mean value is close to 6, then the probability distribution is skewed towards higher numbers.
#' 
#' Here in \code{BioGeoBEARS}, we use the same Maximum Entropy function to specify the relative probability of geographic ranges of a number of different
#' rangesizes.  This is merely used so that a single parameter can control the probability distribution -- there is no MaxEnt estimation going
#' on here.  The user specifies a value for the parameter \code{maxent_constraint_01} between 0.0001 and 0.9999.  This can then be applied to 
#' all of the different ancestor-descendant range combinations in the cladogenesis/speciation matrix.  
#' 
#' Example values of \code{maxent_constraint_01} would give the following results:
#'
#' \code{maxent_constraint_01 = 0.0001} -- The smaller descendant has rangesize 1 with 100% probability (as in \code{LAGRANGE})\cr
#' \code{maxent_constraint_01 = 0.5} -- The smaller descendant can be any rangesize equal probability. This is effectively what happens in \code{DIVA}'s version of vicariance speciation\cr
#' \code{maxent_constraint_01 = 0.9999} -- The smaller descendant will take the largest possible rangesize for a given type of speciation, and a given ancestral
#' rangesize.  E.g., for sympatric/range-copying speciation (the ancestor is simply copied to both descendants, as in a continuous-time model with no
#' cladogenesis effect), an ancestor of size 3 would product two descendant lineages of size 3. Such a model is implemented in the program \code{BayArea} (\cite{Landis_Matzke_etal_2013_BayArea}). 
#' \code{LAGRANGE}, on the other hand, would only allow range-copying for ancestral ranges of size 1.\cr
#'
#' \bold{Note:} In \code{LAGRANGE}-type models, at speciation/cladogenesis events,
#' one descendant daughter branch ALWAYS has size 1, whereas the other descendant daughter branch either (a) is the same (in sympatric/range-copying
#' speciation), (b) inherits the complete ancestral range (in sympatric/subset speciation) or (c) inherits the remainder of the range (in 
#' vicariant/range-division speciation).  LAGRANGE-type behavior (the smaller descendant has rangesize 1 with 100% probability, and 0% probability for
#' any other rangesize) can be achieved by setting the \code{maxent_constraint_01} parameter to 0.0001.
#' 
#' \code{See also:} Maximum Entropy probability distribution for discrete variable with given mean
#' (and discrete uniform flat prior)
#' \url{http://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution}
#' 
#' Currently, the function \code{\link[FD]{maxent}} from the \code{\link[FD]{FD}} package is used to get the discrete probability distribution, given the number of 
#' states and the \code{maxent_constraint_01} parameter.  This could also be done with \code{\link{get_probvals}}, which uses \code{\link{calcZ_part}},
#' \code{\link{calcP_n}}, following equations 6.3-6.4 of \cite{Harte2011}, although this is not yet implemented.
#' 
#' @param max_numareas The maximum number of areas possible allowed for the smaller-ranged-daughter in this type of cladogenesis/speciation.
#' @param maxent_constraint_01 The parameter describing the probability distribution on descendant rangesizes for the smaller descendant. See above.
#' @param NA_val The output matrix consists of ancestral rangesizes and rangesizes of the smaller descendant.  Some values are disallowed -- e.g. descendant ranges
#' larger than the ancestor; or, in subset speciation, descendant ranges the same size as the ancestor are disallowed.  All disallowed descendant rangesizes get
#' \code{NA_val}.
#' @return \code{\link{relative_probabilities_of_vicariants}}, \code{relprob_subsets_matrix}, a numeric matrix giving the relative probability of each rangesize for the smaller descendant of an ancestral range, 
#' conditional on the ancestral rangesize.
#' @export
#' @seealso \code{\link{symbolic_to_relprob_matrix_sp}}, \code{\link{get_probvals}}, \code{\link[FD]{maxent}}, \code{\link{calcZ_part}}, \code{\link{calcP_n}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{http://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution}
#' \cite{Harte2011}
#' \cite{Ronquist1996_DIVA}
#' \cite{Ronquist_1997_DIVA}
#' \cite{Ronquist_Sanmartin_2011}
#' \cite{Landis_Matzke_etal_2013_BayArea}
#' @examples
#' testval=1
#' # Examples
#' 
#' # Probabilities of different descendant rangesizes, for the smaller 
#' # descendant, under sympatric/subset speciation
#' # (plus sympatric/range-copying, which is folded in):
#' relative_probabilities_of_subsets(max_numareas=6, maxent_constraint_01=0.0001, 
#' NA_val=NA)
#' relative_probabilities_of_subsets(max_numareas=6, maxent_constraint_01=0.5, 
#' NA_val=NA)
#' relative_probabilities_of_subsets(max_numareas=6, maxent_constraint_01=0.9999, 
#' NA_val=NA)
#' 
#' # Probabilities of different descendant rangesizes, for the smaller descendant, 
#' # under vicariant speciation
#' relative_probabilities_of_vicariants(max_numareas=6, maxent_constraint_01v=0.0001, 
#' NA_val=NA)
#' relative_probabilities_of_vicariants(max_numareas=6, maxent_constraint_01v=0.5, 
#' NA_val=NA)
#' relative_probabilities_of_vicariants(max_numareas=6, maxent_constraint_01v=0.9999, 
#' NA_val=NA)
#' 
relative_probabilities_of_subsets <- function(max_numareas=6, maxent_constraint_01=0.5, NA_val=NA)
	{
	defaults='
	
	# DEFAULTS	
	max_numareas=6
	maxent_constraint=0.01
	
	# The user provides a relative weight between 0 and 1.
	# This goes into quantile(), on a series of values:
	# 0, 1...max_numareas, max_numareas+1
	#
	# This means that 0.5 gives the median as a constraint,
	# and maxent() will produce a flat categorical distribution
	
	# This default puts all the weight on the "subset" species being
	# sympatric
	maxent_constraint_01 = 1
	maxent_constraint = quantile(x=seq(0,max_numareas+1,1), probs=maxent_constraint_01)
	maxent_constraint

	# This default puts all the weight on the 1-area subset species
	maxent_constraint_01 = 0.0001
	maxent_constraint= quantile(x=seq(0,max_numareas+1,1), probs=maxent_constraint_01)
	maxent_constraint

	# This default produces even weights (picks the median)
	maxent_constraint_01 = 0.5
	maxent_constraint = quantile(x=seq(0,max_numareas+1,1), probs=maxent_constraint_01)
	maxent_constraint
	'
	
	# Require FD for maxent function
	# FD::maxent
	#require(FD)

	# rows = number of areas in ancestor
	# cols = number of areas in subset daughter
	
	relprob_subsets_matrix = matrix(NA_val, nrow=max_numareas, ncol=max_numareas)
	
	# rows
	rownum=max_numareas
	for (rownum in 1:max_numareas)
		{
		numcols = rownum
		tmpstates = seq(1, numcols, by=1)

		# Convert the user-specified 0-1 value into a MaxEnt constraint
		# between 0 and max_numareas+1
		# 0.5 = median
		maxent_constraint = quantile(x=seq(0,length(tmpstates)+1,1), probs=maxent_constraint_01)

		# Apply Maxent constraint to weight the different numbers of areas
		maxent_result = FD::maxent(constr=maxent_constraint, states=tmpstates)
		probs_of_subset_ranges = maxent_result$prob
		probs_of_subset_ranges
		
		# Add NAs to get the correct rowlength
		numzeros_to_add = max_numareas-length(probs_of_subset_ranges)
		probs_of_subset_ranges = c(probs_of_subset_ranges, rep.int(NA_val, times=numzeros_to_add))

		relprob_subsets_matrix[rownum, ] = probs_of_subset_ranges
		}
		
	# Round to 3 significant digits
	relprob_subsets_matrix = round(relprob_subsets_matrix, 3)
	relprob_subsets_matrix
	
	return(relprob_subsets_matrix)
	}



#######################################################
# relative_probabilities_of_vicariants
#######################################################
#' Calculate probability of different descendant rangesizes, for the smaller descendant, in vicariant speciation
#'
#' "Rangesize" here means "number of areas in a geographic range".  The \code{LAGRANGE} cladogenesis model requires that, during cladogenesis events, one daughter
#' lineage will ALWAYS have a geographic range of size 1.  This is argued for in \cite{ReeSmith2008} on the grounds that new species usually get isolated and start
#' in a new area.  This is a reasonable proposition, but still, it would be nice to test the assumption.  In addition, it could be that 
#' some speciation modes, especially vicariance, obey different rules.  E.g., \code{DIVA} (\cite{Ronquist1996_DIVA}, \cite{Ronquist_1997_DIVA})
#' allows vicariant speciation to divide up the ancestral range in every possible way (e.g., ABCD-->AB|CD, or AC|BD, or A|BCD, or D|ABC, etc.),
#' but \code{LAGRANGE} would only allow vicariance to split off areas of size 1: (ABCD-->A|BCD, B|ACD, etc.) (Ronquist_Sanmartin_2011).
#' 
#' To test different models, the user has to have control of the relative probability of different descendant rangesizes.  The probability of each
#' descendant rangesize could be parameterized individually, but we have a limited amount of observational data (essentially one character), so
#' efficient parameterizations should be sought.  
#'
#' One way to do this is with the Maximum Entropy (\cite{Harte2011}) discrete probability distribution
#' of a number of ordered states.  Normally this is applied (in examples) to the problem of estimation of the relative probability of the different
#' faces of a 6-sided die.  The input "knowledge" is the true mean of the dice rolls.  If the mean value is 3.5, then each face of the die will have 
#' probability 1/6.  If the mean value is close to 1, then the die is severely skewed such that the probability of rolling 1 is 99%+ and the probability
#' of other die rolls is very small.  If the mean value is close to 6, then the probability distribution is skewed towards higher numbers.
#' 
#' Here in \code{BioGeoBEARS}, we use the same Maximum Entropy function to specify the relative probability of geographic ranges of a number of different
#' rangesizes.  This is merely used so that a single parameter can control the probability distribution -- there is no MaxEnt estimation going
#' on here.  The user specifies a value for the parameter \code{maxent_constraint_01} between 0.0001 and 0.9999.  This can then be applied to 
#' all of the different ancestor-descendant range combinations in the cladogenesis/speciation matrix.  
#' 
#' Example values of \code{maxent_constraint_01} would give the following results:
#'
#' \code{maxent_constraint_01 = 0.0001} -- The smaller descendant has rangesize 1 with 100% probability (as in \code{LAGRANGE})\cr
#' \code{maxent_constraint_01 = 0.5} -- The smaller descendant can be any rangesize equal probability. This is effectively what happens in \code{DIVA}'s version of vicariance speciation\cr
#' \code{maxent_constraint_01 = 0.9999} -- The smaller descendant will take the largest possible rangesize for a given type of speciation, and a given ancestral
#' rangesize.  E.g., for sympatric/range-copying speciation (the ancestor is simply copied to both descendants, as in a continuous-time model with no
#' cladogenesis effect), an ancestor of size 3 would product two descendant lineages of size 3. Such a model is implemented in the program \code{BayArea} (\cite{Landis_Matzke_etal_2013_BayArea}). 
#' \code{LAGRANGE}, on the other hand, would only allow range-copying for ancestral ranges of size 1.\cr
#'
#' \bold{Note:} In \code{LAGRANGE}-type models, at speciation/cladogenesis events,
#' one descendant daughter branch ALWAYS has size 1, whereas the other descendant daughter branch either (a) is the same (in sympatric/range-copying
#' speciation), (b) inherits the complete ancestral range (in sympatric/subset speciation) or (c) inherits the remainder of the range (in 
#' vicariant/range-division speciation).  LAGRANGE-type behavior (the smaller descendant has rangesize 1 with 100% probability, and 0% probability for
#' any other rangesize) can be achieved by setting the \code{maxent_constraint_01} parameter to 0.0001.
#' 
#' \code{See also:} Maximum Entropy probability distribution for discrete variable with given mean
#' (and discrete uniform flat prior)
#' \url{http://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution}
#' 
#' Currently, the function \code{\link[FD]{maxent}} from the \code{\link[FD]{FD}} package is used to get the discrete probability distribution, given the number of 
#' states and the \code{maxent_constraint_01} parameter.  This could also be done with \code{\link{get_probvals}}, which uses \code{\link{calcZ_part}},
#' \code{\link{calcP_n}}, following equations 6.3-6.4 of \cite{Harte2011}, although this is not yet implemented.
#' 
#' @param max_numareas The maximum number of areas possible allowed for the smaller-ranged-daughter in this type of cladogenesis/speciation.
#' @param maxent_constraint_01v The parameter describing the probability distribution on descendant rangesizes for the smaller descendant, in a vicariance event
#' (where the maximum size of the smaller range is numareas/2, rounded down). See above.
#' @param NA_val The output matrix consists of ancestral rangesizes and rangesizes of the smaller descendant.  Some values are disallowed -- e.g. descendant ranges
#' larger than the ancestor; or, in subset speciation, descendant ranges the same size as the ancestor are disallowed.  All disallowed descendant rangesizes get
#' \code{NA_val}.
#' @return \code{relprob_subsets_matrix}, a numeric matrix giving the relative probability of each rangesize for the smaller descendant of an ancestral range, 
#' conditional on the ancestral rangesize.
#' @export
#' @seealso \code{\link{relative_probabilities_of_subsets}}, \code{\link{symbolic_to_relprob_matrix_sp}}, \code{\link{get_probvals}}, \code{\link[FD]{maxent}}, \code{\link{calcZ_part}}, \code{\link{calcP_n}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{http://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution}
#' \cite{Harte2011}
#' \cite{Ronquist1996_DIVA}
#' \cite{Ronquist_1997_DIVA}
#' \cite{Ronquist_Sanmartin_2011}
#' \cite{Landis_Matzke_etal_2013_BayArea}
#' @examples
#' testval=1
#' # Examples
#' 
#' # Probabilities of different descendant rangesizes, for the smaller descendant, 
#' # under sympatric/subset speciation
#' # (plus sympatric/range-copying, which is folded in):
#' relative_probabilities_of_subsets(max_numareas=6, maxent_constraint_01=0.0001, 
#' NA_val=NA)
#' relative_probabilities_of_subsets(max_numareas=6, maxent_constraint_01=0.5, 
#' NA_val=NA)
#' relative_probabilities_of_subsets(max_numareas=6, maxent_constraint_01=0.9999, 
#' NA_val=NA)
#' 
#' # Probabilities of different descendant rangesizes, for the smaller descendant, 
#' # under vicariant speciation
#' relative_probabilities_of_vicariants(max_numareas=6, maxent_constraint_01v=0.0001, 
#' NA_val=NA)
#' relative_probabilities_of_vicariants(max_numareas=6, maxent_constraint_01v=0.5, 
#' NA_val=NA)
#' relative_probabilities_of_vicariants(max_numareas=6, maxent_constraint_01v=0.9999, 
#' NA_val=NA)
#' 
relative_probabilities_of_vicariants <- function(max_numareas=6, maxent_constraint_01v=0.0001, NA_val=NA)
	{
	defaults='
	max_numareas=6
	maxent_constraint=0.01
	
	# The user provides a relative weight between 0 and 1.
	# This goes into quantile(), on a series of values:
	# 0, 1...max_numareas, max_numareas+1
	#
	# This means that 0.5 gives the median as a constraint,
	# and maxent() will produce a flat categorical distribution
	
	# This default means that the only vicariance event allowed is
	# splitting off a single range
	maxent_constraint_01v = 0.0001
	maxent_constraint_v = quantile(x=seq(0,max_numareas+1,1), probs=maxent_constraint_01v)
	maxent_constraint_v
	' # END defaults
	
	# Require FD for maxent function
	# FD::maxent
	#require(FD)

	# rows = number of areas in ancestor
	# cols = number of areas in subset daughter
	
	relprob_vicar_matrix = matrix(NA_val, nrow=max_numareas, ncol=max_numareas)
	
	# rows
	rownum=max_numareas

	# Row 1 (ancestor size 1) has to be NA for any vicariance event
	# (redundant with NA declaration of relprob_vicar_matrix;
	#  basically an error check)
	relprob_vicar_matrix[1,] = NA_val
	
	# Error check (2015-03-29)
	# If the state space is reduced so that the maximum number of areas is 1, 
	# just return:
	# [,1]
    # [1,]    1
    if (max_numareas < 2)
    	{
    	relprob_vicar_matrix[1,] = 1
    	return(relprob_vicar_matrix)
    	} # END if (max_numareas < 2)
    
    
	
	for (rownum in 2:max_numareas)
		{
		numcols = rownum
		tmpstates = seq(1, numcols, by=1)

		# The maximum smallest rangesize is
		# whatever ranges are SMALLER than the median
		# i.e., for 1-6, median is 3.5, ranges 1-3 are allowed for the smaller range
		# i.e., for 1-5, median is 3, ranges 1-2 are allowed for the smaller range
		max_smaller_rangesize = median(tmpstates)

		possible_vicariance_smaller_rangesizes = tmpstates[tmpstates < max_smaller_rangesize]

		# Convert the user-specified 0-1 value into a MaxEnt constraint
		# between 0 and max_numareas+1
		# 0.5 = median
		# Just do +0, since our state space of possible vicariants is only -- NO!
		# Do +1, so that we get even probabilities at maxent_constraint_01v=0.5
		maxent_constraint = quantile(x=seq(0,length(possible_vicariance_smaller_rangesizes)+1,1), probs=maxent_constraint_01v)
		
		# Apply Maxent constraint to weight the different numbers of areas
		maxent_result = FD::maxent(constr=maxent_constraint, states=possible_vicariance_smaller_rangesizes)
		probs_of_subset_ranges = maxent_result$prob
		probs_of_subset_ranges
		
		# Add NAs to get the correct rowlength
		numzeros_to_add = max_numareas-length(probs_of_subset_ranges)
		probs_of_subset_ranges = c(probs_of_subset_ranges, rep.int(NA_val, times=numzeros_to_add))

		relprob_vicar_matrix[rownum, ] = probs_of_subset_ranges
		}
		
	# Round to 3 significant digits
	relprob_vicar_matrix = round(relprob_vicar_matrix, 3)
	relprob_vicar_matrix
	
	
	return(relprob_vicar_matrix)
	}





# Function giving the relative probability of s (subset speciation)
# given different range sizes of the subset descendent
#######################################################
# sfunc
#######################################################
#' Extract the appropriate probability for a subset speciation event, given text code for rangesize of smaller descendant, and ancestor
#'
#' @param charcell The text in the cell, indicating the type of speciation/cladogenesis range inheritance event.
#' @param relprob_subsets_matrix A numeric matrix describing the relative probability of each smaller daughter range, conditional on the ancestral rangesize.
#' @return \code{prob_of_this_b}, a numeric value giving the relative probability of that descendent-ancestor rangesize pair.
#' @export
#' @seealso \code{\link{yfunc}}, \code{\link{vfunc}}, \code{\link{relative_probabilities_of_subsets}}, \code{\link{symbolic_to_relprob_matrix_sp}}, \code{\link{get_probvals}}, \code{\link[FD]{maxent}}, \code{\link{calcZ_part}}, \code{\link{calcP_n}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{http://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution}
#' \cite{Harte2011}
#' \cite{Ronquist1996_DIVA}
#' \cite{Ronquist_1997_DIVA}
#' \cite{Ronquist_Sanmartin_2011}
#' \cite{Landis_Matzke_etal_2013_BayArea}
#' @examples
#' testval=1
#' # Examples
#' 
#' # Probabilities of different descendant rangesizes, for the smaller descendant, 
#' # under sympatric/subset speciation
#' # (plus sympatric/range-copying, which is folded in):
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.0001, NA_val=NA)
#' relprob_subsets_matrix
#' sfunc(charcell="s1_1", relprob_subsets_matrix)
#' sfunc(charcell="s1_2", relprob_subsets_matrix)
#' sfunc(charcell="s1_3", relprob_subsets_matrix)
#' sfunc(charcell="s2_3", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.5, NA_val=NA)
#' relprob_subsets_matrix
#' sfunc(charcell="s1_1", relprob_subsets_matrix)
#' sfunc(charcell="s1_2", relprob_subsets_matrix)
#' sfunc(charcell="s1_3", relprob_subsets_matrix)
#' sfunc(charcell="s2_3", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.9999, NA_val=NA)
#' relprob_subsets_matrix
#' sfunc(charcell="s1_1", relprob_subsets_matrix)
#' sfunc(charcell="s1_2", relprob_subsets_matrix)
#' sfunc(charcell="s1_3", relprob_subsets_matrix)
#' sfunc(charcell="s2_3", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.0001, NA_val=NA)
#' relprob_subsets_matrix
#' yfunc(charcell="y1", relprob_subsets_matrix)
#' yfunc(charcell="y2", relprob_subsets_matrix)
#' yfunc(charcell="y3", relprob_subsets_matrix)
#' yfunc(charcell="y4", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.5, NA_val=NA)
#' relprob_subsets_matrix
#' yfunc(charcell="y1", relprob_subsets_matrix)
#' yfunc(charcell="y2", relprob_subsets_matrix)
#' yfunc(charcell="y3", relprob_subsets_matrix)
#' yfunc(charcell="y4", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.9999, NA_val=NA)
#' relprob_subsets_matrix
#' yfunc(charcell="y1", relprob_subsets_matrix)
#' yfunc(charcell="y2", relprob_subsets_matrix)
#' yfunc(charcell="y3", relprob_subsets_matrix)
#' yfunc(charcell="y4", relprob_subsets_matrix)
#' 
#' 
#' # Probabilities of different descendant rangesizes, for the smaller descendant, 
#' # under vicariant speciation
#' relprob_subsets_matrix = relative_probabilities_of_vicariants(max_numareas=6, 
#' maxent_constraint_01v=0.0001, NA_val=NA)
#' relprob_subsets_matrix
#' vfunc(charcell="v1_1", relprob_subsets_matrix)
#' vfunc(charcell="v1_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_3", relprob_subsets_matrix)
#' vfunc(charcell="v1_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_6", relprob_subsets_matrix)
#' vfunc(charcell="v2_6", relprob_subsets_matrix)
#' vfunc(charcell="v3_6", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_vicariants(max_numareas=6, 
#' maxent_constraint_01v=0.5, NA_val=NA)
#' relprob_subsets_matrix
#' vfunc(charcell="v1_1", relprob_subsets_matrix)
#' vfunc(charcell="v1_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_3", relprob_subsets_matrix)
#' vfunc(charcell="v1_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_6", relprob_subsets_matrix)
#' vfunc(charcell="v2_6", relprob_subsets_matrix)
#' vfunc(charcell="v3_6", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_vicariants(max_numareas=6, 
#' maxent_constraint_01v=0.9999, NA_val=NA)
#' relprob_subsets_matrix
#' vfunc(charcell="v1_1", relprob_subsets_matrix)
#' vfunc(charcell="v1_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_3", relprob_subsets_matrix)
#' vfunc(charcell="v1_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_6", relprob_subsets_matrix)
#' vfunc(charcell="v2_6", relprob_subsets_matrix)
#' vfunc(charcell="v3_6", relprob_subsets_matrix)
#' 
sfunc <- function(charcell, relprob_subsets_matrix)
	{
	defaults='
	charcell="s1_6"
	maxent_constraint_01=1
	relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, maxent_constraint_01)
	'
	# Remove the "s"
	#"s1_3"
	charcell2 = gsub(pattern="s", replacement="", x=charcell)
	
	words = strsplit(charcell2, split="_")[[1]]
	#"1" "3"
	
	subset_daughter_range = as.numeric(words[1])
	widespread_daughter_range = as.numeric(words[2])
	
	
	# The relative probability of the different options
	# subset daughter areas 1, 2, 3, 4...
	#rangesizes = seq(1, widespread_daughter_range, by=1)
	
	prob_of_this_b = relprob_subsets_matrix[widespread_daughter_range, subset_daughter_range]
	prob_of_this_b
	
	return(prob_of_this_b)
	}



# Function giving the relative probability of y (sympatric speciation)
# given different range sizes of the subset descendent
#######################################################
# yfunc
#######################################################
#' Extract the appropriate probability for a sympatric/range-copying speciation event, given text code for rangesize of smaller descendant, and ancestor
#'
#' @param charcell The text in the cell, indicating the type of speciation/cladogenesis range inheritance event.
#' @param relprob_subsets_matrix A numeric matrix describing the relative probability of each smaller daughter range, conditional on the ancestral rangesize.
#' @return \code{prob_of_this_s}, a numeric value giving the relative probability of that descendent-ancestor rangesize pair.
#' @export
#' @seealso \code{\link{sfunc}}, \code{\link{vfunc}}, \code{\link{relative_probabilities_of_subsets}}, \code{\link{symbolic_to_relprob_matrix_sp}}, \code{\link{get_probvals}}, \code{\link[FD]{maxent}}, \code{\link{calcZ_part}}, \code{\link{calcP_n}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{http://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution}
#' \cite{Harte2011}
#' \cite{Ronquist1996_DIVA}
#' \cite{Ronquist_1997_DIVA}
#' \cite{Ronquist_Sanmartin_2011}
#' \cite{Landis_Matzke_etal_2013_BayArea}
#' @examples
#' testval=1
#' # Examples
#' 
#' # Probabilities of different descendant rangesizes, for the smaller 
#' # descendant, under sympatric/subset speciation
#' # (plus sympatric/range-copying, which is folded in):
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.0001, NA_val=NA)
#' relprob_subsets_matrix
#' sfunc(charcell="s1_1", relprob_subsets_matrix)
#' sfunc(charcell="s1_2", relprob_subsets_matrix)
#' sfunc(charcell="s1_3", relprob_subsets_matrix)
#' sfunc(charcell="s2_3", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.5, NA_val=NA)
#' relprob_subsets_matrix
#' sfunc(charcell="s1_1", relprob_subsets_matrix)
#' sfunc(charcell="s1_2", relprob_subsets_matrix)
#' sfunc(charcell="s1_3", relprob_subsets_matrix)
#' sfunc(charcell="s2_3", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.9999, NA_val=NA)
#' relprob_subsets_matrix
#' sfunc(charcell="s1_1", relprob_subsets_matrix)
#' sfunc(charcell="s1_2", relprob_subsets_matrix)
#' sfunc(charcell="s1_3", relprob_subsets_matrix)
#' sfunc(charcell="s2_3", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.0001, NA_val=NA)
#' relprob_subsets_matrix
#' yfunc(charcell="y1", relprob_subsets_matrix)
#' yfunc(charcell="y2", relprob_subsets_matrix)
#' yfunc(charcell="y3", relprob_subsets_matrix)
#' yfunc(charcell="y4", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.5, NA_val=NA)
#' relprob_subsets_matrix
#' yfunc(charcell="y1", relprob_subsets_matrix)
#' yfunc(charcell="y2", relprob_subsets_matrix)
#' yfunc(charcell="y3", relprob_subsets_matrix)
#' yfunc(charcell="y4", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.9999, NA_val=NA)
#' relprob_subsets_matrix
#' yfunc(charcell="y1", relprob_subsets_matrix)
#' yfunc(charcell="y2", relprob_subsets_matrix)
#' yfunc(charcell="y3", relprob_subsets_matrix)
#' yfunc(charcell="y4", relprob_subsets_matrix)
#' 
#' 
#' # Probabilities of different descendant rangesizes, for the smaller descendant, 
#' # under vicariant speciation
#' relprob_subsets_matrix = relative_probabilities_of_vicariants(max_numareas=6, 
#' maxent_constraint_01v=0.0001, NA_val=NA)
#' relprob_subsets_matrix
#' vfunc(charcell="v1_1", relprob_subsets_matrix)
#' vfunc(charcell="v1_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_3", relprob_subsets_matrix)
#' vfunc(charcell="v1_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_6", relprob_subsets_matrix)
#' vfunc(charcell="v2_6", relprob_subsets_matrix)
#' vfunc(charcell="v3_6", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_vicariants(max_numareas=6, 
#' maxent_constraint_01v=0.5, NA_val=NA)
#' relprob_subsets_matrix
#' vfunc(charcell="v1_1", relprob_subsets_matrix)
#' vfunc(charcell="v1_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_3", relprob_subsets_matrix)
#' vfunc(charcell="v1_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_6", relprob_subsets_matrix)
#' vfunc(charcell="v2_6", relprob_subsets_matrix)
#' vfunc(charcell="v3_6", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_vicariants(max_numareas=6, 
#' maxent_constraint_01v=0.9999, NA_val=NA)
#' relprob_subsets_matrix
#' vfunc(charcell="v1_1", relprob_subsets_matrix)
#' vfunc(charcell="v1_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_3", relprob_subsets_matrix)
#' vfunc(charcell="v1_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_6", relprob_subsets_matrix)
#' vfunc(charcell="v2_6", relprob_subsets_matrix)
#' vfunc(charcell="v3_6", relprob_subsets_matrix)
#' 
yfunc <- function(charcell, relprob_subsets_matrix)
	{
	defaults='
	charcell="y3"
	maxent_constraint_01=1
	relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, maxent_constraint_01)
	
	'
	# Remove the "y"
	#"y3"
	charcell2 = as.numeric(gsub(pattern="y", replacement="", x=charcell))
	
	
	subset_daughter_range = charcell2
	widespread_daughter_range = charcell2
	
	
	# The relative probability of the different options
	# subset daughter areas 1, 2, 3, 4...
	#rangesizes = seq(1, widespread_daughter_range, by=1)
	
	prob_of_this_s = relprob_subsets_matrix[widespread_daughter_range, subset_daughter_range]
	prob_of_this_s
	
	return(prob_of_this_s)
	}


# Function giving the relative probability of j (jump dispersal/founder event speciation)
# given different range sizes of the subset descendent
# NOT DONE
# jfunc <- function(charcell, relprob_subsets_matrix)
# 	{
# 	defaults='
# 	charcell="j3"
# 	maxent_constraint_01=1
# 	relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, maxent_constraint_01)
# 	
# 	'
# 	# Remove the "j"
# 	#"j3"
# 	charcell2 = as.numeric(gsub(pattern="j", replacement="", x=charcell))
# 	
# 	
# 	subset_daughter_range = charcell2
# 	widespread_daughter_range = charcell2
# 	
# 	
# 	# The relative probability of the different options
# 	# subset daughter areas 1, 2, 3, 4...
# 	#rangesizes = seq(1, widespread_daughter_range, by=1)
# 	
# 	prob_of_this_s = relprob_subsets_matrix[widespread_daughter_range, subset_daughter_range]
# 	prob_of_this_s
# 	
# 	return(prob_of_this_s)
# 	}

# Function giving the relative probability of v (vicariant speciation)
# given different smaller range size and ancestral range size
#######################################################
# vfunc
#######################################################
#' Extract the appropriate probability for a vicariant speciation event, given text code for rangesize of smaller descendant, and ancestor
#'
#' @param charcell The text in the cell, indicating the type of speciation/cladogenesis range inheritance event.
#' @param relprob_vicar_matrix A numeric matrix describing the relative probability of each smaller daughter range, conditional on the ancestral rangesize.
#' @return \code{prob_of_this_v}, a numeric value giving the relative probability of that descendent-ancestor rangesize pair.
#' @export
#' @seealso \code{\link{sfunc}}, \code{\link{vfunc}}, \code{\link{relative_probabilities_of_subsets}}, \code{\link{symbolic_to_relprob_matrix_sp}}, \code{\link{get_probvals}}, \code{\link[FD]{maxent}}, \code{\link{calcZ_part}}, \code{\link{calcP_n}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' \url{http://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution}
#' \cite{Harte2011}
#' \cite{Ronquist1996_DIVA}
#' \cite{Ronquist_1997_DIVA}
#' \cite{Ronquist_Sanmartin_2011}
#' \cite{Landis_Matzke_etal_2013_BayArea}
#' @examples
#' testval=1
#' # Examples
#' 
#' # Probabilities of different descendant rangesizes, for the smaller descendant, 
#' # under sympatric/subset speciation
#' # (plus sympatric/range-copying, which is folded in):
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.0001, NA_val=NA)
#' relprob_subsets_matrix
#' sfunc(charcell="s1_1", relprob_subsets_matrix)
#' sfunc(charcell="s1_2", relprob_subsets_matrix)
#' sfunc(charcell="s1_3", relprob_subsets_matrix)
#' sfunc(charcell="s2_3", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.5, NA_val=NA)
#' relprob_subsets_matrix
#' sfunc(charcell="s1_1", relprob_subsets_matrix)
#' sfunc(charcell="s1_2", relprob_subsets_matrix)
#' sfunc(charcell="s1_3", relprob_subsets_matrix)
#' sfunc(charcell="s2_3", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.9999, NA_val=NA)
#' relprob_subsets_matrix
#' sfunc(charcell="s1_1", relprob_subsets_matrix)
#' sfunc(charcell="s1_2", relprob_subsets_matrix)
#' sfunc(charcell="s1_3", relprob_subsets_matrix)
#' sfunc(charcell="s2_3", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.0001, NA_val=NA)
#' relprob_subsets_matrix
#' yfunc(charcell="y1", relprob_subsets_matrix)
#' yfunc(charcell="y2", relprob_subsets_matrix)
#' yfunc(charcell="y3", relprob_subsets_matrix)
#' yfunc(charcell="y4", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.5, NA_val=NA)
#' relprob_subsets_matrix
#' yfunc(charcell="y1", relprob_subsets_matrix)
#' yfunc(charcell="y2", relprob_subsets_matrix)
#' yfunc(charcell="y3", relprob_subsets_matrix)
#' yfunc(charcell="y4", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_subsets(max_numareas=6, 
#' maxent_constraint_01=0.9999, NA_val=NA)
#' relprob_subsets_matrix
#' yfunc(charcell="y1", relprob_subsets_matrix)
#' yfunc(charcell="y2", relprob_subsets_matrix)
#' yfunc(charcell="y3", relprob_subsets_matrix)
#' yfunc(charcell="y4", relprob_subsets_matrix)
#' 
#' 
#' # Probabilities of different descendant rangesizes, for the smaller descendant, 
#' # under vicariant speciation
#' relprob_subsets_matrix = relative_probabilities_of_vicariants(max_numareas=6, 
#' maxent_constraint_01v=0.0001, NA_val=NA)
#' relprob_subsets_matrix
#' vfunc(charcell="v1_1", relprob_subsets_matrix)
#' vfunc(charcell="v1_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_3", relprob_subsets_matrix)
#' vfunc(charcell="v1_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_6", relprob_subsets_matrix)
#' vfunc(charcell="v2_6", relprob_subsets_matrix)
#' vfunc(charcell="v3_6", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_vicariants(max_numareas=6, 
#' maxent_constraint_01v=0.5, NA_val=NA)
#' relprob_subsets_matrix
#' vfunc(charcell="v1_1", relprob_subsets_matrix)
#' vfunc(charcell="v1_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_3", relprob_subsets_matrix)
#' vfunc(charcell="v1_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_6", relprob_subsets_matrix)
#' vfunc(charcell="v2_6", relprob_subsets_matrix)
#' vfunc(charcell="v3_6", relprob_subsets_matrix)
#' 
#' relprob_subsets_matrix = relative_probabilities_of_vicariants(max_numareas=6, 
#' maxent_constraint_01v=0.9999, NA_val=NA)
#' relprob_subsets_matrix
#' vfunc(charcell="v1_1", relprob_subsets_matrix)
#' vfunc(charcell="v1_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_3", relprob_subsets_matrix)
#' vfunc(charcell="v1_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_4", relprob_subsets_matrix)
#' vfunc(charcell="v2_2", relprob_subsets_matrix)
#' vfunc(charcell="v1_6", relprob_subsets_matrix)
#' vfunc(charcell="v2_6", relprob_subsets_matrix)
#' vfunc(charcell="v3_6", relprob_subsets_matrix)
#' 
vfunc <- function(charcell, relprob_vicar_matrix)
	{
	defaults='
	charcell="v1_6"
	maxent_constraint_01v=0.00001
	relprob_vicar_matrix = relative_probabilities_of_subsets(max_numareas=6, maxent_constraint_01v)
	'
	# Remove the "v"
	#"v1_3"
	charcell2 = gsub(pattern="v", replacement="", x=charcell)
	
	words = strsplit(charcell2, split="_")[[1]]
	#"1" "3"
	
	smaller_daughter_range = as.numeric(words[1])
	widespread_ancestor_range = as.numeric(words[2])
	
	
	# The relative probability of the different options
	# subset daughter areas 1, 2, 3, 4...
	#rangesizes = seq(1, widespread_ancestor_range, by=1)
	
	prob_of_this_v = relprob_vicar_matrix[widespread_ancestor_range, smaller_daughter_range]
	prob_of_this_v
	
	return(prob_of_this_v)
	}




#######################################################
# nullsym_to_NA
#######################################################
#' Convert a specified null range code to NA
#' 
#' Takes a matrix \code{mat}, converts any instances of the \code{nullsym} symbol to \code{NA}.
#' 
#' @param mat A matrix.
#' @param nullsym A character specifying the null symbol.
#' @return \code{mat} The revised matrix
#' @export
#' @seealso \code{\link{remove_null_rowcols_from_mat}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' mat = matrix(c("-",1,1,1,"-",1,1,1,"-"), nrow=3)
#' mat
#' mat2 = nullsym_to_NA(mat, nullsym="-")
#' mat2
#'
nullsym_to_NA <- function(mat, nullsym="-")
	{
	mat[mat==nullsym] = NA
	return(mat)
	}





#######################################################
# Process optim results
#######################################################

#######################################################
# process_optim
#######################################################
#' Extract \code{optim} results to a row
#' 
#' After running an ML (maximum likelihood) search with \code{\link[stats]{optim}}, \code{\link[stats]{optim}} returns
#' a list with a variety of objects.  It is often handy to have the parameter values, log-likelihood, etc., extracted
#' to a table for comparison with other optimization runs.  \code{process_optim} does this.
#' 
#' @param optim_results A results object from \code{\link[stats]{optim}}
#' @param max_num_params Specify the number of parameters, if known. If \code{NULL}, the code will try to guess.
#' @return \code{tmprow3} A row holding the \code{\link[stats]{optim}} results, which can then be added to a table with \code{\link[base]{rbind}}.
#' @export
#' @seealso \code{\link[stats]{optim}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' \url{http://phylo.wikidot.com/matzke-2013-international-biogeography-society-poster}
#' @examples
#' testval=1
#' # Any optim() for a biogeography scenario would take too long to run for R CMD check.
#' 
process_optim <- function(optim_results, max_num_params=NULL)
	{
	# Get the number of params
	num_params = length(optim_results$par)


	if (is.null(max_num_params) || (num_params == max_num_params))
		{
		# Re-arrange optim results
		tmprow = unlist(optim_results)
		tmprow2_nonparam_indices = (num_params+1):length(tmprow)
		tmprow2 = tmprow[tmprow2_nonparam_indices]
		tmprow3 = c(tmprow2, tmprow[1:num_params])
		} else {
		
		# Error check
		if (max_num_params < num_params)
			{
			stop("ERROR: num_params in optim_results is GREATER than max_num_params")
			}

		# Fill out extra params
		num_zeros_to_add = max_num_params - length(optim_results$par)
		zeros_to_add = rep(0, num_zeros_to_add)
		
		parnums_to_add = (length(optim_results$par)+1) : max_num_params
		parnums_to_add_names = paste("par", parnums_to_add, sep="")
		
		# Re-arrange optim results
		tmprow = unlist(optim_results)
		tmprow2_nonparam_indices = (num_params+1):length(tmprow)
		tmprow2 = tmprow[tmprow2_nonparam_indices]
		tmprow2 = c(tmprow2, tmprow[1:num_params])
		names_tmprow2 = names(tmprow2) 
		names_tmprow3 = c(names_tmprow2, parnums_to_add_names)
		
		tmprow3 = c(tmprow2, zeros_to_add)
		names(tmprow3) = names_tmprow3
		}
	
	return(tmprow3)
	}







#######################################################
# Scale BioGeoBEARS params table
#######################################################
#' Scale a BioGeoBEARS parameters table
#'
#' Rescales a BioGeoBEARS parameters table (in the example script,
#' found at \code{BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table})
#' so that the min and max of each free parameter are \code{add_smin} and \code{add_smax}, 
#' typically 0 and 1.
#'
#' This may be useful to avoid scaling warnings when certain optimization functions
#' (e.g. optimx) complain about different free parameters having different numeric ranges.
#' Theoretically, if parameters have radically different ranges, the optimizer may have 
#' more difficulty correctly finding the actual optimum. However, I have yet to find
#' a clear case of this. Nonetheless, the rescaling procedure (and un-rescaling for 
#' screen display during each iteration of the optimization, and for calculating 
#' ancestral range probabilities at the end) is simple & fast, so 
#' users are free to experiment with it.
#'
#' @param orig_params_table The original, user-specified parameters table, e.g. 
#'        from \code{BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table}
#' @param add_smin The "substitute minimum" to add.
#' @param add_smax The "substitute maximum" to add.
#' @return params_table A revised parameters table, with these columns replacing
#'         the default min/max columns: smin, smax, orig_min, orig_max
#' @export
#' @seealso \code{\link{unscale_BGB_params}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu}
#' @examples
#' test=1
#' orig_params_table = BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table
#' add_smin=0
#' add_smax=1
#' scaled_params_table = scale_BGB_params(
#' orig_params_table=BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table)
#' scaled_params_table
#' unscaled_params_table = unscale_BGB_params(scaled_params_table)
#' unscaled_params_table
#' 
#' unscaled_params_table$min == orig_params_table$min
#' unscaled_params_table$max == orig_params_table$max
#' unscaled_params_table$init == orig_params_table$init
#' unscaled_params_table$est == orig_params_table$est
#' 
#'
scale_BGB_params <- function(orig_params_table, add_smin=0, add_smax=1)
	{
	defaults='
	orig_params_table = BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table
	add_smin=0
	add_smax=1
	scaled_params_table = scale_BGB_params(
	orig_params_table=BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table, 
	add_smin=add_smin, add_smax=add_smax)
	scaled_params_table
	
	unscaled_params_table = unscale_BGB_params(scaled_params_table)
	unscaled_params_table
	
	unscaled_params_table$min == orig_params_table$min
	unscaled_params_table$max == orig_params_table$max
	unscaled_params_table$init == orig_params_table$init
	unscaled_params_table$est == orig_params_table$est
	'
	
	params_table = orig_params_table
	
	# Add columns if needed:
	# smin = scaled min
	# smax = scaled max
	if (is.null(params_table$smin))
		{
		params_table$smin = add_smin
		}
	if (is.null(params_table$smax))
		{
		params_table$smax = add_smax
		}
	min_scaled = params_table$smin
	max_scaled = params_table$smax
	newscale = max_scaled - min_scaled
	
	# Get the "real" original min and max (could be outside the stated min and max,
	# e.g. when a value is fixed to 0)
	orig_min = params_table$min
	for (i in 1:length(orig_min))
		{
		orig_min[i] = min(c(params_table$min[i], params_table$init[i], params_table$est[i]))
		}
	orig_max = params_table$max
	for (i in 1:length(orig_max))
		{
		orig_max[i] = max(c(params_table$max[i], params_table$init[i], params_table$est[i]))
		}
	params_table$orig_min = orig_min
	params_table$orig_max = orig_max
	
	# Calculate the original scale accordingly
	orig_scale = orig_max - orig_min
	
	# New minimums
	params_table$min = (((orig_params_table$min - orig_min) / orig_scale) * newscale) + min_scaled

	# New maximums
	params_table$max = (((orig_params_table$max - orig_min) / orig_scale) * newscale) + min_scaled

	# New initials
	params_table$init = (((orig_params_table$init - orig_min) / orig_scale) * newscale) + min_scaled

	# New estimates
	params_table$est = (((orig_params_table$est - orig_min) / orig_scale) * newscale) + min_scaled
	
	return(params_table)
	}




#######################################################
# Unscale a rescaled BioGeoBEARS params table
#######################################################
#' Unscale a rescaled BioGeoBEARS parameters table
#'
#' Un-rescales a BioGeoBEARS parameters table (in the example script,
#' found at \code{BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table})
#' so that the min and max of each free parameter are no longer 
#' \code{add_smin} and \code{add_smax} (typically 0 and 1), but instead the 
#' original min and max originally specified by the user.
#'
#' Rescaling may be useful to avoid scaling warnings when certain optimization functions
#' (e.g. optimx) complain about different free parameters having different numeric ranges.
#' Theoretically, if parameters have radically different ranges, the optimizer may have 
#' more difficulty correctly finding the actual optimum. However, I have yet to find
#' a clear case of this. Nonetheless, the rescaling procedure (and un-rescaling for 
#' screen display during each iteration of the optimization, and for calculating 
#' ancestral range probabilities at the end) is simple & fast, so 
#' users are free to experiment with it.
#'
#' @param scaled_params_table The rescaled version of the user-specified parameters 
#'        table, e.g. from 
#'        \code{BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table}
#' @return params_table A revised parameters table, the original, nonscaled parameters.
#' @export
#' @seealso \code{\link{scale_BGB_params}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu}
#' @examples
#' test=1
#' orig_params_table = BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table
#' add_smin=0
#' add_smax=1
#' scaled_params_table = scale_BGB_params(
#' orig_params_table=BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table)
#' scaled_params_table
#' unscaled_params_table = unscale_BGB_params(scaled_params_table)
#' unscaled_params_table
#' 
#' unscaled_params_table$min == orig_params_table$min
#' unscaled_params_table$max == orig_params_table$max
#' unscaled_params_table$init == orig_params_table$init
#' unscaled_params_table$est == orig_params_table$est
#' 
unscale_BGB_params <- function(scaled_params_table)
	{
	defaults='
	orig_params_table = BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table
	add_smin=0
	add_smax=1
	scaled_params_table = scale_BGB_params(
	orig_params_table=BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table, 
	add_smin=add_smin, add_smax=add_smax)
	scaled_params_table
	
	unscaled_params_table = unscale_BGB_params(scaled_params_table)
	unscaled_params_table
	
	unscaled_params_table$min == orig_params_table$min
	unscaled_params_table$max == orig_params_table$max
	unscaled_params_table$init == orig_params_table$init
	unscaled_params_table$est == orig_params_table$est
	'
	
	params_table = scaled_params_table
	
	# Error checks
	if (is.null(params_table$smin))
		{
		stop("ERROR: unscale_BGB_params() requires a BioGeoBEARS params_table, with columns smin, smax, orig_min, orig_max. You are missing at least one of these.")
		}
	if (is.null(params_table$smax))
		{
		stop("ERROR: unscale_BGB_params() requires a BioGeoBEARS params_table, with columns smin, smax, orig_min, orig_max. You are missing at least one of these.")
		}
	if (is.null(params_table$orig_min))
		{
		stop("ERROR: unscale_BGB_params() requires a BioGeoBEARS params_table, with columns smin, smax, orig_min, orig_max. You are missing at least one of these.")
		}
	if (is.null(params_table$orig_max))
		{
		stop("ERROR: unscale_BGB_params() requires a BioGeoBEARS params_table, with columns smin, smax, orig_min, orig_max. You are missing at least one of these.")
		}


	min_scaled = params_table$smin
	max_scaled = params_table$smax
	newscale = max_scaled - min_scaled
	
	# Get the "real" original min and max (could be outside the stated min and max,
	# e.g. when a value is fixed to 0)
	orig_min = params_table$orig_min
	orig_max = params_table$orig_max
	
	# Calculate the original scale accordingly
	orig_scale = orig_max - orig_min
	
	# New minimums
	params_table$min = (((scaled_params_table$min - min_scaled) / newscale) * orig_scale) + orig_min

	# New maximums
	params_table$max = (((scaled_params_table$max - min_scaled) / newscale) * orig_scale) + orig_min

	# New initials
	params_table$init = (((scaled_params_table$init - min_scaled) / newscale) * orig_scale) + orig_min

	# New estimates
	params_table$est = (((scaled_params_table$est - min_scaled) / newscale) * orig_scale) + orig_min
	
	return(params_table)
	} # END unscale_BGB_params <- function(scaled_params_table)





#######################################################
# order_tipranges_by_tr
#######################################################
#' Order the tipranges in a tipranges object so they match the order of tips in a tree
#' 
#' Utility function. Orders the tipranges in a tipranges object so they match the order 
#' of tips in a tree. Life can get very confusing if you don't do this before plotting.
#' 
#' @param tipranges A \code{tipranges} object.
#' @param tr An ape tree object.
#' @return \code{tipranges} The \code{tipranges} object with a reordered \code{data.frame}
#'         inside \code{tipranges@df}.
#' @export
#' @seealso \code{\link[base]{unlist}}
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @examples
#' test=1
#'
#' \dontrun{
#' require(BioGeoBEARS)
#' 
#' # trfn = "/GitHub/BioGeoBEARS/inst/extdata/examples/BSM_3taxa/M0/tree.newick"
#' # geogfn = "/GitHub/BioGeoBEARS/inst/extdata/examples/BSM_3taxa/M0/geog.data"
#' # 
#' # tr = read.tree(trfn)
#' # write.tree(tr, file="")
#' #
#' # ((human:1,chimp:1):1,gorilla:2);
#' # 
#' # tipranges = getranges_from_LagrangePHYLIP(lgdata_fn=geogfn)
#' # tipranges
#' # 
#' # dput_string = dput(tipranges)
#' # dput_string
#' # "new(\"tipranges\", .Data = numeric(0), df = structure(list(A = c(\"0\", 
#' # \n\"0\", \"1\"), B = c(\"0\", \"0\", \"1\"), C = c(\"1\", \"1\", \"0\")), 
#' # class = \"data.frame\", row.names = c(\"human\", \n\"chimp\", \"gorilla\")))"
#' 
#' # Load hard-coded tree and tipranges object:
#' tr = read.tree(file="", text="((human:1,chimp:1):1,gorilla:2);")
#' plot(tr)
#' 
#' tipranges = new("tipranges", .Data = numeric(0), df = structure(list(A = c("1", 
#' "0", "0"), B = c("1", "0", "0"), C = c("0", "1", "1")), class = "data.frame", row.names = c("gorilla", 
#' "chimp", "human")))
#' tipranges
#' 
#' # An object of class "tipranges" (from the global environment)
#' # numeric(0)
#' # Slot "df":
#' #         A B C
#' # gorilla 1 1 0
#' # chimp   0 0 1
#' # human   0 0 1
#' 
#' tipranges_reorderd = order_tipranges_by_tr(tipranges, tr)
#' tipranges_reorderd
#' 
#' # An object of class "tipranges" (from the global environment)
#' # numeric(0)
#' # Slot "df":
#' #         A B C
#' # human   0 0 1
#' # chimp   0 0 1
#' # gorilla 1 1 0
#' 
#' tr$tip.label
#' # [1] "human"   "chimp"   "gorilla"
#' 
#' }
order_tipranges_by_tr <- function(tipranges, tr)
	{
	ex='
	# Load hard-coded tree and tipranges object:
	tr = read.tree(file="", text="((human:1,chimp:1):1,gorilla:2);")
	plot(tr)

	tipranges = new("tipranges", .Data = numeric(0), df = structure(list(A = c("1", 
	"0", "0"), B = c("1", "0", "0"), C = c("0", "1", "1")), class = "data.frame", row.names = c("gorilla", 
	"chimp", "human")))
	tipranges

	# An object of class "tipranges" (from the global environment)
	# numeric(0)
	# Slot "df":
	#         A B C
	# gorilla 1 1 0
	# chimp   0 0 1
	# human   0 0 1

	tipranges_reorderd = order_tipranges_by_tr(tipranges, tr)
	tipranges_reorderd

	# An object of class "tipranges" (from the global environment)
	# numeric(0)
	# Slot "df":
	#         A B C
	# human   0 0 1
	# chimp   0 0 1
	# gorilla 1 1 0

	tr$tip.label
	'
	
	tipranges_names = rownames(tipranges@df)
	tr_names = tr$tip.label
	
	match_indices = get_indices_where_list1_occurs_in_list2(list1=tr_names, list2=tipranges_names)
	
	tmpdf = tipranges@df[match_indices, ]
	tipranges@df = tmpdf
	
	return(tipranges)
	}





#' Put the j column after the e column
#'
#' This function puts the column with "j" directly after the column with "e",
#' if both column names are found, and if column j is after column e+1 
#'
#' @param tmptable A \code{data.frame} or \code{matrix}.
#' @return tmptable The input table, with reordered columns
#' @export
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu}
#' @examples
#' test=1
#'
#' # Set up a data.frame
#' student_names = c("Nick", "Tom", "Bob")
#' grade1 = c(37, 100, 60)
#' grade2 = c(43, 80, 70)
#' grade3 = c(100, 90, 100)
#' 
#' # convert to data frame
#' tmptable = cbind(student_names, grade1, grade2, grade3)
#' tmptable = as.data.frame(tmptable, stringsAsFactors=FALSE)
#' col_headers = c("d", "e", "x", "j")
#' names(tmptable) = col_headers
#' put_jcol_after_ecol(tmptable)
#' 
put_jcol_after_ecol <- function(tmptable)
	{
	ex='
	tmptable = cbind(student_names, grade1, grade2, grade3)
	tmptable = as.data.frame(tmptable, stringsAsFactors=FALSE)
	col_headers = c("d", "e", "x", "j")
	names(tmptable) = col_headers
	put_jcol_after_ecol(tmptable)
	'
	
	e_col = which(names(tmptable)=="e")
	j_col = which(names(tmptable)=="j")
	if ((length(j_col) > 0) && (length(e_col) > 0))
		{
		if ( (j_col > e_col) && (j_col != (e_col+1)) )
			{
			colnums1 = 1:e_col
			tmp_colnums = (e_col+1) : ncol(tmptable)
			colnums2 = tmp_colnums[tmp_colnums != j_col]
			colnums_final = c(colnums1, j_col, colnums2)
			tmptable = tmptable[,colnums_final]
			}
		}
	return(tmptable)
	} # END put_jcol_after_ecol <- function(tmptable)









# state_indices_0based_all_timeperiods A list of lists of numbers (e.g. range 0, range 1, range 12),
# perhaps with NA representing null range

#' Sort a list of lists of numbers
#'
#' This function helps to get e.g. a single combined states_list from a list of states_lists
#' (e.g. in a time-stratified analysis). Useful, for example, after combining several
#' lists of 0-bases states_lists in a time-stratified \code{BioGeoBEARS} analysis. 
#'
#' @param \code{state_indices_0based_all_timeperiods} A list of states, where each state is 
#'        one or more numbers (representing areas) or NA (representing a null_range).
#' @return A sorted \code{state_indices_0based_all_timeperiods}, with the NA in front (if present).
#' @export
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu}
#' @examples
#' test=1
#' 	# This is the list of states/ranges, where each state/range
#' 	# is a list of areas, counting from 0
#' 	max_range_size = 4
#' 	areas = c("A", "B", "C", "D")
#' 	state_indices_0based_all_timeperiods = rev(cladoRcpp::rcpp_areas_list_to_states_list(areas=areas, maxareas=max_range_size, include_null_range=TRUE))
#' 	state_indices_0based_all_timeperiods
#' 	
#' 	# Sort it
#' 	sorted_state_indices_0based_all_timeperiods = sort_list_of_lists_of_numbers(state_indices_0based_all_timeperiods)
#' 	sorted_state_indices_0based_all_timeperiods
#' 
sort_list_of_lists_of_numbers <- function(state_indices_0based_all_timeperiods)
	{
	ex='
	# This is the list of states/ranges, where each state/range
	# is a list of areas, counting from 0
	max_range_size = 4
	areas = c("A", "B", "C", "D")
	state_indices_0based_all_timeperiods = rev(cladoRcpp::rcpp_areas_list_to_states_list(areas=areas, maxareas=max_range_size, include_null_range=TRUE))
	state_indices_0based_all_timeperiods
	
	# Sort it
	sorted_state_indices_0based_all_timeperiods = sort_list_of_lists_of_numbers(state_indices_0based_all_timeperiods)
	sorted_state_indices_0based_all_timeperiods
	
	# Another tricky one
	state_indices_0based_all_timeperiods = list(NA, 0L, 1L, 2L, 3L, 0:1, c(0L, 2L), c(0L, 3L), 1:2, c(1L, 3L), 2:3, 0:2, c(0L, 1L, 3L), c(0L, 2L, 3L), 1:3, 0:3)
	sorted_state_indices_0based_all_timeperiods = sort_list_of_lists_of_numbers(state_indices_0based_all_timeperiods)
	sorted_state_indices_0based_all_timeperiods
	'
	# Get the numbers as collapsed characters, to be sure sorting into correct order
	state_indices_charcodes = rep("_", times=length(state_indices_0based_all_timeperiods))
	lengthvals = rep(0, times=length(state_indices_0based_all_timeperiods))
	for (ss in 1:length(state_indices_0based_all_timeperiods))
		{
		if (length(state_indices_0based_all_timeperiods[[ss]]) == 1)
			{
			# Yes, the list item has length 1
			if (is.na(state_indices_0based_all_timeperiods[[ss]]))
				{
				state_indices_charcodes[ss] = "_"
				lengthvals[ss] = 0
				next() # Go to the next ss
				}
			} # END if (length(state_indices_0based_all_timeperiods[[ss]]) == 1)
		# Otherwise, convert:
		# Convert e.g. 0, 1, 2 to 0000, 0001, 0002 (for sorting later)
		charvals = sprintf("%04.0f", state_indices_0based_all_timeperiods[[ss]])
		state_indices_charcodes[ss] = paste0(charvals, collapse=",", sep="")
		lengthvals[ss] = length(state_indices_0based_all_timeperiods[[ss]])
		} # END for (ss in 1:length(state_indices_0based_all_timeperiods))
	lengthvals = sprintf("%04.0f", lengthvals)
	state_indices_charcodes
	state_indices_charcodes = paste0(lengthvals, "-", state_indices_charcodes, sep="")
	ordernums = order(state_indices_charcodes)
	
	oldlist = state_indices_0based_all_timeperiods
	for (ss in 1:length(state_indices_0based_all_timeperiods))
		{
		state_indices_0based_all_timeperiods[[ss]] = oldlist[[ordernums[ss]]]
		}
	state_indices_0based_all_timeperiods
	
	return(state_indices_0based_all_timeperiods)
	}
nmatzke/BioGeoBEARS documentation built on April 11, 2024, 2:16 a.m.