R/summarize_BSM_tables_v1.R

Defines functions linear_regression_plot minmax_pretty check_ML_vs_BSM table2 hist_event_counts calc_BSM_mean_node_states print_counts_lists_to_file count_ana_clado_events count_clado_events_nonjump count_events_huge_tables get_huge_events_tables_from_clado_ana_events_tables get_huge_events_tables_from_BSM_Rdata uniquify_clado_events count_ana_dispersal_events count_clado_dispersal_events simulate_source_area_clado simulate_source_area_ana simulate_source_areas_ana_clado

#######################################################
# Intrinsically, the transition matrix used in BioGeoBEARS, Lagrange, etc.,
# does not specify the exact source area when a transition occurs
# from a widespread ancestor. However, this can be simulated given
# knowledge of dmat (dmat = all dispersal multipliers multiplied)
#######################################################

simulate_source_areas_ana_clado <- function(res, clado_events_tables, ana_events_tables, areanames)
	{
	# Get the dmat and times
	dmat_times = get_dmat_times_from_res(res=res, numstates=NULL)
		
	dmat = dmat_times$dmat
	times = dmat_times$times
	
	# 2019-07-11 update to fix Mathias issue with dmat length 1
# 	print("dmat_times$dmat")
# 	print(dmat_times$dmat)
# 	print("dmat")
# 	print(dmat)
	
	# Error check
	if (length(clado_events_tables) != length(ana_events_tables))
		{
		txt = paste0("STOP ERROR in simulate_source_areas_ana_clado(): ana_events_tables and clado_events_tables should have the same length. Instead, length(ana_events_tables)=", length(ana_events_tables), ", length(clado_events_tables)=", (clado_events_tables), ".")
		cat("\n\n")
		cat(txt)
		cat("\n\n")
		stop(txt)
		} # END if (length(clado_events_tables) != length(clado_events_tables))
	
	# Simulate 
	# - the anagenetic dispersal event source areas
	# - the cladogenetic dispersal event (jump dispersal) source areas
	numBSMs = length(clado_events_tables)
	txt = paste0("Simulating the ancestral areas for cases where the ancestral area occupied 2 or more areas (number of stochastic maps=", numBSMs, "):\n")
	cat("\n\n")
	cat(txt)
	for (i in 1:numBSMs)
		{
		cat(i, " ", sep="")
		
		# Simulate the source areas for the jump-dispersal events;
		# modify clado_event_tables
		clado_events_tables[[i]] = simulate_source_area_clado(clado_events_tables[[i]], areanames, dmat=dmat, times=times)

		# Simulate the source areas for the anagenetic-dispersal events;
		# modify ana_event_tables
		ana_events_tables[[i]] = simulate_source_area_ana(ana_events_tables[[i]], areanames, dmat=dmat, times=times)
		} # END for (i in 1:numBSMs)
	cat("\n\n")	
		
	BSMs_w_sourceAreas = NULL
	BSMs_w_sourceAreas$clado_events_tables = clado_events_tables
	BSMs_w_sourceAreas$ana_events_tables = ana_events_tables
	
	extract='
	clado_events_tables = BSMs_w_sourceAreas$clado_events_tables
	ana_events_tables = BSMs_w_sourceAreas$ana_events_tables
	'
	
	return(BSMs_w_sourceAreas)
	}



# areanames = names(tipranges@df)
# actual_names = areanames
# actual_names
# 
# # Get the dmat and times (if any)
# dmat_times = get_dmat_times_from_res(res=res, numstates=NULL)
# dmat_times
# 
# # Simulate the source areas for the jump-dispersal events; add to table
# clado_events_table = clado_events_tables[[1]]
# names(clado_events_table)
# clado_events_table = simulate_source_area_clado(clado_events_table, areanames, dmat=dmat_times$dmat)
# names(clado_events_table)
# 
# # Simulate the source areas for the jump-dispersal events; add to table
# ana_events_table = ana_events_tables[[1]]
# names(ana_events_table)
# ana_events_table = simulate_source_area_ana(ana_events_table, areanames, dmat=NULL)
# names(ana_events_table)


simulate_source_area_ana <- function(ana_events_table, areanames, dmat=NULL, times=NULL)
	{
	# NA if no events; return NULL
	if ( (length(ana_events_table)==1) && (is.na(ana_events_table) == TRUE) )
		{
		return(ana_events_table)
		} # END if (is.na(ana_events_table) == TRUE)
	
	ana_events_table_OLD = ana_events_table
	
	# Check for times (different dmats through time)
	strat_TF = FALSE
	if (is_list_not_dataframe(dmat) && is.null(times))
		{
		txt = "STOP ERROR in simulate_source_area_ana(): if 'dmat' is a list of dmats, 'times' cannot be NULL."
		cat("\n\n")
		cat(txt)
		cat("\n\n")
		stop(txt)
		}
	if (is_list_not_dataframe(dmat) && !is.null(times))
		{
		strat_TF = TRUE
		
		# Get the top/bottom of the time bins
		tops = c(0, times[-length(times)])
		bots = times
		
		if (length(dmat) != length(times))
			{
			txt = "STOP ERROR in simulate_source_area_ana(): the length of the 'dmat' list must equal the length of 'times' in a time-stratified analysis."
			cat("\n\n")
			cat(txt)
			cat("\n\n")
			stop(txt)
			} # END if (length(dmat) != length(times))
		
		if ( all(times == sort(times))==FALSE || (0 %in% times) == TRUE )
			{
			txt = "STOP ERROR in simulate_source_area_ana(): 'times' in a time-stratified analysis must be sorted from youngest to oldest. Also, time '0' should *not* be included."
			cat("\n\n")
			cat(txt)
			cat("\n\n")
			stop(txt)
			} # END if ( all( times == sort(times) ) )
		} # END if (is_list_not_dataframe(dmat) && is.null(times))
	
	# If dmat is NULL, set to all 1s
	# This is the dispersal_multipliers_matrix, which includes the influence 
	# of manual dispersal multipliers, distance, environmental distance,
	# etc. on the dispersal (d) and jump dispersal (j) processes.
	# (and anagenetic range-switching, a)
	if (is.null(dmat) == TRUE)
		{
		dmat = matrix(1, nrow=length(areanames), ncol=length(areanames))
		} # END if (is.null(dmat) == TRUE)

	# Actual source area (single area)
	ana_dispersal_from = rep("", nrow(ana_events_table))
	for (ii in 1:length(areanames))
		{
		areaname = areanames[ii]
		TF = ana_events_table$dispersal_to == areaname
		TF[is.na(TF)] = FALSE
	
		if ((length(TF) > 0) && (sum(TF) > 0))
			{
			# Split events on "->"
			tmp_table = ana_events_table[TF,]
			events_txt = tmp_table$event_txt

			events_df = as.data.frame(matrix(unlist(sapply(X=events_txt, FUN=strsplit, split="->")), ncol=2, byrow=TRUE), stringsAsFactors=FALSE)
			names(events_df) = c("ancrange", "decrange")
			orig_rownum = 1:nrow(events_df)

			# OLD: nchars is labeled as the weight
			#nchars = nchar(events_df$ancrange)

			# NEW: weight by prob of being the source area,
			# then pick a source area
			tmp_ana_dispersal_from = rep(NA, nrow(tmp_table))
			for (j in 1:nrow(tmp_table))
				{
				# Destination area was set by i through areanames
				# (assumes they are in the same order); filter
				# dmat accordingly
				if (strat_TF == FALSE)
					{
					source_area_relprob1 = dmat[,ii]
					} else {
					timebp_of_event = tmp_table$abs_event_time[j]
					TF1 = timebp_of_event >= tops
					TF2 = timebp_of_event < bots
					inbin_TF = (TF1 + TF2) == 2
					binnum = (1:length(times))[inbin_TF]
					
					source_area_relprob1 = dmat[[binnum]][,ii]
					} # END if (strat_TF == FALSE)

				# source range
				source_range = events_df$ancrange[j]
				source_areas = strsplit(x=source_range, split="")[[1]]
				source_indices = match(x=source_areas, table=areanames)
				source_area_relprob2 = source_area_relprob1[source_indices]
				
				# Check for zeros (might happen in "manual" histories)
				if (sum(source_area_relprob2) <= 0)
					{
					stoptxt = paste0("simulate_source_area_ana() says: WARNING: the sum of source_area_relprob2 is 0. This means that all possible source areas have probability zero. In stochastic mapping, this could happen if, e.g. in a time-stratified stochastic map, the simulated state at the top of the previous time-bin is an impossible starting range for the manual history in the current time bin. In this situation, the 'manual history' option probably forced a history, just to stop you from running millions of tries pointlessly. This is part of the overall problem with very-improbable histories in stochastic mapping with huge biogeographical state spaces and complex modifications and lots of constratints.  If you get this warning, you should (a) be grateful that this code works at all, programming all of this is ridiculously complex, especially in R, and (b) you should probably delete these histories from your collection, if you can, although the overall effect should be minor.  Here, we are re-setting source_area_relprob2 to source_area_relprob2=rep(1, times=length(source_area_relprob2)). The events_txt being tried here was: '", events_txt, "'.")
					cat("\n\n")
					cat(stoptxt)
					cat("\n\n")
					warning(stoptxt)
					
					source_area_relprob2=rep(1, times=length(source_area_relprob2))
					}
				
				source_area_relprob = source_area_relprob2 / sum(source_area_relprob2)
				
				# Randomly sample a source area
				source_area = sample(x=source_areas, size=1, replace=FALSE, prob=source_area_relprob)
				tmp_ana_dispersal_from[j] = source_area
				} # END for (j in 1:nrow(tmp_table))
			# Save in the overall ana_dispersal_from
			ana_dispersal_from[TF] = tmp_ana_dispersal_from
			} # END if ((length(TF) > 0) && (sum(TF) > 0))
		} # END for (ii in 1:length(areanames))

	# Edit the input ana_events_table
	dispersal_to_colnum_TF = names(ana_events_table_OLD) == "dispersal_to"
	dispersal_to_colnum = (1:ncol(ana_events_table_OLD))[dispersal_to_colnum_TF]
	
	end1 = (dispersal_to_colnum-1)
	start2 = (dispersal_to_colnum)
	end2 = ncol(ana_events_table_OLD)
	if ( (dispersal_to_colnum+1) <= ncol(ana_events_table_OLD))
		{
		ana_events_table = cbind(ana_events_table_OLD[1:end1], ana_dispersal_from, ana_events_table_OLD[start2:end2])
		} else {
		ana_events_table = cbind(ana_events_table_OLD[1:end1], ana_dispersal_from)
		} # END if ( (dispersal_to_colnum+1) <= ncol(ana_events_table_OLD))
	
	return(ana_events_table)
	} # END simulate_source_area_ana <- function(ana_events_tables, ana_events_tables, areanames, dmat=NULL)



simulate_source_area_clado <- function(clado_events_table, areanames, dmat=NULL, times=NULL)
	{
	clado_events_table_OLD = clado_events_table

	# Check for times (different dmats through time)
	strat_TF = FALSE
	if (is_list_not_dataframe(dmat) && is.null(times))
		{
		txt = "STOP ERROR in simulate_source_area_clado(): if 'dmat' is a list of dmats, 'times' cannot be NULL."
		cat("\n\n")
		cat(txt)
		cat("\n\n")
		stop(txt)
		}
	if (is_list_not_dataframe(dmat) && !is.null(times))
		{
		strat_TF = TRUE
		
		# Get the top/bottom of the time bins
		tops = c(0, times[-length(times)])
		bots = times
		
		if (length(dmat) != length(times))
			{
			txt = "STOP ERROR in simulate_source_area_clado(): the length of the 'dmat' list must equal the length of 'times' in a time-stratified analysis."
			cat("\n\n")
			cat(txt)
			cat("\n\n")
			stop(txt)
			} # END if (length(dmat) != length(times))
		
		if ( all(times == sort(times))==FALSE || (0 %in% times) == TRUE )
			{
			txt = "STOP ERROR in simulate_source_area_clado(): 'times' in a time-stratified analysis must be sorted from youngest to oldest. Also, time '0' should *not* be included."
			cat("\n\n")
			cat(txt)
			cat("\n\n")
			stop(txt)
			} # END if ( all( times == sort(times) ) )
		} # END if (is_list_not_dataframe(dmat) && is.null(times))
	
	
	# If dmat is NULL, set to all 1s
	# This is the dispersal_multipliers_matrix, which includes the influence 
	# of manual dispersal multipliers, distance, environmental distance,
	# etc. on the dispersal (d) and jump dispersal (j) processes.
	# (and anagenetic range-switching, a)
	if (is.null(dmat) == TRUE)
		{
		dmat = matrix(1, nrow=length(areanames), ncol=length(areanames))
		} # END if (is.null(dmat) == TRUE)

	# Actual source area (single area)
	clado_dispersal_from = rep("", nrow(clado_events_table))
	for (i in 1:length(areanames))
		{
		areaname = areanames[i]
		TF = clado_events_table$clado_dispersal_to == areaname
		TF[is.na(TF)] = FALSE
		
		if ((length(TF) > 0) && (sum(TF) > 0))
			{
			# Split events on "->"
			tmp_table = clado_events_table[TF,]
			events_txt = tmp_table$clado_event_txt
	
			events_df = as.data.frame(matrix(unlist(sapply(X=events_txt, FUN=strsplit, split="->")), ncol=2, byrow=TRUE), stringsAsFactors=FALSE)
			names(events_df) = c("ancrange", "decrange")
			orig_rownum = 1:nrow(events_df)

			# OLD: nchars is labeled as the weight
			#nchars = nchar(events_df$ancrange)

			# NEW: weight by prob of being the source area,
			# then pick a source area
			tmp_clado_dispersal_from = rep(NA, nrow(tmp_table))
			for (j in 1:nrow(tmp_table))
				{

				# Destination area was set by i through areanames
				# (assumes they are in the same order); filter
				# dmat accordingly
				if (strat_TF == FALSE)
					{
					source_area_relprob1 = dmat[,i]
					} else {
					timebp_of_event = tmp_table$time_bp[j]
					TF1 = timebp_of_event >= tops
					TF2 = timebp_of_event < bots
					inbin_TF = (TF1 + TF2) == 2
					binnum = (1:length(times))[inbin_TF]
					source_area_relprob1 = dmat[[binnum]][,i]
					} # END if (strat_TF == FALSE)

				# source range
				source_range = events_df$ancrange[j]
				source_areas = strsplit(x=source_range, split="")[[1]]
				source_indices = match(x=source_areas, table=areanames)
				source_area_relprob2 = source_area_relprob1[source_indices]
				source_area_relprob = source_area_relprob2 / sum(source_area_relprob2)

				# Randomly sample a source area
				source_area = sample(x=source_areas, size=1, replace=FALSE, prob=source_area_relprob)
				tmp_clado_dispersal_from[j] = source_area
				} # END for (j in 1:nrow(tmp_table))
			# Save in the overall clado_dispersal_from
			clado_dispersal_from[TF] = tmp_clado_dispersal_from
			} # END if ((length(TF) > 0) && (sum(TF) > 0))
		} # END for (i in 1:length(areanames))
	
	# Edit the input clado_events_table
	clado_to_colnum_TF = names(clado_events_table_OLD) == "clado_dispersal_to"
	clado_to_colnum = (1:ncol(clado_events_table_OLD))[clado_to_colnum_TF]
	
	end1 = (clado_to_colnum-1)
	start2 = (clado_to_colnum)
	end2 = ncol(clado_events_table_OLD)
	if ( (clado_to_colnum+1) <= ncol(clado_events_table_OLD))
		{
		clado_events_table = cbind(clado_events_table_OLD[1:end1], clado_dispersal_from, clado_events_table_OLD[start2:end2])
		} else {
		clado_events_table = cbind(clado_events_table_OLD[1:end1], clado_dispersal_from)
		} # END if ( (clado_to_colnum+1) <= ncol(clado_events_table_OLD))
	
	return(clado_events_table)
	} # END simulate_source_area_clado <- function(clado_events_tables, ana_events_tables, areanames, dmat=NULL)






# Assumes equal weights of ancestral areas, when ancestor
# occupies more than one area
count_clado_dispersal_events <- function(clado_events_table, areanames, actual_names=areanames)
	{
	defaults='
	areanames = c("A", "B", "C", "D", "E", "F", "G")
	actual_names = c("SAm", "CAm", "Car", "NAm", "AF", "EU", "OZ")
	clado_events_table = DECJ_clado_events_tables[[1]]
	'

	# Error check
	if ("clado_dispersal_from" %in% names(clado_events_table) == FALSE)
		{
		txt = paste0("STOP ERROR in count_clado_dispersal_events(): input 'clado_events_table' must have a column 'clado_dispersal_from'. To add this column, run simulate_source_area_clado() or simulate_source_areas_ana_clado().")
		cat("\n\n")
		cat(txt)
		cat("\n\n")
		stop(txt)
		} # END error check

	
	TF = nchar(areanames) > 1
	if (any(TF))
		{
		txt = paste0("STOP ERROR: all 'areanames' must be only 1 character long for the counting of events in stochastic mapping to work. You have violations for these 'areanames': ", paste(areanames[TF], sep=", "), ".")
		cat("\n\n")
		cat(txt)
		cat("\n\n")
		stop(txt)
		} # END if (any(TF))


	
	# Built up the transition matrix
	events_df2 = NULL
	for (i in 1:length(areanames))
		{
		areaname = areanames[i]
		TF = clado_events_table$clado_dispersal_to == areaname
		TF[is.na(TF)] = FALSE
		tmp_table = clado_events_table[TF, ]
		
		if ((length(TF) > 0) && (sum(TF) > 0))
			{
			# Split them on "->"
			events_txt = tmp_table$clado_event_txt
	
			events_df = as.data.frame(matrix(unlist(sapply(X=events_txt, FUN=strsplit, split="->")), ncol=2, byrow=TRUE), stringsAsFactors=FALSE)
			names(events_df) = c("ancrange", "decrange")
			orig_rownum = 1:nrow(events_df)
			
			time_bp = tmp_table$time_bp
			ancrange = events_df$ancrange
			decrange = events_df$decrange
			clado_dispersal_from = tmp_table$clado_dispersal_from
			clado_dispersal_to = tmp_table$clado_dispersal_to
			
			events_df = cbind(clado_events_table$time_bp[TF], events_df, clado_dispersal_from, clado_events_table$clado_dispersal_to[TF])
			names(events_df) = c("time_bp", "ancrange", "decrange", "clado_dispersal_from", "clado_dispersal_to")
			
			events_df2 = rbind(events_df2, events_df)
			names(events_df2) = c("time_bp", "ancrange", "decrange", "clado_dispersal_from", "clado_dispersal_to")
			} # END if (sum(TF) > 0)
		} # # END for (i in 1:areanames)

	

	
	if (is.null(events_df2) == TRUE)
		{
		counts_df = NULL
		return(counts_df)
		} # END if (is.null(events_df2) == TRUE)

	counts_matrix = matrix(0, nrow=length(areanames), ncol=length(areanames))
	for (i in 1:length(areanames))
		{
		for (j in 1:length(areanames))
			{
			TF1 = events_df2$clado_dispersal_from == areanames[i]
			TF2 = events_df2$clado_dispersal_to == areanames[j]
			TF = (TF1 + TF2) == 2
			if (sum(TF) > 0)
				{
				counts_matrix[i,j] = sum(TF)
				} else {
				counts_matrix[i,j] = 0
				} # END if (sum(TF) > 0)
			} # END for (j in 1:length(areanames))
		} # END for (i in 1:length(areanames))

	counts_df = adf2(counts_matrix)
	names(counts_df) = actual_names
	row.names(counts_df) = actual_names
	
	return(counts_df)
	} # END count_clado_dispersal_events <- function(clado_events_table, areanames, actual_names=areanames)








# No longer assumes equal weights of ancestral areas, when ancestor
# occupies more than one area; assumes you have used dmat and 
# simulate_source_areas_ana_clado to add source area.
count_ana_dispersal_events <- function(ana_events_table, areanames, actual_names=areanames)
	{
	defaults='
	areanames = c("A", "B", "C", "D", "E", "F", "G")
	actual_names = c("SAm", "CAm", "Car", "NAm", "AF", "EU", "OZ")
	ana_events_table = DECJ_ana_events_tables[[1]]
	'

	if ("ana_dispersal_from" %in% names(ana_events_table) == FALSE)
		{
		txt = paste0("STOP ERROR in count_ana_dispersal_events(): input 'ana_events_table' must have a column 'ana_dispersal_from'. To add this column, run simulate_source_area_ana() or simulate_source_areas_ana_clado().")
		cat("\n\n")
		cat(txt)
		cat("\n\n")
		stop(txt)
		} # END error check
	
	TF = nchar(areanames) > 1
	if (any(TF))
		{
		txt = paste0("STOP ERROR in count_ana_dispersal_events(): all 'areanames' must be only 1 character long for the counting of events in stochastic mapping to work. You have violations for these 'areanames': ", paste(areanames[TF], sep=", "), ".")
		cat("\n\n")
		cat(txt)
		cat("\n\n")
		stop(txt)
		} # END if (any(TF))

	# Built up the transition matrix
	events_df2 = NULL
	extirp_df2 = NULL
	for (i in 1:length(areanames))
		{
		areaname = areanames[i]
		
		# Count the dispersal events
		TF = ana_events_table$dispersal_to == areaname
		TF[is.na(TF)] = FALSE
		tmp_table = ana_events_table[TF, ]
	
		# Get the ancestor range
		#nchars = nchar(tmp_table$current_rangetxt)
		time_bp = tmp_table$abs_event_time
		event_type = tmp_table$event_type
		ancrange = tmp_table$current_rangetxt
		decrange = tmp_table$new_rangetxt
		ana_dispersal_from = tmp_table$ana_dispersal_from
		ana_dispersal_to = tmp_table$dispersal_to
		events_df = adf2(cbind(time_bp, event_type, ancrange, decrange, ana_dispersal_from, ana_dispersal_to))
		events_df
		names(events_df) = c("time_bp", "event_type", "ancrange", "decrange", "ana_dispersal_from", "ana_dispersal_to")
		
		events_df2 = rbind(events_df2, events_df)
		
		# Count the extirpation events also!
		TF = ana_events_table$extirpation_from == areaname
		TF[is.na(TF)] = FALSE
		tmp_table = ana_events_table[TF, ]

		time_bp = tmp_table$abs_event_time
		event_type = tmp_table$event_type
		ancrange = tmp_table$current_rangetxt
		decrange = tmp_table$new_rangetxt
		extirpation_from = tmp_table$extirpation_from
		extirp_df = adf2(cbind(time_bp, event_type, ancrange, decrange, extirpation_from))
		names(extirp_df) = c("time_bp", "event_type", "ancrange", "decrange", "extirpation_from")
		extirp_df2 = rbind(extirp_df2, extirp_df)
		} # for (i in 1:areanames)
	
	if ( (is.null(events_df2) == TRUE) && (is.null(extirp_df2) == TRUE) )
		{
		ana_disp_counts = NULL
		ana_disp_counts$e_counts_df = NULL
		ana_disp_counts$a_counts_df = NULL
		ana_disp_counts$d_counts_df = NULL
		ana_disp_counts$counts_df = NULL
		return(ana_disp_counts)
		} # END if ( (is.null(events_df2) == TRUE) && (is.null(extirp_df2) == TRUE) )
	
	names(events_df2) = c("time_bp", "event_type", "ancrange", "decrange", "ana_dispersal_from", "ana_dispersal_to")
	events_df2
	names(extirp_df2) = c("time_bp", "event_type", "ancrange", "decrange", "extirpation_from")
	extirp_df2

	a_counts_matrix = matrix(0, nrow=length(areanames), ncol=length(areanames))
	d_counts_matrix = matrix(0, nrow=length(areanames), ncol=length(areanames))
	counts_matrix = matrix(0, nrow=length(areanames), ncol=length(areanames))
	e_counts_list = matrix(0, nrow=1, ncol=length(areanames))
	for (i in 1:length(areanames))
		{
		# Dispersal matrices
		for (j in 1:length(areanames))
			{
			TF1 = events_df2$ana_dispersal_from == areanames[i]
			TF2 = events_df2$ana_dispersal_to == areanames[j]
			TF = (TF1 + TF2) == 2
			if (sum(TF) > 0)
				{
				aTF = events_df2$event_type[TF] == "a"
				dTF = events_df2$event_type[TF] == "d"
				
				a_counts_matrix[i,j] = sum(aTF)
				d_counts_matrix[i,j] = sum(dTF)
				counts_matrix[i,j] = sum(TF)
				} else {
				a_counts_matrix[i,j] = 0
				d_counts_matrix[i,j] = 0
				counts_matrix[i,j] = 0
				} # END if (sum(TF) > 0)
			} # END for (j in 1:length(areanames))

		# Extinction/extirpation vector
		TF = events_df2$extirpation_from == areanames[i]
		if (sum(TF) > 0)
			{
			e_counts_list[,i] = sum(TF)
			} else {
			e_counts_list[,i] = 0
			}# END if (sum(TF) > 0)
		} # END for (i in 1:length(areanames))

	counts_df = adf2(counts_matrix)
	names(counts_df) = actual_names
	row.names(counts_df) = actual_names

	a_counts_df = adf2(a_counts_matrix)
	names(a_counts_df) = actual_names
	row.names(a_counts_df) = actual_names

	d_counts_df = adf2(d_counts_matrix)
	names(d_counts_df) = actual_names
	row.names(d_counts_df) = actual_names

	e_counts_df = adf2(e_counts_list)
	names(e_counts_list) = actual_names

	
	ana_disp_counts = NULL
	ana_disp_counts$e_counts_df = e_counts_df
	ana_disp_counts$a_counts_df = a_counts_df
	ana_disp_counts$d_counts_df = d_counts_df
	ana_disp_counts$counts_df = counts_df

	extract = '
	e_counts_df = ana_disp_counts$e_counts_df
	a_counts_df = ana_disp_counts$a_counts_df
	d_counts_df = ana_disp_counts$d_counts_df
	counts_df = ana_disp_counts$counts_df
	'
	
	return(ana_disp_counts)
	} # END count_ana_dispersal_events <- function(ana_events_table, areanames, actual_names=areanames)


# Convert e.g. AB->B,A to AB->A,B, since these are identical
uniquify_clado_events <- function(clado_events_table)
	{
	# Fix AB -> B,A

	# Eliminate the blank internal nodes
	TF = clado_events_table$clado_event_type != ""
	TF[is.na(clado_events_table$clado_event_type)] = FALSE
	clado_events_table = clado_events_table[TF,]
	clado_event_txt = clado_events_table$clado_event_txt
	dim(clado_events_table)
	
	# If no events in here, just return the original
	if (dim(clado_events_table)[1] == 0)
		{
		return(clado_events_table)
		}
	
	
	# Lose something here?
	x = sapply(X=clado_event_txt, FUN=strsplit, split="->")
	# lx = unlist(lapply(X=x, FUN=length))
	# zero_TF = (lx == 0)
	# clado_events_table[zero_TF,]
	# Eliminate events with 0 hits first
	if (length(clado_event_txt) < 1)
		{
		return(clado_events_table)
		}

	events_df = as.data.frame(matrix(unlist(x), ncol=2, byrow=TRUE), stringsAsFactors=FALSE)
	names(events_df) = c("ancrange", "decrange")
	dim(events_df)

	LRdf = as.data.frame(matrix(unlist(sapply(X=events_df$decrange, FUN=strsplit, split=",")), ncol=2, byrow=TRUE), stringsAsFactors=FALSE)
	names(LRdf) = c("L", "R")
	dim(LRdf)

	# Switch if left is bigger
	# Deal with the case of 1 event, which produces 2,1 matrix instead of nrowsx2 matrix
	LRdf_temp = sapply(X=LRdf, FUN=nchar)
	if (is.null(dim(LRdf_temp)) == TRUE)
		{
		LRdf_temp = matrix(data=LRdf_temp, nrow=1, byrow=TRUE)
		}
	
	nchar_LRdf = as.data.frame(LRdf_temp, stringsAsFactors=FALSE)
	names(nchar_LRdf) = c("L", "R")
	class(nchar_LRdf$L) = "numeric"
	class(nchar_LRdf$R) = "numeric"
	L_bigger_TF = nchar_LRdf$L > nchar_LRdf$R
	new_L = LRdf$R[L_bigger_TF]
	new_R = LRdf$L[L_bigger_TF]
	LRdf$L[L_bigger_TF] = new_L
	LRdf$R[L_bigger_TF] = new_R
	LRdf

	# In cases where left and right are equal, sort
	LRdf_temp = sapply(X=LRdf, FUN=nchar)
	if (is.null(dim(LRdf_temp)) == TRUE)
		{
		LRdf_temp = matrix(data=LRdf_temp, nrow=1, byrow=TRUE)
		}
	
	nchar_LRdf = as.data.frame(LRdf_temp, stringsAsFactors=FALSE)
	names(nchar_LRdf) = c("L", "R")
	class(nchar_LRdf$L) = "numeric"
	class(nchar_LRdf$R) = "numeric"
	LR_equal_TF = nchar_LRdf$L == nchar_LRdf$R
	order_df = as.data.frame(matrix(apply(X=LRdf, MARGIN=1, FUN=order), ncol=2, byrow=TRUE))
	names(order_df) = c("L", "R")
	L_bigger_TF = order_df$L > order_df$R
	switch_TF = (L_bigger_TF + LR_equal_TF) == 2
	new_L = LRdf$R[switch_TF]
	new_R = LRdf$L[switch_TF]
	LRdf$L[switch_TF] = new_L
	LRdf$R[switch_TF] = new_R
	LRdf

	# Merge back
	LRtxt = apply(X=LRdf, MARGIN=1, paste, sep="", collapse=",")
	event_txt = apply(X=cbind(events_df$ancrange, LRtxt), MARGIN=1, paste, sep="", collapse="->")
	event_txt
	clado_events_table$clado_event_txt = event_txt
	
	return(clado_events_table)
	}





#######################################################
# Get huge events tables from saved stochastic maps
# Rdata files
#######################################################
get_huge_events_tables_from_BSM_Rdata <- function(BSM_tables_dir, BSM_fn_base, model_name="", suffix="")
	{
	defaults='
	# Working directory
	wd = "/drives/Dropbox/_njm/__packages/BioGeoBEARS_setup/_basic_example/"
	setwd(wd)

	# Biogeographical stochastic mappings (BSMs) saved
	BSM_tables_dir = "BSM_tables_M0_v1"
	BSM_fn_base = "BSM_M0_"
	model_name = "DEC"
	suffix = ".Rdata"
	' # END defaults

	# Necessary setting to avoid getting numbers etc. in the stochastic mapping output
	options(stringsAsFactors=FALSE)
	
	
	# Get the list of files
	searchstring = paste(BSM_fn_base, model_name, "_", sep="")#*.Rdata", sep="")
	tmpfns = list.files(path=BSM_tables_dir, pattern=searchstring)
	TF = grepl(pattern=suffix, x=tmpfns)
	BSM_fns = tmpfns[TF]
	BSM_fns
	num_BSM_fns = length(BSM_fns)
	
	# Huge tables, and indexes each BSM
	huge_cladogenetic_events_table = NULL
	huge_anagenetic_events_table = NULL
	ic = NULL
	ia = NULL


	# Get the output
	for (i in 1:num_BSM_fns)
		{
		BSM_fn = slashslash(paste(addslash(BSM_tables_dir), BSM_fns[i], sep=""))
	
		cat("\nExtracting file #", i, "/", num_BSM_fns, ": ", BSM_fn, "...", sep="")
	
		# Loads to BSM_results
		load(BSM_fn)
	
		# Is it stratified?
		if (class(BSM_results) == "data.frame")
			{
			# Non-stratified. Extract cladogenetic events table, 
			# calculate anagenetic events table
			BSM_strat_TF = FALSE
			stochastic_mapping_results = BSM_results
			master_table_cladogenetic_events = stochastic_mapping_results

			# Calculate anagenetic events table
			events_table = events_txt_list_into_events_table(events_txt_list=master_table_cladogenetic_events$anagenetic_events_txt_below_node, trtable=master_table_cladogenetic_events, recalc_abs_ages=TRUE)
			events_table
			table_w_anagenetic_events = events_table
		
			} else {
			# Stratified. Extract cladogenetic and anagenetic events tables
			BSM_strat_TF = TRUE
			stochastic_mapping_results = BSM_results
			master_table_cladogenetic_events = stochastic_mapping_results$master_table_cladogenetic_events
			table_w_anagenetic_events = stochastic_mapping_results$table_w_anagenetic_events
			} # END if (class(BSM_results) == "data.frame")
	
		# Add to huge tables
		huge_cladogenetic_events_table = rbind(huge_cladogenetic_events_table, master_table_cladogenetic_events)
		huge_anagenetic_events_table = rbind(huge_anagenetic_events_table, table_w_anagenetic_events)
		
		# Add to the ic, ia indexes
		if (is.null(master_table_cladogenetic_events) == FALSE)
			{
			tmp_ic = rep(i, nrow(master_table_cladogenetic_events))
			ic = c(ic, tmp_ic)
			} # END if (!is.null(master_table_cladogenetic_events))

		if (is.null(table_w_anagenetic_events) == FALSE)
			{
			tmp_ia = rep(i, nrow(table_w_anagenetic_events))
			ia = c(ia, tmp_ia)
			} # END if (!is.null(table_w_anagenetic_events))
		
		} # END
	
	# Add the indexes
	i = ic
	huge_cladogenetic_events_table = cbind(i, huge_cladogenetic_events_table)
	i = ia
	huge_anagenetic_events_table = cbind(i, huge_anagenetic_events_table)
	i = 0
	
	# Store as list of 2 tables
	huge_tables = NULL
	huge_tables$huge_cladogenetic_events_table = huge_cladogenetic_events_table
	huge_tables$huge_anagenetic_events_table = huge_anagenetic_events_table
	
	# To extract
	#huge_cladogenetic_events_table = huge_tables$huge_cladogenetic_events_table
	#huge_anagenetic_events_table = huge_tables$huge_anagenetic_events_table
	
	return(huge_tables)
	} # END get_huge_events_tables_from_BSM_Rdata











#######################################################
# Get huge events tables from saved stochastic maps
# Rdata files
#######################################################
get_huge_events_tables_from_clado_ana_events_tables <- function(clado_events_tables, ana_events_tables, model_name="")
	{
	defaults='
	# Working directory
	wd = "/drives/Dropbox/_njm/__packages/BioGeoBEARS_setup/_basic_example/"
	setwd(wd)

	# Biogeographical stochastic mappings (BSMs) saved
	BSM_tables_dir = "BSM_tables_M0_v1"
	BSM_fn_base = "BSM_M0_"
	model_name = "DEC"
	suffix = ".Rdata"
	' # END defaults

	# Necessary setting to avoid getting numbers etc. in the stochastic mapping output
	options(stringsAsFactors=FALSE)
	
	
	# Get the list of files
	num_BSMs = length(clado_events_tables)
	
	# Huge tables, and indexes each BSM
	huge_cladogenetic_events_table = NULL
	huge_anagenetic_events_table = NULL
	ic = NULL
	ia = NULL


	# Get the output
	for (i in 1:num_BSMs)
		{
		# Add to huge tables
		huge_cladogenetic_events_table = rbind(huge_cladogenetic_events_table, clado_events_tables[[i]])
		huge_anagenetic_events_table = rbind(huge_anagenetic_events_table, ana_events_tables[[i]])
		
		# Add to the ic, ia indexes
		if (is.null(clado_events_tables[[i]]) == FALSE)
			{
			tmp_ic = rep(i, nrow(clado_events_tables[[i]]))
			ic = c(ic, tmp_ic)
			} # END if (!is.null(master_table_cladogenetic_events))

		if (is.null(ana_events_tables[[i]]) == FALSE)
			{
			tmp_ia = rep(i, nrow(ana_events_tables[[i]]))
			ia = c(ia, tmp_ia)
			} # END if (!is.null(table_w_anagenetic_events))
		
		} # END
	
	# Add the indexes
	i = ic
	huge_cladogenetic_events_table = cbind(i, huge_cladogenetic_events_table)
	i = ia
	huge_anagenetic_events_table = cbind(i, huge_anagenetic_events_table)
	i = 0
	
	# Store as list of 2 tables
	huge_tables = NULL
	huge_tables$huge_cladogenetic_events_table = huge_cladogenetic_events_table
	huge_tables$huge_anagenetic_events_table = huge_anagenetic_events_table
	
	# To extract
	#huge_cladogenetic_events_table = huge_tables$huge_cladogenetic_events_table
	#huge_anagenetic_events_table = huge_tables$huge_anagenetic_events_table
	
	return(huge_tables)
	} # END get_huge_events_tables_from_BSM_Rdata







count_events_huge_tables <- function(huge_tables, timeperiod=NULL, area_abbr_to=NULL, area_abbr_from=NULL, BSM_i=NULL)
	{
	# Extract
	huge_cladogenetic_events_table = huge_tables$huge_cladogenetic_events_table
	huge_anagenetic_events_table = huge_tables$huge_anagenetic_events_table
	
	# If desired, count only within a timeperiod / stratum / time bin
	if (is.null(timeperiod) == FALSE)
		{
		# Error check
		if (length(timeperiod) != 2)
			{
			errortxt = paste("\n\nERROR in count_events_huge_tables(): timeperiod must either be NULL or be 2 numbers (min and max of time bin, in time before present).\n\n", sep="")
			cat(errortxt)
			stop(errortxt)
			}
		
		minT = min(timeperiod)
		maxT = max(timeperiod)
		
		# Subset cladogenetic events
		times_GT_min_TF = huge_cladogenetic_events_table$time_bp >= minT
		times_LT_max_TF = huge_cladogenetic_events_table$time_bp < maxT
		TF = (times_GT_min_TF + times_LT_max_TF) == 2
		huge_cladogenetic_events_table = huge_cladogenetic_events_table[TF,]
		
		# Subset anagenetic events
		times_GT_min_TF = huge_anagenetic_events_table$abs_event_time >= minT
		times_LT_max_TF = huge_anagenetic_events_table$abs_event_time < maxT
		TF = (times_GT_min_TF + times_LT_max_TF) == 2
		huge_anagenetic_events_table = huge_anagenetic_events_table[TF,]
		} # END if (is.null(timeperiod) == FALSE)


	# If desired, count events only dispersing to the area in area_abbr_to
	# (These are just d, j events)
	if (is.null(area_abbr_to) == FALSE)
		{
		# Subset cladogenetic events
		TF = huge_cladogenetic_events_table$clado_dispersal_to %in% area_abbr_to
		huge_cladogenetic_events_table = huge_cladogenetic_events_table[TF,]

		# Subset anagenetic events
		TF = huge_anagenetic_events_table$dispersal_to %in% area_abbr_to
		huge_anagenetic_events_table = huge_anagenetic_events_table[TF,]
		
		} # END if (is.null(area_abbr_to) == FALSE)

	# If desired, count events only dispersing to the area in area_abbr_from
	# (These are just d, j events)
	if (is.null(area_abbr_from) == FALSE)
		{
		# Subset cladogenetic events by the ancestor state before cladogenesis
		# (first, get clado_event_from events)
		tmpsplit1 <- function(x)
			{
			clado_event_from = strsplit(x=x, split="->")[[1]][1]
			return(clado_event_from)
			}
		
		clado_event_from = sapply(X=huge_cladogenetic_events_table$clado_event_txt, FUN=tmpsplit1) 
		names(clado_event_from)=NULL
		clado_event_from[is.na(clado_event_from)] = ""
		clado_event_from
		
		TF = clado_event_from %in% area_abbr_from
		huge_cladogenetic_events_table = huge_cladogenetic_events_table[TF,]

		# Subset anagenetic events
		ana_event_from = sapply(X=huge_anagenetic_events_table$event_txt, FUN=tmpsplit1) 
		names(ana_event_from)=NULL
		ana_event_from[is.na(ana_event_from)] = ""
		ana_event_from
		
		TF = ana_event_from %in% area_abbr_from
		huge_anagenetic_events_table = huge_anagenetic_events_table[TF,]
		
		} # END if (is.null(area_abbr_from) == FALSE)

	# If desired, count events only for a certain BSM realization index #i
	if (is.null(BSM_i) == FALSE)
		{
		# Subset cladogenetic events
		TF = huge_cladogenetic_events_table$i %in% BSM_i
		huge_cladogenetic_events_table = huge_cladogenetic_events_table[TF,]

		# Subset anagenetic events
		TF = huge_anagenetic_events_table$i %in% BSM_i
		huge_anagenetic_events_table = huge_anagenetic_events_table[TF,]
		}
	
	
	
	# Cladogenetic events
	# y events
	TF = grepl(pattern="\\(y\\)", x=huge_cladogenetic_events_table$clado_event_type)
	ycount = sum(TF, na.rm=TRUE)

	# s events
	TF = grepl(pattern="\\(s\\)", x=huge_cladogenetic_events_table$clado_event_type)
	scount = sum(TF, na.rm=TRUE)

	# v events
	TF = grepl(pattern="\\(v\\)", x=huge_cladogenetic_events_table$clado_event_type)
	vcount = sum(TF, na.rm=TRUE)

	# j events
	TF = grepl(pattern="\\(j\\)", x=huge_cladogenetic_events_table$clado_event_type)
	jcount = sum(TF, na.rm=TRUE)
	
	# Anagenetic events
	# d events
	TF = huge_anagenetic_events_table$event_type == "d"
	dcount = sum(TF, na.rm=TRUE)

	# e events
	TF = huge_anagenetic_events_table$event_type == "e"
	ecount = sum(TF, na.rm=TRUE)

	# a events
	TF = huge_anagenetic_events_table$event_type == "a"
	acount = sum(TF, na.rm=TRUE)
	
	events_count = matrix(data=c(dcount, ecount, acount, ycount, scount, vcount, jcount), nrow=1)
	events_count = as.data.frame(events_count)
	names(events_count) = c("d", "e", "a", "y", "s", "v", "j")
	events_count
		
	return(events_count)
	} # END count_events_huge_tables <- function(huge_tables, timeperiod=NULL, area_abbr_to=NULL, area_abbr_from=NULL, BSM_i=NULL)



# Copy of count_ana_clado_events (for back-compatibility; it DOES do jumps and anagenesis)
count_clado_events_nonjump <- function(clado_events_tables, ana_events_tables, areanames, actual_names)
	{
	count_ana_clado_events(clado_events_tables, ana_events_tables, areanames, actual_names)
	} # END count_clado_events_nonjump <- function(clado_events_tables, ana_events_tables, areanames, actual_names)






#######################################################
# Count anagenetic and cladogenetic events across
# a collection of BSMs
#######################################################

count_ana_clado_events <- function(clado_events_tables, ana_events_tables, areanames, actual_names, timeperiod=NULL)
	{
	defaults='
	areanames = c("A", "B", "C", "D", "E", "F", "G")
	actual_names = c("SAm", "CAm", "Car", "NAm", "AF", "EU", "OZ")
	clado_events_table = clado_events_tables[[1]]
	'
	
	TF = nchar(areanames) > 1
	if (any(TF))
		{
		txt = paste0("STOP ERROR: all 'areanames' must be only 1 character long for the counting of events in stochastic mapping to work. You have violations for these 'areanames': ", paste(areanames[TF], sep=", "), ".")
		cat("\n\n")
		cat(txt)
		cat("\n\n")
		stop(txt)
		} # END if (any(TF))


	# Error check
	if (length(areanames) != length(actual_names))
		{
		txt = paste0("STOP ERROR in count_ana_clado_events(): areanames and actual_names must have the same length. Instead, length(areanames)=", length(areanames), ", length(actual_names)=", length(actual_names), ".")
		cat("\n\n")
		cat(txt)
		cat("\n\n")
		stop(txt)
		} # END if (length(areanames) != length(actual_names))


	# If desired, count only within a timeperiod / stratum / time bin
	if (is.null(timeperiod) == FALSE)
		{
		# Error check
		if (length(timeperiod) != 2)
			{
			errortxt = paste("\n\nERROR in count_events_huge_tables(): timeperiod must either be NULL or be 2 numbers (min and max of time bin, in time before present).\n\n", sep="")
			cat(errortxt)
			stop(errortxt)
			}
		
		minT = min(timeperiod)
		maxT = max(timeperiod)
		
		original_clado_events_tables = clado_events_tables
		original_ana_events_tables = ana_events_tables
		
		for (i in 1:length(original_clado_events_tables))
			{
			clado_events_table = clado_events_tables[[i]]
			ana_events_table = ana_events_tables[[i]]
			
			TF1 = clado_events_table$time_bp < maxT
			TF2 = clado_events_table$time_bp >= minT
			TF = (TF1 + TF2) == 2
			clado_events_table = clado_events_table[TF,]

			TF1 = ana_events_table$abs_event_time < maxT
			TF2 = ana_events_table$abs_event_time >= minT
			TF = (TF1 + TF2) == 2
			ana_events_table = ana_events_table[TF,]
			
			clado_events_tables[[i]] = clado_events_table
			ana_events_tables[[i]] = ana_events_table
			}
		} else {
		minT = 0
		maxT = 1e50
		}

	
	# Initialize
	dimdata = c(length(areanames), length(areanames))
	dims = c(dimdata, length(clado_events_tables))
	founder_counts_cube = array(NA, dim=dims)
	founder_totals_list = rep(NA, length(clado_events_tables))

	dimdata = c(length(areanames), length(areanames))
	dims = c(dimdata, length(ana_events_tables))
	ana_counts_cube = array(NA, dim=dims)
	ana_totals_list = rep(NA, length(ana_events_tables))
	a_counts_cube = array(NA, dim=dims)
	a_totals_list = rep(NA, length(ana_events_tables))
	d_counts_cube = array(NA, dim=dims)
	d_totals_list = rep(NA, length(ana_events_tables))

	e_counts_rectangle = matrix(NA, nrow=length(ana_events_tables), ncol=length(areanames))
	e_totals_list = rep(NA, length(ana_events_tables))

	
	vicariance_clado_events_table_bigTable = NULL
	subsetSymp_clado_events_table_bigTable = NULL
	sympatry_clado_events_table_bigTable = NULL

	vicariance_totals_list = rep(NA, length(clado_events_tables))
	subsetSymp_totals_list = rep(NA, length(clado_events_tables))
	sympatry_totals_list = rep(NA, length(clado_events_tables))


	# Make a list of each non-jump cladogenesis event
	for (i in 1:length(clado_events_tables))
		{
		# Cladogenesis events for this i
		clado_events_table = clado_events_tables[[i]]

		# Convert e.g. AB->B,A to AB->A,B, since these are identical
		clado_events_table = uniquify_clado_events(clado_events_table)
		
		# Count vicariance events
		vicariance_clado_events_table = clado_events_table[clado_events_table$clado_event_type == "vicariance (v)",]
		vicariance_clado_events_table
		vic_table = table(vicariance_clado_events_table$clado_event_txt)
		names_vic_table = names(vic_table)
		vic_table
	
		if (length(vic_table) > 0)
			{
			vic_table2 = cbind(i, names(vic_table), vic_table)
			} else {
			vic_table2 = NULL
			} # END if (length(vic_table) > 0)
	
		vicariance_clado_events_table_bigTable = rbind(vicariance_clado_events_table_bigTable, vic_table2)
	
		# Count subset sympatry events
		subsetSymp_clado_events_table = clado_events_table[clado_events_table$clado_event_type == "subset (s)",]
		subsetSymp_clado_events_table
		sub_table = table(subsetSymp_clado_events_table$clado_event_txt)
		names_sub_table = names(sub_table)
		sub_table

		if (length(sub_table) > 0)
			{
			sub_table2 = cbind(i, names(sub_table), sub_table)
			} else {
			sub_table2 = NULL
			} # END if (length(sub_table) > 0)

		subsetSymp_clado_events_table_bigTable = rbind(subsetSymp_clado_events_table_bigTable, sub_table2)

		# Count sympatry events
		sympatry_clado_events_table = clado_events_table[clado_events_table$clado_event_type == "sympatry (y)",]
		sympatry_clado_events_table
		sym_table = table(sympatry_clado_events_table$clado_event_txt)
		names_sym_table = names(sym_table)
		sym_table
	
		if (length(sym_table) > 0)
			{
			sym_table2 = cbind(i, names(sym_table), sym_table)
			} else {
			sym_table2 = NULL
			} # END if (length(sym_table2) > 0)
	
		sympatry_clado_events_table_bigTable = rbind(sympatry_clado_events_table_bigTable, sym_table2)

		} # END for (i in 1:length(clado_events_tables))

	
	# Defaults
	unique_vic_events = NULL
	unique_sub_events = NULL
	unique_sym_events = NULL

	# Populate if non-null
	if (is.null(vicariance_clado_events_table_bigTable) == FALSE)
		{
		unique_vic_events = sort(unique(vicariance_clado_events_table_bigTable[,2]))
		unique_vic_events
		} # END if (is.null(vicariance_clado_events_table_bigTable) == FALSE)
	if (is.null(subsetSymp_clado_events_table_bigTable) == FALSE)
		{
		unique_sub_events = sort(unique(subsetSymp_clado_events_table_bigTable[,2]))
		unique_sub_events
		} # END if (is.null(vicariance_clado_events_table_bigTable) == FALSE)
	if (is.null(sympatry_clado_events_table_bigTable) == FALSE)
		{
		unique_sym_events = sort(unique(sympatry_clado_events_table_bigTable[,2]))
		unique_sym_events
		} # END if (is.null(vicariance_clado_events_table_bigTable) == FALSE)

	unique_vic_counts = matrix(NA, nrow=length(clado_events_tables), ncol=length(unique_vic_events))
	unique_sub_counts = matrix(NA, nrow=length(clado_events_tables), ncol=length(unique_sub_events))
	unique_sym_counts = matrix(NA, nrow=length(clado_events_tables), ncol=length(unique_sym_events))

	cat("\ncount_ana_clado_events() is counting events over Biogeographical Stochastic Map (BSM) #:\n")
	for (i in 1:length(clado_events_tables))
	#for (i in 1:26)
		{
		cat(i, " ", sep="")
		
		# Extract the cladogenetic and anagenetic tables for this BSM
		clado_events_table = clado_events_tables[[i]]
		ana_events_table = ana_events_tables[[i]]
	
		# Count founder-events
		founder_counts_df = count_clado_dispersal_events(clado_events_table, areanames=areanames, actual_names=actual_names)
		if (is.null(founder_counts_df) == TRUE)
			{
			founder_counts_df = matrix(data=0, nrow=length(areanames), ncol=length(areanames))
			} # END if (is.null(founder_counts_df) == TRUE)

		# Fill in the jump-dispersal counts
		founder_counts_cube[,,i] = as.matrix(founder_counts_df)
		founder_totals_list[i] = sum(founder_counts_df)
		
		# Count range-expansion events
#		if ( (length(ana_events_table) > 1) || (is.na(ana_events_table)==FALSE) )
		if ( length(ana_events_table) > 1 )
			{
			# Parse out the different sorts of anagenetic dispersal
			#ana_counts_df = 
			ana_disp_counts = count_ana_dispersal_events(ana_events_table, areanames=areanames, actual_names=actual_names)
			
			e_counts_df = ana_disp_counts$e_counts_df
			a_counts_df = ana_disp_counts$a_counts_df
			d_counts_df = ana_disp_counts$d_counts_df
			ana_counts_df = ana_disp_counts$counts_df
			
			#round(ana_counts_df, 1)
			} # END if ( (length(ana_events_table) > 1) || (is.na(ana_events_table)==FALSE) )
	
#		if ( (length(ana_events_table) > 1) || (is.na(ana_events_table)==FALSE) )
		if ( length(ana_events_table) > 1 )
			{
			if (!is.null(ana_counts_df))
				{
				ana_counts_cube[,,i] = as.matrix(ana_counts_df)
				ana_totals_list[i] = sum(ana_counts_df)
				} else {
				ana_counts_cube[,,i] = 0
				ana_totals_list[i] = 0
				} # END if (!is.null(ana_counts_df))

			if (!is.null(a_counts_df))
				{
				a_counts_cube[,,i] = as.matrix(a_counts_df)
				a_totals_list[i] = sum(a_counts_df)
				} else {
				a_counts_cube[,,i] = 0
				a_totals_list[i] = 0
				} # END if (!is.null(a_counts_df))

			if (!is.null(d_counts_df))
				{
				d_counts_cube[,,i] = as.matrix(d_counts_df)
				d_totals_list[i] = sum(d_counts_df)
				} else {
				d_counts_cube[,,i] = 0
				d_totals_list[i] = 0
				} # END if (!is.null(d_counts_df))

			if (!is.null(e_counts_df))
				{
				e_counts_rectangle[i,] = as.matrix(e_counts_df)
				e_totals_list[i] = sum(e_counts_df)
				} else {
				e_counts_rectangle[i,] = 0
				e_totals_list[i] = 0
				} # END if (!is.null(e_counts_df))

			} # END if ( (length(ana_events_table) > 1) || (is.na(ana_events_table)==FALSE) )
	
		################################################################
		# Convert e.g. AB->B,A to AB->A,B, since these are identical
		################################################################
		clado_events_table = uniquify_clado_events(clado_events_table)

		# Count vicariance events
		vicariance_clado_events_table = clado_events_table[clado_events_table$clado_event_type == "vicariance (v)",]
		vicariance_clado_events_table
		if (nrow(vicariance_clado_events_table) > 0)
			{
			vic_table = table(vicariance_clado_events_table$clado_event_txt)
			names_vic_table = names(vic_table)
			#vic_table

			#ordernums = match(x=names_vic_table, table=unique_vic_events)
			#ordernums = ordernums[is.na(ordernums) == FALSE]
			event_in_uniqs_TF = unique_vic_events %in% names_vic_table
			unique_vic_counts[i, event_in_uniqs_TF] = vic_table
			vicariance_totals_list[i] = sum(unique_vic_counts[i,event_in_uniqs_TF])
			} else {
			vic_table = NA
			vicariance_totals_list[i] = 0
			}
		
		# Count subset sympatry events
		subsetSymp_clado_events_table = clado_events_table[clado_events_table$clado_event_type == "subset (s)",]
		subsetSymp_clado_events_table
		if (nrow(subsetSymp_clado_events_table) > 0)
			{
			sub_table = table(subsetSymp_clado_events_table$clado_event_txt)
			names_sub_table = names(sub_table)
			#ordernums = match(x=names_sub_table, table=unique_sub_events)
			#ordernums = ordernums[is.na(ordernums) == FALSE]
			event_in_uniqs_TF = unique_sub_events %in% names_sub_table
			unique_sub_counts[i, event_in_uniqs_TF] = sub_table
			subsetSymp_totals_list[i] = sum(unique_sub_counts[i,event_in_uniqs_TF])
			} else {
			sub_table = NA
			subsetSymp_totals_list[i] = 0
			}
		sub_table
	
		# Count sympatry events
		sympatry_clado_events_table = clado_events_table[clado_events_table$clado_event_type == "sympatry (y)",]
		sympatry_clado_events_table
		if (nrow(sympatry_clado_events_table) > 0)
			{
			sym_table = table(sympatry_clado_events_table$clado_event_txt)
			names_sym_table = names(sym_table)
			#sym_table
	
			#ordernums = match(x=names_sym_table, table=unique_sym_events)
			#ordernums = ordernums[is.na(ordernums) == FALSE]
			event_in_uniqs_TF = unique_sym_events %in% names_sym_table
			unique_sym_counts[i, event_in_uniqs_TF] = sym_table
			sympatry_totals_list[i] = sum(unique_sym_counts[i,event_in_uniqs_TF])
			} else {
			sym_table = NA
			sympatry_totals_list[i] = 0
			}
		} # END for (i in 1:length(clado_events_tables))


	unique_vic_counts = adf2(unique_vic_counts)
	names(unique_vic_counts) = unique_vic_events
	if (ncol(unique_vic_counts) > 0)
		{
		unique_vic_counts[is.na(unique_vic_counts)] = 0
		} # END if (nrow(unique_vic_events) > 0)

	unique_sub_counts = adf2(unique_sub_counts)
	names(unique_sub_counts) = unique_sub_events
	

	if (ncol(unique_sub_counts) > 0)
		{
		unique_sub_counts[is.na(unique_sub_counts)] = 0
		} # END if (nrow(unique_sub_counts) > 0)

	unique_sym_counts = adf2(unique_sym_counts)
	names(unique_sym_counts) = unique_sym_events
	if (ncol(unique_sym_counts) > 0)
		{
		unique_sym_counts[is.na(unique_sym_counts)] = 0
		} # END if (nrow(unique_sym_counts) > 0)

	head(unique_vic_counts)
	head(unique_sub_counts)
	head(unique_sym_counts)

	round(apply(X=unique_vic_counts, MARGIN=2, FUN=mean), 1)
	round(apply(X=unique_vic_counts, MARGIN=2, FUN=sd), 1)

	round(apply(X=unique_sub_counts, MARGIN=2, FUN=mean), 1)
	round(apply(X=unique_sub_counts, MARGIN=2, FUN=sd), 1)

	round(apply(X=unique_sym_counts, MARGIN=2, FUN=mean), 1)
	round(apply(X=unique_sym_counts, MARGIN=2, FUN=sd), 1)

	# Save the cladogenesis events
	vic_counts_means = apply(X=unique_vic_counts, MARGIN=2, FUN=mean)
	vic_counts_sds = apply(X=unique_vic_counts, MARGIN=2, FUN=sd)
	vic_counts_sums = apply(X=unique_vic_counts, MARGIN=2, FUN=sum)

	sub_counts_means = apply(X=unique_sub_counts, MARGIN=2, FUN=mean)
	sub_counts_sds = apply(X=unique_sub_counts, MARGIN=2, FUN=sd)
	sub_counts_sums = apply(X=unique_sub_counts, MARGIN=2, FUN=sum)

	sym_counts_means = apply(X=unique_sym_counts, MARGIN=2, FUN=mean)
	sym_counts_sds = apply(X=unique_sym_counts, MARGIN=2, FUN=sd)
	sym_counts_sums = apply(X=unique_sym_counts, MARGIN=2, FUN=sum)
	
	
	sum(sym_counts_means)
	sum(sub_counts_means)
	sum(vic_counts_means)


	
	# Save results
	counts_list = NULL
	
	# Fix NAs
	founder_counts_cube[is.na(founder_counts_cube)] = 0
	ana_counts_cube[is.na(ana_counts_cube)] = 0
	a_counts_cube[is.na(a_counts_cube)] = 0
	d_counts_cube[is.na(d_counts_cube)] = 0
	e_counts_rectangle[is.na(e_counts_rectangle)] = 0
	unique_sub_counts[is.na(unique_sub_counts)] = 0
	unique_vic_counts[is.na(unique_vic_counts)] = 0
	unique_sym_counts[is.na(unique_sym_counts)] = 0
	
	founder_totals_list[is.na(founder_totals_list)] = 0
	ana_totals_list[is.na(ana_totals_list)] = 0
	a_totals_list[is.na(a_totals_list)] = 0
	d_totals_list[is.na(d_totals_list)] = 0
	e_totals_list[is.na(e_totals_list)] = 0
	subsetSymp_totals_list[is.na(subsetSymp_totals_list)] = 0
	vicariance_totals_list[is.na(vicariance_totals_list)] = 0
	sympatry_totals_list[is.na(sympatry_totals_list)] = 0

	# Make an ALL dispersal events cube
	all_dispersals_counts_cube = founder_counts_cube
	all_dispersals_totals_list = founder_totals_list
	
	# Make an ALL anagenetic dispersal events cube
	anagenetic_dispersals_counts_cube = d_counts_cube
	anagenetic_dispersals_totals_list = d_totals_list
	
	for (i in 1:length(all_dispersals_totals_list))
		{
		all_dispersals_counts_cube[,,i] = all_dispersals_counts_cube[,,i] + a_counts_cube[,,i] + d_counts_cube[,,i]
		all_dispersals_totals_list[i] = all_dispersals_totals_list[i] + a_totals_list[i] + d_totals_list[i]
		anagenetic_dispersals_counts_cube[,,i] = anagenetic_dispersals_counts_cube[,,i] + a_counts_cube[,,i]
		anagenetic_dispersals_totals_list[i] = anagenetic_dispersals_totals_list[i] + a_totals_list[i]
		}
	
	# Events by category per BSM
	counts_list$all_dispersals_counts_cube = all_dispersals_counts_cube
	counts_list$anagenetic_dispersals_counts_cube = anagenetic_dispersals_counts_cube
	counts_list$founder_counts_cube = founder_counts_cube
	counts_list$ana_counts_cube = ana_counts_cube
	counts_list$a_counts_cube = a_counts_cube
	counts_list$d_counts_cube = d_counts_cube
	counts_list$e_counts_rectangle = e_counts_rectangle
	counts_list$unique_sub_counts = unique_sub_counts
	counts_list$unique_vic_counts = unique_vic_counts
	counts_list$unique_sym_counts = unique_sym_counts
	
	# Totals by category, per BSM
	counts_list$all_dispersals_totals_list = all_dispersals_totals_list
	counts_list$anagenetic_dispersals_totals_list = anagenetic_dispersals_totals_list
	counts_list$founder_totals_list = founder_totals_list
	counts_list$ana_totals_list = ana_totals_list
	counts_list$a_totals_list = a_totals_list
	counts_list$d_totals_list = d_totals_list
	counts_list$e_totals_list = e_totals_list
	counts_list$subsetSymp_totals_list = subsetSymp_totals_list
	counts_list$vicariance_totals_list = vicariance_totals_list
	counts_list$sympatry_totals_list = sympatry_totals_list

	clado_totals_list = founder_totals_list + subsetSymp_totals_list + vicariance_totals_list + sympatry_totals_list
	
	all_totals_list = founder_totals_list + subsetSymp_totals_list + vicariance_totals_list + sympatry_totals_list + ana_totals_list
	counts_list$clado_totals_list = clado_totals_list
	counts_list$all_totals_list = all_totals_list
	clado_totals_list
	all_totals_list
	# Means, SDs, sums across all BSMs
	
	means = c(mean(founder_totals_list), mean(a_totals_list), mean(d_totals_list), mean(e_totals_list), mean(subsetSymp_totals_list), mean(vicariance_totals_list), mean(sympatry_totals_list), mean(all_dispersals_totals_list), mean(anagenetic_dispersals_totals_list), mean(ana_totals_list), mean(clado_totals_list), mean(all_totals_list))
	sds = c(sd(founder_totals_list), sd(a_totals_list), sd(d_totals_list), sd(e_totals_list), sd(subsetSymp_totals_list), sd(vicariance_totals_list), sd(sympatry_totals_list), sd(all_dispersals_totals_list), sd(anagenetic_dispersals_totals_list), sd(ana_totals_list), sd(clado_totals_list), sd(all_totals_list))
	sums = c(sum(founder_totals_list), sum(a_totals_list), sum(d_totals_list), sum(e_totals_list), sum(subsetSymp_totals_list), sum(vicariance_totals_list), sum(sympatry_totals_list), sum(all_dispersals_totals_list), sum(anagenetic_dispersals_totals_list), sum(ana_totals_list), sum(clado_totals_list), sum(all_totals_list))
	
	# Summarize directionality of dispersal
	all_dispersals_counts_fromto_means = adf2(apply(X=all_dispersals_counts_cube, MARGIN=c(1,2), FUN=mean))
	ana_dispersals_counts_fromto_means = adf2(apply(X=anagenetic_dispersals_counts_cube, MARGIN=c(1,2), FUN=mean))
	founder_counts_fromto_means = adf2(apply(X=founder_counts_cube, MARGIN=c(1,2), FUN=mean))
	ana_events_counts_fromto_means = adf2(apply(X=ana_counts_cube, MARGIN=c(1,2), FUN=mean))
	a_counts_fromto_means = adf2(apply(X=a_counts_cube, MARGIN=c(1,2), FUN=mean))
	d_counts_fromto_means = adf2(apply(X=d_counts_cube, MARGIN=c(1,2), FUN=mean))

	all_dispersals_counts_fromto_sds = adf2(apply(X=all_dispersals_counts_cube, MARGIN=c(1,2), FUN=sd))
	ana_dispersals_counts_fromto_sds = adf2(apply(X=anagenetic_dispersals_counts_cube, MARGIN=c(1,2), FUN=sd))
	founder_counts_fromto_sds = adf2(apply(X=founder_counts_cube, MARGIN=c(1,2), FUN=sd))
	ana_counts_fromto_sds = adf2(apply(X=ana_counts_cube, MARGIN=c(1,2), FUN=sd))
	a_counts_fromto_sds = adf2(apply(X=a_counts_cube, MARGIN=c(1,2), FUN=sd))
	d_counts_fromto_sds = adf2(apply(X=d_counts_cube, MARGIN=c(1,2), FUN=sd))
	
	names(all_dispersals_counts_fromto_means) = actual_names
	row.names(all_dispersals_counts_fromto_means) = actual_names
	names(ana_dispersals_counts_fromto_means) = actual_names
	row.names(ana_dispersals_counts_fromto_means) = actual_names
	names(founder_counts_fromto_means) = actual_names
	row.names(founder_counts_fromto_means) = actual_names
	names(a_counts_fromto_means) = actual_names
	row.names(a_counts_fromto_means) = actual_names
	names(d_counts_fromto_means) = actual_names
	row.names(d_counts_fromto_means) = actual_names

	names(all_dispersals_counts_fromto_sds) = actual_names
	row.names(all_dispersals_counts_fromto_sds) = actual_names
	names(ana_dispersals_counts_fromto_sds) = actual_names
	row.names(ana_dispersals_counts_fromto_sds) = actual_names
	names(founder_counts_fromto_sds) = actual_names
	row.names(founder_counts_fromto_sds) = actual_names
	names(a_counts_fromto_sds) = actual_names
	row.names(a_counts_fromto_sds) = actual_names
	names(d_counts_fromto_sds) = actual_names
	row.names(d_counts_fromto_sds) = actual_names
	
	counts_list$all_dispersals_counts_fromto_means = all_dispersals_counts_fromto_means
	counts_list$ana_dispersals_counts_fromto_means = ana_dispersals_counts_fromto_means
	counts_list$founder_counts_fromto_means = founder_counts_fromto_means
	counts_list$a_counts_fromto_means = a_counts_fromto_means
	counts_list$d_counts_fromto_means = d_counts_fromto_means

	counts_list$all_dispersals_counts_fromto_sds = all_dispersals_counts_fromto_sds
	counts_list$ana_dispersals_counts_fromto_sds = ana_dispersals_counts_fromto_sds
	counts_list$founder_counts_fromto_sds = founder_counts_fromto_sds
	counts_list$a_counts_fromto_sds = a_counts_fromto_sds
	counts_list$d_counts_fromto_sds = d_counts_fromto_sds
	
	cat("\n\nRange-switching dispersal (all observed 'a' dispersals):\n")
	cat("\nmeans:\n")
	print(conditional_format_table(counts_list$a_counts_fromto_means))
	cat("\nstandard deviations:\n")
	print(conditional_format_table(counts_list$a_counts_fromto_sds))

	cat("\n\nRange-expansion dispersal (all observed 'd' dispersals):\n")
	cat("\nmeans:\n")
	print(conditional_format_table(counts_list$d_counts_fromto_means))
	cat("\nstandard deviations:\n")
	print(conditional_format_table(counts_list$d_counts_fromto_sds))

	cat("\n\nAnagenetic dispersal (mean of all observed anagenetic 'a' or 'd' dispersals):\n")
	cat("\nmeans:\n")
	print(conditional_format_table(counts_list$ana_dispersals_counts_fromto_means))
	cat("\nstandard deviations:\n")
	print(conditional_format_table(counts_list$ana_dispersals_counts_fromto_sds))

	cat("\n\nCladogenetic dispersal (mean of all observed jump 'j' dispersals):\n")
	cat("\nmeans:\n")
	print(conditional_format_table(counts_list$founder_counts_fromto_means))
	cat("\nstandard deviations:\n")
	print(conditional_format_table(counts_list$founder_counts_fromto_sds))

	cat("\n\nALL dispersal (mean of all observed anagenetic 'a', 'd' dispersals, PLUS cladogenetic founder/jump dispersal):\n")
	cat("\nmeans:\n")
	print(conditional_format_table(counts_list$all_dispersals_counts_fromto_means))
	cat("\nstandard deviations:\n")
	print(conditional_format_table(counts_list$all_dispersals_counts_fromto_sds))

	
	
	summary_counts_BSMs = rbind(means, sds, sums)
	summary_counts_BSMs = adf(summary_counts_BSMs)
	names(summary_counts_BSMs) = c("founder", "a", "d", "e", "subset", "vicariance", "sympatry", "ALL_disp", "ana_disp", "all_ana", "all_clado", "total_events")
	row.names(summary_counts_BSMs) = c("means", "stdevs", "sums")
	
	counts_list$summary_counts_BSMs = summary_counts_BSMs
	
	cat("\n\nSummary of event counts from ", length(clado_events_tables), " BSMs:\n", sep="")
	print(conditional_format_table(summary_counts_BSMs))
	
	# Code to extract output tables
	extract='
	all_dispersals_counts_cube = counts_list$all_dispersals_counts_cube
	anagenetic_dispersals_counts_cube = counts_list$anagenetic_dispersals_counts_cube
	founder_counts_cube = counts_list$founder_counts_cube
	ana_counts_cube = counts_list$ana_counts_cube
	a_counts_cube = counts_list$a_counts_cube
	d_counts_cube = counts_list$d_counts_cube
	e_counts_rectangle = counts_list$e_counts_rectangle
	unique_sub_counts = counts_list$unique_sub_counts
	unique_vic_counts = counts_list$unique_vic_counts
	unique_sym_counts = counts_list$unique_sym_counts

	all_dispersals_totals_list = counts_list$all_dispersals_totals_list
	anagenetic_dispersals_totals_list = counts_list$anagenetic_dispersals_totals_list
	founder_totals_list = counts_list$founder_totals_list
	ana_totals_list = counts_list$ana_totals_list
	a_totals_list = counts_list$a_totals_list
	d_totals_list = counts_list$d_totals_list
	e_totals_list = counts_list$e_totals_list
	subsetSymp_totals_list = counts_list$subsetSymp_totals_list
	vicariance_totals_list = counts_list$vicariance_totals_list
	sympatry_totals_list = counts_list$sympatry_totals_list
	

	summary_counts_BSMs = counts_list$summary_counts_BSMs
	'
	
	return(counts_list)
	} # END count_ana_clado_events <- function(clado_events_tables, ana_events_tables, areanames, actual_names)





print_counts_lists_to_file <- function(list_of_counts_lists, model_name="default")
	{
	time_txt_TF = TRUE
	if (is.list(list_of_counts_lists[[1]]) == FALSE)
		{
		list_of_counts_lists = list(list_of_counts_lists)
		
		time_txt_TF = FALSE
		}
	
	for (i in 1:length(list_of_counts_lists))
		{
		counts_list = list_of_counts_lists[[i]]

		summary_counts_BSMs = counts_list$summary_counts_BSMs
		print(conditional_format_table(summary_counts_BSMs))

		# Histogram of event counts
		# Convert 1 to 01
		
		if (time_txt_TF == TRUE)
			{
			txtnum = sprintf("%02.0f", i)
			time_txt = paste0("_time", txtnum)
			} else {
			time_txt = ""
			}
			
		# Make the histograms of event counts
		pdffn = paste0(model_name, time_txt, "_histograms_of_event_counts.pdf")
		hist_event_counts(counts_list, pdffn=pdffn)

		#######################################################
		# Print counts to files
		#######################################################
		tmpnames = names(counts_list)
		
		if (time_txt_TF == TRUE)
			{
			txtnum = sprintf("%02.0f", i)
			time_txt = paste0("_time", txtnum)
			} else {
			time_txt = ""
			} 
		tmpfns = paste0(model_name, time_txt, "_", tmpnames)
		cat("\n\nWriting tables* of counts to tab-delimited text files:\n(* = Tables have dimension=2 (rows and columns). Cubes (dimension 3) and lists (dimension 1) will not be printed to text files.) \n\n")
		for (i in 1:length(tmpnames))
				{
				cmdtxt = paste0("item = counts_list$", tmpnames[i])
				eval(parse(text=cmdtxt))

				# Skip cubes
				if (length(dim(item)) != 2)
						{
						next()
						}

				outfn = paste0(tmpfns[i], ".txt")
				if (length(item) == 0)
						{
						cat(outfn, " -- NOT written, *NO* events recorded of this type", sep="")
						cat("\n")
						} else {
						cat(outfn)
						cat("\n")
						write.table(conditional_format_table(item), file=outfn, quote=FALSE, sep="\t", col.names=TRUE, row.names=TRUE)
						} # END if (length(item) == 0)
				} # END for (i in 1:length(tmpnames))
		cat("...done.\n")
		} # END for-loop
	}  # END print_counts_lists_to_file <- function(list_of_counts_lists, model_name="default")




calc_BSM_mean_node_states <- function(clado_events_tables, tr, numstates)
	{
	numBSMs = length(clado_events_tables)
	numtips = length(tr$tip.label)
	numnodes = numtips + tr$Nnode
	dims = c(numnodes, numstates, numBSMs)
	state_counts_1based = array(data=0, dim=dims)

	strat_TF = "SUBnode.type" %in% names(clado_events_tables[[1]])
	for (i in 1:numBSMs)
		{
		clado_events_table = clado_events_tables[[i]]
	
		if (strat_TF == TRUE)
			{
			# Stratified
			# Eliminate internal tips
			TF1 = clado_events_table$node.type == "internal"
			TF2 = clado_events_table$SUBnode.type == "tip"
			internal_tips_TF = (TF1 + TF2) == 2
			clado_events_table = clado_events_table[internal_tips_TF==FALSE,]
		
			# Order by node
			clado_events_table = clado_events_table[order(clado_events_table$node), ]

			nodestates = clado_events_table$sampled_states_AT_nodes
			# Store the states
			state_counts_temp = matrix(0, nrow=numnodes, ncol=numstates)
			for (j in 1:length(clado_events_table$node))
				{
				current_nodenum = clado_events_table$node[j]
				state_counts_temp[current_nodenum, nodestates[j]] = 1
				}
			} else {
			# NON-stratified
			nodestates = clado_events_table$sampled_states_AT_nodes
			# Store the states
			state_counts_temp = matrix(0, nrow=numnodes, ncol=numstates)
			for (j in 1:numnodes)
				{
				state_counts_temp[j, nodestates[j]] = 1
				}
			} # END if (strat_TF == TRUE)
		
		state_counts_1based[,,i] = state_counts_temp
		} # END for (i in 1:numBSMs)
	
	meanBSMprobs = apply(X=state_counts_1based, MARGIN=c(1,2), FUN=mean)
	sumBSMcounts = apply(X=state_counts_1based, MARGIN=c(1,2), FUN=sum)
	
	BSMstates_summary = NULL
	BSMstates_summary$state_counts_1based = state_counts_1based
	BSMstates_summary$meanBSMprobs = meanBSMprobs
	BSMstates_summary$sumBSMcounts = sumBSMcounts

	head(meanBSMprobs)
	dim(meanBSMprobs)
	colSums(meanBSMprobs)
	sum(colSums(meanBSMprobs))
	rowSums(meanBSMprobs)
	
	extract='
	state_counts_1based = BSMstates_summary$state_counts_1based
	meanBSMprobs = BSMstates_summary$meanBSMprobs
	sumBSMcounts = BSMstates_summary$sumBSMcounts
	'
	return(BSMstates_summary)
	} # END calc_BSM_mean_node_states <- function(clado_events_tables, tr, numstates)













#######################################################
# Histograms of event counts
#######################################################
hist_event_counts <- function(counts_list, pdffn="hist_event_counts.pdf", col="grey70")
	{
	title_cex = 1
	
	counts_tmp = c(counts_list$founder_totals_list, counts_list$ana_totals_list, counts_list$subsetSymp_totals_list, counts_list$vicariance_totals_list, counts_list$sympatry_totals_list)
	xmax = max(counts_tmp)
	xlims = c(0, max(pretty(counts_tmp)))
	count_names = seq(xlims[1], xlims[2])
	numBSMs = length(counts_list$all_totals_list)
	
	
	

	#pdffn = "hist_event_counts.pdf"
	pdffn = pdffn
	pdf(file=pdffn, width=4, height=8)

	par(mfrow=c(5,1))
	# c(bottom, left, top, right) 
	par(mar=c(2,4,0.5,1))
	
	ylabel = paste0("Freq. in ", numBSMs, " BSMs")
	#par(oma=c(0,0,0,0))
	#hist(counts_list$ana_totals_list, xlab=NULL, ylab=NULL, xlim=xlims, main="")
	mp = barplot(table2(counts_list$ana_totals_list, xlims=xlims), xlab=NULL, ylab=NULL, main="", col=col)
	axis(side=1, at=mp, labels=count_names, tick=FALSE, line=-0.75, cex.axis=0.8)
	#title("Anagenetic dispersal events", cex.main=title_cex, font.main=1)
	mtext(text="# anagenetic dispersal", side=2, cex=0.7, line=2.5)

	mp = barplot(table2(counts_list$sympatry_totals_list, xlims=xlims), xlab=NULL, ylab=NULL, main="", col=col)
	axis(side=1, at=mp, labels=count_names, tick=FALSE, line=-0.75, cex.axis=0.8)
	#title("Narrow sympatry", cex.main=title_cex, font.main=1)
	mtext(text="# narrow sympatry", side=2, cex=0.7, line=2.5)

	mp = barplot(table2(counts_list$subsetSymp_totals_list, xlims=xlims), xlab=NULL, ylab=NULL, main="", col=col)
	axis(side=1, at=mp, labels=count_names, tick=FALSE, line=-0.75, cex.axis=0.8)
	#title("Subset sympatry", cex.main=title_cex, font.main=1)
	mtext(text="# subset sympatry", side=2, cex=0.7, line=2.5)

	mp = barplot(table2(counts_list$vicariance_totals_list, xlims=xlims), xlab=NULL, ylab=NULL, main="", col=col)
	axis(side=1, at=mp, labels=count_names, tick=FALSE, line=-0.75, cex.axis=0.8)
	#title("Vicariance", cex.main=title_cex, font.main=1)
	mtext(text="# vicariance", side=2, cex=0.7, line=2.5)

	par(mar=c(3.5,4,0,1))
	mp = barplot(table2(counts_list$founder_totals_list, xlims=xlims), xlab=NULL, ylab=NULL, main="", col=col)
	axis(side=1, at=mp, labels=count_names, tick=FALSE, line=-0.75, cex.axis=0.8)
	#title("Founder-events", cex.main=title_cex, font.main=1)
	mtext(text="# founder events", side=2, cex=0.7, line=2.5)
	mtext(text=paste0("Event counts in each of ", numBSMs, " BSMs"), side=1, cex=0.7, line=2)
	
	dev.off()
	cmdstr = paste0("open ", pdffn)
	system(cmdstr)
	} # END hist_event_counts <- function(counts_list, pdffn="hist_event_counts.pdf")

# Max a table, with 0s for the integers that are unobserved
table2 <- function(event_counts, xlims)
	{
	count_names = seq(xlims[1], xlims[2])
	counts_for_barplot = matrix(0, nrow=1, ncol=length(count_names))
	names(counts_for_barplot) = count_names
	counts_for_barplot
	
	tmptable = table(as.integer(event_counts))
	indices = match(x=names(tmptable), table=count_names)
	counts_for_barplot[indices] = tmptable
	
	return(counts_for_barplot)
	}






#######################################################
# Check that ML ancestral state/range probabilities and
# the mean of the BSMs approximately line up
#######################################################
check_ML_vs_BSM <- function(res, clado_events_tables, model_name, tr=NULL, plot_each_node=FALSE, linreg_plot=TRUE, MultinomialCI=TRUE)
	{
	# Get tree if needed
	if (is.null(tr))
		{
		#tr = read.tree(res$inputs$trfn)
		tr = check_trfn(trfn=res$inputs$trfn)
		} # END if (is.null(tr))
	
	# Determine if a stratified analysis if needed
	if (is.null(res$inputs$stratified))
		{
		if (is.numeric(res$inputs$timeperiods) == TRUE)
			{
			res$inputs$stratified = TRUE
			} else {
			res$inputs$stratified = FALSE
			}# END if (is.numeric(BioGeoBEARS_run_object$timeperiods) == TRUE)
		} # END if (is.null(res$inputs$stratified))
	stratified = res$inputs$stratified
	
	
	pdffn = paste0(model_name, "_ML_vs_BSM.pdf")
	pdf(file=pdffn, height=6, width=6)

	x_all = NULL
	y_all = NULL
	numtips = length(tr$tip.label)
	intnodenums = (numtips+1):(numtips+tr$Nnode)

	MLstateprobs = res$ML_marginal_prob_each_state_at_branch_top_AT_node
	numstates = ncol(MLstateprobs)
	BSMstates_summary = calc_BSM_mean_node_states(clado_events_tables, tr, numstates)
	state_counts_1based = BSMstates_summary$state_counts_1based
	meanBSMprobs = BSMstates_summary$meanBSMprobs
	sumBSMcounts = BSMstates_summary$sumBSMcounts

	node_history_samples_allNodes = NULL
	cat("\nCalculating BSM means for node #:", sep="")
	
	for (nodenum in intnodenums)
		{
		cat(nodenum, " ", sep="")
		
		node_history_samples = NULL
		for (i in 1:length(clado_events_tables))
			{
			clado_events_table = clado_events_tables[[i]]
			if (stratified == TRUE)
				{
				TF1 = clado_events_table$SUBnode.type != "tip"
				TF2 = clado_events_table$node.type != "tip"
				TF = (TF1+TF2)==2
				} else {
				TF = clado_events_table$node.type != "tip"
				}
			clado_events_table = clado_events_table[TF,]
			TF = clado_events_table$node == nodenum
			node_history_sample = clado_events_table[TF,]
			node_history_samples = rbind(node_history_samples, node_history_sample)
			} # END for (i in 1:length(clado_events_tables))
		head(node_history_samples)
		class(node_history_samples)
		dim(node_history_samples)

		# Save
		node_history_samples_allNodes = rbind(node_history_samples_allNodes, node_history_samples)
	
		node_history_samples$clado_event_txt
		node_history_samples$sampled_states_AT_nodes


		cbind(node_history_samples$sampled_states_AT_nodes, node_history_samples$sampled_states_AT_brbots)
		table(node_history_samples$sampled_states_AT_nodes)
		table(node_history_samples$sampled_states_AT_brbots)

		x=round(res$ML_marginal_prob_each_state_at_branch_top_AT_node[nodenum,],3)

		num_root_state = table(node_history_samples$sampled_states_AT_nodes)
		fract_root_state = num_root_state / sum(num_root_state)
		obs_BSM_probs_at_root = rep(0, length(x))
		obs_BSM_probs_at_root[as.numeric(names(fract_root_state))] = fract_root_state
	
		x=round(res$ML_marginal_prob_each_state_at_branch_top_AT_node[nodenum,],3)
		y=obs_BSM_probs_at_root
		x_all = c(x_all, x)
		y_all = c(y_all, y)
	
		if (plot_each_node == TRUE)
			{
			plot(x,y, xlim=c(0,1), ylim=c(0,1), xlabel="ML marginal probs", ylabel="BSM probs")
			segments(0,0,1,1)
			title(nodenum)
			} # END if (plot_each_node == TRUE)
		} # END for (nodenum in 20:37)
	cat("...done.\n\n")

	# Plot allnodes
	if (linreg_plot == FALSE)
		{
		plot(x_all, y_all, xlim=c(0,1), ylim=c(0,1), xlabel="ML marginal probs", ylabel="BSM probs")
		segments(0,0,1,1)
		title("all nodes")
		} # END if (linreg_plot == FALSE)

	if (linreg_plot == TRUE)
		{
		linear_regression_plot(x=x_all, y=y_all, tmppch=1, xlabel="State probabilities under ML model", ylabel="State probabilities as mean of BSMs", xlim=c(0,1), ylim=c(0,1))
		# Multiple R-squared:  0.9655,	Adjusted R-squared:  0.9655 
		title(paste0(model_name, ":\nML state probs vs. mean of BSMs"))
		} # END if (linreg_plot == TRUE)

	if (MultinomialCI == TRUE)
		{
		# Try doing confidence intervals
		#require(MultinomialCI)
		sumBSMcounts2 = sumBSMcounts[-(1:numtips),]

		# Confident intervals for a particular node
		# CI95 = multinomialCI(x=sumBSMcounts2[1,], alpha=0.05, verbose=TRUE)
		# CI95

		# Add multinomial CI95s to plot
		cat("\nAdding CI95 segments for: ", sep="")
		for (i in 1:tr$Nnode)
			{
			cat(i+numtips, " ", sep="")
			tmpcounts = sumBSMcounts2[i,]
			CI95 = MultinomialCI::multinomialCI(x=tmpcounts, alpha=0.05, verbose=FALSE)
			#xvals = tmpcounts / sum(tmpcounts)
			xvals = MLstateprobs[-(1:numtips),][i,]
			segments(x0=xvals, x1=xvals, y0=CI95[,1], y1=CI95[,2])
			} # for (1 in 1:numnodes)
		} # END if (MultinomialCI == TRUE)
	cat("...done.\n\n")

	# Close PDF and open for viewer
	dev.off()
	cmdstr = paste("open ", pdffn)
	system(cmdstr)
	} # END check_ML_vs_BSM <- function(res, clado_events_tables, tr=NULL)


minmax_pretty <- function(x)
	{
	minmax = c( min(pretty(x)), max(pretty(x)) )
	return(minmax)
	}


#######################################################
# Linear regression plot, with stats
#######################################################
# If slope1=TRUE, subtract 1:1 slope from the line and test for differences

linear_regression_plot <- function(x, y, xlabel="x", ylabel="y", tmppch=".", pointscol="black", tmplinecol="black", tmplty=1, tmplwd=1, plottext=TRUE, legend_title="", textcol="black", legend_x="topleft", legend_y=NULL, xlim=minmax_pretty(x), ylim=minmax_pretty(y), increment_fraction=NULL, legend_cex=1, axis_cex=1, slope1=FALSE, intercept_in_legend=TRUE, add_to_plot=FALSE, printall=TRUE, ...)
	{
	# Make a linear regression plot
	model1 = lm(y~x)
	slope = model1$coefficients[2]
	intercept = model1$coefficients[1]
	
	if (printall)
		{
		print("Summary of model 1 (standard regression, y predicted by x)")
		print(summary(model1))
		}
	



	# Set up Legend
	if (is.null(increment_fraction))
		{
		increment_fraction = 0.05
		}

	# Legend positions
	if (legend_x == "topleft")
		{
		legend_x = min(xlim)
		legend_y = 1 * max(ylim)
		}
	
	increment = increment_fraction * (max(ylim) - min(ylim))
	
	
	# Plot the full plot, or just add points
	if (add_to_plot == FALSE)
		{
		plot(x, y, xlab=xlabel, ylab=ylabel, pch=tmppch, xlim=xlim, ylim=ylim, col=pointscol, cex.axis=axis_cex, ...)
		} else {
		points(x, y, pch=tmppch, xlim=xlim, ylim=ylim, col=pointscol, ...)
		}
	tmpx1 = min(x, na.rm=TRUE)
	tmpx2 = max(x, na.rm=TRUE)
	tmpy1 = slope*tmpx1 + intercept
	tmpy2 = slope*tmpx2 + intercept
	
	segments(x0=tmpx1, y0=tmpy1, x1=tmpx2, y1=tmpy2, col=tmplinecol, lty=tmplty, lwd=tmplwd)
	
	
	if (plottext)
		{
		# Subtract 1:1 slope if desired
		# (for Patrick Shih cyanobacteria analysis)
		if (slope1 == TRUE)
			{
			# Subtract 1:1 from the y values
			ynew = y-x
			model2 = lm(ynew~x)
			pval = summary(model2)$coefficients[2,4]

			print("Summary of model 2 (standard regression, (y-x) predicted by x; this means we've substracted out the 1:1 expected relationship.)")
			print(summary(model2))
			} else {
			pval = summary(model1)$coefficients[2,4]
			} # END if (slope1 == TRUE)
		
		# Start ypos for legend text at 0
		ypos = 0
		# Plot a legend title if desired
		if (legend_title != "")
			{
			text(x=legend_x, y=legend_y - ypos*increment, pos=4, labels=legend_title, cex=legend_cex, col=textcol)
			} # END if (legend_title != "")
	
		
		if (intercept_in_legend==TRUE)
			{		
			R2 = summary(model1)$r.squared
			slope = summary(model1)$coefficients[2,1]
			slopeSE = summary(model1)$coefficients[2,2]
			slope95 = 1.96*slopeSE

			intercept = summary(model1)$coefficients[1,1]
			interceptSE = summary(model1)$coefficients[1,2]
			intercept95 = 1.96*interceptSE
		
			R2txt = bquote(italic(R)^2 == .(format(R2, digits=3)))
			text(x=legend_x, y=legend_y - (ypos=ypos+1)*increment, pos=4, labels=R2txt, cex=legend_cex, col=textcol)
			slopetxt = bquote(italic(m) == .(format(slope, digits=3)) %+-% .(format(slope95, digits=3)))
			text(x=legend_x, y=legend_y - (ypos=ypos+1)*increment, pos=4, labels=slopetxt, cex=legend_cex, col=textcol)
			#intercepttxt = paste("intercept=", format(intercept, digits=3), " +/- ", format(intercept95, digits=3), sep="")
			intercepttxt = bquote(italic(b) == .(format(intercept, digits=3)) %+-% .(format(intercept95, digits=3)))
			text(x=legend_x, y=legend_y - (ypos=ypos+1)*increment, pos=4, labels=intercepttxt, cex=legend_cex, col=textcol)
		
			if (slope1 == TRUE)
				{
				#pvaltxt = paste("p = ", format(pval, digits=3), " (null: slope is 1:1)", sep="")
				pvaltxt = bquote(italic(p) == .(format(pval, digits=3))~" (null: slope is 1:1)")
				text(x=legend_x, y=legend_y - (ypos=ypos+1)*increment, pos=4, labels=pvaltxt, cex=legend_cex, col=textcol)
				} else {
				#pvaltxt = paste("p = ", format(pval, digits=3), sep="")
				pvaltxt = bquote(italic(p) == .(format(pval, digits=3)))
				text(x=legend_x, y=legend_y - (ypos=ypos+1)*increment, pos=4, labels=pvaltxt, cex=legend_cex, col=textcol)
				} # END if (slope1 == TRUE)

			} # END if (intercept_in_legend==TRUE)
		
# 		if (intercept_in_legend==TRUE)
# 			{
# 			txt_to_plot = paste(R2txt, slopetxt, intercepttxt, pvaltxt, sep="\n")
# 			} else {
# 			txt_to_plot = paste(R2txt, slopetxt, pvaltxt, sep="\n")
# 			}
		
		# http://stackoverflow.com/questions/3761410/how-can-i-plot-my-r-squared-value-on-my-scatterplot-using-r
		# bty suppresses box
		# print(legend_x)
		# print(legend_y)
		# Discounted
		#legend(x=legend_x, y=legend_y, bty="n", legend=txt_to_plot, cex=0.9)

		}
	
	if (printall == TRUE)	
		{
		print(model1)
		print(summary(model1))
		}
	
	
	
	return(model1)
	}




linear_regression_plot_OLD <- function(x, y, xlabel="x", ylabel="y", tmppch=".", printall=TRUE, tmplinecol="black", tmplty=1, tmplwd=1, plottext=TRUE, legend_x="topleft", legend_y=NULL, xlim=minmax_pretty(x), ylim=minmax_pretty(y), slope1=FALSE, ...)
	{
	# Make a linear regression plot
	model1 = lm(y~x)
	print("Summary of model 1 (standard regression, y predicted by x)")
	print(summary(model1))
	
	slope = model1$coefficients[2]
	intercept = model1$coefficients[1]
	
	plot(x, y, xlab=xlabel, ylab=ylabel, pch=tmppch, xlim=xlim, ylim=ylim, ...)
	tmpx1 = min(x, na.rm=TRUE)
	tmpx2 = max(x, na.rm=TRUE)
	tmpy1 = slope*tmpx1 + intercept
	tmpy2 = slope*tmpx2 + intercept
	
	segments(x0=tmpx1, y0=tmpy1, x1=tmpx2, y1=tmpy2, col=tmplinecol, lty=tmplty, lwd=tmplwd)
	
	
	if (plottext)
		{
		# Subtract 1:1 slope if desired
		# (for Patrick Shih cyanobacteria analysis)
		if (slope1 == TRUE)
			{
			# Subtract 1:1 from the y values
			ynew = y-x
			model2 = lm(ynew~x)
			pval = summary(model2)$coefficients[2,4]

			print("Summary of model 2 (standard regression, (y-x) predicted by x; this means we've substracted out the 1:1 expected relationship.)")
			print(summary(model2))
			} else {
			pval = summary(model1)$coefficients[2,4]
			} # END if (slope1 == TRUE)
		
		
		R2 = summary(model1)$r.squared
		slope = summary(model1)$coefficients[2,1]
		slopeSE = summary(model1)$coefficients[2,2]
		slope95 = 1.96*slopeSE
		
		R2txt = paste("R2 = ", format(R2, digits=3), sep="")
		slopetxt = paste("m=", format(slope, digits=3), " +/- ", format(slope95, digits=3), sep="")
		
		if (slope1 == TRUE)
			{
			pvaltxt = paste("p = ", format(pval, digits=3), " (null: slope is 1:1)", sep="")
			} else {
			pvaltxt = paste("p = ", format(pval, digits=3), sep="")
			} # END if (slope1 == TRUE)

		txt_to_plot = paste(R2txt, slopetxt, pvaltxt, sep="\n")
		
		# http://stackoverflow.com/questions/3761410/how-can-i-plot-my-r-squared-value-on-my-scatterplot-using-r
		# bty suppresses box
		# print(legend_x)
		# print(legend_y)
		legend(x=legend_x, y=legend_y, bty="n", legend=txt_to_plot, cex=0.9)

		}
	
	if (printall == TRUE)	
		{
		print(model1)
		print(summary(model1))
		}
	
	return(model1)
	} # END linear_regression_plot



#######################################################
# BSM_to_phytools_SM
#######################################################
#' Convert Biogeographic Stochastic Map (BSM) to phytools SIMMAP stochastic map (SM) format
#' 
#' This function converts a Biogeographic Stochastic Map (BSM) to a \code{phytools} "\code{simmap}" 
#' object. This is useful mostly to make use of the more diverse plotting functions for
#' simmaps available in \code{phytools}. 
#'
#' The BioGeoBEARS Biogeographic Stochastic Map (BSM) output is rather complex, as it
#' keeps track of numerous different types of anagenetic and cladogenetic events, the
#' source and destination areas of dispersal events (*), etc. However, BioGeoBEARS only 
#' has plotting functions for the standard, rectangular, right-facing phylogeny. 
#' \code{\link{BSM_to_phytools_SM}} converts a BioGeoBEARS BSM result into a phytools 
#' stochastic mapping object (which is a really a phylo3 tree object, with extra fields
#' to contain the stochastic map.
#'
#' The inputs to \code{BSM_to_phytools_SM} consist of (1) a "res" object (the result of 
#' an ML inference), a \code{clado_events_table} containing
#' the events sampled at each node of the phylogeny, and (optionally, but usually needed)
#' a \code{ana_events_table} containing the anagenetic events on each branch (or branch 
#' segment, in the case of a time-stratified analysis, where the tree has been broken
#' up into subpieces.
#'
#' The resulting object, \code{tr_wSimmap}, is a phytools simmap object, which really 
#' consists of a standard APE phylogeny object, plus the extra fields "map", "mapped.edge",
#' "Q", and "logL". The class of this object, i.e. the result of class(tr_wSimmap), is
#' \code{c("simmap", "phylo")}. 
#'
#' Notes on using the resulting simmap object in phytools:
#'
#' <b>1.</b> The phytools functions, like \code{countSimmap(tr_wSimmap)}, will only count the 
#'    \emph{anagenetic} events, as that is all that is formally recorded in the \code{phytools}
#'    \code{simmap} object. For example, imagine if your model parameters for a DEC+J model were
#'    \emph{d}=0.00224, \emph{e}=0.0, \emph{j}=0.0297. In this case, \code{countSimmap(tr_wSimmap)}
#'    would count transitions like \code{A->AB}, because d is positive. However, the 
#'    reverse transition of \code{AB->A} would require an "\emph{e}" event, but \emph{e} is 
#'    inferred to be 0.0, as is very common in DEC, DEC+J etc. analyses. 
#'
#'    Transitions to 
#'    single-area states in "\emph{j}" events would be common at cladogenesis events (e.g., 
#'    \code{AB->AB,C}), but \code{phytools} doesn't know anything about cladogenesis models, 
#'    as it was written assuming purely anagenetic models.  One could probably write a 
#'    script to infer "<i>j</i>" events off the \code{phytools} \code{simmap} object by 
#'    comparing the last-state-below-a-node with the descendent-pairs-above-a-node, but 
#'    it would probably be easier to just use the BioGeoBEARS BSM outputs 
#'    (\code{clado_events_table} and \code{ana_events_table}), because they record all 
#'    of this information already, and the text files output by the example BSM script
#'    summarize all of the different types of events and their direction.
#'
#' \bold{2.} For similar reasons, the \code{phytools} graphics, while they should branch
#' the branch histories correctly, don't always correctly 
#' draw the colors at the "corners" between a speciation event and the beginning of a 
#' descendant branch. This is for the same reasons as #1: \code{phytools} doesn't 
#' know about cladogenesis models and assumes a purely anagenetic model, where the state
#' just before and just after cladogenesis would always be identical. (Counting the 
#' states at nodes may also be somewhat inaccurate if counted off the \code{phytools} \code{simmap}
#' derived from a BioGeoBEARS BSM, I haven't tested this.)
#' 
#' \bold{(*) Please note carefully:} area-to-area dispersal events are not identical with the 
#' state transitions. For example, a state can be a geographic range with multiple 
#' areas, but under the logic of DEC-type models, a range-expansion event like 
#' ABC->ABCD actually means that a dispersal happened from some specific area (A, B, or C)
#' to the new area, D.  BSMs track this area-to-area sourcing, at least if 
#' \code{\link{simulate_source_areas_ana_clado}} has been run.
#' 
#' @param \code{res} A BioGeoBEARS results object, produced by ML inference via \code{\link{bears_optim_run}}.
#' @param \code{clado_events_table} A cladogenetic events table, from BioGeoBEARS BSM.
#' @param \code{ana_events_table} An anagenetic events table, from BioGeoBEARS BSM. Default is NA, 
#' in which case the function assumes that the BSM had 0 anagenetic range-changing events, and 
#' all range-changing events were cladogenetic. This will only process properly if actually is
#' true that the sampled states at branch bottoms and branch tops in the the \code{clado_events_table}
#' always match (an error check checks for this).
#' @return \code{tr_wSimmap} This is an object of type \code{c("simmap", "phylo"). It contains
#' the following: 
#' \code{tr_wSimmap$tr} is a \code{phylo3} APE tree object
#' \code{tr_wSimmap$maps} = A list for each branch (branch numbers when \code{phylo} object in "cladewise" order),
#' with the state history of each branch (the length of time in each state list of states inhabited, with the 
#' cell names giving the state).
#' \code{tr_wSimmap$mapped.edge} The total amount of time in each state, for each branch (same order as
#' in \code{$maps}).  The row names are the names of the nodes at the bottom and top of each branch/edge.
#' tr_wSimmap$Q The \code{Q} transition matrix (calculated from the ML parameter estimates contained in f;cc).
#' tr_wSimmap$logL The log-likelihood of the data under the ML model (this will be the same across all BSMs).
#' @export
#' @seealso \code{\link{BSMs_to_phytools_SMs}}, \code{\link{simulate_source_areas_ana_clado}}, 
#' \code{phytools::countSimmap},  \code{phytools::make.simmap}
#' @note Go (BioGeo)BEARS!
#' @author Nicholas J. Matzke \email{matzke@@berkeley.edu} 
#' @references
#' 
#' @examples
#' 
BSM_to_phytools_SM <- function(res, clado_events_table, ana_events_table=NA)
	{
	run_parent_brs_TF = TRUE
	
	# Error check
	if ((class(ana_events_table) == "data.frame") && (dim(ana_events_table)[1] > 0))
		{
		run_parent_brs_TF = TRUE
		} else {
		ana_events_table = NA
		run_parent_brs_TF = FALSE
		}

	if (is.null(ana_events_table) == TRUE)
		{
		ana_events_table = NA
		run_parent_brs_TF = FALSE
		}
	
	# Is it time-stratified?
	stratTF = (length(res$inputs$timeperiods) > 0)
	
	returned_mats = get_Qmat_COOmat_from_BioGeoBEARS_run_object(BioGeoBEARS_run_object=res$inputs, include_null_range=res$inputs$include_null_range)
	returned_mats
	areanames = returned_mats$areanames
	ranges_list = returned_mats$ranges_list

	# Get list of edges:
	trtable = prt(tr, printflag=FALSE)
	trtable

	# Convert pruningwise edge numbers to cladewise edgenums
	# The clado_events_table (non-stratified) has the edge numbers in
	# pruningwise order
	if (stratTF == FALSE)
		{
		pruningwise_edgenums = clado_events_table$parent_br
		cladewise_edgenums = trtable$parent_br
		translation_pruning_to_clade_edgenums = as.data.frame(cbind(pruningwise_edgenums, cladewise_edgenums), stringsAsFactors=FALSE)
		translation_pruning_to_clade_edgenums
		
		# Error trap for when there are no anagenetic events
		if (run_parent_brs_TF == TRUE)
			{
			ana_events_edgenums_indexes_in_clado_events_table = match(x=ana_events_table$parent_br, table=clado_events_table$parent_br)
			ana_events_table$parent_br = translation_pruning_to_clade_edgenums$cladewise_edgenums[ana_events_edgenums_indexes_in_clado_events_table]
			}
		clado_events_table$parent_br = trtable$parent_br
		}
	

	# Get the edgenums, exclude the "NA" for the root branch
	# Order edgenums from smallest to largest
	nonroot_TF = trtable$node.type != "root"
	edgenums = trtable$parent_br[nonroot_TF]
	edgenums_order = order(edgenums)
	edgenums = edgenums[edgenums_order]

	# Get the "ancestor node, descendant node"
	ancnodenums = trtable$ancestor[nonroot_TF][edgenums_order]
	decnodenums = trtable$node[nonroot_TF][edgenums_order]

	# rownames for mapped.edge
	rownames_for_mapped_edge = paste0(ancnodenums, ",", decnodenums)
	rownames_for_mapped_edge

	# instantiate "maps" for phytools (a list, with array of state residence times
	maps = list()
	
	if (stratTF == TRUE)
		{
		time_tops = sort(unique(clado_events_table$time_top))
		time_bots = sort(unique(clado_events_table$time_bot))
		}
	
	
	# Loop through the edges, record any anagenetic events on the branches
	# fill in "maps"
	for (i in 1:length(edgenums))
		{
		edgenum = edgenums[i]
	
		# Trap for if ana_events_table is NA (common, if there are no anagenetic events in
		# the tree at all)
		if ( (length(ana_events_table) == 1) && (is.na(ana_events_table)) )
			{
			edgefound_TF = FALSE
			} else {
			edgefound_TF = ana_events_table$parent_br == edgenum
			}
	
		# If no anagenetic events are found, the whole branchlength is in the 
		# starting state, as specified in "clado_events_table"
		if (sum(edgefound_TF) == 0)
			{
			# The states should be the same at the branch bottom and top:
			clado_row_TF = clado_events_table$parent_br == edgenum
			clado_row_TF[is.na(clado_row_TF)] = FALSE
		
# 			if (stratTF == TRUE)
# 				{
# 				match_time_tops_TF = as.numeric(tmptable_rows$abs_event_time[nnr]) >= as.numeric(trtable$time_top)
# 				match_time_bots_TF = as.numeric(tmptable_rows$abs_event_time[nnr]) < as.numeric(trtable$time_bot)
# 				}
			
			
			## NJM 2019-03-12_ fix: doubles can be found in time-strat, FIX
			clado_events_table[clado_row_TF,]
			
			
			# Error check
			if (sum(clado_row_TF) < 1)
				{
				txt = paste0("STOP ERROR #1 in BSM_to_phytools_SM(): 0 rows in clado_events_table match edge number/branch number (parent_br==", edgenum, ").")
				cat("\n\n")
				cat(txt)
				cat("\n\n")
				stop(txt)
				}
		
			if (sum(clado_row_TF) == 1)
				{
				bottom_state_num_1based = clado_events_table$sampled_states_AT_brbots[clado_row_TF]
				top_state_num_1based = clado_events_table$sampled_states_AT_nodes[clado_row_TF]
				}

			if (sum(clado_row_TF) > 1)
				{
				bottom_state_num_1based = unique(clado_events_table$sampled_states_AT_brbots[clado_row_TF])
				top_state_num_1based = unique(clado_events_table$sampled_states_AT_nodes[clado_row_TF])
				
				# Error check: there should be only 1 unique state corresponding to this
				# node (because we are in the section where no anagenetic histories were
				# found on the branch).
				if (length(top_state_num_1based) != 1)
					{
					txt = "STOP ERROR in BSM_to_phytools_SM(): more than one 'top_state_num_1based' corresponding to the node specified. Printing the matching rows of 'clado_events_table'."
					cat("\n\n")
					cat(txt)
					cat("\n\n")
					
					print("clado_events_table[clado_row_TF,]")
					print(clado_events_table[clado_row_TF,])
					
					print("top_state_num_1based")
					print(top_state_num_1based)
					
					stop(txt)
					} # END if (length(top_state_num_1based) != 1)

				if (length(bottom_state_num_1based) != 1)
					{
					txt = "STOP ERROR in BSM_to_phytools_SM(): more than one 'bottom_state_num_1based' corresponding to the node specified. Printing the matching rows of 'clado_events_table'."
					cat("\n\n")
					cat(txt)
					cat("\n\n")
					
					print("clado_events_table[clado_row_TF,]")
					print(clado_events_table[clado_row_TF,])
					
					print("bottom_state_num_1based")
					print(bottom_state_num_1based)
					
					stop(txt)
					} # END if (length(bottom_state_num_1based) != 1)
				} # END if (sum(clado_row_TF) > 1)
			
		
			# Error check
			if (bottom_state_num_1based != top_state_num_1based)
				{
				txt = paste0("STOP ERROR #2 in BSM_to_phytools_SM(): the top_state_num_1based (", top_state_num_1based, "), and bottom_state_num_1based (", bottom_state_num_1based, ") have to match at edge number/branch number (parent_br==", edgenum, "), because no anagenetic events were recorded on this branch.")
				cat("\n\n")
				cat(txt)
				cat("\n\n")
				stop(txt)
				}
		
			# No events detected, so put in the states_array just one state
			# But, since there are NO events on this branch, don't add to it if
			# there is already something there (e.g. don't add the same branch for
			# multiple branch segments)
			names_of_states_array = c(ranges_list[bottom_state_num_1based])
			times_in_each_state_array = unique(c(clado_events_table$edge.length[clado_row_TF]))

			if (length(times_in_each_state_array) != 1)
				{
				txt = "STOP ERROR in BSM_to_phytools_SM(): more than one 'times_in_each_state_array' corresponding to the node specified. There should only be one time, because no anagenetic events were detected on this branch. Printing the matching rows of 'clado_events_table'."
				cat("\n\n")
				cat(txt)
				cat("\n\n")
				
				print("clado_events_table[clado_row_TF,]")
				print(clado_events_table[clado_row_TF,])
				
				print("times_in_each_state_array")
				print(times_in_each_state_array)
				
				stop(txt)
				} # END if (length(times_in_each_state_array) != 1)
			
			
			names(times_in_each_state_array) = names_of_states_array
			times_in_each_state_array
			} # END if (sum(edgefound_TF) == 0)

		# If some anagenetic events are found, the whole branchlength is in the 
		# starting state, as specified in "clado_events_table"
		if (sum(edgefound_TF) > 0)
			{
			# The states will be listed in the ana_events_table
			rows_matching_edgenum_TF = ana_events_table$parent_br == edgenum
			tmp_ana_events_table = ana_events_table[rows_matching_edgenum_TF,]
		
			# Make sure the tmp_ana_events_table is sorted by REVERSE event_time (along branch)
			# (Do REVERSE, because older events have larger event ages)
			tmp_ana_events_table = tmp_ana_events_table[rev(order(tmp_ana_events_table$abs_event_time)),]
			tmp_ana_events_table
		
			numevents = sum(rows_matching_edgenum_TF)
		
			# 1 event, 2 states on branch
			if (numevents == 1)
				{
				first_state_name = c(tmp_ana_events_table$current_rangetxt[1])
				abs_time_bp_at_branch_top = tmp_ana_events_table$time_bp[1]
				abs_time_bp_at_branch_bot = abs_time_bp_at_branch_top + tmp_ana_events_table$edge.length[1]
				first_state_timelength = c(abs_time_bp_at_branch_bot - tmp_ana_events_table$abs_event_time[1])

				# The rest of the branch is the 2nd state
				further_state_name = c(tmp_ana_events_table$new_rangetxt[1])
				further_state_time = c(tmp_ana_events_table$edge.length[1] - first_state_timelength)

				times_in_each_state_array = c(first_state_timelength, further_state_time)
				names_of_states_array = c(first_state_name, further_state_name)
				} # END if (numevents == 1)
		
			# 2+ events, 3+ states on branch
			if (numevents >= 2)
				{
				first_state_name = c(tmp_ana_events_table$current_rangetxt[1])
				#first_state_time = c(tmp_ana_events_table$event_time[1])
				abs_time_bp_at_branch_top = tmp_ana_events_table$time_bp[1]
				abs_time_bp_at_branch_bot = abs_time_bp_at_branch_top + tmp_ana_events_table$edge.length[1]
				first_state_timelength = c(abs_time_bp_at_branch_bot - tmp_ana_events_table$abs_event_time[1])
			
				nonfirst_rows = 2:numevents
				nonlast_rows = 1:(numevents-1)

				further_state_names = c(tmp_ana_events_table$new_rangetxt)
				further_state_times = tmp_ana_events_table$abs_event_time[nonlast_rows] - tmp_ana_events_table$abs_event_time[nonfirst_rows]
				
				# How much of the branch is left
				last_time = c(tmp_ana_events_table$edge.length[numevents] - sum(c(first_state_timelength,further_state_times)))

				times_in_each_state_array = c(first_state_timelength, further_state_times, last_time)
				names_of_states_array = c(first_state_name, further_state_names)
				} # END if (numevents >= 2)

			names(times_in_each_state_array) = names_of_states_array
			} # END if (sum(edgefound_TF) > 0)
	
		# Store
		maps[[i]] = times_in_each_state_array
		} # END for (i in 1:length(edgenums))

	maps


	# Check that the sums of state residence times add up to the branch lengths
	lapply(X=maps, FUN=sum)
	tr$edge.length
	all(c(lapply(X=maps, FUN=sum)) == tr$edge.length)

	cbind(tr$edge.length, c(lapply(X=maps, FUN=sum)))


	# Make the mapped.edge output
	mapped.edge = matrix(data=0.0, nrow=length(edgenums), ncol=length(ranges_list))
	row.names(mapped.edge) = rownames_for_mapped_edge
	colnames(mapped.edge) = ranges_list
	mapped.edge


	# For each branch, 
	# 1. Get the list of observed states
	i = 3
	observed_states = sort(unique(names(maps[[i]])))
	observed_states


	# sapply to get the sum of each
	sapply(X=observed_states, FUN=get_sum_statetime_on_branch, branch_history_map=maps[[i]])

	# Fill in the table for each branch
	for (i in 1:nrow(mapped.edge))
		{
		observed_states = sort(unique(names(maps[[i]])))
		total_residence_times = sapply(X=observed_states, FUN=get_sum_statetime_on_branch, branch_history_map=maps[[i]])
		names_observed_states = names(total_residence_times)
		mapped.edge[i,names_observed_states] = unname(total_residence_times)
		}
	mapped.edge


	# Get the transition matrix and logL
	Q = returned_mats$Qmat
	row.names(Q) = ranges_list
	colnames(Q) = ranges_list
	logL = res$total_loglikelihood

	tr_wSimmap = tr
	tr_wSimmap$maps = maps
	tr_wSimmap$mapped.edge = mapped.edge
	tr_wSimmap$Q = Q
	tr_wSimmap$logL = logL
	class(tr_wSimmap) = c("simmap", "phylo")
	tr_wSimmap

	tr_wSimmap$maps
	tr_wSimmap$mapped.edge

	return(tr_wSimmap)
	} # END BSM_to_phytools_SM


	# Get the sum of one state
	get_sum_statetime_on_branch <- function(statename_to_sum, branch_history_map)
		{
		TF = names(branch_history_map) == statename_to_sum
		total_residence_time = sum(branch_history_map[TF])
		return(total_residence_time)
		}



BSMs_to_phytools_SMs <- function(res, clado_events_tables, ana_events_tables)
	{
	simmaps_list = list()
	
	if (class(clado_events_tables) != "list")
		{
		txt = "WARNING from BSMs_to_phytools_SMs(): Input 'clado_events_tables' was not a list, so we will assume it is instead a single Biogeographical Stochastic Map (BSM) table in data.frame format. Therefore, BSM_to_phytools_SM() is being run instead."
		warning(txt)
		tr_wSimmap = BSM_to_phytools_SM(res=res, clado_events_table=clado_events_tables, ana_events_table=ana_events_tables)
		return(tr_wSimmap)
		} # END if (class(clado_events_tables) != "list")
	
	for (i in 1:length(clado_events_tables))
		{
		simmaps_list[[i]] = BSM_to_phytools_SM(res=res, clado_events_table=clado_events_tables[[i]], ana_events_table=ana_events_tables[[i]])
		}
		
	class(simmaps_list) = c("multiSimmap", "multiPhylo")
	return(simmaps_list)
	} # END BSMs_to_phytools_SMs


# Converts a BioGeoBEARS BSM to phytools format, then 
# counts branchlength in each state FOR A TIMEPERIOD
# like timeperiod=c(0,1)
count_brlen_in_each_state <- function(timeperiods=c(0,1), res, trtable, clado_events_table, ana_events_table)
	{
	# Convert BioGeoBEARS BSMs to phytools stochastic maps (SMs)
	phytools_SM = BSM_to_phytools_SM(res, clado_events_table, ana_events_table)
	mapped_edge_abs_times = phytools_SM$mapped.edge
	
	# Construct a TIMED matrix with mapped edges in absolute time,
	# rather than time since beginning
	maps = phytools_SM$maps
	max_numchanges = max(sapply(X=phytools_SM$maps, FUN=length))
	mapped_edge_matrix = matrix(data=NA, nrow=nrow(mapped_edge_abs_times), ncol=max_numchanges+1)
	mapped_edge_matrix[,1] = 0.0
	statenames = colnames(mapped_edge_abs_times)
	timeperiods_bp = timeperiods
	colnames(mapped_edge_matrix) = unlist(rep(statenames, times=50))[1:ncol(mapped_edge_matrix)]
	head(mapped_edge_matrix)
	
	
	# Cumulative times of state changes along branches
	# Absolute times (times bp, before present)
	#cumtimes = matrix(data=0.0, nrow=nrow(mapped_edge_abs_times), ncol=ncol(mapped_edge_abs_times)+1)
	#abstimes = matrix(data=0.0, nrow=nrow(mapped_edge_abs_times), ncol=ncol(mapped_edge_abs_times)+1)
	cumtimes_list = list()
	abstimes_bp_list = list() # In branch order; i.e. the branch behind the node
	sums_table = NULL
	tree_age = max(trtable$time_bp)
	for (i in 1:length(maps))
		{
		map = maps[[i]]
		brnum = i
		TF = trtable$parent_br == brnum
		TF[is.na(TF)] = FALSE
		nodenum = trtable$node[TF]
		
		time_bp_node = trtable$time_bp[TF]
		time_brlen = trtable$edge.length[TF]
		
		time_above_root_brbot = tree_age - (time_bp_node + time_brlen)
		time_bp_root_brbot = time_bp_node + time_brlen
		
		cumtime_map = c(time_above_root_brbot, map)
		abstime_map = c(time_bp_root_brbot, map)
		for (j in 1:length(map))
			{
			if (j == 1)
				{
				cumtime_map[j+1] = cumtime_map[j] + map[j]
				abstime_map[j+1] = abstime_map[j] - map[j]
				} else {
				cumtime_map[j+1] = cumtime_map[j] + map[j]
				abstime_map[j+1] = abstime_map[j] - map[j]
				}
			}
		names(cumtime_map) = c(names(map), names(map)[length(names(map))])
		names(abstime_map) = c(names(map), names(map)[length(names(map))])
		
		cumtimes_list[[i]] = cumtime_map
		abstimes_bp_list[[i]] = abstime_map
		}
	sums = sapply(X=abstimes_bp_list, FUN=total_brlen_each_state, statenames=statenames, timeperiods_bp=timeperiods_bp, simplify=FALSE)
	sums
	sums_table = matrix(data=unlist(sums), ncol=2, byrow=TRUE)
	sums_by_state = colSums(sums_table)
	names(sums_by_state) = statenames
	timewidths_by_state = as.data.frame(sums_table, stringsAsFactors=FALSE)
	names(timewidths_by_state) = statenames
	#head(sums_table)
	#sum(sums_table)
	return(timewidths_by_state)
	}

# Total branchlengths in each state
# This ASSUMES the "map" is 
# * states in order along the branch, named by the starting state (except the last column, which is the end)
# * times are in times_bp
total_brlen_each_state <- function(abstime_map, statenames, timeperiods_bp)
	{
	timebin_bot = timeperiods_bp[2]
	timebin_top = timeperiods_bp[1]
	sums = rep(0.0, times=length(statenames))
	names(sums) = statenames
	
	
	for (j in 1:(length(abstime_map)-1))
		{
		bsm_time_bp_bot = abstime_map[j]
		bsm_time_bp_top = abstime_map[j+1]
		statename = names(abstime_map[j])
		timeperiod_too_old_TF = bsm_time_bp_top > timebin_bot
		timeperiod_too_young_TF = bsm_time_bp_bot < timebin_top
		if ((timeperiod_too_old_TF + timeperiod_too_young_TF) == 0)
			{
			timewidth = bsm_time_bp_bot - bsm_time_bp_top

			amount_to_subtract1 = timebin_top - bsm_time_bp_top
			amount_to_subtract1[amount_to_subtract1 < 0] = 0

			amount_to_subtract2 = bsm_time_bp_bot - timebin_bot
			amount_to_subtract2[amount_to_subtract2 < 0] = 0
	
			timewidth = timewidth - amount_to_subtract1 - amount_to_subtract2
			} else {
			timewidth = 0.0
			}
		TF = statenames == statename
		sums[TF] = sums[TF] + timewidth
		}
	return(sums)
	} # END total_brlen_each_state <- function(abstime_map, statenames, timeperiods_bp)
nmatzke/BioGeoBEARS documentation built on April 11, 2024, 2:16 a.m.