R/DivE.R
In DivE: Diversity Estimator

Documented in CombDM Curvature DiveMaster DivSampleNum DivSubsamples FitSingleMod plot.FitSingleMod print.DiveMaster print.DivSubsamples print.FitSingleMod print.ScoreSingleMod print.summary.DiveMaster print.summary.DivSubsamples print.summary.FitSingleMod print.summary.ScoreSingleMod ScoreSingleMod summary.DiveMaster summary.DivSubsamples summary.FitSingleMod summary.ScoreSingleMod

###############################################################################
# DivE - Diversity Estimator
# Authors: Daniel Laydon, Aaron Sim, Charles Bangham, Becca Asquith
# Date: 03 Feb 2020
# Version: 1.2
# 
# Contents:
#   A. SUBSAMPLE AND RAREFACTION DATA GENERATION
#     1. Other functions
#     2a-2b. Subsample sizes generation functions
#     3a-3h. Rarefaction and diversity data generation functions (incl. summary)
#     4a.    Curvature function
#   B. CURVE FITTING
#     1a-1n. Supporting fitting functions
#     2a-2b. Fitting functions
#     3a-3e. Fitting output functions (incl. summary)
#   C. SCORING AND MODEL COMPARISON 
#     1a-1f. Rounding functions
#     2a.    Scoring single model 
#     3a-3d. Scoring single model output
#     4a.    Comparison of models
#     5a-5d. Comparison of models output
#     6a.    Combining separate runs
#     7a.    Diversity estimate for different population size
###############################################################################

######################## A. SUBSAMPLE DATA GENERATION #########################
######################## A1. Function to format input #########################
# Two formats accepted:
# i. dataframe with two variables - species id, number of species
# ii. dataframe, vector or matrix with one dimension
FormatInput <- function(x, ...) 
{
	if (is.data.frame(x) && (dim(x)[2] == 2)) {
		as.character(rep(x[,1], x[,2]))
	} else if (is.vector(x)) {
		as.character(x)
	} else if ((is.data.frame(x) && dim(x)[2] == 1) || (is.matrix(x) && dim(x)[2] == 1)) {
		as.character(x[,1])
	} else {
		stop("Incorrect sample input format - reformat and try again")
	}
}


############## A2a. Function to generate vector of subsample lengths #############
# Inputs: Main sample, desired number of subsamples
# By default produces a vector of length six with equally spaced subsample sizes
GenSubsampLengths <- function(main.samp, N_Subsamp) 
{
	if ((length(N_Subsamp))!=1) {
		sort(N_Subsamp, decreasing=TRUE)
	} else {
		SampInt <- length(main.samp)/N_Subsamp
		sort(round(seq(from=SampInt, to=length(main.samp), by=SampInt)), decreasing=TRUE)
	}
}

############### A2b. Generate subsample lengths function #################
DivSampleNum <- function(ms, n=6) 
{ 
	if ((length(n)==1) && (n<2)) { 
		stop('Number of nested subsamples (subsizes) must be 2 or greater')
	}
	main.samp <- FormatInput(ms)
	GenSubsampLengths(main.samp=main.samp, N_Subsamp=n)
}


############ A3a. Function to generate vector of rarefaction datapoints ############
# Inputs: Any sample, desired interval between rarefaction points, maximum data size of subsample, minimum data size of subsample
GenRarefacLengths <- function(samp, N_Rarefac, Max_Rarefac=length(FormatInput(samp)), Min_Rarefac=1) 
{
	RarefacInt <- N_Rarefac
	out <- round(seq(from=Min_Rarefac, to=Max_Rarefac, by=RarefacInt))
	if (out[length(out)]==Max_Rarefac) 
	{
		return (out)
		
	} else 
	{
		out <- c(out, Max_Rarefac)
		return (out)
	}
}

############ A3b. Function to determine diversity of a (sub)sample  ###########
# Inputs: Any vector
DivCount <- function(samp) {length(unique(samp))}

##### A3c. Function to generate the sample diversity values at rarefaction datapoints for fitting ####
# Inputs: Sample, interval between rarefaction datapoints, minimum data size of subsample, Iterations, maximum data size of subsample 
GenRarefacDiv <- function(samp, N_Rarefac, Min_Rarefac=1, B, Max_Rarefac=length(FormatInput(samp))) 
{
	l <- length(samp)
	RarefacLengths <- GenRarefacLengths(samp, N_Rarefac, Max_Rarefac, Min_Rarefac)
	s <- length(RarefacLengths)
	OrderVec <- as.vector(apply(matrix(sample(1:(l*B)), nrow=B, ncol=l), 1, order))
	ShuffleMat <- matrix(samp[OrderVec], nrow=B, ncol=l, byrow=TRUE)
	RarefacDivMean <- rep(1, s)
	RarefacDivSD <- rep(0, s)
	
	for (i in (2:s)) 
	{
		temp <- apply(ShuffleMat[,1:RarefacLengths[i]], 1, DivCount)
		RarefacDivMean[i] <- mean(temp)
		RarefacDivSD[i] <- sd(temp)
	}
	list(RarefacXAxis=RarefacLengths, 
			RarefacYAxis=RarefacDivMean,
			div_sd=RarefacDivSD,
			NResamples=B)
}

############# A3d. Generate subsamples/diversity function #############
DivSubsamples <- function(mainsamp, nrf, minrarefac=1, maxrarefac=length(FormatInput(mainsamp)), NResamples=1000) 
{ 
	samp <- FormatInput(mainsamp)
	
	xyvals <- GenRarefacDiv(samp=samp, N_Rarefac=nrf, Min_Rarefac=minrarefac, B=NResamples, Max_Rarefac=maxrarefac)
	
	xyvals$call <- match.call()
	class(xyvals) <- "DivSubsamples"
	xyvals
}

################ A3e. Print method -  subsample/diversity data ################
print.DivSubsamples <- function(x, ...) 
{
	cat("RarefacXAxis:\n")
	print(x$RarefacXAxis)
	cat("\nRarefacYAxis (Mean diversity values):\n")
	print(x$RarefacYAxis)
}

################ A3f. Summary method: subsample/diversity data ################
summary.DivSubsamples <- function(object, ...) 
{
	TotDiv <- object$RarefacYAxis[length(object$RarefacYAxis)]
	TotRarefac <- length(object$RarefacXAxis)
	SampSize <- object$RarefacXAxis[TotRarefac]
	N_Iter <- object$NResamples
	ave.sdpc <- mean(object$div_sd/object$RarefacYAxis)
	TAB <- cbind(Subsample.size=SampSize,
			Subsample.diversity=TotDiv,
			No.of.rarefac.points=TotRarefac,
			Iterations=N_Iter,
			Ave.StdErr=ave.sdpc)
	dss.sum <- list(call=object$call, rsum=TAB)
	class(dss.sum) <- "summary.DivSubsamples"
	dss.sum
}

############ A3g. Print summary method -  subsample/diversity data ############
print.summary.DivSubsamples <- function(x, ...) 
{
	cat("Call:\n")
	print(x$call)
	cat("\nSubsample data summary:\n")
	print(x$rsum)
}

############ A3h. Method to extract the relevant nested subset of a DivSubsamples object ############
# Inputs: divsubsample object, size of nested subsample desired
DivSubsamples_Nested <- function(dss, maxsamp) 
{
	DSStemp <- dss
	EndIndex <- which.min(abs(maxsamp-dss$RarefacXAxis))
	DSStemp$RarefacXAxis <- dss$RarefacXAxis[1:EndIndex]
	DSStemp$RarefacYAxis <- dss$RarefacYAxis[1:EndIndex]
	DSStemp$div_sd <- dss$div_sd[1:EndIndex]
	return (DSStemp)
}


############ A4a. Method to calculate the curvature of a rarefaction curve ############
# Inputs: divsubsample object
Curvature <- function(dss) 
{
	if ((length(dss) > 1) && inherits(dss[[1]], "DivSubsamples")) 
	{
		SubSizes = c()
		for (Sub in 1:length(dss)) 
			SubSizes[Sub] = tail(dss[[Sub]]$RarefacXAxis,1)
		
		LargestSub = order(SubSizes)[1]
		dss = dss[[LargestSub]]
	}
	x_val <- dss$RarefacXAxis
	y_val <- dss$RarefacYAxis
	
	x_end <- tail(x_val, 1)
	y_end <- tail(y_val, 1)
	
	AnB <- 0.5*x_end*y_end
	
	x_vec <- c(x_val[1], diff(x_val))
	
	y_vec_left <- c(0,y_val)[1:(length(y_val))]
	y_vec_right <- y_val[1:length(y_val)]
	y_vec <- (y_vec_right + y_vec_left)/2
	
	A <- sum(x_vec*y_vec) - AnB
	
	output <- A/AnB
	return (output)
}

############################# B. CURVE FITTING ################################

####### B1a. Function to predict the diversity values from subsample x-values according to a given model #######
# Input: Model id, rarefaction x-axis values, model parameters
# Output: dataframe of x and y axis plotting values
PredDiv <- function(model, rarefac, params) 
{
	model.param <- as.list(params)
	div.temp <- model(rarefac, model.param)
	if(any(div.temp=="NaN")) div.temp <- rep(Inf, length(rarefac)) 	 # Ensures an infinite ssr
	data.frame(RarefacXAxis=rarefac, RarefacYAxis=div.temp)
}

####### B1b. Function to calculate the residuals (modCost) #######
# Input: Model id, model parameters, DivSubsamples object
ModelCost <- function(model, params, dss) 
{
	xypred <- PredDiv(model, dss$RarefacXAxis, params)
	xyactual <- data.frame(RarefacXAxis=dss$RarefacXAxis, RarefacYAxis=dss$RarefacYAxis)
	modCost(xypred, xyactual, x="RarefacXAxis")
}

####### B1c. Function to calculate the discrepancy as the mean of pointwise percentage error #######
# Input: Model id, model parameters, DivSubsamples object
ModelCostAbs <- function(model, params, dss) 
{
	ypred <- PredDiv(model, dss$RarefacXAxis, params)$RarefacYAxis
	yactual <- dss$RarefacYAxis
	mean(abs((ypred-yactual)/yactual))
}

####### B1d. Function to convert parameter ranges to vectors #######
# Input: model.set, Param.ranges (possibly truncated)
ConvertRanges <- function(model.set, param.ranges) 
{
	BoundsLower <- list()
	BoundsUpper <- list()
	for (mod in 1:length(model.set)) 
	{
		BoundsLower[[names(model.set)[mod]]] = param.ranges[[mod]]["Lower",]
		BoundsUpper[[names(model.set)[mod]]] = param.ranges[[mod]]["Upper",]
	}
	list(lower=BoundsLower, upper=BoundsUpper)
}

####### B1e. Function to perform a global Fit #######
# Input: Function, initial parameters, lower parameter bounds, upper parameter bounds, Max no. of iterations, minimum variance
GlobalFit <- function(func, init.list, lower.bd, upper.bd, numit, varleft) 
{
	cat("Choosing optimal initial parameters for global fit", "\n")
	init.param <- (lower.bd + upper.bd)/2
	cost.lowest <- func(init.param)$model
	
	for (i in 1:dim(init.list)[1]) 
		if (all((init.list[i,] >= lower.bd)) & all((init.list[i,] <= upper.bd))) 
		{
			cost.temp <- func(init.list[i,])$model
			if (cost.temp < cost.lowest) 
			{
				cost.lowest <- cost.temp
				init.param <- init.list[i,]
			}
		}
	cat("Performing global fit", "\n")
	modFit(f = func, p = init.param, lower=lower.bd, upper=upper.bd,       # "Pseudo" Global Fit
			method = "Pseudo", control = c(numiter = numit, varleft=varleft, verbose=TRUE))
}

####### B1f. Function to perform a local Fit #######
# Input: Function, initial parameters, lower parameter bounds, upper parameter bounds, Max no. of iterations, minimum variance
LocalFit <- function(func, init.param, lower.bd, upper.bd) 
{
	cat("Performing local fit", "\n")
	localfit <- modFit(f = func, p = init.param, lower=lower.bd, upper=upper.bd,       # Local Fit
			method = "Marq")
	return(localfit)
}

####### B1g. Function to perform a combined global-local Fit #######
# Input: Function, initial parameters, lower parameter bounds, upper parameter bounds, Max no. of iterations, minimum variance
CombinedFit <- function(func, init.param, lower.bd, upper.bd, numit, varleft) 
{
	g.fit <- GlobalFit(func, init.param, lower.bd, upper.bd, numit, varleft)
	LocalFit(func, g.fit$par, lower.bd, upper.bd)
}

####### B1h. Function to convert function "funk" to function of a single variable #######
# Input: function, parameters
SingleVar <- function(funk, params) 
{
	fn = function(x)
	{
		funk(x, params)
	}
	return(fn)
}

####### B1i. Function to find intersections between curves #######
# Input: function, parameters1, parameter2, lower x-limit, upper x-limit
Crossings <- function(model, param1, param2, xvals) 
{
#	yvals1 <- model(xvals, param1)
#	yvals2 <- model(xvals, param2)
#	s1 <- try(SpatialLines(list(Lines(list(Line(cbind(xvals , yvals1))) , ID=1))), silent = TRUE)
#	s2 <- try(SpatialLines(list(Lines(list(Line(cbind(xvals , yvals2))) , ID=1))), silent = TRUE)
#	
#	temp.int <- gIntersection(s1, s2)
#	int.len <- length(temp.int)
#	
#	if (int.len != 0) 
#		row.names(temp.int) <- seq(1,int.len)
#	
#	intersections <- try(as.data.frame(temp.int), silent=TRUE)
	intersections <- NULL
	intersections
}

####### B1j. Function to evaluate distance between curves #######
# Input: function, parameters1, parameter2, lower x-limit, upper x-limit
Simm <- function(model, param1, param2, lowerlimit, upperlimit, tot.length) 
{
	intersections <- Crossings(model, param1, param2, seq(lowerlimit, upperlimit, length.out=tot.length))
	
	# No intersections
	if (length(intersections) == 0  |  inherits(intersections, "try-error")) 
	{
		dist <- abs(as.numeric(as.character(integrate(SingleVar(model, param1), lower = lowerlimit, upper = upperlimit))[1])  -
						as.numeric(as.character(integrate(SingleVar(model, param2), lower = lowerlimit, upper = upperlimit))[1]))
		return(dist)
		
	} else if (length(intersections) != 0 & !inherits(intersections, "try-error")) 
	{
		# with intersections
		limits <- c(lowerlimit)
		for (i in (1:(dim(intersections)[1]))) limits <- c(limits, intersections[i,1])
		limits <- c(limits, upperlimit)
		
		dist <- 0
		for(interval in 1:(dim(intersections)[1]+1)) 
		{
			dist <- dist + abs(as.numeric(as.character(integrate(SingleVar(model, param1), lower=limits[interval],
													upper=limits[interval+1]))[1]) -
							as.numeric(as.character(integrate(SingleVar(model, param2), lower=limits[interval], 
													upper=limits[interval+1]))[1]))
		}
		return(dist)
	}
}

####### B1k. Function to evaluate the area under the reference curve #######
# Input: function, parameters1, lower x-limit, upper x-limit
RefArea <- function(model, param1, lowerlimit, upperlimit) 
{
	total.area <- abs(as.numeric(as.character(integrate(SingleVar(model, param1), lower = lowerlimit, upper = upperlimit))[1]))
	return(total.area)
}

####### B1l. Function to evaluate the mean squared distances between curves #######
# Input: function, parameters1, parameter2, lower x-limit, upper x-limit
MSRcurves <- function(dist) 
{
	return(sum(dist*dist)/(2*nrow(dist)))
}

####### B1m. Function to evaluate the monotoniticy of the fitted curve #######
# Input: Model, parameters, minimum datapoint, maximum datapoint, number of points to test
IsMonotonic <- function(model, params, min.samp, max.samp, tot.length) 
{
	xvals <- seq(min.samp, max.samp, length.out=tot.length)
	all(diff(model(xvals, params))>=0)
}

####### B1n. Function to evaluate the negativity of the 2nd derivative of the fitted curve #######
# Input: Model, parameters, minimum data size, maximum data size, number of points to test
IsSlowing <- function(model, params, min.samp, max.samp, tot.length) 
{
	xvals <- seq(min.samp, max.samp, length.out=tot.length)
	all(round(diff(diff(model(xvals, params))), 5)<=0)
}


####### B2a. Fit a single sample to a single model #######
FitSample <- function(model, init.param, lower.bd, upper.bd, dss, numit, varleft) 
{
	func <- function(p) 
	{
		ModelCost(model, p, dss)
	}
	CombinedFit(func, init.param, lower.bd, upper.bd, numit, varleft)
}

####### B2b. Fit all samples to a single model #######
# Input: SS=list of subsample sizes, model, initial parameters, parameter boundaries,
#       list of diversity subsample objects, numit, varleft, population total estimate, vector of original samples,
#       minimum sample size (for criteria 3 & 4)
FitAllSubs <- function(SS, model.list, init.param, param.range, dSS, numit, varleft, tot.pop, v1, minsampsize) 
{
	###### Extract model and name ######
	model <- model.list[[1]]
	model.name <- names(model.list)
	
	###### Create lower and upper boundaries ######
	lower.bd <- param.range[1,]
	upper.bd <- param.range[2,]
	
	###### Create storage matrices ######
	# Parameter names
	param.mat <- matrix(nrow=length(SS), ncol=ncol(init.param))
	rownames(param.mat) <- as.character(SS)
	colnames(param.mat) <- names(init.param[1,])
	
	# Criteria 1: Sum of squares
	ssr.mat <- matrix(nrow=length(SS), ncol=1)
	rownames(ssr.mat) <- as.character(SS)
	colnames(ssr.mat) <- c("Sum_of_squares")  
	
	# Criteria 1: Mean squared residual
	ms.mat <- matrix(nrow=length(SS), ncol=1)
	rownames(ms.mat) <- as.character(SS)
	colnames(ms.mat) <- c("Mean_squared_residual")
	
	# Criteria 1: Discrepancy - mean abs error
	discrep.mat <- matrix(nrow=length(SS), ncol=1)
	rownames(discrep.mat) <- as.character(SS)
	colnames(discrep.mat) <- c("Mean_absolute_error")
	
	# Criteria 2: Local diversity prediction
	local.mat <- matrix(nrow=length(SS), ncol=1)
	rownames(local.mat) <- as.character(SS)
	colnames(local.mat) <- c("Predicted_local_diversity")
	
	# Criteria 2: Global diversity prediction
	global.mat <- matrix(nrow=length(SS), ncol=1)
	rownames(global.mat) <- as.character(SS)
	colnames(global.mat) <- c("Predicted_global_diversity")  
	
	# Criteria 2: Accuracy to observed dataset diversity
	AccuracyToObserved.mat <- matrix(nrow=length(SS), ncol=1)
	rownames(AccuracyToObserved.mat) <- as.character(SS)
	colnames(AccuracyToObserved.mat) <- c("Accuracy_to_observed_dataset_diversity")
	
	# Criteria 3: Similarity of curves (local and global)
	sim.local.mat <- matrix(rep(0, length(SS)*length(SS)), nrow=length(SS), ncol=length(SS))
	rownames(sim.local.mat) <- as.character(SS)
	colnames(sim.local.mat) <- as.character(SS)
	
	sim.global.mat <- matrix(rep(0, length(SS)*length(SS)), nrow=length(SS), ncol=length(SS))
	rownames(sim.global.mat) <- as.character(SS)
	colnames(sim.global.mat) <- as.character(SS)
	
	# Criteria 3: Mean squared distance between curves (local and global)
	msr.curve.mat <- matrix(nrow=2, ncol=1)
	rownames(msr.curve.mat) <- c("local", "global")
	colnames(msr.curve.mat) <- c("MSR_between_subsample_curves")
	
	# Criteria 3: Distance from reference curve (local and global)
	sim.local.ref <- matrix(nrow=length(SS)-1, ncol=1)
	rownames(sim.local.ref) <- as.character(SS[2:length(SS)])
	colnames(sim.local.ref) <- c("Distance_from_local_reference_curve")
	
	sim.global.ref <- matrix(nrow=length(SS)-1, ncol=1)
	rownames(sim.global.ref) <- as.character(SS[2:length(SS)])
	colnames(sim.global.ref) <- c("Distance_from_global_reference_curve")
	
	# Criteria 4: Plausibility (Monotonicity and decreasing 2nd derivate)
	monotonic.local.mat <- matrix(nrow=length(SS), ncol=1)
	monotonic.global.mat <- matrix(nrow=length(SS), ncol=1)
	slowing.local.mat <- matrix(nrow=length(SS), ncol=1)
	slowing.global.mat <- matrix(nrow=length(SS), ncol=1)
	plausibility.mat <- matrix(nrow=length(SS), ncol=3)
	rownames(monotonic.local.mat) <- as.character(SS)
	rownames(monotonic.global.mat) <- as.character(SS)
	rownames(slowing.local.mat) <- as.character(SS)
	rownames(slowing.global.mat) <- as.character(SS)
	rownames(plausibility.mat) <- as.character(SS)
	colnames(monotonic.local.mat) <- c("Is monotonic (local)")
	colnames(monotonic.global.mat) <- c("Is monotonic (global)")
	colnames(slowing.local.mat) <- c("Has_decreasing_2nd_derivative (local)")
	colnames(slowing.global.mat) <- c("Has_decreasing_2nd_derivative (global)")
	colnames(plausibility.mat) <- c("Is plausible", "Monotonically increasing", "Slowing")
	
	###### Loop through different samples in SS #######
	for (b in 1:length(SS)) 
	{
		cat("Performing fitting routine for sample ", b, "\n")
		if (b>1) rbind(init.param, fit$par) # whether to use meta.list or previous fitting values
		
		fit <- try(FitSample(model, init.param, lower.bd, upper.bd, dSS[[b]], numit, varleft), silent=TRUE)
		
		if(inherits(fit, "try-error")) next 
		
		param.mat[b,] <- fit$par # Assign parameter values
		ssr.mat[b,] <- fit$ssr # Assign sum of squared residuals
		ms.mat[b,] <- fit$ms # Assign mean squared residuals
		discrep.mat[b,] <- ModelCostAbs(model, fit$par, dSS[[b]]) # Assign discrepancy mean absolute residuals (in percentages)
		local.mat[b,] <- model(length(v1), fit$par) # Assign local diversity prediction
		global.mat[b,] <- model(tot.pop, fit$par) # Assign global diversity prediction
		AccuracyToObserved.mat[b,] <- (model(length(v1), fit$par) - length(unique(v1)))/length(unique(v1)) # Assign % accuracy of local diversity predition
		
		# Assign plausibility assessments
		monotonic.local.mat[b,] <- IsMonotonic(model, fit$par, min.samp=minsampsize, max.samp=length(v1), tot.length=length(v1)-minsampsize+1) 
		monotonic.global.mat[b,] <- IsMonotonic(model, fit$par, min.samp=minsampsize, max.samp=tot.pop, tot.length=min(tot.pop-minsampsize+1,2000))
		slowing.local.mat[b,] <- IsSlowing(model, fit$par, min.samp=minsampsize, max.samp=length(v1), tot.length=length(v1)-minsampsize+1) 
		slowing.global.mat[b,] <- IsSlowing(model, fit$par, min.samp=minsampsize, max.samp=tot.pop, tot.length=min(tot.pop-minsampsize+1,2000))
		plausibility.mat[b,2] <- ((monotonic.local.mat[b,])*(monotonic.global.mat[b,]))==1
		plausibility.mat[b,3] <- (slowing.global.mat[b,])==1
		plausibility.mat[b,1] <- (plausibility.mat[b,2]*plausibility.mat[b,3])==1
	}
	
	# Assign similarity measures (local and global)
	norm.fac.local <- try(RefArea(model, param.mat[1,], minsampsize, length(v1)), silent=TRUE)
	norm.fac.global <- try(RefArea(model, param.mat[1,], minsampsize, tot.pop), silent=TRUE)
	
	for (b in 1:length(SS)) 
		for (c in (b:length(SS))) 
			if (b==c) 
			{
				sim.local.mat[b,c] <- 0
				sim.global.mat[b,c] <- 0
				
			} else 
			{
				local.tmp.mat <- try(Simm(model, param.mat[b,], param.mat[c,], lowerlimit=minsampsize,
								upperlimit=length(v1), tot.length=length(v1)-minsampsize+1), silent=TRUE)
				global.tmp.mat <- try(Simm(model, param.mat[b,], param.mat[c,], lowerlimit=minsampsize,
								upperlimit=tot.pop, tot.length=tot.pop-minsampsize+1), silent=TRUE)
				if(!inherits(local.tmp.mat, "try-error") & !inherits(global.tmp.mat, "try-error")) 
				{
					sim.local.mat[b,c] <- local.tmp.mat
					sim.global.mat[b,c] <- global.tmp.mat
				}
			}
	
	sim.local.mat <- sim.local.mat + t(sim.local.mat)
	sim.global.mat <- sim.global.mat + t(sim.global.mat)
	sim.local.mat.tmp <- try(sim.local.mat/norm.fac.local, silent=TRUE)
	sim.global.mat.tmp <- try(sim.global.mat/norm.fac.global, silent=TRUE)
	if (!inherits(sim.local.mat.tmp, "try-error") & !inherits(sim.global.mat.tmp, "try-error")) 
	{
		sim.local.mat <- sim.local.mat.tmp
		sim.global.mat <- sim.global.mat.tmp
	}
	
	for (b in 2:length(SS))
	{
		sim.local.ref[b-1,] <- sim.local.mat[b,1]
		sim.global.ref[b-1,] <- sim.global.mat[b,1]
	}
	
	# Assign msr between sample curves
	msr.curve.mat[1,1] <- MSRcurves(sim.local.mat)
	msr.curve.mat[2,1] <- MSRcurves(sim.global.mat)
	
	list(param=param.mat, ssr=ssr.mat, ms=ms.mat, discrep=discrep.mat, local=local.mat, global=global.mat, AccuracyToObserved=AccuracyToObserved.mat,
			subsamplesizes=SS, datapoints=dSS, modelname=names(model.list), numparam=ncol(param.mat), sampvar=msr.curve.mat,
			mono.local=monotonic.local.mat, mono.global=monotonic.global.mat, slowing.local=slowing.local.mat,
			slowing.global=slowing.global.mat, plausibility=plausibility.mat, dist.local=sim.local.mat, dist.global=sim.global.mat,
			local.ref.dist=sim.local.ref, global.ref.dist=sim.global.ref, popsize=tot.pop, themodel=model)
}


####### B3a. Fit single model function #######
FitSingleMod <- function(model.list, init.param, param.range, main.samp, tot.pop=(100*(DivSampleNum(main.samp,2)[1])),
		numit=10^5, varleft=1e-8, data.default=TRUE, subsizes=6, dssamps=list(),
		nrf=1, minrarefac=1, NResamples=1000, minplaus=10, fitloops=2) 
{ 
	v1 <- FormatInput(main.samp) # Convert samp to a vector of length=main sample length and with species ids
	if (data.default) 
	{
		cat("Creating subsamples and rarefaction data", "\n")
		SS <- DivSampleNum(main.samp, subsizes)
		
		# Create divsubsample object for each sample
		samp <- v1
		dSS <- list()
		
		main.dss <- DivSubsamples(mainsamp=samp, nrf=nrf, minrarefac=minrarefac, maxrarefac=SS[1])
		
		dSS[[1]] <- main.dss
		for (b in (2:length(SS))) 
		{
			max.ss <- SS[b]
			dss_temp <- DivSubsamples_Nested(main.dss, max.ss)
			dSS[[b]] <- dss_temp
		}
	} else 
	{
		SS <- subsizes # largest element of samp.vec must be < length(v1)
		# Create divsubsample object for each sample
		dSS <- dssamps
	}
	
	# Fit the samples
	cat("Fitting loop 1", "\n")
	fitsingle <- FitAllSubs(SS, model.list, init.param, param.range, dSS, numit, varleft, tot.pop, v1, minplaus)
	
	if (fitloops > 1) 
		for (c in (2:fitloops)) 
		{
			cat("Fitting loop", c, "\n")
			rbind(init.param, fitsingle$param)
			fitsingle <- FitAllSubs(SS, model.list, init.param, param.range, dSS, numit, varleft, tot.pop, v1, minplaus)
		}
	fitsingle$call <- match.call()
	class(fitsingle) <- "FitSingleMod"
	fitsingle
}

####### B3b. Print method -  fit of single model #######
print.FitSingleMod <- function(x, ...) 
{
	cat("Fitting results for model:", x$modelname, "\n")
	cat("Model parameters ($param):\n")
	print(x$param)
	
	cat("\nSubsample sizes ($subsamplesizes):\n")
	print(x$subsamplesizes)
	
	cat("\nMeasures for discrepancies between rarefaction data and model fits ($discrep, $ssr, $ms):\n")
	print(cbind(x$discrep, x$ssr, x$ms))
	
	cat("\nDiversity predictions and the local prediction accuracy ($AccuracyToObserved, $local, $global):\n")
	print(cbind(x$AccuracyToObserved, x$local, x$global))
	
	cat("\nSimilarity measure: normalised area between curve and largest-sample curve ($local.ref.dist):\n")
	print(x$local.ref.dist)
	
	cat("\nPlausibility measures: monotonicity and decreasing 2nd derivative ($plausibility):\n")
	print(x$plausibility)
}

####### B3c. Summary method: fit of single model #######
summary.FitSingleMod <- function(object, ...) 
{
	tot.subsamp <- length(object$datapoints)
	num.rf <- length((object$datapoints[[1]])$RarefacXAxis)
	fitted.model <- object$modelname
	n.param <- object$numparam
	
	TAB <- cbind(No.of.subsamples=tot.subsamp,
			No.of.rarefaction.points=num.rf,
			Fitted.model.name=fitted.model,
			No.of.model.parameters=n.param)
	fit.sum <- list(call=object$call,
			rsum=TAB)
	class(fit.sum) <- "summary.FitSingleMod"
	fit.sum
}

####### B3d. Print summary method -  fit of single model #######
print.summary.FitSingleMod <- function(x, ...) 
{
	cat("Call:\n")
	print(x$call)
	cat("\nSingle model fit setup summary:\n")
	print(x$rsum)
}

####### B3e. Plot method -  fit of single model #######
plot.FitSingleMod <- function(x, range="local", ...) 
{
	points.colours <- c("red", "green", "brown", "orange", "blue", "yellow")
	
	# Define plot limits
	ylim.list <- c()
	for (i in 1:length(x$subsamplesizes)) 
	{
		# Define fitted curve
		func1 <- function(z) {
			x$themodel(z, x$param[i,])
		}
		
		if (range=="local") xvalue <- x$datapoints[[i]]$RarefacXAxis[length(x$datapoints[[i]]$RarefacXAxis)]	else 	xvalue <- x$popsize
		
		yvalue <- x$datapoints[[i]]$RarefacYAxis[length(x$datapoints[[i]]$RarefacYAxis)]
		ypred <- func1(xvalue)
		max.y <- max(yvalue, ypred)
		ylim.list[i] <- max.y
	}
	ylim.choose <- max(ylim.list)
	
	# Plot main sample diversity
	Sample_number <- x$datapoints[[1]]$RarefacXAxis[length(x$datapoints[[1]]$RarefacXAxis)]
	Species_count <- x$datapoints[[1]]$RarefacYAxis[length(x$datapoints[[1]]$RarefacXAxis)]
	
	if (range=="local") 
	{
		xupper <- Sample_number*1.1
		yupper <- ylim.choose*1.1
	
	} else if (range=="global") 
	{
		xupper <- x$popsize*1.1
		yupper <- ylim.choose*1.1
	}
	plot(x=Sample_number, y=Species_count, pch=20, col="purple", cex=3, xlim=c(0,xupper),
			ylim=c(0, yupper), ...)
	
	# Plot curves and points
	for (j in 1:length(x$subsamplesizes)) 
	{
		# Plot the points
		points(x=x$datapoints[[j]]$RarefacXAxis,
				y=x$datapoints[[j]]$RarefacYAxis,
				pch=4, col=points.colours[j])
		
		# Define the fitted function
		func1 <- function(z) {
			x$themodel(z, x$param[j,])
		}
		
		# Plot the fitted function
		if (range=="local") 
		{
			xval <- seq(0, Sample_number, 0.1)
			yval <- func1(xval)
		
		} else if (range=="global") 
		{
			xval <- seq(0, x$popsize, 1)
			yval <- func1(xval)
		}
		lines(x=xval, y=yval, lty=1, col=points.colours[j], lwd=2)
	}
	
	# Purpledot
	points(x=Sample_number, y=Species_count, pch=20, col="purple", cex=3)
}


####################### C. SCORING AND MODEL COMPARISON #######################
####### C1a. Whole number check #######
# Input: Single number
# Output: TRUE/FALSE
IsWholeNumber <- function(x, tol = .Machine$double.eps^0.5) 
{
	return(abs(x - round(x)) < tol)  
}

####### C1b. Binning function - Single number, absolute divisor #######
# Input: Number, divisor
# Output: Number rounded UP to multiple of divisor
BinFunkSingleNumber <- function(x,d) 
{
	if (is.na(x))  {
		return(x)
	} else if ( x == 0 )	{	## need this line as otherwise 0 value gets 0 score, want minimum score to be 1, regardless of divisor.
		return(1)
	}	else {
		X = abs(x)
		res = X%%d
		if(IsWholeNumber(X/d))	{	
			return((	((X-res)/d) ) )				
		}	else {
			return( (	((X-res)/d) + 1	) 	)
		}
	}
}

####### C1c. Binning function - Vector of numbers, absolute divisor #######
# Input: Vector of numbers, divisor
# Output: Vector of number rounded UP to multiple of divisor
BinFunk <- function(xx,dd) # this rounds each entry of xx DOWN to the last multiple of dd. 
{  	
	return(sapply(xx , BinFunkSingleNumber, d = dd))
}

####### C1d. Binning function - Single number, Percentage precision #######
# Input: Number, precison
# Output: Number rounded to specified precision
Bfunprop = function(a, percent) 
{
	if (is.na(a)) {
		return(a)
	} else {
		if (a >= 0) {
			return( floor((1/percent) * a)/(1/percent)	)
		}
		if (a < 0) {
			return( ceiling((1/percent) * a)/(1/percent)	)
		}  
	}
}

####### C1e. Binning function - Vector of numbers, Percentage precision #######
# Input: Vector of numbers, precison
# Output: Vector of numbers rounded to specified precision
BBprop=function(aa,percentwecareabout) 
{  
	if(percentwecareabout == 0)	{
		return(aa)
	} else	{
		return(sapply(aa , Bfunprop, percent = percentwecareabout))
	}
}

####### C1f. Geometric mean function #######
# Input: Top estimates
# Output: Geometric mean
GeoMean <- function(x) 
{
	if (any(is.na(x))) {warning('global diversity values are unreliable due to one or more poor model fits amongst top models')}
	return(exp(mean(log(x), na.rm=TRUE)))
}


####### C2a. Scoring function - Single model #######
# Input: FitsingleMod, Sample aggregation method ("Equal", "Proportional", "InvProportional"), 
#         Criteria rounding values c(fit, accuracy, local similarity), plausibility penalty score
# Output: Single model scores - Five criteria - 1. Fit, 2. Accuracy, 3. Local similarity, 
#         4. Monotonicity (local-global combined), 5. Plausible-global
SingleModScore <- function(fsm, precision.lv = c(0.0001, 0.005, 0.005), plaus.pen=500) 
{
	nS <- length(fsm$subsamplesizes) # No. of samples
	samp.wts <- rep(1/nS, nS)
	
	
	## Un-aggregated scores
	fit.scores <- BinFunk(fsm$discrep, precision.lv[1])
	accuracy.scores <- BinFunk(fsm$AccuracyToObserved, precision.lv[2])
	similarity.scores <- BinFunk(fsm$local.ref.dist, precision.lv[3])
	plausible.scores <- rep(500, nS)
	
	# Monotonic and plausibility scores penalties
	for (i in 1:nS) {
		
		if (is.na(fsm$plausibility[i,1]) || fsm$plausibility[i,1]==FALSE) {
			plausible.scores <- plaus.pen
		}
	}
	
	## Aggregated scores
	fit.agg <- sum(fit.scores*samp.wts)
	accuracy.agg <- sum(accuracy.scores*samp.wts)
	similarity.agg <- sum(similarity.scores*samp.wts[2:length(samp.wts)])/sum(samp.wts[2:length(samp.wts)])
	plausible.agg <- sum(plausible.scores*samp.wts)
	list(fit=fit.agg, accuracy=accuracy.agg, similarity=similarity.agg, plausibility=plausible.agg,
			binsize=precision.lv, plausibility.penalty=plaus.pen,
			modname=fsm$modelname)
}


####### C3a. Score single model function #######
ScoreSingleMod <- function(fsm, precision.lv = c(0.0001, 0.005, 0.005), plaus.pen=500) 
{
	scoresingle <- SingleModScore(fsm, precision.lv, plaus.pen)
	scoresingle$call <- match.call()
	class(scoresingle) <- "ScoreSingleMod"
	scoresingle
}

####### C3b. Print method -  score of single model #######
print.ScoreSingleMod <- function(x, ...) 
{
	cat("Scoring results ($[colnames]):\n")
	score.output <- matrix(c(x$fit, x$accuracy, x$similarity, x$plausibility), nrow=1, ncol=4)
	rownames(score.output) <- as.character(x$modname)
	colnames(score.output) <- c("discrepancy", "accuracy", "similarity", "plausibility")
	print(score.output)
}

####### C3c. Summary method: score of single model #######
summary.ScoreSingleMod <- function(object, ...) 
{
	fitted.model <- object$modname
	binsize <- object$binsize
	plauspen <- object$plausibility.penalty
	
	TAB <- cbind(Model.name=fitted.model,
			precision.fit=binsize[1],
			precision.accuracy=binsize[2],
			precision.similarity=binsize[3],
			Plausibility.penalty=plauspen)
	score <- list(call=object$call,
			rsum=TAB)
	class(score) <- "summary.ScoreSingleMod"
	score
}

####### C3d. Print summary method -  score of single model #######
print.summary.ScoreSingleMod <- function(x, ...) 
{
	cat("Call:\n")
	print(x$call)
	cat("\nSingle model scoring setup summary:\n")
	print(x$rsum)
}


####### C4a. Combine attributes of a single model #######
# Input: ScoreSingleMod object, Criteria weights (zero=exclude from scoring)
# Output: Single model score
CombineCriteria <- function(ssm, crit.wts=c(1.0, 1.0, 1.0, 1.0)) 
{
	vect <- c(ssm$fit, ssm$accuracy, ssm$similarity, ssm$plausibility)
	vect <- sum((vect*crit.wts)/sum(crit.wts))
	return(vect)
}

####### C4b. Fit, score and compare multiple models #######
# Input: List of models, List of initial parameters, List of Lower bounds, List of upper bounds,
#       main.samp, tot.pop, numit, varleft, subsizes, dssamps, nrf, minrarefac, NResamples, minplaus,
#       precision.lv, plaus.pen, crit.wts, fitloops, numpred
# Output: List of i) Model scores ii) List of dss iii) List of ssm iv) predictions x3 vii) original model list
MultipleScoring <- function(models, init.params, param.ranges, main.samp, tot.pop, numit, varleft,
                             subsizes, dssamps, nrf, minrarefac, NResamples, minplaus, precision.lv, plaus.pen, crit.wts,
                             fitloops, numpred) 
{
	num.mod <- length(models)
	fmm <- list()
	ssm <- matrix(rep(NA, num.mod*4), nrow=num.mod, ncol=4)
	colnames(ssm) <- c("fit", "accuracy", "similarity", "plausibility")
	mod.score <- matrix(rep(NA, num.mod), nrow=num.mod, ncol=1)
	mod.rownames <- as.character(rep(NA, num.mod))
	
	# Create the samples and divsubsample object
	if (length(dssamps)==0) 
	{
		cat("Creating subsamples and rarefaction data", "\n")
		mas.dss <- list() # Create 3 master samples
		mas.SS <- DivSampleNum(main.samp, subsizes) # Create master vector of 3 samples sizes
		samp <- FormatInput(main.samp) # Format main sample
		
		dss.main <- DivSubsamples(mainsamp=samp, nrf=nrf, minrarefac=minrarefac, maxrarefac=mas.SS[1], NResamples=NResamples)
		mas.dss[[1]] <- dss.main
		for (b in (2:length(mas.SS))) # Create master list of DivSubsamples
		{ 
			max.ss <- mas.SS[b]
			dss <- DivSubsamples_Nested(dss.main, max.ss)
			mas.dss[[b]] <- dss  
		}
		
	} else 
	{
		cat("Loading predefined subsamples", "\n")
		mas.SS <- DivSampleNum(main.samp, subsizes)
		samp <- FormatInput(main.samp) # Format main sample
		
		mas.dss <- dssamps
	}
	
	timeremain = "tbd"
	ptm <- proc.time()[3]
	# Loop throught the different models
	for (i in (1:num.mod)) 
	{
		# Fit model
		cat("Fitting model", i, "of", length(models),"(Est. time remaining:", timeremain, "mins)", "\n")
		fsm.temp <- FitSingleMod(model.list=models[i], init.param=init.params[[i]], param.range=param.ranges[[i]],
				main.samp, data.default=FALSE, tot.pop= tot.pop, numit=numit, varleft=varleft,
				subsizes=mas.SS, dssamps=mas.dss, nrf=nrf, minrarefac=minrarefac, NResamples=NResamples,
				minplaus=minplaus, fitloops=fitloops)
		
		# Score model criteria
		cat("Scoring model", i, "\n")
		ssm.temp <- try(ScoreSingleMod(fsm=fsm.temp, precision.lv = precision.lv, plaus.pen=plaus.pen), silent=TRUE)
		if(inherits(ssm.temp, "try-error")) next  
		
		# Aggregate criteria score
		cat("Aggregating scoring for model", i, "\n")
		mod.score.temp <- CombineCriteria(ssm=ssm.temp, crit.wts=crit.wts)
		
		fmm[[fsm.temp$modelname]] <- fsm.temp
		ssm[i,1] <- ssm.temp$fit
		ssm[i,2] <- ssm.temp$accuracy
		ssm[i,3] <- ssm.temp$similarity
		ssm[i,4] <- ssm.temp$plausibility
		mod.score[i,] <- mod.score.temp
		mod.rownames[i] <- fsm.temp$modelname
		
		# Calculate time
		timeremain <- round(((proc.time()[3] - ptm)*(num.mod - i)/i)/60)
	}
	rownames(mod.score) <- mod.rownames
	colnames(mod.score) <- "Combined score"
	rownames(ssm) <- mod.rownames
	
	# calculate estimate
	num.models <- length(fmm)
	m <- min(numpred, num.models)
	topx_scores <- sort(unique(mod.score))[1:m]
	lenx <- length(topx_scores)
	topx_index <- which(mod.score %in% topx_scores)
	predx.vector <- c()
	for (i in 1:lenx) 
	{
		tmp <- ((fmm[[topx_index[i]]])$global)[1,]
		predx.vector <- c(predx.vector, tmp)
	}
	predx <- GeoMean(predx.vector)
	predx_high <- max(predx.vector)
	predx_low <- min(predx.vector)
	
	list(model.scores=mod.score, fmm=fmm, ssm=ssm, estimate=predx, upper_estimate=predx_high, lower_estimate=predx_low, models=models, m=m)
}


####### C5a. Fit/score multiple models function #######
DiveMaster <- function(models, init.params, param.ranges, main.samp, tot.pop=(100*(DivSampleNum(main.samp,2)[1])), numit=10^5, varleft=1e-8,
                              subsizes=6, dssamps=list(), nrf=1, minrarefac=1, NResamples=1000, minplaus=10, 
                              precision.lv=c(0.0001, 0.005, 0.005), plaus.pen=500,
                              crit.wts=c(1.0, 1.0, 1.0, 1.0), fitloops=2, numpred=5) 
{
  mainfit <- MultipleScoring(models, init.params, param.ranges, main.samp, tot.pop, numit, varleft,
                              subsizes, dssamps, nrf, minrarefac, NResamples, minplaus, precision.lv, plaus.pen, crit.wts, fitloops, numpred)
  mainfit$call <- match.call()
  class(mainfit) <- "DiveMaster"
  mainfit
}

####### C5b. Print method - score of multiple models #######
print.DiveMaster <- function(x, ...) 
{
	cat("Model score ($model.scores):\n")
	print(x$model.scores)
	cat("\nPopulation diversity estimate ($estimate):\n")
	print(x$estimate)
}

####### C5c. Summary method: fit/scores of multiple models #######
summary.DiveMaster <- function(object, ...) 
{
	num.models <- length(object$fmm)
	
	best.index <- which.min(object$model.scores)
	best.model <- (object$fmm[[best.index]])$modelname
	div.pred.from.top.model <- ((object$fmm[[best.index]])$global)[1,]
	
	TAB <- data.frame(Number_of_models_fitted=as.numeric(num.models),
			Best_scoring_model=best.model,
			Estimated_global_diversity=as.numeric(object$estimate),
			Diversity_upperbound=as.numeric(object$upper_estimate),
			Diversity_lowerbound=as.numeric(object$lower_estimate))
	
	res <- list(call=object$call,
			rsum=TAB)
	class(res) <- "summary.DiveMaster"
	res
}

####### C5d. Print summary method -  fits/scores of multiple models #######
print.summary.DiveMaster <- function(x, ...) 
{
	cat("Call:\n")
	print(x$call)
	cat("\nMultiple Model fitting and scoring summary:\n")
	print(x$rsum)
}


####### C6. Combining several DiveMaster objects into a single DiveMaster object #######
# Input: List of DiveMaster objects
# Output: Single combined DiveMaster object
CombDM <- function(dmlist) 
{
	l <- length(dmlist) # number of DiveMaster objects
	out <- dmlist[[1]]
	if (l>1) 
	{
		for (i in 2:l)
		{
			out$model.scores <- rbind(out$model.scores, dmlist[[i]]$model.scores)
			out$ssm <- rbind(out$ssm, dmlist[[i]]$ssm)
			out$fmm <- c(out$fmm, dmlist[[i]]$fmm)
			out$models <- c(out$models, dmlist[[i]]$models)
			out$m <- min(out$m, dmlist[[i]]$m)
		}
		
		# Recompute estimate
		num.models <- length(out$fmm)
		m <- min(out$m, num.models)
		topx_scores <- sort(unique(out$model.scores))[1:m]
		lenx <- length(topx_scores)
		topx_index <- which(out$model.scores %in% topx_scores)
		predx.vector <- c()
		for (j in 1:lenx) 
		{
			tmp <- ((out$fmm[[topx_index[j]]])$global)[1,]
			predx.vector <- c(predx.vector, tmp)
		}
		predx <- GeoMean(predx.vector)
		predx_high <- max(predx.vector)
		predx_low <- min(predx.vector)
		
		out$estimate <- predx
		out$upper_estimate <- predx_high
		out$lower_estimate <- predx_low
	}
	class(out) <- "DiveMaster"
	
	return(out)
}


####### C7. Give a diversity estimate for a different population size based on top-x score models #######
# Input: DiveMaster object, population size, Number of top scores models to include in estimate
# Output: Single population estimate
PopDiversity = function (dm, popsize, TopX = NULL) 
{
	if (is.null(TopX))	m <- dm$m	else 
	{
		if (TopX <= length(dm$fmm)) 	m <- TopX else 	
		{ 	
			m <- min(5, length(dm$fmm))		
			warning("Number of models specified is greater then number of models fitted")
		}	 
	}
	
	topx_scores <- sort(unique(dm$model.scores))[1:m]
	lenx <- length(topx_scores)
	topx_index <- which(dm$model.scores %in% topx_scores)
	predx.vector <- c()
	for (i in 1:lenx) 
	{
		tmp <- dm$models[[topx_index[i]]](popsize, dm$fmm[[topx_index[i]]]$param[1, ])
		predx.vector <- c(predx.vector, tmp)
	}
	predx <- GeoMean(predx.vector)
	predx_high <- max(predx.vector)
	predx_low <- min(predx.vector)
	return(list(estimate = predx, upper_estimate = predx_high, 
					lower_estimate = predx_low))
}