R/Functions-DPM.R

Defines functions mbb criteria ovl.BB ovl trapezoidal dmdpm_xd var.mdpm_xd mean.mdpm_xd mdpm_xd dbern dmdpm_xc var.mdpm_xc mean.mdpm_xc mdpm_xc ddpm var.dpm mean.dpm dpm.fit

Documented in dpm.fit

#########################################
##     Dirichlet process mixture of    ##
##  multivariate normal distributions  ##
#########################################

######################
##  Main functions  ##
######################
dpm.fit<-function(y, xd = NULL, L, prior, nsim, nburn, nthin = 1, start = NULL, scale = TRUE){
	if(is.null(xd)){
		res<-mdpm_xc(y, L, prior, nsim, nburn, nthin, start, scale)
	}
	else{
		res<-mdpm_xd(y, xd, L, prior, nsim, nburn, nthin, start, scale)
	}

	return(res)
}
mean.dpm<-function(fit, xc, xd = NULL){
	if(is.null(xd)){
		res<-mean.mdpm_xc(fit, xc)
	}
	else{
		if(missing(xc)){
			res<-mean.mdpm_xd(fit, NULL, xd)
		}
		else{
			res<-mean.mdpm_xd(fit, xc, xd)
		}
	}

	return(res)
}
var.dpm<-function(fit, meanfunc, xc, xd = NULL){
	if(is.null(xd)){
		res<-var.mdpm_xc(fit, meanfunc, xc)
	}
	else{
		if(missing(xc)){
			res<-var.mdpm_xd(fit, meanfunc, NULL, xd)
		}
		else{
			res<-var.mdpm_xd(fit, meanfunc, xc, xd)
		}
	}

	return(res)
}
ddpm<-function(fit, y, xd = NULL, ncores = 1){
	if(is.null(xd)){
		res<-dmdpm_xc(fit, y, ncores)
	}
	else{
		res<-dmdpm_xd(fit, y, xd, ncores)
	}

	return(res)
}


########################
##  DPM - Continuous  ##
########################
## DPM-MVN model
mdpm_xc<-function(y, L, prior, nsim, nburn, nthin = 1, start = NULL, scale = TRUE){
	## Convert data into a matrix
	y<-cbind(y)

	## Auxiliar variables
	n<-nrow(y)
	p<-ncol(y)

	## Check prior information
	if("mu_0" %in% names(prior)){
		## Extract hyperparameters
		mu_0<-prior$mu_0
	}
	else{
		if(all(c("m", "S") %in% names(prior))){
			## Initial value
			mu_0<-rep(0, p)

			## Hyperparameters
			m<-prior$m
			S<-prior$S
			S_1<-solve(S)
		}
		else{
			stop("If mu_0 is not supplied, hyperparameters m and S must be provided.")
		}
	}
	if("V_0" %in% names(prior)){
		## Extract hyperparameters
		V_0<-prior$V_0
		V_0_1<-solve(V_0)
		v0<-FALSE
	}
	else{
		if(all(c("r", "Q") %in% names(prior))){
			## Initial value
			V_0<-diag(p)
			v0<-TRUE

			## Hyperparameters
			r<-prior$r
			Q<-prior$Q
		}
		else{
			stop("If V_0 is not supplied, hyperparameters r and Q must be provided.")
		}
	}
	if("nu_0" %in% names(prior)){
		## Extract hyperparameters
		nu_0<-prior$nu_0
	}
	else{
		stop("Hyperparameter nu_0 must be provided.")
	}
	if("Psi_0" %in% names(prior)){
		## Extract hyperparameters
		Psi_0<-prior$Psi_0
	}
	else{
		if(all(c("nu_1", "Psi_1") %in% names(prior))){
			## Initial value
			Psi_0<-diag(p)

			## Hyperparameters
			nu_1<-prior$nu_1
			Psi_1<-prior$Psi_1
			Psi_1_1<-solve(Psi_1)
		}
		else{
			stop("If Psi_0 is not supplied, hyperparameters nu_1 and Psi_1 must be provided.")
		}
	}
	if("alpha" %in% names(prior)){
		## Extract hyperparameters
		alpha<-prior$alpha
	}
	else{
		if(all(c("a_alpha", "b_alpha") %in% names(prior))){
			## Initial value
			alpha<-1

			## Hyperparameters
			a_alpha<-prior$a_alpha
			b_alpha<-prior$b_alpha
		}
		else{
			stop("If alpha is not supplied, hyperparameters a_alpha and b_alpha must be provided.")
		}
	}

	## Output objects
	params<-list()

	## Scale the data
	y0<-y
	if(scale){
		m0<-colMeans(y0)
		ev<-eigen(cov(y))
		B<-ev$vectors%*%
			diag(ev$values, nrow = length(ev$values))^(0.5)%*%
			t(ev$vectors)
		B_1<-solve(B)
		y<-t(matrix(apply(y, 1, function(x) B_1%*%(x - m0)), ncol = n))
	}

	## Initial values
	v<-rep(1/L, L)
	v[L]<-1
	comp<-comp_aux<-list()
	if(is.null(start)){
		for(l in 1:L){
			comp[[l]]<-list(omega = 1/L, mu = colMeans(y),
						sigma = cov(y))
		}
	}
	else{
		if(all(c("omega", "mu", "sigma") %in% names(start))){
			if(sum(omega) == 1){
			  omega<-start$omega
			}
		  else{
		    stop("Weights must sum up to one.")
		  }
			mu<-as.matrix(start$mu, ncol = L)
			sigma<-start$sigma
			for(l in 1:L){
				comp[[l]]<-list(omega = omega[l], mu = mu[, l],
							sigma = sigma[[l]])
			}
		}
		else{
			stop("Initial values for omega, mu and sigma must be specified.")
		}
	}

	## Auxiliar variables
	pi_z<-matrix(, nrow = n, ncol = L)

	for(s in 1:nsim){
		## Compute the weights
		comp[[1]]$omega<-v[1]
		for(l in 2:L){
			comp[[l]]$omega<-v[l]*(1 - v[l - 1])*
						comp[[l - 1]]$omega/v[l - 1]
		}

		## Sample z
		for(l in 1:L){
			pi_z[, l]<-comp[[l]]$omega*mvnfast::dmvn(y, comp[[l]]$mu,
						comp[[l]]$sigma)
		}
		z<-apply(pi_z, 1, function(x) sample(1:L, size = 1, prob = x))

		## Number of observations per cluster
		nl<-tabulate(z, L)

		## Sample v
		for(l in 1:(L - 1)){
			v[l]<-rbeta(1, nl[l] + 1, alpha + sum(nl[(l + 1):L]))
		}

		## Sample mu and sigma
		if(v0) V_0_1<-solve(V_0)
		for(l in 1:L){
			yz<-matrix(y[z == l, ], ncol = p)

			## Sample mu
			syz<-colSums(yz)
			sigma_1<-solve(comp[[l]]$sigma)
			VAR<-solve(V_0_1 + nl[l]*sigma_1)
			MN<-V_0_1%*%mu_0 + sigma_1%*%syz
			comp[[l]]$mu<-MASS::mvrnorm(1, VAR%*%MN, VAR)

			## Sample sigma
			y_star<-sweep(yz, 2, comp[[l]]$mu, "-")
			yty<-t(y_star)%*%y_star
			comp[[l]]$sigma<-CholWishart::rInvWishart(1, nu_0 + nl[l],
								Psi_0 + yty)[, , 1]
		}

		## Sample mu_0
		if(!("mu_0" %in% names(prior))){
			VAR0<-solve(S_1 + L*V_0_1)
			MN0<-S_1%*%m + V_0_1%*%rowSums(matrix(sapply(comp,
				function(x) x$mu), nrow = p))
			mu_0<-MASS::mvrnorm(1, VAR0%*%MN0, VAR0)
		}

		## Sample V_0
		if(!("V_0" %in% names(prior))){
			mu_mu0<-matrix(sapply(comp,
					function(x) x$mu - mu_0), nrow = p)
			V_0<-CholWishart::rInvWishart(1, r + L, Q + mu_mu0%*%t(mu_mu0))[, , 1]
		}

		## Sample Psi_0
		if(!("Psi_0" %in% names(prior))){
			ss<-Reduce("+", lapply(comp, function(x) solve(x$sigma)))
			Psi_0<-rWishart(1, nu_1 + L*nu_0,
						solve(Psi_1_1 + ss))[, , 1]
		}

		## Sample alpha
		if(!("alpha" %in% names(prior))){
			alpha<-rgamma(1, a_alpha + L - 1,
						b_alpha - sum(log(1 - v[1:(L - 1)])))
		}

		## Re-scale parameters
		if(scale){
			for(l in 1:L){
				comp_aux[[l]]<-list(omega = comp[[l]]$omega,
							mu = B%*%comp[[l]]$mu + m0,
							sigma = B%*%comp[[l]]$sigma%*%t(B))
			}
		}
		else{
				comp_aux<-comp
		}

		## Store results
		params[[s]]<-comp_aux
	}

	## Burn-in
	params<-lapply(seq(nburn + 1, nsim, nthin), function(x) params[[x]])

	return(list(y = y0, params = params))
}
## Conditional mean function of the DPM-MVN model
mean.mdpm_xc<-function(fit, xnew){
	## Convert into a matrix
	xnew<-as.matrix(xnew)

	## Extract parameters
	params<-fit$params

	## Auxiliar variables
	nsim<-length(params)

	## Conditional mean function
	f<-function(s){
		q<-lapply(params[[s]], function(x) x$omega*mvnfast::dmvn(xnew,
				x$mu[-1, ], x$sigma[-1, -1]))
		E<-lapply(params[[s]], function(x) x$mu[1, ] +
				x$sigma[-1, 1]%*%solve(x$sigma[-1, -1])%*%
				t(sweep(xnew, 2, x$mu[-1, ], "-")))
		E_y<-Reduce("+", Map("*", q, E))/Reduce("+", q)
	}
	E_y<-sapply(seq(nsim), f)

	return(E_y)
}
## Conditional variance function of the DPM-MVN model
var.mdpm_xc<-function(fit, meanfunc, xnew){
	## Convert into a matrix
	xnew<-as.matrix(xnew)

	## Extract parameters
	params<-fit$params

	## Auxiliar variables
	nsim<-length(params)

	## Conditional variance function
	f<-function(s){
		q<-lapply(params[[s]], function(x) x$omega*mvnfast::dmvn(xnew,
				x$mu[-1, ], x$sigma[-1, -1]))
		V<-lapply(params[[s]], function(x) as.vector(x$sigma[1, 1] -
				x$sigma[1, -1]%*%solve(x$sigma[-1, -1])%*%
				x$sigma[-1, 1]) + (x$mu[1, ] +
				x$sigma[-1, 1]%*%solve(x$sigma[-1, -1])%*%
				t(sweep(xnew, 2, x$mu[-1, ], "-")))^2)
		V_y<-Reduce("+", Map("*", q, V))/Reduce("+", q)
	}
	V_y<-sapply(seq(nsim), f) - meanfunc^2

	return(V_y)
}
## Conditional density of the DPM-MVN model
dmdpm_xc<-function(fit, newdata, ncores = 1){
	## Convert into a matrix
	newdata<-as.matrix(newdata)

	## Extract parameters
	params<-fit$params

	## Auxiliar variables
	nsim<-length(params)

	## Conditional density
	if(ncol(fit$y) != 1){
		f<-function(s){
			fx<-Reduce("+", lapply(params[[s]], function(x) x$omega*
				mvnfast::dmvn(cbind(newdata[, -1]), x$mu[-1, ], x$sigma[-1, -1],
					ncores = ncores)))
			d<-Reduce("+", lapply(params[[s]], function(x) x$omega*
				mvnfast::dmvn(newdata, x$mu, x$sigma, ncores = ncores)))/fx
		}
	}
	else{
		f<-function(s){
			d<-Reduce("+", lapply(params[[s]], function(x) x$omega*
				dnorm(newdata, x$mu, sqrt(x$sigma))))
		}
	}
	d<-sapply(seq(nsim), f)

	return(d)
}


###############################
##  DPM - Continuous/Binary  ##
###############################
## Density product of Bernoulli
dbern<-function(x, p){
	## Convert into a matrix
	x<-as.matrix(x)

	## Auxiliar variables
	n<-nrow(x)
	q<-ncol(x)
	aux<-matrix(, nrow = n, ncol = q)

	## Density function
	for(j in 1:q){
		aux[, j]<-dbinom(x[, j], 1, p[j])
	}

	return(apply(aux, 1, prod))
}
## DPM-MVN w/ binary
mdpm_xd<-function(y, xd, L, prior, nsim, nburn, nthin = 1, start = NULL, scale = TRUE){
	## Convert data into a matrix
	y<-cbind(y)
	xd<-cbind(xd)

	## Auxiliar variables
	n<-nrow(y)
	p<-ncol(y)
	q<-ncol(xd)

	## Check prior information
	if("mu_0" %in% names(prior)){
		## Extract hyperparameters
		mu_0<-prior$mu_0
	}
	else{
		if(all(c("m", "S") %in% names(prior))){
			## Initial value
			mu_0<-rep(0, p)

			## Hyperparameters
			m<-prior$m
			S<-prior$S
			S_1<-solve(S)
		}
		else{
			stop("If mu_0 is not supplied, hyperparameters m and S must be provided.")
		}
	}
	if("V_0" %in% names(prior)){
		## Extract hyperparameters
		V_0<-prior$V_0
		V_0_1<-solve(V_0)
		v0<-FALSE
	}
	else{
		if(all(c("r", "Q") %in% names(prior))){
			## Initial value
			V_0<-diag(p)
			v0<-TRUE

			## Hyperparameters
			r<-prior$r
			Q<-prior$Q
		}
		else{
			stop("If V_0 is not supplied, hyperparameters r and Q must be provided.")
		}
	}
	if("nu_0" %in% names(prior)){
		## Extract hyperparameters
		nu_0<-prior$nu_0
	}
	else{
		stop("Hyperparameter nu_0 must be provided.")
	}
	if("Psi_0" %in% names(prior)){
		## Extract hyperparameters
		Psi_0<-prior$Psi_0
	}
	else{
		if(all(c("nu_1", "Psi_1") %in% names(prior))){
			## Initial value
			Psi_0<-diag(p)

			## Hyperparameters
			nu_1<-prior$nu_1
			Psi_1<-prior$Psi_1
			Psi_1_1<-solve(Psi_1)
		}
		else{
			stop("If Psi_0 is not supplied, hyperparameters nu_1 and Psi_1 must be provided.")
		}
	}
	if(all(c("a_pi", "b_pi") %in% names(prior)) & !is.null(xd)){
		## Extract hyperparameters
		a_pi<-prior$a_pi
		b_pi<-prior$b_pi

		if(length(a_pi) != length(b_pi) | length(a_pi) != q){
			stop("Lengths of the hyperparameters a_pi and b_pi differ or
				they are not equal to the number of discrete covariates")
		}
	}
	else{
		stop("Hyperparameters a_pi and b_pi must be provided when discrete variables are considered.")
	}
	if("alpha" %in% names(prior)){
		## Extract hyperparameters
		alpha<-prior$alpha
	}
	else{
		if(all(c("a_alpha", "b_alpha") %in% names(prior))){
			## Initial value
			alpha<-1

			## Hyperparameters
			a_alpha<-prior$a_alpha
			b_alpha<-prior$b_alpha
		}
		else{
			stop("If alpha is not supplied, hyperparameters a_alpha and b_alpha must be provided.")
		}
	}

	## Output objects
	params<-list()

	## Scale the data
	y0<-y
	if(scale){
		m0<-colMeans(y0)
		ev<-eigen(cov(y))
		B<-ev$vectors%*%
			diag(ev$values, nrow = length(ev$values))^(0.5)%*%
			t(ev$vectors)
		B_1<-solve(B)
		y<-t(matrix(apply(y, 1, function(x) B_1%*%(x - m0)), ncol = n))
	}

	## Initial values
	v<-rep(1/L, L)
	v[L]<-1
	comp<-comp_aux<-list()
	if(is.null(start)){
		for(l in 1:L){
			comp[[l]]<-list(omega = 1/L, mu = colMeans(y),
						sigma = cov(y), pi = rep(0.5, q))
		}
	}
	else{
		if(all(c("omega", "mu", "sigma", "pi") %in% names(start))){
		  if(sum(omega) == 1){
		    omega<-start$omega
		  }
		  else{
		    stop("Weights must sum up to one.")
		  }
		  mu<-as.matrix(start$mu, ncol = L)
		  sigma<-start$sigma
		  pi<-as.matrix(start$pi, ncol = L)
		  for(l in 1:L){
		    comp[[l]]<-list(omega = omega[l], mu = mu[, l],
		                    sigma = sigma[[l]], pi = pi[, l])
		  }
		}
		else{
			stop("Initial values for omega, mu, sigma and pi must be specified.")
		}
	}

	## Auxiliar variables
	pi_z<-matrix(, nrow = n, ncol = L)

	for(s in 1:nsim){
		## Compute the weights
		comp[[1]]$omega<-v[1]
		for(l in 2:L){
			comp[[l]]$omega<-v[l]*(1 - v[l - 1])*
						comp[[l - 1]]$omega/v[l - 1]
		}

		## Sample z
		for(l in 1:L){
			pi_z[, l]<-comp[[l]]$omega*mvnfast::dmvn(y, comp[[l]]$mu,
						comp[[l]]$sigma)*
					dbern(xd, comp[[l]]$pi)
		}
		z<-apply(pi_z, 1, function(x) sample(1:L, size = 1, prob = x))

		## Number of observations per cluster
		nl<-tabulate(z, L)

		## Sample v
		for(l in 1:(L - 1)){
			v[l]<-rbeta(1, nl[l] + 1, alpha + sum(nl[(l + 1):L]))
		}

		## Sample mu and sigma
		if(v0) V_0_1<-solve(V_0)
		for(l in 1:L){
			yz<-matrix(y[z == l, ], ncol = p)

			## Sample mu
			syz<-colSums(yz)
			sigma_1<-solve(comp[[l]]$sigma)
			VAR<-solve(V_0_1 + nl[l]*sigma_1)
			MN<-V_0_1%*%mu_0 + sigma_1%*%syz
			comp[[l]]$mu<-MASS::mvrnorm(1, VAR%*%MN, VAR)

			## Sample sigma
			y_star<-sweep(yz, 2, comp[[l]]$mu, "-")
			yty<-t(y_star)%*%y_star
			comp[[l]]$sigma<-CholWishart::rInvWishart(1, nu_0 + nl[l],
								Psi_0 + yty)[, , 1]

			## Sample pi
			for(j in 1:q){
				comp[[l]]$pi[j]<-rbeta(1, a_pi[j] +
							sum(xd[z == l, j]),
							b_pi[j] + nl[l] -
								sum(xd[z == l, j]))
			}
		}

		## Sample mu_0
		if(!("mu_0" %in% names(prior))){
			VAR0<-solve(S_1 + L*V_0_1)
			MN0<-S_1%*%m + V_0_1%*%rowSums(matrix(sapply(comp, function(x) x$mu), nrow = p))
			mu_0<-MASS::mvrnorm(1, VAR0%*%MN0, VAR0)
		}

		## Sample V_0
		if(!("V_0" %in% names(prior))){
			mu_mu0<-matrix(sapply(comp,
					function(x) x$mu - mu_0), nrow = p)
			V_0<-CholWishart::rInvWishart(1, r + L, Q + mu_mu0%*%t(mu_mu0))[, , 1]
		}

		## Sample Psi_0
		if(!("Psi_0" %in% names(prior))){
			ss<-Reduce("+", lapply(comp, function(x) solve(x$sigma)))
			Psi_0<-rWishart(1, nu_1 + L*nu_0,
						solve(Psi_1_1 + ss))[, , 1]
		}

		## Sample alpha
		if(!("alpha" %in% names(prior))){
			alpha<-rgamma(1, a_alpha + L - 1,
						b_alpha - sum(log(1 - v[1:(L - 1)])))
		}

		## Re-scale parameters
		if(scale){
			for(l in 1:L){
				comp_aux[[l]]<-list(omega = comp[[l]]$omega,
							mu = B%*%comp[[l]]$mu + m0,
							sigma = B%*%comp[[l]]$sigma%*%t(B),
							pi = comp[[l]]$pi)
			}
		}
		else{
				comp_aux<-comp
		}

		## Store results
		params[[s]]<-comp_aux
	}

	## Burn-in
	params<-lapply(seq(nburn + 1, nsim, nthin), function(x) params[[x]])

	return(list(y = y0, xd = xd, params = params))
}
## Conditional mean function of the DPM-MVN w/ binary
mean.mdpm_xd<-function(fit, xc_new = NULL, xd_new){
	## Convert into a matrix
	xd_new<-as.matrix(xd_new)

	## Extract parameters
	params<-fit$params

	## Auxiliar variables
	nsim<-length(params)

	## Conditional mean function
	if(ncol(fit$y) != 1){
		## Convert into a matrix
		xc_new<-as.matrix(xc_new)

		f<-function(s){
			q<-lapply(params[[s]], function(x) x$omega*mvnfast::dmvn(xc_new,
					x$mu[-1, ], x$sigma[-1, -1])*
					prod(dbinom(xd_new, 1, x$pi)))
			E<-lapply(params[[s]], function(x) x$mu[1, ] +
					x$sigma[-1, 1]%*%solve(x$sigma[-1, -1])%*%
					t(sweep(xc_new, 2, x$mu[-1, ], "-")))
			E_y<-Reduce("+", Map("*", q, E))/Reduce("+", q)
		}
	}
	else{
		f<-function(s){
			q<-lapply(params[[s]], function(x) x$omega*
					dbern(xd_new, x$pi))
			E<-lapply(params[[s]], function(x) x$mu[1, ])
			E_y<-Reduce("+", Map("*", q, E))/Reduce("+", q)
		}
	}
	E_y<-sapply(seq(nsim), f)

	return(E_y)
}
## Conditional variance function of the DPM-MVN w/ binary
var.mdpm_xd<-function(fit, meanfunc, xc_new = NULL, xd_new){
	## Convert into a matrix
	xd_new<-as.matrix(xd_new)

	## Extract parameters
	params<-fit$params

	## Auxiliar variables
	nsim<-length(params)

	## Conditional variance function
	if(ncol(fit$y) != 1){
		## Convert into a matrix
		xc_new<-as.matrix(xc_new)

		f<-function(s){
			q<-lapply(params[[s]], function(x) x$omega*mvnfast::dmvn(xc_new,
					x$mu[-1, ], x$sigma[-1, -1])*
					prod(dbinom(xd_new, 1, x$pi)))
			V<-lapply(params[[s]], function(x)
					as.vector(x$sigma[1, 1] -
					x$sigma[1, -1]%*%solve(x$sigma[-1, -1])%*%
					x$sigma[-1, 1]) +
					(x$mu[1, ] +
					x$sigma[-1, 1]%*%solve(x$sigma[-1, -1])%*%
					t(sweep(xc_new, 2, x$mu[-1, ], "-")))^2)
			V_y<-Reduce("+", Map("*", q, V))/Reduce("+", q)
		}
	}
	else{
		f<-function(s){
			q<-lapply(params[[s]], function(x) x$omega*
					dbern(xd_new, x$pi))
			V<-lapply(params[[s]], function(x)
					as.vector(x$sigma[1, ] +  x$mu[1, ]^2))
			V_y<-Reduce("+", Map("*", q, V))/Reduce("+", q)
		}

	}
	V_y<-sapply(seq(nsim), f) - meanfunc^2

	return(V_y)
}
## Conditional density of the DPM-MVN w/ binary
dmdpm_xd<-function(fit, newdata, xd_new, ncores = 1){
	## Convert into a matrix
	newdata<-as.matrix(newdata)
	xd_new<-as.matrix(xd_new)

	## Extract parameters
	params<-fit$params

	## Auxiliar variables
	nsim<-length(params)

	## Conditional density
	if(ncol(fit$y) != 1){
		f<-function(s){
			fx<-Reduce("+", lapply(params[[s]], function(x) x$omega*
				mvnfast::dmvn(cbind(newdata[, -1]), x$mu[-1, ],
					x$sigma[-1, -1], ncores = ncores)*
					prod(dbinom(xd_new, 1, x$pi))))
			d<-Reduce("+", lapply(params[[s]], function(x) x$omega*
				mvnfast::dmvn(newdata, x$mu, x$sigma, ncores = ncores)*
					prod(dbinom(xd_new, 1, x$pi))))/fx
		}
	}
	else{
		f<-function(s){
			fx<-Reduce("+", lapply(params[[s]], function(x) x$omega*
				dbern(xd_new, x$pi)))
			d<-Reduce("+", lapply(params[[s]], function(x) x$omega*
				mvnfast::dmvn(newdata, x$mu, x$sigma, ncores = ncores)*
					dbern(xd_new, x$pi)))/fx
		}
	}
	d<-sapply(seq(nsim), f)

	return(d)
}


###########################
##  Overlap coefficient  ##
###########################
## Trapezoidal rule
trapezoidal<-function(f, grid){
	if(length(f) != length(grid)){
		stop("Lengths differ.
			Function must be evaluated in the supplied grid")
	}

	## Auxiliar variable
	n<-length(grid)

	## Compute the integral
	integral<-1/2*sum((grid[2:n] - grid[1:(n - 1)])*
				(f[2:n] + f[1:(n - 1)]))

	return(integral)
}
## Overlap coefficient - Plain vanilla
ovl<-function(d0, d1, grid, integral = trapezoidal, ...){
	## Check for dimensions
	if(all(dim(d0) == dim(d1))){
		## Compute the OVL
		covl<-apply(pmin(d0, d1), 2, integral, grid = grid, ...)
	}
	else{
		stop("Dimensions of the supplied densities differ.")
	}

	return(covl)
}
## Bayesian-bootstrap based OVL estimator
ovl.BB<-function(y0, y1, d0, d1, grid){
	## Check for dimensions
	if(all(dim(d0) == dim(d1))){
		## Auxiliar variables
		nsim<-ncol(d0)
		n0<-length(y0)
		n1<-length(y1)

		## Compute the weights
		q0<-matrix(rexp(n0*nsim, 1), ncol = nsim)
		q1<-matrix(rexp(n1*nsim, 1), ncol = nsim)
		w0<-apply(q0, 2, function(x) x/sum(x))
		w1<-apply(q1, 2, function(x) x/sum(x))

		## Compute the OVL
		covl<-rep(NA, nsim)
		for(j in 1:nsim){
			## Compute the probabilities
			s1<-sum(w0[, j]*(approx(grid, d0[, j], y0)$y <
						approx(grid, d1[, j], y0)$y))
			s2<-sum(w1[, j]*(approx(grid, d0[, j], y1)$y >=
						approx(grid, d1[, j], y1)$y))

			## Compute the OVL
			covl[j]<-min(s1 + s2, 1)
		}
	}
	else{
		stop("Dimensions of the supplied densities differ.")
	}

	return(covl)
}


#################################
##  Model comparison criteria  ##
#################################
## Model comparison criteria function
criteria<-function(pd){
	## Log-Pseudo Marginal Likelihood (LPML)
	cpo<-1/apply(1/pd, 1, mean)
	lpml<-sum(log(cpo))

	## Altenative LPML (Zhou and Hanson, 2017)
	w<-1/pd
	w_bar<-sqrt(ncol(pd))*apply(1/pd, 1, mean)

	cpo_tilde<-vector()
	for(i in 1:nrow(pd)){
		w_tilde<-pmin(w[i, ], w_bar[i])
		cpo_tilde[i]<-sum(pd[i, ]*w_tilde)/sum(w_tilde)
	}
	lpml_a<-sum(log(cpo_tilde))

	## WAIC
	lpd<-sum(log(apply(pd, 1, mean)))
	p_WAIC<-sum(apply(log(pd), 1, var))
	elpd_WAIC<-lpd - p_WAIC
	waic<--2*elpd_WAIC

	## DIC_3
	dic<--4*mean(colSums(log(pd))) + 2*sum(log(rowMeans(pd)))

	res<-c(lpml, lpml_a, waic, dic)
	names(res)<-c("LPML", "Adjusted-LPML", "WAIC", "DIC_3")

	return(list(measures = res, cpo = cpo))
}
## Bayesian bootstrap multiple model comparison
mbb<-function(cpo1, cpo2, ..., B = 10000){
	## Extract the models cpos
	models<-list(cpo1, cpo2, ...)

	## Auxiliar variables
	if(length(unique(sapply(models, length))) == 1){
		n<-length(cpo1)
	}
	else{
		stop("Lengths of the CPOs differ.")
	}
	m<-length(models)

	## Bootstrap weights
	w<-matrix(rexp(n*B, 1), nrow = B, ncol = n)
	w<-w/rowSums(w)
	pbb<-matrix(, nrow = B, ncol = m)
	for(b in 1:B){
		pbb[b, ]<-order(sapply(models, function(x)
					mean(w[b, ]*log(x))), decreasing = TRUE)
	}

	## Posterior rank probability
	P<-apply(pbb, 2, function(x) mean(x == 1))

	return(P)
}
javier-gg/OverlapCoefficient documentation built on July 21, 2020, 12:14 a.m.