R/fullMCMC_complexityPrior.R

Defines functions simMultiIndNormInvGamma proposeTheta logLikelihoodFullModel logPrior simulateFromPrior localProposal singleLocalProposal birthProbs truncatedPoisson complexityPrior updateNumberOfCutpoints mcmcSampler beast myUnicodeCharacters normalizeTime0 computePosteriorParameters computePosteriorParametersFree computeEmpiricalPriorParameters print.beast.object

Documented in beast birthProbs complexityPrior computeEmpiricalPriorParameters computePosteriorParameters computePosteriorParametersFree localProposal logLikelihoodFullModel logPrior mcmcSampler myUnicodeCharacters normalizeTime0 print.beast.object proposeTheta simMultiIndNormInvGamma simulateFromPrior singleLocalProposal truncatedPoisson updateNumberOfCutpoints

simMultiIndNormInvGamma <- function(mu, nu, alpha, beta){
	m <- length(mu)
#	sigma2 <- 1/rgamma(n = m, shape = alpha, rate = beta)
#	tha valw na epistrefei mono to meso tis diasporas wste na thewreitai stathero
	sigma2 <- beta/(alpha - 1)
	mySd <- sqrt( sigma2/nu )
	simMeans <- rnorm(n = m, mean = 0, sd = 1)
	simMeans <- mu + simMeans*mySd
	results <- vector("list", length = 2)
	results[[1]] <- simMeans
	results[[2]] <- sigma2
	names(results) <- c("mean", "variance")
	return(results)
}


proposeTheta <- function(thetaOld, tau, alpha, beta){
	# ta alpha beta xrisimeuoun apla gia na epistrefeis kai tin diaspora pou einai fixed
	m <- length(thetaOld)
#	sigma2 <- 1/rgamma(n = m, shape = alpha, rate = beta)
#	tha valw na epistrefei mono to meso tis diasporas wste na thewreitai stathero
	sigma2 <- beta/(alpha - 1)
	results <- vector("list", length = 2)
#	results[[1]] <- rnorm(m, mean = thetaOld, sd = tau)
	results[[1]] <- rnorm(m, mean = 0, sd = 1)
	results[[1]] <- thetaOld + tau*results[[1]]
	results[[2]] <- sigma2
	names(results) <- c("mean", "variance")
	return(results)
}



logLikelihoodFullModel <- function(myData, cutPoints, theta, startPoint){
	nTime <- dim(myData)[1]
	augmentedCutPoints <- c(1, cutPoints, nTime )
	sanityCheck <- diff(augmentedCutPoints)
	if(  min(sanityCheck) <= 0 ){
		logL <- log(0)
	}else{
		goldenBoy <- theta$mean[augmentedCutPoints]
		myMeans <- numeric(nTime)
		for (j in 2:length(augmentedCutPoints)){
			subIndex <- augmentedCutPoints[j-1]:augmentedCutPoints[j]
			myMeans[subIndex] <- (goldenBoy[j] - goldenBoy[j-1])/(augmentedCutPoints[j] - augmentedCutPoints[j-1])*(subIndex - augmentedCutPoints[j-1]) + goldenBoy[j-1]
		}
		myMeanMatrix <- matrix(myMeans, nrow = nTime, ncol = dim(myData)[2] )
		sdPerPoint <- sqrt( theta$var )		
		mySDMatrix <- matrix(sdPerPoint, nrow = nTime, ncol = dim(myData)[2] )
		zData <- (myData - myMeanMatrix)/mySDMatrix
		logL <- sum( apply(zData, 1, function(y){ sum(dnorm(y, mean = 0, sd = 1, log = TRUE)) })   )
	}
	return(logL)	
}



# startPoint >= 2 !

logPrior <- function(cutPoints, nTime, startPoint){
	L <- length(cutPoints)
	augmentedCutPoints <- c(startPoint - 1, cutPoints, nTime )
	sanityCheck <- diff(augmentedCutPoints)
	if(  min(sanityCheck) <= 0 ){
		logP <- log(0)
	}else{
		logP <- 0
		if(L > 0){
			logP <- -log(nTime - L - startPoint + 1)
			if(L > 1){
				for(l in 2:L){
					logP <- logP - log(nTime - L + l - 1 - cutPoints[l-1]) 
				}
			}
		}
	}
	return(logP)
}

simulateFromPrior <- function(nTime, startPoint, L = 3){
	cutPoints <- numeric(L)
	indexRange <- startPoint:(nTime-L)
	if(L > 0){
		if(length(indexRange) == 1){
			cutPoints[1] <- indexRange}else{
			cutPoints[1] <- sample(indexRange, 1)
		}
		if(L > 1){
			for(l in 2:L){
				indexRange <- (cutPoints[l-1] + 1):(nTime - L + l - 1)
				if(length(indexRange) == 1){
					cutPoints[l] <- indexRange}
				else{			
					cutPoints[l] <- sample(indexRange, 1)
				}
			}
		}
	}
	return(cutPoints)
}

localProposal <- function(cutPoints, nTime, mhPropRange, startPoint){
	L <- length(cutPoints)
	if(L < 1){stop("L = 0.")}
	if(missing(mhPropRange)){mhPropRange = 1}
	epsilon <- sample(c(-seq(1:mhPropRange), 0, seq(1:mhPropRange)),L, replace = TRUE)
	newState <- cutPoints + epsilon
	augmentedCutPoints <- c(startPoint - 1, newState, nTime )
	sanityCheck <- diff(augmentedCutPoints)
	if(  min(sanityCheck) <= 0 ){	# not valid state!
		propRatio <- log(0)
	}else{
		propRatio <- 0
	}
	
	results <- vector("list", length = 2)
	results[[1]] <- newState
	results[[2]] <- propRatio
	names(results) <- c("newState", "propRatio")
	return(results)
}


singleLocalProposal <- function(cutPoints, nTime, mhSinglePropRange, startPoint){
	L <- length(cutPoints)
	if(L < 1){stop("L = 0.")}
	if(missing(mhSinglePropRange)){mhSinglePropRange = 1}
	epsilon <- sample(c(-seq(1:mhSinglePropRange), seq(1:mhSinglePropRange)),1)
	justOneIndex <- sample(L,1)
	newState <- cutPoints
	newState[ justOneIndex ] <- cutPoints[ justOneIndex ] + epsilon
	augmentedCutPoints <- c(startPoint - 1, newState, nTime )
	sanityCheck <- diff(augmentedCutPoints)
	if(  min(sanityCheck) <= 0 ){	# not valid state!
		propRatio <- log(0)
	}else{
		propRatio <- 0
	}
	
	results <- vector("list", length = 2)
	results[[1]] <- newState
	results[[2]] <- propRatio
	names(results) <- c("newState", "propRatio")
	return(results)
}


birthProbs <- function(LRange){
	Lmin = 0
	Lmax <- max(LRange)
	probs <- numeric(Lmax + 1)
	probs[1] = 1
	probs[Lmax + 1] = 0
	if(Lmax - Lmin + 1 > 2) {
		probs[(Lmin+2):(Lmax)] = 0.5
	}
	names(probs) <- 0:Lmax
	return(probs)
}



truncatedPoisson <- function(Lmax = 20, gammaParameter = 1){
        logPrior <- log(numeric(Lmax))
        Lmin = 0
        logPrior1 <- dpois(0:Lmax, lambda=gammaParameter)
        logPrior1 <- logPrior1/sum(logPrior1)
        logPrior <- log(logPrior1)
	names(logPrior) <- 0:Lmax
	return(logPrior)
}


complexityPrior <- function(Lmax = 20, gammaParameter, nTime){
	Lmin = 0
        logPrior1 <- exp(-(0:Lmax)*gammaParameter*log(3*nTime/(0:Lmax)))
	logPrior1[1] <- 1
        logPrior1 <- logPrior1/sum(logPrior1)
        logPrior <- log(logPrior1)
	names(logPrior) <- 0:Lmax
	return(logPrior)

}



updateNumberOfCutpoints <- function(cutPoints, nTime, startPoint, LRange, birthProbs){
	results <- vector("list", length = 4)
	L <- length(cutPoints)
	augmentedCutPoints <- c(startPoint - 1, cutPoints, nTime )
	#choose birth or death according to birthProbs
	u = runif(1)
	if( u < birthProbs[as.character(L)]){
		#	birth move
		nIntervals <- L + 1
		intervalIndex <- sample(L+1, 1)	
		myIndex <- (augmentedCutPoints[intervalIndex]+1):(augmentedCutPoints[intervalIndex+1]-1)
		lengthInterval <- augmentedCutPoints[intervalIndex+1] - augmentedCutPoints[intervalIndex] - 1
		if( lengthInterval < 1 ){
			propRatio <- log(0)
			moveType = "notValidBirth"
			newState = cutPoints
		}else{
			moveType = "birth"	
			if(lengthInterval > 1){
				newCutPoint <- sample(myIndex, 1)
			}else{
				newCutPoint <- myIndex
			}
			results[[4]] <- newCutPoint
			newState <- numeric(L+1)
			newState[intervalIndex] <- newCutPoint
			if( intervalIndex > 1 ){
				newState[1:(intervalIndex-1)] <- cutPoints[1:(intervalIndex-1)]
			}
			if(intervalIndex < L+1){
				newState[(intervalIndex+1):(L+1)] <- cutPoints[intervalIndex:L]  
			}
			propRatio <- log(lengthInterval) + log(1 - birthProbs[as.character(L+1)]) - log(birthProbs[as.character(L)])
		}
	}else{
		moveType = "death"
		#	death move
		myIndex <- sample(L, 1)
		results[[4]] <- cutPoints[myIndex]
		newState <- cutPoints[-myIndex]
		lengthInterval <- augmentedCutPoints[myIndex+2] - augmentedCutPoints[myIndex] - 1
		propRatio <- - log(lengthInterval) - log(1 - birthProbs[as.character(L)]) + log(birthProbs[as.character(L-1)])
	}

	results[[1]] <- newState
	results[[2]] <- propRatio
	results[[3]] <- moveType
	names(results) <- c("newState", "propRatio", "moveType", "timePoint")
	return(results)
}


mcmcSampler <- function(myData, nIter, finalIterationPdf, modelVariance, mhPropRange, mhSinglePropRange, movesRange, startPoint, 
		postPar, dName, timeScale, burn,  iterPerPlotPrefix, priorParameters, L = 3, LRange, tau, 
		gammaParameter, saveTheta, Prior = "complexity"){
	# tau is the sd of the MH proposal
	if(missing(timeScale)){timeScale = 1}
	if(missing(mhPropRange)){mhPropRange = 1}
	if (missing(modelVariance)){modelVariance = FALSE}
	if(missing(startPoint)){ startPoint = 2 }
	if(startPoint < 2){ startPoint = 2 }
	if(missing(burn)){burn = 1}
	if(missing(movesRange)){movesRange = as.character(1:3)}
	if (missing(finalIterationPdf)){ finalIterationPdf = NULL }
	if (missing(LRange)){LRange = L}
	Lmax <- max(LRange)
	nTime <- dim(myData)[1]
	if(Prior == "complexity"){
		logPriorNPoints <- complexityPrior(Lmax = Lmax, gammaParameter = gammaParameter, nTime = nTime)
	}else{
		logPriorNPoints <- truncatedPoisson(Lmax = Lmax, gammaParameter = gammaParameter)
	}
	cutPoints <- array(data = NA, dim = c(nIter, Lmax))
	if(saveTheta == TRUE){
		thetaValues <- array(data = NA, dim = c(nIter, nTime))
	}
	Lvalues <- numeric(nIter)
	mu <- array(data = NA, dim = c(nIter, nTime))
	sigma2 <- array(data = NA, dim = c(nIter, nTime))
	iter <- 1
#	L <- Lvalues[iter] <- floor(Lmax/2)
	L <- Lvalues[iter] <- 1
	cutPoints[iter, 1:L] <- startPoint + (1:L)*floor((nTime - startPoint)/(L+1))

	myBirthProbs <- birthProbs(LRange)

	overkill <- simMultiIndNormInvGamma(mu = postPar[[1]], nu = postPar[[2]], alpha = postPar[[3]], beta = postPar[[4]])
	theta <- overkill
	mu[iter, ] <- overkill$mean
	sigma2[iter, ] <- overkill$var
	lValues <- lPosterior <- numeric(nIter)
	lValues[iter] <- logLikelihoodFullModel(myData = myData, cutPoints = cutPoints[iter, 0:L], theta = overkill, startPoint = startPoint)
	lPosterior[iter] <- lValues[iter] + logPrior(cutPoints = cutPoints[iter, 0:L], nTime = nTime, startPoint = startPoint) 
	mhRate0a <- 0
	mhRate0 <- 0
	mhRate0c <- 0
	mhRate <- 0
	mhRate2 <- 0
	mhRate3 <- 0
	mhRatePoints <- 0
	arEpsilon <- 0
	myPositions <- floor(seq(0, nTime, length = 10))  #c(0,50,100,150,200,250,300)
	myLabels <- round(myPositions*timeScale,1)
	uchar <- myUnicodeCharacters()
#	epsilonValues <- numeric(nIter)
#	epsilonValues[1] <- rbeta(n = 1, shape1 = beta_prior_parameters[1], shape2 = beta_prior_parameters[2])
	sampleSD <- sqrt(postPar[[4]]/(postPar[[3]] - 1))
	#plot(sampleSD)
	myFl <- floor((nIter/4))
	for(iter in 2:nIter){
		lValues[iter] <- lValues[iter - 1]
		cutPoints[iter, ] <- cutPoints[iter - 1, ]
#-------------------------------------------------------------------------------------------------------------------------------------
#		update number of cutpoints		
#		L <- sample(LRange, 1)
		lValues[iter] <- lValues[iter - 1]
		if(L > 0){
			cutPoints[iter, 1:L] = cutPoints[iter - 1, 1:L]
		}
		myProposal <- updateNumberOfCutpoints(cutPoints = cutPoints[iter - 1, 0:L], nTime = nTime, 
				startPoint = startPoint, LRange = LRange, birthProbs = myBirthProbs)
		overkillProposed <- theta
		propLogL <- logLikelihoodFullModel(myData = myData, cutPoints = myProposal$newState, theta = overkillProposed, startPoint = startPoint)
		if(myProposal$moveType != "notValidBirth"){
			if(myProposal$moveType == "birth"){Lprop = L + 1}else{Lprop = L - 1}
			mhRatio <- propLogL + logPriorNPoints[as.character(Lprop)] - lValues[iter] - logPriorNPoints[as.character(L)] + myProposal$propRatio
			if(L > 0){
				augCutPoints <- c(1, cutPoints[iter, 1:L], nTime )
			}else{augCutPoints <- c(1, nTime )}
			augCutPointsProp <- c(1, myProposal$newState, nTime )
			#zCurrent <- (theta$mean[augCutPoints] - priorParameters$mu0[augCutPoints])/sqrt(theta$variance[augCutPoints]/priorParameters$nu0)
			#zProp <- (overkillProposed$mean[augCutPointsProp] - priorParameters$mu0[augCutPointsProp])/sqrt(overkillProposed$variance[augCutPointsProp]/priorParameters$nu0)
			#priorRatio <- sum(dnorm( x = zProp, mean = 0, sd = 1, log = TRUE)) - sum(dnorm( x = zCurrent, mean = 0, sd = 1, log = TRUE))
			priorRatio <- 0
			mhRatio <- mhRatio + priorRatio
			if( log(runif(1)) < mhRatio ){
				lValues[iter] <- propLogL
				mhRatePoints <- mhRatePoints + 1
				theta$mean <- overkillProposed$mean
				L = Lprop
				cutPoints[iter, ] <- rep(0, Lmax)
				if(L > 0){
					cutPoints[iter, 1:L] <- myProposal$newState
				}
				lValues[iter] <- propLogL
			}
#			cutPoints[iter, 1:L] <- simulateFromPrior(nTime = nTime, startPoint = startPoint, L = L)
		}
		Lvalues[iter] <- L
#-------------------------------------------------------------------------------------------------------------------------------------

#		Move 0.c: update all theta parameters using a standard symmetric proposal based on the current values	
		overkill <- proposeTheta(thetaOld = theta$mean, tau = tau*sampleSD,  alpha = postPar[[3]], beta = postPar[[4]])
		propLogL <- logLikelihoodFullModel(myData = myData, cutPoints = cutPoints[iter, 0:L], theta = overkill, startPoint = startPoint)
		mhRatio <- propLogL - lValues[iter]
		augCutPoints <- c(1, cutPoints[iter, 0:L], nTime )
#		zCurrent <- (theta$mean[augCutPoints] - priorParameters$mu0[augCutPoints])/sqrt(theta$variance[augCutPoints]/priorParameters$nu0)
#		zProp <- (overkill$mean[augCutPoints] - priorParameters$mu0[augCutPoints])/sqrt(overkill$variance[augCutPoints]/priorParameters$nu0)
		zCurrent <- (theta$mean - priorParameters$mu0)/sqrt(theta$variance/priorParameters$nu0)
		zProp <- (overkill$mean - priorParameters$mu0)/sqrt(overkill$variance/priorParameters$nu0)
		priorRatio <- sum(dnorm( x = zProp, mean = 0, sd = 1, log = TRUE)) - sum(dnorm( x = zCurrent, mean = 0, sd = 1, log = TRUE))
		mhRatio <- mhRatio + priorRatio
		if( log(runif(1)) < mhRatio ){
			lValues[iter] <- propLogL
			mhRate0c <- mhRate0c + 1
			theta <- overkill
		}
###########################################################################################################
#		Gibss move: update all theta[-augmentedCutPoints] from the prior.
		if(Prior == "complexity"){
		if(iter > burn){
			overkill <-  simMultiIndNormInvGamma(mu = priorParameters$mu0, 
								nu = priorParameters$nu0, 
								alpha = postPar[[3]], 
								beta = postPar[[4]]
							)
			if(L < nTime - 1){
				theta$mean[-augCutPoints] <- overkill$mean[-augCutPoints]
			}
		}
		}else{
			overkill <-  simMultiIndNormInvGamma(mu = priorParameters$mu0, 
								nu = priorParameters$nu0, 
								alpha = postPar[[3]], 
								beta = postPar[[4]]
							)
			if(L < nTime - 1){
				theta$mean[-augCutPoints] <- overkill$mean[-augCutPoints]
			}
		}
###########################################################################################################
		chooseMove <- sample(movesRange, 1)
		if(chooseMove == "1"){
			if(L > 0){
#			Move a: proposal from prior
				newCutPoints <- simulateFromPrior(nTime = nTime, startPoint = startPoint, L = L)
				propLogL <- logLikelihoodFullModel(myData = myData, cutPoints = newCutPoints, theta = theta, startPoint = startPoint)
				mhRatio <- propLogL - lValues[iter - 1]
				u <- runif(1)
				if(log(u) < mhRatio){
					cutPoints[iter, 1:L] <- newCutPoints
					lValues[iter] <- propLogL
					mhRate <- mhRate + length(movesRange)
				}
			}
		}
		if(chooseMove == "2"){
#			Move b: local random walk
			if(L > 0){
				lpProp <- localProposal(cutPoints = cutPoints[iter, 1:L] , nTime = nTime, mhPropRange = mhPropRange, startPoint = startPoint)
				newCutPoints <- lpProp$newState
				propLogL <- logLikelihoodFullModel(myData = myData, cutPoints = newCutPoints, theta = theta, startPoint = startPoint)
				mhRatio <- propLogL - lValues[iter] + lpProp$propRatio + 
					logPrior(cutPoints = newCutPoints, nTime = nTime, startPoint = startPoint) - 
					logPrior(cutPoints = cutPoints[iter, 1:L], nTime = nTime, startPoint = startPoint)

				u <- runif(1)
				if(log(u) < mhRatio){
					cutPoints[iter, 1:L] <- newCutPoints
					lValues[iter] <- propLogL
					mhRate2 <- mhRate2 + length(movesRange)
				}
			}
		}
		if(chooseMove == "3"){
#			Move c: random walk to just one index
			if( L > 0 ){
				lpProp <- singleLocalProposal(cutPoints = cutPoints[iter, 1:L], nTime = nTime, 
						mhSinglePropRange = mhSinglePropRange, startPoint = startPoint)
				newCutPoints <- lpProp$newState
				propLogL <- logLikelihoodFullModel(myData = myData, cutPoints = newCutPoints, theta = theta, startPoint = startPoint)
				mhRatio <- propLogL - lValues[iter] + lpProp$propRatio + 
					logPrior(cutPoints = newCutPoints, nTime = nTime, startPoint = startPoint) - 
					logPrior(cutPoints = cutPoints[iter, 1:L], nTime = nTime, startPoint = startPoint)

				u <- runif(1)
				if(log(u) < mhRatio){
					cutPoints[iter, 1:L] <- newCutPoints
					lValues[iter] <- propLogL
					mhRate3 <- mhRate3 + length(movesRange)
				}
			}
		}
		if(chooseMove == "4"){
#			Move c: random walk to just one index
			if(L > 0){
				lpProp <- singleLocalProposal(cutPoints = cutPoints[iter, 1:L], nTime = nTime, 
						mhSinglePropRange = 2, startPoint = startPoint)
				newCutPoints <- lpProp$newState
				propLogL <- logLikelihoodFullModel(myData = myData, cutPoints = newCutPoints, theta = theta, startPoint = startPoint)
				mhRatio <- propLogL - lValues[iter] + lpProp$propRatio + 
					logPrior(cutPoints = newCutPoints, nTime = nTime, startPoint = startPoint) - 
					logPrior(cutPoints = cutPoints[iter, 1:L], nTime = nTime, startPoint = startPoint)

				u <- runif(1)
				if(log(u) < mhRatio){
					cutPoints[iter, 1:L] <- newCutPoints
					lValues[iter] <- propLogL
					mhRate3 <- mhRate3 + length(movesRange)
				}
			}
		}
		lPosterior[iter] <- lValues[iter] + logPrior(cutPoints = cutPoints[iter, 0:L], nTime = nTime, startPoint = startPoint)
		if(saveTheta == TRUE){
			thetaValues[iter, ] <- theta$mean
		}
		if(iter %% myFl == 0){cat(paste0(uchar, " "))}
	}
	acceptanceRates <- c( mhRatePoints*100/iter,  mhRate0c*100/iter, mhRate2*100/iter, mhRate3*100/iter )
	names(acceptanceRates) <- c("move_l_n", "move_theta_randomWalk", "move_tau_all", "move_tau_j")
	cat(paste0(" Accepted MH moves: [l_n] ", round(mhRatePoints*100/iter,2), "%, [","\U03B8","+","\U03B5",'] ',round(mhRate0c*100/iter,2) ,"%, [","\U03C4","] ", round(mhRate2*100/iter,2), "%, [","\U03C4","_j] ", round(mhRate3*100/iter,2), "%."),"\n")
	results <- vector("list", length = 7)
	results[[2]] <- lValues
	results[[3]] <- acceptanceRates
	results[[4]] <- Lvalues
	LVtab <- table(Lvalues[-(1:burn)])/(length(Lvalues[-(1:burn)]))
	results[[6]] <- LVtab
	names(results) <- c("cutPoints", "logLikelihood", "acceptanceRates", "nCutPoints", "nCutPointsMAP", "nCutPointsPosterior", "theta")
	#posterior mode of number of cutpoints
	L <- as.numeric(names(table(Lvalues[-(1:burn)]))[order(table(Lvalues[-(1:burn)]), decreasing=T)[1]])
	cat(paste0( "     Most probable number of cutpoints equals ", L, " with: P(l_n = ",L,"|x) = ",as.numeric(-sort(-LVtab)[1])),"\n")
	results[[5]] <- L
	Lindex <- which(Lvalues == L)
	LindexFull <- which(Lvalues == L)
	burnIndex <- which(Lindex < burn + 1) 
	if(length(burnIndex) > 0){
		Lindex <- Lindex[-burnIndex]
	}

	results[[1]] = cutPoints[Lindex,1:L]
	if(saveTheta == TRUE){
		thinnedIndex <- Lindex[seq(1,length(Lindex), by = 20)]
		results[[7]] <- thetaValues[thinnedIndex, ]
	}
	if(is.null(finalIterationPdf) == FALSE){
		pdf(file = paste0(finalIterationPdf,"/",dName,"_mcmcTrace.pdf"), width = 20, height = 10)
		split.screen( figs = c( 2, 1 ) )
		split.screen( figs = c( 1, 4 ), screen = 1 )
		split.screen( figs = c( 1, 1 ), screen = 2 )
		screen( 3 )
		plot(Lvalues, type = "l", xlab = "mcmc iteration", ylab = "number of cutpoints")
		#print('OK1')
		screen( 4 )
		barplot(table(Lvalues[-(1:burn)]), xlab = "number of cutpoints", ylab = "frequency")
		#print('OK2')
		screen( 5 )
		plot(lPosterior, ylim = range(lPosterior[-(1:burn)]), type= "l", xlab = "mcmc iteration", ylab = "log-posterior pdf")
		points(lPosterior[1:burn], type = "l", lty = 5, col = "white")
		legend("bottomright",lty = 3, col = 1, "burn-in period")
		#print('OK3')
		screen( 6 )
		if(L > 0){
			ylim = range(cutPoints[LindexFull,1:L]*timeScale)
			ylim[1] = 0
			matplot(cutPoints[LindexFull,1:L]*timeScale, ylim = ylim, type = "l", col = 2:(L+1), xlab = "mcmc iteration", lty = 1, ylab = "hours")
			matplot(cutPoints[Lindex,1:L]*timeScale, ylim = ylim, type = "l", col = "white", lty = 5, add = TRUE)
			legend("bottomright", paste0("cut-point of phase ", 1:L), lty = 1, col = 2:(L+1), bty = "n")
		}else{plot(c(1,1))}
		screen( 7 )
		matplot(myData, type = "l", col = 1, xaxt = "n", xlab = "hours", ylab = "growth")
		axis(1, at = myPositions, labels = myLabels)
		if(L > 0){
			if(L > 1){
				abline(v = apply(cutPoints[Lindex, 1:L], 2, median), col = 2:(L+1), lwd = 2)
			}else{
				abline(v = median(cutPoints[Lindex, 1:L]), col = 2:(L+1), lwd = 2)
			}
			legend("bottomright", paste0("replicate ",1:3), lty = 1:3, bty = "n")
			legend("topleft", paste0("posterior median ",1:L), lty = c(1,1,1), col = 2:(L+1), bty = "n")
			if(L > 1){mcmcVar <- apply(cutPoints[Lindex, 1:L],2,var)}else{
				mcmcVar <- var(cutPoints[Lindex, 1:L])
			}
			for(l in 1:L){
				if(mcmcVar[l] > 0){
					points(density(cutPoints[Lindex,l], adjust = 7), type = "l", col = l+1, lwd = 4)
					points(density(cutPoints[Lindex,l], adjust = 7), type = "l", col = "gray", lwd = 2)
				}
			}
			text(x = cutPoints[iter,1:L ] - 20, y = 0.8, paste0("phase ", 1:L), col = 2:(L+1))
		}
		if(missing(dName)  == FALSE){
			title( dName, outer = TRUE , line=-1)
		}
		close.screen( all.screens = TRUE )
		dev.off()
	}

	return(results)
}

beast <- function(myDataList, burn = 20000, nIter = 70000, mhPropRange = 1, mhSinglePropRange=40, startPoint = 2, 
		timeScale, savePlots, zeroNormalization = FALSE, LRange = 0:30, tau = 0.05,
		gammaParameter = 2, nu0 = 0.1, alpha0 = 1, beta0 = 1, subsetIndex, saveTheta = FALSE, sameVariance = TRUE, Prior = "complexity"){
#	burn = 2000, nIter = 5000,mhPropRange = 1, mhSinglePropRange = 50, getSDvalues = T, startPoint=54, timeScale = 1/6,
	cat("\n")
	myColNames <- colnames(myDataList[[1]])
	if(missing(timeScale)){timeScale = 1/1}
#	if(missing(zeroNormalization)){zeroNormalization = TRUE}
	Lmax <- max(LRange)
	LRange = 0:Lmax
	if(burn > nIter - 1){stop("`burn` should be smaller than `nIter`.")}
	if(missing(savePlots)){
		savePlots = NULL; finalIterationPdf = FALSE
	}else{ 
		if(dir.exists(savePlots)){
			myPrompt <- readline(paste0("*  [WARNING] Directory `",getwd(),"/", savePlots, "` exists. Overwrite? 1 = YES, 0 = ABORT "))
			if(myPrompt != 1){stop("Process killed by the user.")}
		}else{
			dir.create(savePlots)
			cat(paste0("*  Plots will be saved to directory: `",getwd(),"/",savePlots,"`"),"\n")
		} 
	}
	if(missing(startPoint)){ startPoint = 2 }
	if( startPoint < 2){ startPoint = 2 }
	if(missing(mhPropRange)){mhPropRange = 1}
	if(missing(mhSinglePropRange)){ mhSinglePropRange = dim(myDataList[[1]])[1]/10  }
	getSDvalues = TRUE
	if(Prior == "complexity"){
		cat(paste0("*  Assuming a complexity prior distribution on the number of change-points with `gammaParameter = ", gammaParameter,"`."), "\n")	
	}else{
		cat(paste0("*  Assuming a Poisson prior distribution on the number of change-points with rate `gammaParameter = ", gammaParameter,"`."), "\n")
#		cat(paste0("*      [WARNING]: The Poisson distribution is NOT suggested!"), "\n")		
	}
	if(zeroNormalization){
		cat(paste0("*  Normalizing at time zero... "))	
		myDataList <- normalizeTime0(myDataList = myDataList)
		cat(paste0(" done.","\n"))
	}
	if(getSDvalues == TRUE){
		cat(paste0("*  Computing posterior parameters... "))
		priorParameters <- computeEmpiricalPriorParameters(myDataList = myDataList, nu0 = nu0, alpha0 = alpha0, beta0 = beta0)
		if(sameVariance == TRUE){
			cat(paste0(" using the same variance per time series... "))
			posteriorParameters <- computePosteriorParameters(myDataList = myDataList, priorParameters = priorParameters)
		}else{
			cat(paste0(" using different variance per time series... "))
			posteriorParameters <- computePosteriorParametersFree(myDataList = myDataList, priorParameters = priorParameters)
		}
		cat(paste0(" done.","\n"))
	}
	L = 1	# not really used anywhere
#	zz <- file(paste0(savePlots,"/numberCutPointsMAP.txt"), open = "w")
	nReps <- length(myDataList)
	n <- dim(myDataList[[1]])[2]
	cutPoints <- vector("list", length = n)
	cutPointsVar <- vector("list", length = n)
	if(nReps < 2){stop("no replicates")}
	cat(paste0("*  MCMC sampler parameters: nIter = ", nIter, ", burn = ", burn, ", startPoint = ", startPoint ),"\n")
	cat(paste0("*  Running MCMC for ", n, " subjects..."), "\n")	
	NAindex <- c()
	results <- vector("list", length = 11)
	results[[3]] <- vector("list", length = n)
	results[[5]] <- vector("list", length = n)
	results[[4]] <- numeric(n)
	results[[6]] <- myColNames
	results[[7]] <- vector("list", length = n)
	results[[8]] <- vector("list", length = n)
	results[[9]] <- vector("list", length = n)
	checkCondition <- -99
	if( missing(subsetIndex) == FALSE ){
		checkCondition <- (1:n)[-subsetIndex]
		cat(paste0("*                [NOTE] skipping ", n - length(subsetIndex), " subjects"), "\n")	
	}else{
		subsetIndex <- 1:n
	}
	movesRange = c(2,3)
	for (i in 1:n){
		myData <- myDataList[[1]][ , i]
		if( i %in% checkCondition ){
			#cat(paste0("                [SKIPPED] subsetIndex enabled.","\n"))
			NAindex <- c(NAindex, i)
		}else{
			cat(paste0("*    i = ", i, ", name: ", myColNames[i], " " ))
			for (j in 2:nReps){
				myData <- cbind(myData, myDataList[[j]][ , i])
			}
			posteriorParametersIter <- posteriorParameters
			posteriorParametersIter[[1]] <- posteriorParameters[[1]][i,]
			if(sameVariance == FALSE){
				posteriorParametersIter[[4]] <- posteriorParameters[[4]][i,]
			}
			mhRunForSubject <- mcmcSampler(myData = myData, nIter = nIter, mhPropRange = mhPropRange, dName = myColNames[i], burn = burn,
							mhSinglePropRange = mhSinglePropRange, movesRange = movesRange, finalIterationPdf = savePlots,
							startPoint = startPoint, postPar = posteriorParametersIter, timeScale = timeScale, 
							iterPerPlotPrefix = paste0("plot-", i), priorParameters = priorParameters, 
							L = L, LRange = LRange, tau = tau, 
							gammaParameter = gammaParameter, saveTheta = saveTheta, Prior = Prior)
			results[[3]][[i]] <- mhRunForSubject$nCutPointsPosterior
			results[[4]][i] <- mhRunForSubject$nCutPointsMAP
			results[[5]][[i]] <- mhRunForSubject$acceptanceRates
			results[[7]][[i]] <- mhRunForSubject$cutPoints
			results[[8]][[i]] <- mhRunForSubject$theta
			results[[9]][[i]] <- mhRunForSubject$nCutPoints
			if(is.array(mhRunForSubject$cutPoints) == TRUE){
				cutPoints[[i]] <- apply(mhRunForSubject$cutPoints, 2, median)
				cutPointsVar[[i]] <- apply(mhRunForSubject$cutPoints, 2, var)
			}else{
				cutPoints[[i]] <- median(mhRunForSubject$cutPoints)
				cutPointsVar[[i]] <- var(mhRunForSubject$cutPoints)
			}
#			cat(paste0(myColNames[i], " ", mhRunForSubject$nCutPointsMAP), file = zz, "\n")
		}
	}
#	close(zz)
	results[[1]] <- lapply(cutPoints, function(x){x*timeScale})
	results[[2]] <- lapply(cutPointsVar, function(x){x*(timeScale^2)})
	names(results) <- c("Cutpoint_posterior_median", 
				"Cutpoint_posterior_variance", 
				"NumberOfCutPoints_posterior_distribution", 
				"NumberOfCutPoints_MAP", 
				"Metropolis-Hastings_acceptance_rate", 
				"subject_ID",
				"Cutpoint_mcmc_trace_map",
				"theta",
				"nCutPointsTrace",
				"subsetIndex",
				"data")
	results$subsetIndex = subsetIndex
	results$data = myDataList
	cat(paste0("*  ALL DONE."),"\n")
	if(is.null(savePlots) == FALSE){ 
		cat(paste0("*  See produced *.pdf files in: `",getwd(),"/",savePlots,"`"),"\n")
	}

        class(results) <- c('list', 'beast.object')
 	return(results)

}

myUnicodeCharacters <- function(){
	mySymbols <- c("\U0000A4", "\U0000A3", "\U0000A5", "\U000285","\U0009F8", "\U000E5B","\U001405","\U001518","\U001620","\U0018F2","\U00204D","\U0021AF","\U00220E","\U00261D","\U00262F",
"\U00266C","\U00267B","\U002687","\U002713","\U002730","\U00272A", "\U0027C6","\U002FC2","\U00265E","\U00269D","\U002A12", "\U002605", "\U0029EB", "\U002300", "\U002301", "\U002302", "\U002303", "\U002304", "\U002305", "\U002306", "\U002307", "\U002308", "\U002309", "\U0023F0", "\U0023ED", "\U0023E7", "\U0025F4", "\U0025CD", "\U0025B5", "\U002615", "\U002660", "\U0026C7", "\U002667", "\U002706", "\U00270D", "\U0026F7")
	return(mySymbols[floor(length(mySymbols)*runif(1)+1)])
}


normalizeTime0 <- function(myDataList){

	normalizedData <- myDataList
	nReps <- length(myDataList)
	n <- dim(myDataList[[1]])[2]
	for(i in 1:n){
		for(j in 1:nReps){
			valueAtZero <- myDataList[[j]][1,i]
			normalizedData[[j]][ , i] <- myDataList[[j]][ , i] - valueAtZero
		}
	}
	return(normalizedData)
}


computePosteriorParameters <- function(myDataList, priorParameters){
	n <- dim(myDataList[[1]])[2]
	nTime <- dim(myDataList[[1]])[1]
	nReps <- length(myDataList)
	mu0 <- priorParameters$mu0
	nu0 <- priorParameters$nu0
	alpha0 <- priorParameters$alpha0
	beta0 <- priorParameters$beta0
	j <- 0
	alive <- c()
	for(i in 1:n){
		myData <- myDataList[[1]][,i]
		for(k in 2:nReps){
			myData <- rbind(myData, myDataList[[k]][,i])
		}
		j <- j + 1
		alive <- c(alive, i)
	}
	nAlive <- j

	meanParameters1 <- array(data = 0, dim = c(n, nTime))
	meanParameters2 <- nReps + nu0
	varianceParameters1 <- alpha0 + nAlive*nReps/2	
	varianceParameters2 <- numeric(nTime)	
	sumOverN <- numeric(nTime)
	for(i in alive){
		myData <- myDataList[[1]][,i]
		for(k in 2:nReps){
			myData <- rbind(myData, myDataList[[k]][,i])
		}
		cSum <- colSums(myData)
		meanParameters1[i, ] <- (cSum + nu0*mu0)/meanParameters2
		sumOverN <- sumOverN + meanParameters2*colSums(myData^2) - cSum^2 - 2*nu0*mu0*cSum
	}
	varianceParameters2 <- beta0 + 0.5*(nAlive*nReps*nu0*mu0^2 + sumOverN)/meanParameters2
	results <- vector("list", length = 4)
	results[[1]] <- meanParameters1
	results[[2]] <- meanParameters2
	results[[3]] <- varianceParameters1
	results[[4]] <- varianceParameters2
	names(results) <- c("meanPar1", "meanPar2", "varPar1", "varPar2")
	return(results)
	
}


# different variance per time series
computePosteriorParametersFree <- function(myDataList, priorParameters){
	n <- dim(myDataList[[1]])[2]
	nTime <- dim(myDataList[[1]])[1]
	nReps <- length(myDataList)
	mu0 <- priorParameters$mu0
	nu0 <- priorParameters$nu0
	alpha0 <- priorParameters$alpha0
	beta0 <- priorParameters$beta0
	j <- 0
	alive <- c()
	for(i in 1:n){
		myData <- myDataList[[1]][,i]
		for(k in 2:nReps){
			myData <- rbind(myData, myDataList[[k]][,i])
		}
		j <- j + 1
		alive <- c(alive, i)
	}
	nAlive <- j

	meanParameters1 <- array(data = 0, dim = c(n, nTime))
	meanParameters2 <- nReps + nu0
	varianceParameters1 <- alpha0 + nReps/2	
	varianceParameters2 <- array(data = 0, dim = c(n, nTime))	
	for(i in alive){
		myData <- myDataList[[1]][,i]
		for(k in 2:nReps){
			myData <- rbind(myData, myDataList[[k]][,i])
		}
		cSum <- colSums(myData)
		meanParameters1[i, ] <- (cSum + nu0*mu0)/meanParameters2
		sumOverN <- meanParameters2*colSums(myData^2) - cSum^2 - 2*nu0*mu0*cSum
		varianceParameters2[i, ] <- beta0 + 0.5*(nReps*nu0*mu0^2 + sumOverN)/meanParameters2
	}

	results <- vector("list", length = 4)
	results[[1]] <- meanParameters1
	results[[2]] <- meanParameters2
	results[[3]] <- varianceParameters1
	results[[4]] <- varianceParameters2

	names(results) <- c("meanPar1", "meanPar2", "varPar1", "varPar2")
	return(results)
	
}

computeEmpiricalPriorParameters <- function(myDataList, nu0 = 1, alpha0 = 1, beta0 = 1){
	priorParameters <- vector("list",length = 4)
	n <- dim(myDataList[[1]])[2]
	nTime <- dim(myDataList[[1]])[1]
	nReps <- length(myDataList)
	j <- 0
	alive <- c()
	xMean <- numeric(nTime)
	for(i in 1:n){
		myData <- myDataList[[1]][,i]
		for(k in 2:nReps){
			myData <- rbind(myData, myDataList[[k]][,i])
		}
		j <- j + 1
		alive <- c(alive, i)
		xMean <- xMean + colMeans(myData)
	}
	nAlive <- j
	xMean <- xMean/nAlive
	
	names(priorParameters) <- c("mu0", "nu0", "alpha0", "beta0")
	priorParameters$mu0 <- xMean
	priorParameters$nu0 <- nu0
	priorParameters$alpha0 <- alpha0
	priorParameters$beta0 <- beta0
	return(priorParameters)

}

#' @export
print.beast.object <- function(x, ...){
        if( 'beast.object' %in% class(x) ){
                cat("\n")
		subsetIndex <- x$subsetIndex
		cat(paste0("Table of frequencies of the most probable number of change-points:"),'\n')
		print(table(names(unlist(lapply(x$NumberOfCutPoints_MAP, function(y){table(y)})))[subsetIndex]))
		cat("\n")
		cat(paste0("Summary per time-series:"),'\n')
		for(i in subsetIndex){
			cat(paste0(x$subject_ID[[i]],": "), 
				paste0("P(",x$NumberOfCutPoints_MAP[[i]], " change-points|data) ="), 
				paste0(round(as.numeric(x$NumberOfCutPoints_posterior_distribution[[i]][as.character(x$NumberOfCutPoints_MAP[[i]])]),2)), 
				paste0("located at"),paste0( x$Cutpoint_posterior_median[[i]]),
			 "\n")
		}
                cat('\n')
        }else{
                cat(paste("    The input is not in class `beast.object`"),'\n')
        }
}


#' @export
plot.beast.object <- function (x, fileName, width = 9, height = 6, pointsize = 12, 
    ylab = "x", xlab = "t", timeScale = 1, myPal, boxwex = 0.20, ...) 
{
    if ("beast.object" %in% class(x)) {
        cat("\n")
        if (timeScale < 0) {
            stop("timeScale should be positive.")
        }
        subsetIndex <- x$subsetIndex
        mutantNames <- colnames(x$data[[1]])[subsetIndex]
        nTime <- dim(x$data[[1]])[1]
        n <- dim(x$data[[1]])[2]
        nReps <- length(x$data)
        xMax <- max(unlist(x$data))
        xMin <- min(unlist(x$data))
        myPositions <- floor(seq(0, nTime, length = 10))
        myLabels <- round(myPositions * timeScale, 1)
        pdf(fileName, width = width, height = height, pointsize = pointsize)
        nColors <- length(table(names(unlist(lapply(x$NumberOfCutPoints_MAP, 
            function(y) {
                table(y)
            })))[subsetIndex]))
        if (nColors < 10) {
            myPal <- brewer.pal(n = nColors, name = "Set1")
        }
        else {
            stop("the range of possible numbers of change-points is larger than 9, please manually define the colors to `myPal` argument")
        }
        myMeanTimeSeries <- array(data = 0, dim = c(n, nTime))
        for (i in subsetIndex) {
            for (j in 1:nReps) {
                myMeanTimeSeries[i, ] <- myMeanTimeSeries[i, 
                  ] + x$data[[j]][, i]
            }
            myMeanTimeSeries[i, ] <- myMeanTimeSeries[i, ]/nReps
        }
        if (timeScale == 1) {
            matplot(t(myMeanTimeSeries[subsetIndex, ]), type = "l", 
                col = myPal[as.numeric(as.factor(unlist(x$NumberOfCutPoints_MAP)[subsetIndex]))], 
                lty = 1, xlab = "t", ylab = paste0("average ", 
                  ylab))
        }
        else {
            matplot(t(myMeanTimeSeries[subsetIndex, ]), xaxt = "n", 
                type = "l", col = myPal[as.numeric(as.factor(unlist(x$NumberOfCutPoints_MAP)[subsetIndex]))], 
                lty = 1, xlab = "t", ylab = paste0("average ", 
                  ylab))
            axis(1, at = myPositions, labels = myLabels)
        }
        legend("topleft", col = c(0, myPal), lty = 1, c("MAP number of change-points:", 
            paste0(names(table(unlist(x$NumberOfCutPoints_MAP)[subsetIndex])), 
                " (", as.numeric(table(unlist(x$NumberOfCutPoints_MAP)[subsetIndex])), 
                " time-series)")))
        iter <- 0
        for (i in subsetIndex) {
            iter <- iter + 1
            tmp <- x$data[[1]][, i]
            if (nReps > 1) {
                for (j in 2:nReps) {
                  tmp <- cbind(tmp, x$data[[j]][, i])
                }
            }
            if (timeScale == 1) {
                matplot(tmp, type = "l", lwd = 2, lty = 1, col = 2:(nReps + 
                  1), ylim = c(xMin, xMax), ylab = ylab, xlab = xlab, 
                  main = paste0("", mutantNames[iter], ""))
            }
            else {
                matplot(tmp, xaxt = "n", type = "l", lwd = 2, 
                  lty = 1, col = 2:(nReps + 1), ylim = c(xMin, xMax), 
                  ylab = ylab, xlab = xlab, main = paste0("", 
                    mutantNames[iter], ""))
                axis(1, at = myPositions, labels = myLabels)
            }
            l <- x$NumberOfCutPoints_MAP[[i]]
		if(l > 0){
            abline(v = x$Cutpoint_posterior_median[[i]], lty = 3, col = "gray")
		}
            yRange <- c(0, max(tmp))
            yPoints <- seq(from = 0, to = xMax, length = l + 
                2)[-c(1, l + 2)]
		lY <- length(yPoints)
		yPoints <- yPoints[lY:1]
            if(l > 0){
			if (l == 1) {
			boxplot(x$Cutpoint_mcmc_trace_map[[i]], horizontal = TRUE, 
			  add = TRUE, at = yPoints, boxwex = boxwex, col = "gray", 
			  xaxt = "n")
			}
			else {
			for (j in 1:l) {
			  boxplot(x$Cutpoint_mcmc_trace_map[[i]][, j], 
			    horizontal = TRUE, add = TRUE, at = yPoints[j], 
			    boxwex = boxwex, col = "gray", xaxt = "n")
			}
			}
		}
        }
        dev.off()
        cat(paste0("See produced file: ", fileName), "\n")
    }
    else {
        cat(paste("    The input is not in class `beast.object`"), 
            "\n")
    }
}

Try the beast package in your browser

Any scripts or data that you put into this service are public.

beast documentation built on May 2, 2019, 1:19 p.m.