R/postProcess.R
In PReMiuM: Dirichlet Process Bayesian Clustering, Profile Regression

Documented in calcAvgRiskAndProfile calcDissimilarityMatrix calcOptimalClustering calcPredictions computeRatioOfVariance globalParsTrace heatDissMat is.wholenumber margModelPosterior plotPredictions plotRiskProfile profRegr setHyperparams simBenchmark summariseVarSelectRho

# (C) Copyright David Hastie, Silvia Liverani and Aurore J. Lavigne, 2012-2014.

# PReMiuM++ is free software; you can redistribute it and/or modify it under the
# terms of the GNU Lesser General Public License as published by the Free Software
# Foundation; either version 3 of the License, or (at your option) any later
# version.

# PReMiuM++ is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.

# You should have received a copy of the GNU Lesser General Public License
# along with PReMiuM++ in the documentation directory. If not, see
# <http://www.gnu.org/licenses/>.

# The external linear algebra library Eigen, parts of which are included  in the
# lib directory is released under the LGPL3+ licence. See comments in file headers
# for details.

# The Boost C++ header library, parts of which are included in the  lib directory
# is released under the Boost Software Licence, Version 1.0, a copy  of which is
# included in the documentation directory.

is.wholenumber <- function(x, tol = .Machine$double.eps^0.5)  abs(x - round(x)) < tol

profRegr<-function(covNames, fixedEffectsNames, outcome="outcome", outcomeT=NA, data, output="output", hyper, predict, predictType="RaoBlackwell", nSweeps=1000, nBurn=1000, nProgress=500, nFilter=1, nClusInit, seed, yModel="Bernoulli", xModel="Discrete", sampler="SliceDependent", alpha=-2, dPitmanYor=0, excludeY=FALSE, extraYVar=FALSE, varSelectType="None", entropy,reportBurnIn=FALSE, run=TRUE, discreteCovs, continuousCovs, whichLabelSwitch="123", includeCAR=FALSE, neighboursFile="Neighbours.txt",uCARinit=FALSE,PoissonCARadaptive=FALSE,weibullFixedShape=TRUE, useNormInvWishPrior=FALSE, useHyperpriorR1=TRUE, useIndependentNormal=FALSE, useSeparationPrior=FALSE){
  
  # suppress scientific notation
  options(scipen=999)
  
  if (xModel=="Mixed"){
    covNames <- c(discreteCovs, continuousCovs)
    nDiscreteCovs <- length(discreteCovs)
    nContinuousCovs <- length(continuousCovs)
  }
  nCovariates<-length(covNames)
  
  if (!is.data.frame(data)) stop("Input data must be a data.frame with outcome, covariates and fixed effect names as column names.")
  
  if (extraYVar==TRUE&(yModel=="Categorical"||yModel=="Normal"||yModel=="Survival"||yModel=="Quantile")) stop("Option extraYVar is only available for Bernoulli, Binomial and Poisson response.")
  
  if (includeCAR==TRUE&(yModel=="Categorical"||yModel=="Survival"||yModel=="Bernoulli"||yModel=="Binomial"||yModel=="Quantile")) stop("Option includeCAR is only available for Poisson and Normal response.")
  
  if (predictType=="random"&(yModel=="Categorical"||yModel=="Poisson"||yModel=="Binomial"||yModel=="Bernoulli"||yModel=="Survival")) stop("The option of random predictions is only available for yModel=Normal and yModel=Quantile.")
  
  if (useNormInvWishPrior==TRUE && !varSelectType=="None") stop("Variable selection is not available for Normal-inverse-Wishart prior for Normal covariates.")
  
  if (useIndependentNormal==TRUE) useHyperpriorR1=FALSE # because it happens automatically
  
  if (useIndependentNormal==TRUE && useSeparationPrior==TRUE) stop("useSeparationPrior option cannot be used for independent normal likelihood.")
  
  if (useSeparationPrior==TRUE) useHyperpriorR1=FALSE # because it happens automatically
    
  if (xModel=="Normal") {
    sdContVars<-apply(data[covNames],2,sd)
    if(length(which(sdContVars==0))) stop("One of the continuous covariates is constant (with zero variance). Remove it and run again.") 
  }
  if (xModel=="Mixed") {
    sdContVars<-apply(data[continuousCovs],2,sd)
    if(length(which(sdContVars==0))) stop("One of the continuous covariates is constant (with zero variance). Remove it and run again.") 
  }
  
  # open file to write output
  fileName<-paste(output,"_input.txt",sep="")
  # make big data matrix with outcome, covariates and fixed effects	
  # outcome
  # create outcome if excludeY=TRUE and outcome not provided
  if (length(which(colnames(data)==outcome))<1&&excludeY==TRUE) {
    dataMatrix<-rep(0,dim(data)[1])
    yModel="Bernoulli" 
  } else {
    dataMatrix<-data[,which(colnames(data)==outcome)]
  }
  
  if (sum(is.na(dataMatrix))>0) stop("ERROR: the outcome cannot have missing values. Use the profiles with missing outcome for predictions.")
  
  if (length(dataMatrix)==0) stop("ERROR: the outcome provided is not one of the names of the variables in the dataframe.")
  
  # recode outcome covariates
  if (yModel=="Categorical"||yModel=="Bernoulli"){
    outcomeY<-dataMatrix
    outcomeFactor<-as.factor(outcomeY)
    yLevels<-length(levels(outcomeFactor))
    if (yModel=="Bernoulli"&yLevels>2) stop("The number of levels of the outcome is greater than 2, which is not allowed for Bernoulli outcome. You might want to set yModel to be Categorical.") 
    if (yModel=="Categorical") write(yLevels,fileName,append=T,ncolumns=length(yLevels))
    if (is.numeric(outcomeY)){
      if (!(min(outcomeY)==0&&max(outcomeY)==(yLevels-1)&&sum(!is.wholenumber(outcomeY))==0)) {
        print("Recoding of the outcome as follows")
        print(paste("Replacing level ",levels(outcomeFactor)," with ",c(0:(yLevels-1)),sep=""))
        levels(outcomeFactor)<-c(0:(yLevels-1))
        dataMatrix<-outcomeFactor
        if (!missing(predict)) stop("If you are including data for prediction, it is advised that the recoding of the categorical variables as advised above is done manually ahead of running this function, as errors might arise.")
      }
    } else {
      print("Recoding of the outcome as follows")
      tmpLevels<-levels(outcomeFactor)
      print(paste("Replacing level ",levels(outcomeFactor)," with ",c(0:(yLevels-1)),sep=""))
      levels(outcomeFactor)<-c(0:(yLevels-1))
      dataMatrix<-outcomeFactor
      if (!missing(predict)) stop("If you are including data for prediction, it is advised that the recoding of the categorical variables as advised above is done manually ahead of running this function, as errors might arise.")
    }
    if (yModel=="Bernoulli") yLevels <- 1
  } else {
    yLevels <- 1
  }
  
  # covariates
  covIndeces<-vector()
  for (i in 1:nCovariates){
    tmpIndex<-which(colnames(data)==covNames[i])
    if (length(tmpIndex)==0) stop("ERROR: covariate names in data.frame provided do not correspond to list of covariates for profile regression")
    covIndeces<-append(covIndeces,tmpIndex)
  }
  
  covariates<-data[,covIndeces]
  dataMatrix<-cbind(dataMatrix,covariates)
  
  # recode covariate levels
  if (xModel=="Discrete"){
    xLevels<-vector()
    for (k in 1:nCovariates){
      tmpCov<-dataMatrix[,(1+k)]
      xLevels[k]<-length(levels(as.factor(tmpCov)))	
      if(is.factor(tmpCov)){
        print(paste("Recoding of covariate ",colnames(dataMatrix)[k+1]," as follows",sep=""))
        tmpCovFactor<-as.factor(tmpCov)
        tmpLevels<-levels(tmpCovFactor)
        print(paste("Replacing level ",levels(tmpCovFactor)," with ",c(0:(xLevels[k]-1)),sep=""))
        levels(tmpCovFactor)<-c(0:(xLevels[k]-1))	
        dataMatrix[,(1+k)]<-tmpCovFactor
        dataMatrix[,(1+k)]<-as.numeric(levels(dataMatrix[,(1+k)]))[as.integer(dataMatrix[,(1+k)])]
	if (!missing(predict)) stop("If you are including data for prediction, it is advised that the recoding of the categorical variables as advised above is done manually ahead of running this function, as errors might arise.")

      } else {
        if (!(min(tmpCov,na.rm=TRUE)==0&&max(tmpCov,na.rm=TRUE)==(xLevels[k]-1)&&sum(!is.wholenumber(tmpCov[!is.na(tmpCov)]))==0)) {
          print(paste("Recoding of covariate ",colnames(dataMatrix)[k+1]," as follows",sep=""))
          tmpCovFactor<-as.factor(tmpCov)
          tmpLevels<-levels(tmpCovFactor)
          print(paste("Replacing level ",levels(tmpCovFactor)," with ",c(0:(xLevels[k]-1)),sep=""))
          levels(tmpCovFactor)<-c(0:(xLevels[k]-1))	
          dataMatrix[,(1+k)]<-tmpCovFactor
          dataMatrix[,(1+k)]<-as.numeric(levels(dataMatrix[,(1+k)]))[as.integer(dataMatrix[,(1+k)])]
	if (!missing(predict)) stop("If you are including data for prediction, it is advised that the recoding of the categorical variables as advised above is done manually ahead of running this function, as errors might arise.")

        }
      }
    }
  } else 	if (xModel=="Mixed"){
    xLevels<-vector()
    for (k in 1:nDiscreteCovs){
      tmpCov<-dataMatrix[,(1+k)]
      xLevels[k]<-length(levels(as.factor(tmpCov)))	
      if(is.factor(tmpCov)){
        print(paste("Recoding of covariate number ",colnames(dataMatrix)[k+1]," as follows",sep=""))
        tmpCovFactor<-as.factor(tmpCov)
        tmpLevels<-levels(tmpCovFactor)
        print(paste("Replacing level ",levels(tmpCovFactor)," with ",c(0:(xLevels[k]-1)),sep=""))
        levels(tmpCovFactor)<-c(0:(xLevels[k]-1))	
        dataMatrix[,(1+k)]<-tmpCovFactor
        dataMatrix[,(1+k)]<-as.numeric(levels(dataMatrix[,(1+k)]))[as.integer(dataMatrix[,(1+k)])]
      if (!missing(predict)) stop("If you are including data for prediction, it is advised that the recoding of the categorical variables as advised above is done manually ahead of running this function, as errors might arise.")
        
      } else {
        if (!(min(tmpCov,na.rm=TRUE)==0&&max(tmpCov,na.rm=TRUE)==(xLevels[k]-1)&&sum(!is.wholenumber(tmpCov[!is.na(tmpCov)]))==0)) {
          print(paste("Recoding of covariate number ",colnames(dataMatrix)[k+1]," as follows",sep=""))
          tmpCovFactor<-as.factor(tmpCov)
          tmpLevels<-levels(tmpCovFactor)
          print(paste("Replacing level ",levels(tmpCovFactor)," with ",c(0:(xLevels[k]-1)),sep=""))
          levels(tmpCovFactor)<-c(0:(xLevels[k]-1))	
          dataMatrix[,(1+k)]<-tmpCovFactor
          dataMatrix[,(1+k)]<-as.numeric(levels(dataMatrix[,(1+k)]))[as.integer(dataMatrix[,(1+k)])]
      if (!missing(predict)) stop("If you are including data for prediction, it is advised that the recoding of the categorical variables as advised above is done manually ahead of running this function, as errors might arise.")

        }
      }
    }
  } else {
    xLevels <- 1
  }
  
  for (k in 1:nCovariates){
    missingX<-is.na(dataMatrix[,(k+1)])
    nMissingX<-sum(missingX)
    if (nMissingX>0) {
      dataMatrix[missingX,(k+1)]<- rep(-999,nMissingX)
    }
  }
  
  # fixed effects
  if (!missing(fixedEffectsNames)) {
    nFixedEffects<-length(fixedEffectsNames)
    FEIndeces<-vector()
    for (i in 1:nFixedEffects){
      tmpIndex<-which(colnames(data)==fixedEffectsNames[i])
      if (length(tmpIndex)==0) stop("ERROR: fixed effects names in data.frame provided do not correspond to list of fixed effects for profile regression")
      FEIndeces<-append(FEIndeces,tmpIndex)
    }
    fixedEffects<-data[,FEIndeces]
    if (sum(is.na(fixedEffects))>0) stop("ERROR: fixed effects cannot have missing values. Use an imputation method before using profRegr().")
    dataMatrix<-cbind(dataMatrix,fixedEffects)
    for (i in dim(fixedEffects)[2]){
      if (isa(fixedEffects[,i],"character")) stop("ERROR: fixed effects must be of class numeric. See help pages.") 
    }
  } else {
    nFixedEffects<-0
    #if (yModel=="Survival") stop("ERROR: For the current implementation of Survival outcome the fixed effects must be provided. ")
  }
  
  #  extra outcome data
  if (yModel=="Poisson"||yModel=="Binomial"||yModel=="Survival") {
    if(is.na(outcomeT)){
      stop ("It is required to set outcomeT for Poisson (offset), Binomial (number of trials) or Survival (censoring) outcome.")
    } else {
      indexOutcomeT <- which(colnames(data)==outcomeT)
      if (yModel=="Survival") {
        if (!all(is.element(names(table(data[indexOutcomeT])),c("0","1")))) stop("If yModel is Survival, variable outcomeT corresponds to the censoring of the outcome. Therefore, it must be coded 0 and 1.")
        
      }
      dataMatrix <- cbind(dataMatrix,data[indexOutcomeT])
    }
  } else {
    if(!is.na(outcomeT)) stop ("It is only required to set outcomeT for Poisson, Binomial and Survival outcome.")
  }		
  
  # print number of subjects
  nSubjects <- dim(dataMatrix)[1]
  write(as.character(nSubjects), fileName,ncolumns=1)
  # print number of covariates and their names
  write(as.character(nCovariates),fileName,append=T,ncolumns=1)
  if (xModel=="Mixed"){
    write(as.character(nDiscreteCovs),fileName,append=T,ncolumns=1)
    write(as.character(nContinuousCovs),fileName,append=T,ncolumns=1)
  }
  write(t(covNames), fileName,append=T,ncolumns=1)
  # print number of fixed effects and their names
  write(nFixedEffects, fileName,append=T,ncolumns=1)
  if (nFixedEffects>0){
    write(t(fixedEffectsNames), fileName,append=T,ncolumns=1)
  }
  if (yModel=="Categorical") write(yLevels,fileName,append=T,ncolumns=1) 
  if (xModel=="Discrete"||xModel=="Mixed"){
    write(xLevels,fileName,append=T,ncolumns=length(xLevels))
  }
  
  # write prediction file
  #	if (!missing(predict)&(includeCAR)) stop ("Predictions are not available with spatial effect.")
  if (!missing(predict)) {
    nPreds<-dim(predict)[1]
    write(nPreds, paste(output,"_predict.txt",sep=""),ncolumns=1)
    for (k in 1:nPreds){
      missingX<-is.na(predict[k,])
      nMissingX<-sum(missingX)
      if (nMissingX>0) {
        predict[k,missingX]<- rep(-999,nMissingX)
      }
    }		
    write(t(as.matrix(predict[,c(covNames)])), paste(output,"_predict.txt",sep=""),append=T,ncolumns=length(covNames))
    if (length(intersect(outcome,names(predict)))>0) {
      if (nFixedEffects>0 ) {
        if (length(intersect(fixedEffectsNames,names(predict)))==length(fixedEffectsNames)) {
          write(nPreds, paste(output,"_predictFull.txt",sep=""),ncolumns=1)
          if (yModel=="Poisson" || yModel=="Binomial"||yModel=="Survival"){
            write(t(as.matrix(predict[,c(outcome,fixedEffectsNames,outcomeT)])), paste(output,"_predictFull.txt",sep=""),append=T,ncolumns=(2+length(fixedEffectsNames)))
          } else {
            write(t(as.matrix(predict[,c(outcome,fixedEffectsNames)])), paste(output,"_predictFull.txt",sep=""),append=T,ncolumns=(1+length(fixedEffectsNames)))				
          }
          fullPredictFile<-TRUE
        } else {
          write(nPreds, paste(output,"_predictFull.txt",sep=""),ncolumns=1)
          predictFixedEffectsNA<-cbind(as.matrix(predict[,c(outcome)]),matrix(-999,ncol=nFixedEffects,nrow=nPreds))
          write(t(predictFixedEffectsNA), paste(output,"_predictFull.txt",sep=""),append=T,ncolumns=(1+length(fixedEffectsNames)))
          fullPredictFile<-TRUE
        }
      } else {
        
        write(nPreds, paste(output,"_predictFull.txt",sep=""),ncolumns=1)
        if (yModel=="Poisson" || yModel=="Binomial" ||yModel == "Survival"){
          predictFixedEffectsNA<-as.matrix(predict[,c(outcome,outcomeT)])
          write(t(predictFixedEffectsNA), paste(output,"_predictFull.txt",sep=""),append=T,ncolumns=2)
        } else {
          predictFixedEffectsNA<-as.matrix(predict[,c(outcome)])
          write(t(predictFixedEffectsNA), paste(output,"_predictFull.txt",sep=""),append=T,ncolumns=1)
        }
        fullPredictFile<-TRUE
      }
      
    } else {
      fullPredictFile<-FALSE
    }
  } else {
    nPreds<-0
    fullPredictFile<-FALSE
  }
  
  write(t(dataMatrix), fileName,append=T,ncolumns=dim(dataMatrix)[2])
  
  # other checks to ensure that there are no errors when calling the program
  if (xModel!="Discrete"&xModel!="Normal"&xModel!="Mixed") stop("This xModel is not defined.")
  if (yModel!="Poisson"&yModel!="Binomial"&yModel!="Bernoulli"&yModel!="Normal"&yModel!="Categorical"&yModel!="Survival"&yModel!="Quantile") stop("This yModel is not defined.")
  
  # conditions for alpha and dPitmanYor parameters	
  # checks that dPitmanYor is in the correct interval
  if (dPitmanYor<0&dPitmanYor>=1) stop("dPitmanYor must belongto the interval [0,1).")
  # if alpha < 0 then we have a Dirichlet prior, so cannot use dPitmanYor
  # if the user asks for alpha<0 and dPitmanYor>0 we stop and give an error message to ensure the user is aware
  if (alpha<=-1&&dPitmanYor>0) stop("Setting alpha < 0 (the default is -2) and dPitmanYor > 0 is not allowed.")
  # the third label switching move is only for Dirichlet process prior, not for Pitman-Yor process prior 
  if (dPitmanYor>0) whichLabelSwitch<-"12"
  if ((alpha>=-1)&(alpha < -dPitmanYor)) stop("It is a condition of the Pitman-Yor process prior that alpha > - dPitmanYor.")
  if (dPitmanYor>0&sampler!="Truncated") stop("The Pitman-Yor process prior has been implemented to use with the Truncated sampler only.")
  #if (dPitmanYor>0&sampler=="Truncated") print("Note that for the Pitman-Yor process prior there might be en error when using the Truncated sampler. This is due to the way that the bound on the number of clusters is computed.")
  
  #check entries for spatial CAR term
  includeuCARinit<-FALSE
  if (includeCAR){
    if(file.exists(neighboursFile)==FALSE) stop("A valid file for neighbourhood structure must be included for the spatial model.") 
    islands<-readLines(neighboursFile)
    get_neigh_number<-function(dat){
      strsplit(dat," ")[[1]][2]
    }
    n_neighbours<-lapply(islands,get_neigh_number)
    if(0 %in% n_neighbours) stop("There cannot be areas without neighbours in the neighboursFile.")
    if (uCARinit!=FALSE){
      if(file.exists(uCARinit)==FALSE) stop("Invald file for uCARinit.") 
      #uCARvalues<-read.table("uCARinit.txt")[,1]
      includeuCARinit<-TRUE
    }
  }

  inputString<-paste("PReMiuM --input=",fileName," --output=",output," --xModel=",xModel," --yModel=",yModel," --varSelect=",varSelectType," --whichLabelSwitch=",whichLabelSwitch," --predType=",predictType,sep="")

  # create hyperparameters file
  if (!missing(hyper)) {
    hyperFile <-paste(output,"_hyper.txt",sep="")
    if (file.exists(hyperFile)) file.remove(hyperFile)
    if (!is.null(hyper$shapeAlpha)){
      write(paste("shapeAlpha=",hyper$shapeAlpha,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$rateAlpha)){
      write(paste("rateAlpha=",hyper$rateAlpha,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$aPhi)){
      write(paste("aPhi=",paste(hyper$aPhi,collapse=" ")," ",sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$mu0)){
      write(paste("mu0=",paste(hyper$mu0,collapse=" ")," ",sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$Tau0)){
      write(paste("Tau0=",paste(t(hyper$Tau0),collapse=" ")," ",sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$TauIndep0)){
      write(paste("Tau_Indep_0=",paste(t(hyper$TauIndep0),collapse=" ")," ",sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$R0)){
      write(paste("R0=",paste(t(hyper$R0),collapse=" ")," ",sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$RIndep0)){
      write(paste("R_Indep_0=",paste(t(hyper$RIndep0),collapse=" ")," ",sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$kappa0)){
      write(paste("kappa0=",hyper$kappa0,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$kappa1)){
      write(paste("kappa1=",hyper$kappa1,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$nu0)){
      write(paste("nu0=",hyper$nu0,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$muTheta)){
      write(paste("muTheta=",hyper$muTheta,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$sigmaTheta)){
      write(paste("sigmaTheta=",hyper$sigmaTheta,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$dofTheta)){
      write(paste("dofTheta=",hyper$dofTheta,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$muBeta)){
      write(paste("muBeta=",hyper$muBeta,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$sigmaBeta)){
      write(paste("sigmaBeta=",hyper$sigmaBeta,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$dofBeta)){
      write(paste("dofBeta=",hyper$dofBeta,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$shapeTauEpsilon)){
      write(paste("shapeTauEpsilon=",hyper$shapeTauEpsilon,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$rateTauEpsilon)){
      write(paste("rateTauEpsilon=",hyper$rateTauEpsilon,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$aRho)){
      write(paste("aRho=",hyper$aRho,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$bRho)){
      write(paste("bRho=",hyper$bRho,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$atomRho)){
      if (hyper$atomRho<=0 || hyper$atomRho >1) stop("Hyperparameter atomRho must be in (0,1]. See ?setHyperparams for help.")
      write(paste("atomRho=",hyper$atomRho,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$shapeSigmaSqY)){
      write(paste("shapeSigmaSqY=",hyper$shapeSigmaSqY,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$scaleSigmaSqY)){
      write(paste("scaleSigmaSqY=",hyper$scaleSigmaSqY,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$pQuantile)){
      write(paste("pQuantile=",hyper$pQuantile,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$rSlice)){
      write(paste("rSlice=",hyper$rSlice,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$truncationEps)){
      write(paste("truncationEps=",hyper$truncationEps,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$shapeTauCAR)){
      write(paste("shapeTauCAR=",hyper$shapeTauCAR,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$rateTauCAR)){
      write(paste("rateTauCAR=",hyper$rateTauCAR,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$shapeNu)){
      write(paste("shapeNu=",hyper$shapeNu,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$scaleNu)){
      write(paste("scaleNu=",hyper$scaleNu,sep=""),hyperFile,append=T)
    }
    if (!is.null(hyper$initAlloc)){
      write(paste("initAlloc=",paste(hyper$initAlloc,collapse=" ")," ",sep=""),hyperFile,append=T)
      if(sum(is.wholenumber(hyper$initAlloc))!=nSubjects) stop("The vector initAlloc has not been initialised properly: the vector must contain integers and have as many elements as there are observations in the dataset (nSubjects).")
      nClusInit<-max(hyper$initAlloc)
      
      
    }
  }
  
  if (weibullFixedShape) inputString<-paste(inputString," --weibullFixedShape",sep="")
  if (PoissonCARadaptive) inputString<-paste(inputString," --PoissonCARadaptive",sep="")
  if (reportBurnIn) inputString<-paste(inputString," --reportBurnIn",sep="")
  if (!missing(alpha)) inputString<-paste(inputString," --alpha=",alpha,sep="")
  if (!missing(dPitmanYor)) inputString<-paste(inputString," --dPitmanYor=",dPitmanYor,sep="")
  if (!missing(sampler)) inputString<-paste(inputString," --sampler=",sampler,sep="")
  if (!missing(hyper)) inputString<-paste(inputString," --hyper=",hyperFile,sep="")
  if (!missing(predict)) inputString<-paste(inputString," --predict=",paste(output,"_predict.txt",sep=""),sep="")
  if (!missing(nSweeps)) inputString<-paste(inputString," --nSweeps=",nSweeps,sep="")
  if (!missing(nBurn)) inputString<-paste(inputString," --nBurn=",nBurn,sep="")
  if (!missing(nProgress)) inputString<-paste(inputString," --nProgress=",nProgress,sep="")
  if (!missing(nFilter)) inputString<-paste(inputString," --nFilter=",nFilter,sep="")
  if (!missing(nClusInit)) inputString<-paste(inputString," --nClusInit=",nClusInit,sep="")
  if (!missing(seed)) inputString<-paste(inputString," --seed=",seed,sep="")
  if (excludeY) inputString<-paste(inputString," --excludeY",sep="")
  if (extraYVar) inputString<-paste(inputString," --extraYVar",sep="")
  if (!missing(entropy)) inputString<-paste(inputString," --entropy",sep="")
  if (includeCAR) inputString<-paste(inputString," --includeCAR", " --neighbours=", neighboursFile ,sep="")
  if (includeuCARinit) inputString<-paste(inputString, " --uCARinit=", uCARinit ,sep="")
  if (useNormInvWishPrior) inputString<-paste(inputString," --useNormInvWishPrior", sep="")
  if (useHyperpriorR1) inputString<-paste(inputString," --useHyperpriorR1", sep="")
  if (useSeparationPrior) inputString<-paste(inputString," --useSeparationPrior", sep="")
  if (useIndependentNormal) inputString<-paste(inputString," --useIndependentNormal", sep="")

  if (run) .Call("profRegr", inputString, PACKAGE = 'PReMiuM')
  
  
  # define directory path and fileStem
  outputSplit <- strsplit(output,split="/")
  fileStem <- tail(outputSplit[[1]],1)
  if (length(outputSplit[[1]])>1) {
    directoryPath <- paste(head(outputSplit[[1]],-1),collapse="/")
  } else {
    directoryPath <-"."
  }
  
  # other re-writes for function return
  # var select and var select type
  if (varSelectType=="None") {
    varSelect <- FALSE
    varSelType <- NULL
  } else {
    varSelect <- TRUE
    varSelType <- varSelectType
  }
  # covariate matrix
  xMat <- dataMatrix[,2:(nCovariates+1)]
  # outcome and fixed effect matrix
  yMat <- NULL
  wMat <- NULL
  # the code requires these matrices whether excludeY is TRUE or FALSE
  #if(includeResponse){
  yMat<-matrix(dataMatrix[,1],ncol=1)
  if(yModel=='Poisson'){
    offset<-dataMatrix[,ncol(dataMatrix)]
    yMat<-cbind(yMat,offset)
  }else if(yModel=='Binomial'){
    nTrials<-dataMatrix[,ncol(dataMatrix)]
    yMat<-cbind(yMat,nTrials)
  }else if(yModel=='Survival'){
    censoring<-dataMatrix[,ncol(dataMatrix)]
    yMat<-cbind(yMat,censoring)
  }
  if(nFixedEffects>0){
    wMat<-dataMatrix[,(2+nCovariates):(1+nCovariates+nFixedEffects)]
    wMat<-as.matrix(wMat,ncol=nFixedEffects)
  }
  #}
  # include response
  if (excludeY) {
    includeResponse <- FALSE
    yModel <- NULL
  } else {
    includeResponse <- TRUE
  }
  
  if (xModel=="Mixed") {
    discreteTmp<-discreteCovs
    contTmp<-continuousCovs
  } else {
    discreteTmp<-NA
    contTmp<-NA
  }
  
  if (xModel=="Discrete"){
    useNIWP<-NA
  }else{
    useNIWP<-useNormInvWishPrior
  }
  
  
  return(list("directoryPath"=directoryPath,
              "fileStem"=fileStem,
              "inputFileName"=fileName,
              "nSweeps"=nSweeps,
              "nBurn"=nBurn,
              "reportBurnIn"=reportBurnIn,
              "nFilter"=nFilter,
              "nProgress"=nProgress,
              "nSubjects"=nSubjects,
              "nPredictSubjects"=nPreds,
              "fullPredictFile"=fullPredictFile,
              "predictType"=predictType,
              "alpha"=alpha,
              "dPitmanYor"=dPitmanYor,
              "covNames"=covNames,
              "discreteCovs"=discreteTmp,
              "continuousCovs"=contTmp,
              "xModel"=xModel,
              "includeResponse"=includeResponse,
              "whichLabelSwitch"=whichLabelSwitch,
              "yModel"=yModel,
              "varSelect"=varSelect,
              "varSelectType"=varSelType,
              "nCovariates"=nCovariates,
              "nDiscreteCovs"=ifelse(xModel=="Mixed",nDiscreteCovs,NA),
              "nContinuousCovs"=ifelse(xModel=="Mixed",nContinuousCovs,NA),
              "nFixedEffects"=nFixedEffects,
              "nCategoriesY"=yLevels,
              "nCategories"=xLevels,
              "extraYVar"=extraYVar,
              "includeCAR"=includeCAR,
              "weibullFixedShape"=weibullFixedShape,
              "useNormInvWishPrior"=useNIWP,
              "useHyperpriorR1"=useHyperpriorR1,
              "useSeparationPrior"=useSeparationPrior,
              "useIndependentNormal"=useIndependentNormal,
              "xMat"=xMat,"yMat"=yMat,"wMat"=wMat))
}



# Function to take the output from the C++ run and return an average dissimilarity
# matrix
calcDissimilarityMatrix<-function(runInfoObj,onlyLS=FALSE){
  
  directoryPath=NULL
  fileStem=NULL
  reportBurnIn=NULL
  nSweeps=NULL
  nFilter=NULL
  nSubjects=NULL
  nPredictSubjects=NULL
  nBurn=NULL
  
  for (i in 1:length(runInfoObj)) assign(names(runInfoObj)[i],runInfoObj[[i]])
  
  fileName <- file.path(directoryPath,paste(fileStem,'_z.txt',sep=''))
  
  if (reportBurnIn) {
    recordedNBurn<-nBurn
  } else {
    recordedNBurn<-0
  }
  
  # Call the C++ to compute the dissimilarity matrix
  disSimList<-.Call("calcDisSimMat",fileName,nSweeps,recordedNBurn,nFilter,nSubjects,
                    nPredictSubjects, onlyLS, PACKAGE = 'PReMiuM')
  
  if (onlyLS){
    lsOptSweep<-disSimList$lsOptSweep
    disSimMatPred<-NULL              
    disSimObj<-list('disSimRunInfoObj'=runInfoObj,'disSimMat'=NA,
                    'disSimMatPred'=NA,'lsOptSweep'=lsOptSweep,'onlyLS'=onlyLS)              
  } else {
    disSimMat<-disSimList$disSimMat
    lsOptSweep<-disSimList$lsOptSweep
    disSimMatPred<-NULL              
    if(nPredictSubjects>0){
      disSimMatPred<-disSimMat[(1+(nSubjects*(nSubjects-1)/2)):length(disSimMat)]
      disSimMat<-disSimMat[1:(nSubjects*(nSubjects-1)/2)]
    }   
    disSimObj<-list('disSimRunInfoObj'=runInfoObj,'disSimMat'=disSimMat,
                    'disSimMatPred'=disSimMatPred,'lsOptSweep'=lsOptSweep,'onlyLS'=onlyLS)              
  }
  
  return(disSimObj)
}

# Given a dissimilarity matrix (or list of dissimilarity matrices)
# run partitioning around medoids clustering
calcOptimalClustering<-function(disSimObj,maxNClusters=NULL,useLS=F){
  
  disSimRunInfoObj=NULL
  directoryPath=NULL
  fileStem=NULL
  lsOptSweep=NULL
  nSubjects=NULL
  nPredictSubjects=NULL
  reportBurnIn=NULL
  nBurn=NULL
  nFilter=NULL
  nSweeps=NULL
  onlyLS=NULL
  
  for (i in 1:length(disSimObj)) assign(names(disSimObj)[i],disSimObj[[i]])
  for (i in 1:length(disSimRunInfoObj)) assign(names(disSimRunInfoObj)[i],disSimRunInfoObj[[i]])
  
  if (onlyLS==TRUE) useLS <- TRUE
  
  if(useLS){
    # maniupulation for least squares method, but computation has been done in previous function
    zFileName <- file.path(directoryPath,paste(fileStem,'_z.txt',sep=''))
    zFile<-file(zFileName,open="r")
    
    optZ<-scan(zFile,what=integer(),skip=lsOptSweep-1,n=nSubjects+nPredictSubjects,quiet=T)
    optZFit<-optZ[1:nSubjects]
    if(nPredictSubjects>0){
      optZPredict<-optZ[(nSubjects+1):(nSubjects+nPredictSubjects)]
    }
    uZFit<-unique(optZFit)
    chosenNClusters<-length(unique(uZFit))
    clustVec<-match(optZFit,uZFit)
    clustSizes<-rep(0,chosenNClusters)
    for(c in 1:chosenNClusters){
      clustSizes[c]<-length(which(clustVec==c))
    }
    
    clusteringPred<-NULL
    if(nPredictSubjects>0){
      clusteringPred<-match(optZPredict,uZFit)
    }
    avgSilhouetteWidth<-NULL
    close(zFile)
    
  }else{
    
    if(is.null(maxNClusters)){
      # Determine the maximum number of clusters
      nMembersFileName<-file.path(directoryPath,paste(fileStem,'_nMembers.txt',sep=''))
      nMembersFile<-file(nMembersFileName,open="r")
      nClustersFileName<- file.path(directoryPath,paste(fileStem,'_nClusters.txt',sep=''))
      nClustersFile<-file(nClustersFileName,open="r")
      
      # Restrict to sweeps after burn in
      firstLine<-ifelse(reportBurnIn,nBurn/nFilter+2,1)
      lastLine<-(nSweeps+ifelse(reportBurnIn,nBurn+1,0))/nFilter
      maxNClusters<-0
      
      for(sweep in firstLine:lastLine){
        if(sweep==firstLine){
          skipVal<-firstLine-1
        }else{
          skipVal<-0
        }
        
        # Get the current number of members for each cluster
        nClusters<-scan(nClustersFile,what=integer(),skip=skipVal,n=1,quiet=T)	
        currNMembers<-scan(nMembersFile,what=integer(),skip=skipVal,n=nClusters+1,quiet=T)
        currNMembers<-currNMembers[1:nClusters]
        # Find the number of non-empty clusters
        nNotEmpty<-sum(currNMembers>0)
        if(nNotEmpty>maxNClusters){
          maxNClusters<-nNotEmpty
        }
      }   
      # Add on another 5 just to make sure bound is safe (but don't let it exceed no. of subjects -1)
      maxNClusters<-min(maxNClusters+5,nSubjects-1)
      
      close(nMembersFile)
      close(nClustersFile)
    }   
    
    # If the input was a list of dissimilarity matrices then take the average
    if(is.list(disSimMat)){
      for(i in 1:length(disSimMat)){
        if(i==1){
          tmpMat<-disSimMat[[i]]      
        }else{
          tmpMat<-tmpMat+disSimMat[[i]]
        }
      }
      tmpMat<-tmpMat/length(disSimMat)
      disSimMat<-tmpMat
    }
    
    # Loop over the possible number of clusters
    avgSilhouetteWidth<--1.0;
    cat(paste("Max no of possible clusters:",maxNClusters,"\n"))
    for(c in 2:maxNClusters){
      cat(paste("Trying",c,"clusters\n"))
      tmpObj<-pam(disSimMat,k=c,diss=T)
      # Check whether the silhouette width from this clustering improves previous best
      if(avgSilhouetteWidth<tmpObj$silinfo$avg.width){
        avgSilhouetteWidth<-tmpObj$silinfo$avg.width
        chosenNClusters<-c
        clustVec<-tmpObj$clustering
        clustSizes<-tmpObj$clusinfo[,1]
        # The id of the objects chosen as the medoids
        clustMedoids<-tmpObj$id.med
      }
    }
    
    # Work out the clustering of the prediction objects
    clusteringPred<-NULL
    if(nPredictSubjects>0){
      disSimMatPred<-matrix(disSimMatPred,nrow=nPredictSubjects,byrow=T)
      clusteringPred<-rep(0,nPredictSubjects)
      for(i in 1:nPredictSubjects){
        tmpVec<-disSimMatPred[i,clustMedoids]
        whichMin <- which(tmpVec==min(tmpVec))
        if (length(whichMin)>1) {
          clusteringPred[i]<-sample(whichMin,1)
        } else {
          clusteringPred[i]<-whichMin
        }
      }
    }
  }
  
  return(list("clusObjRunInfoObj"=disSimObj$disSimRunInfoObj,
              "clusObjDisSimMat"=disSimObj$disSimMat,
              "nClusters"=chosenNClusters,
              "clusterSizes"=clustSizes,
              "clustering"=clustVec,
              "avgSilhouetteWidth"=avgSilhouetteWidth,
              "clusteringPred"=clusteringPred))
}

# Function to take the optimal clustering and computing the risk and probability
# profile
calcAvgRiskAndProfile<-function(clusObj,includeFixedEffects=F,proportionalHazards=F){
  
  clusObjRunInfoObj=NULL
  directoryPath=NULL
  fileStem=NULL
  xModel=NULL
  nCategories=NULL
  varSelect=NULL
  varSelectType=NULL
  includeResponse=NULL
  nFixedEffects=NULL
  reportBurnIn=NULL
  nBurn=NULL
  nFilter=NULL
  nSweeps=NULL
  nClusters=NULL
  clustering=NULL
  nCategoriesY=NULL
  nCovariates=NULL
  nDiscreteCovs=NULL
  nContinuousCovs=NULL
  nSubjects=NULL
  nPredictSubjects=NULL
  yModel=NULL
  wMat=NULL
  yMat=NULL
  weibullFixedShape=NULL
  useIndependentNormal=NULL
  
  for (i in 1:length(clusObj)) assign(names(clusObj)[i],clusObj[[i]])
  for (i in 1:length(clusObjRunInfoObj)) assign(names(clusObjRunInfoObj)[i],clusObjRunInfoObj[[i]])
  
  # Construct the number of clusters file name
  nClustersFileName<-file.path(directoryPath,paste(fileStem,'_nClusters.txt',sep=''))
  nClustersFile<-file(nClustersFileName,open="r")
  
  # Construct the allocation file name
  zFileName <- file.path(directoryPath,paste(fileStem,'_z.txt',sep=''))
  zFile<-file(zFileName,open="r")
  
  if(xModel=="Discrete"){
    # Construct the allocation file name
    phiFileName <- file.path(directoryPath,paste(fileStem,'_phi.txt',sep=''))
    phiFile<-file(phiFileName,open="r")
    # Get the maximum number of categories
    maxNCategories<-max(nCategories)
    if(varSelect){
      nullPhiFileName <- file.path(directoryPath,paste(fileStem,'_nullPhi.txt',sep=''))
      if(varSelectType=="Continuous"){
        gammaFileName <- file.path(directoryPath,paste(fileStem,'_rho.txt',sep=''))
        gammaFile<-file(gammaFileName,open="r")
      }else{
        gammaFileName <- file.path(directoryPath,paste(fileStem,'_gamma.txt',sep=''))
        gammaFile<-file(gammaFileName,open="r")
      }
    }
    
  }else if(xModel=="Normal"){
    muFileName <- file.path(directoryPath,paste(fileStem,'_mu.txt',sep=''))
    muFile<-file(muFileName,open="r")
    SigmaFileName <- file.path(directoryPath,paste(fileStem,'_Sigma.txt',sep=''))
    SigmaFile<-file(SigmaFileName,open="r")
    if(varSelect){
      nullMuFileName <- file.path(directoryPath,paste(fileStem,'_nullMu.txt',sep=''))
      if(varSelectType=="Continuous"){
        gammaFileName <- file.path(directoryPath,paste(fileStem,'_rho.txt',sep=''))
        gammaFile<-file(gammaFileName,open="r")
      }else{
        gammaFileName <- file.path(directoryPath,paste(fileStem,'_gamma.txt',sep=''))
        gammaFile<-file(gammaFileName,open="r")
      }
    }
  }else if(xModel=="Mixed"){
    # Construct the allocation file name
    phiFileName <- file.path(directoryPath,paste(fileStem,'_phi.txt',sep=''))
    phiFile<-file(phiFileName,open="r")
    # Get the maximum number of categories
    maxNCategories<-max(nCategories)
    muFileName <- file.path(directoryPath,paste(fileStem,'_mu.txt',sep=''))
    muFile<-file(muFileName,open="r")
    SigmaFileName <- file.path(directoryPath,paste(fileStem,'_Sigma.txt',sep=''))
    SigmaFile<-file(SigmaFileName,open="r")
    if(varSelect){
      nullPhiFileName <- file.path(directoryPath,paste(fileStem,'_nullPhi.txt',sep=''))
      nullMuFileName <- file.path(directoryPath,paste(fileStem,'_nullMu.txt',sep=''))
      if(varSelectType=="Continuous"){
        gammaFileName <- file.path(directoryPath,paste(fileStem,'_rho.txt',sep=''))
        gammaFile<-file(gammaFileName,open="r")
      }else{
        gammaFileName <- file.path(directoryPath,paste(fileStem,'_gamma.txt',sep=''))
        gammaFile<-file(gammaFileName,open="r")
      }
    }
  }
  
  if(includeResponse){
    # Construct the theta file name
    thetaFileName <- file.path(directoryPath,paste(fileStem,'_theta.txt',sep=''))
    thetaFile<-file(thetaFileName,open="r")
    if (yModel=="Survival"&&!weibullFixedShape){
      nuFileName <- file.path(directoryPath,paste(fileStem,'_nu.txt',sep=''))
      nuFile<-file(nuFileName,open="r")
    }
    if(nFixedEffects>0){
      # Construct the fixed effect coefficient file name
      betaFileName <-file.path(directoryPath,paste(fileStem,'_beta.txt',sep=''))
      betaFile<-file(betaFileName,open="r")
    }
    if (yModel=="Survival"&&weibullFixedShape){
      nuFileName<-file.path(directoryPath,paste(fileStem,'_nu.txt',sep=''))
      nuFile<-file(nuFileName,open="r")
      nu<-(read.table(nuFile)[,1])
      close(nuFile)
    }
  } 
  
  # Restrict to sweeps after burn in
  firstLine<-ifelse(reportBurnIn,nBurn/nFilter+2,1)
  lastLine<-(nSweeps+ifelse(reportBurnIn,nBurn+1,0))/nFilter
  nSamples<-lastLine-firstLine+1
  
  # Make a list of the subjects in each of the optimal clusters
  optAlloc<-vector("list",nClusters)
  for(c in 1:nClusters){
    optAlloc[[c]]<-which(clustering==c)
  }
  
  if(includeResponse){
    # Initialise the object for storing the risks
    riskArray<-array(0,dim=c(nSamples,nClusters,nCategoriesY))
    thetaArray<-array(0,dim=c(nSamples,nClusters,nCategoriesY))
    if (yModel=="Survival"&&!weibullFixedShape) nuArray<-array(0,dim=c(nSamples,nClusters))
    if(nFixedEffects>0){
      betaArray<-array(0,dim=c(nSamples,nFixedEffects,nCategoriesY))
    }
  }else{
    riskArray<-NULL
  }
  
  # Initialise the object for storing the profiles
  if(xModel=='Discrete'){
    phiArray<-array(dim=c(nSamples,nClusters,nCovariates,maxNCategories))
    if(varSelect){
      phiStarArray<-array(dim=c(nSamples,nClusters,nCovariates,maxNCategories))
      tmpCurrNullPhi<-scan(nullPhiFileName,what=double(),quiet=T)
      tmpCurrNullPhi<-array(tmpCurrNullPhi,dim=c(maxNCategories,nCovariates))
      currNullPhi<-array(dim=c(1,maxNCategories,nCovariates))
      currNullPhi[1,,]<-tmpCurrNullPhi
    }else{
      phiStarArray<-NULL
    }
  }else if(xModel=='Normal'){
    muArray<-array(dim=c(nSamples,nClusters,nCovariates))
    if(varSelect){
      muStarArray<-array(dim=c(nSamples,nClusters,nCovariates))
      currNullMu<-scan(nullMuFileName,what=double(),quiet=T)
      currNullMu<-array(currNullMu,dim=c(nCovariates,1))
      currNullMu<-t(currNullMu)
    }else{
      muStarArray<-NULL
    }
    if(useIndependentNormal){
      sigmaArray<-array(dim=c(nSamples,nClusters,nCovariates))
    }else{
      sigmaArray<-array(dim=c(nSamples,nClusters,nCovariates,nCovariates))
    }
  }else if(xModel=='Mixed'){
    phiArray<-array(dim=c(nSamples,nClusters,nDiscreteCovs,maxNCategories))
    if(varSelect){
      phiStarArray<-array(dim=c(nSamples,nClusters,nDiscreteCovs,maxNCategories))
      tmpCurrNullPhi<-scan(nullPhiFileName,what=double(),quiet=T)
      tmpCurrNullPhi<-array(tmpCurrNullPhi,dim=c(maxNCategories,nDiscreteCovs))
      currNullPhi<-array(dim=c(1,maxNCategories,nDiscreteCovs))
      currNullPhi[1,,]<-tmpCurrNullPhi
    }else{
      phiStarArray<-NULL
    }
    muArray<-array(dim=c(nSamples,nClusters,nContinuousCovs))
    if(varSelect){
      muStarArray<-array(dim=c(nSamples,nClusters,nContinuousCovs))
      currNullMu<-scan(nullMuFileName,what=double(),quiet=T)
      currNullMu<-array(currNullMu,dim=c(nContinuousCovs,1))
      currNullMu<-t(currNullMu)
    }else{
      muStarArray<-NULL
    }
    if(useIndependentNormal){
      sigmaArray<-array(dim=c(nSamples,nClusters,nContinuousCovs))
    }else{
      sigmaArray<-array(dim=c(nSamples,nClusters,nContinuousCovs,nContinuousCovs))
    }
  }
  
  
  for(sweep in firstLine:lastLine){
    if(sweep==firstLine){
      skipVal<-firstLine-1
    }else{
      skipVal<-0
    }
    
    if(sweep-firstLine==0||(sweep-firstLine+1)%%1000==0){
      cat(paste("Processing sweep",sweep-firstLine+1,"of ",lastLine-firstLine+1,"\n"))
    }
    currMaxNClusters<-scan(nClustersFile,what=integer(),skip=skipVal,n=1,quiet=T)
    
    # Get the current allocation data for this sweep
    currZ<-scan(zFile,what=integer(),skip=skipVal,n=nSubjects+nPredictSubjects,quiet=T)
    currZ<-1+currZ
    
    if(includeResponse){
      # Get the risk data corresponding to this sweep
      if (yModel=="Categorical") {
        currThetaVector<-scan(thetaFile,what=double(),skip=skipVal,n=currMaxNClusters*(nCategoriesY-1),quiet=T)
        currTheta<-matrix(currThetaVector,ncol=(nCategoriesY-1),byrow=T)
        currTheta<-cbind(rep(0,dim(currTheta)[1]),currTheta)
      } else {
        currThetaVector<-scan(thetaFile,what=double(),skip=skipVal,n=currMaxNClusters*nCategoriesY,quiet=T)
        currTheta<-matrix(currThetaVector,ncol=nCategoriesY,byrow=T)
        if (yModel=="Survival"&&!weibullFixedShape){
          currNuVector<-scan(nuFile,what=double(),skip=skipVal,n=currMaxNClusters,quiet=T)
          currNu<-c(currNuVector)
        }
      }
      if(nFixedEffects>0){
        if (yModel=="Categorical") {
          currBetaVector<-scan(betaFile,what=double(),skip=skipVal,n=nFixedEffects*(nCategoriesY-1),quiet=T)
          currBeta<-matrix(currBetaVector,ncol=(nCategoriesY-1),byrow=T)
          currBeta<-cbind(rep(0,dim(currBeta)[1]),currBeta)
        } else {
          currBetaVector<-scan(betaFile,what=double(),skip=skipVal,n=nFixedEffects*nCategoriesY,quiet=T)
          currBeta<-matrix(currBetaVector,ncol=nCategoriesY,byrow=T)
        }
        betaArray[sweep-firstLine+1,,]<-currBeta
      }
      # Calculate the average risk (over subjects) for each cluster
      
      for(c in 1:nClusters){
        currLambdaVector<-currTheta[currZ[optAlloc[[c]]],]
        currLambda<-matrix(currLambdaVector,ncol=nCategoriesY)
        if(includeFixedEffects&&nFixedEffects>0){
          if (yModel=="Categorical"){
            for (i in 1:length(optAlloc[[c]])){
              for (k in 1:nCategoriesY) currLambda[i,k]<-currLambda[i,k]+
                  t(as.matrix(wMat[optAlloc[[c]][i],]))%*%
                  as.matrix(currBeta[,yMat[optAlloc[[c]][i]]+1])
            }	
          } else {
            currLambda<-currLambda+as.matrix(wMat[optAlloc[[c]],])%*%currBeta
          }
        }
        if(yModel=="Poisson"){
          currRisk<-exp(currLambda )
        }else if(yModel=="Bernoulli"||yModel=="Binomial"){
          currRisk<-1.0/(1.0+exp(-currLambda))
        }else if(yModel=="Normal"||yModel=="Quantile"){
          currRisk<-currLambda
        }else if(yModel=="Categorical"){
          currRisk<-matrix(0,ncol=length(optAlloc[[c]]),nrow=nCategoriesY)
          currRisk<-exp(currLambda)/rowSums(exp(currLambda))
        }else if(yModel=="Survival"){
          if (!weibullFixedShape){
            currNuVector<-currNu[currZ[optAlloc[[c]]]]
          } else {
            currNuVector<-nu[sweep]
          }
          if (proportionalHazards){
            currRisk<-exp(currLambda)
          }else{
            currRisk<-1/((exp(currLambda))^(1/currNuVector))*gamma(1+1/currNuVector)
          }
        }
        riskArray[sweep-firstLine+1,c,]<-apply(currRisk,2,mean)
        thetaArray[sweep-firstLine+1,c,]<-apply(as.matrix(currTheta[currZ[optAlloc[[c]]],],ncol=nCategoriesY),2,mean)
        if(yModel=="Survival"&&!weibullFixedShape){
          nuArray[sweep-firstLine+1,c]<-mean(currNuVector)
        }
      }
    }
    
    # Calculate the average profile (over subjects) for each cluster
    if(xModel=='Discrete'){
      currPhi<-scan(phiFile,what=double(),
                    skip=skipVal,n=currMaxNClusters*maxNCategories*nCovariates,quiet=T)
      # This is slightly convoluted, because of the way that R reads in by column
      # I switched the order of categories and covariates in column below, and then
      # take the transpose to correct in the loop
      currPhi<-array(currPhi,dim=c(currMaxNClusters,maxNCategories,nCovariates))
      if(varSelect){
        # We increase dimensions of currGamma using duplicates, to
        # enable easier calculation of phiStar
        if(varSelectType=='BinaryCluster'){
          tmpCurrGamma<-scan(gammaFile,what=double(),
                             skip=skipVal,n=nCovariates*currMaxNClusters,quiet=T)
          tmpCurrGamma<-array(tmpCurrGamma,dim=c(currMaxNClusters,nCovariates))
        }else{
          tmpCurrGamma<-scan(gammaFile,what=double(),skip=skipVal,n=nCovariates,quiet=T)
          tmpCurrGamma<-array(tmpCurrGamma,dim=c(nCovariates,currMaxNClusters))
          tmpCurrGamma<-t(tmpCurrGamma)
        }
        currGamma<-array(dim=c(currMaxNClusters,maxNCategories,nCovariates))
        for(p in 1:maxNCategories){
          currGamma[,p,]<-tmpCurrGamma
        }
      }	
      for(c in 1:nClusters){
        phiArray[sweep-firstLine+1,c,,]<-t(apply(array(currPhi[currZ[optAlloc[[c]]],,],
                                                       dim=c(length(optAlloc[[c]]),
                                                             dim(currPhi)[2],dim(currPhi)[3])),2:3,mean))
        if(varSelect){
          phiStarArray[sweep-firstLine+1,c,,]<-t(apply(array(currGamma[currZ[optAlloc[[c]]],,],
                                                             dim=c(length(optAlloc[[c]]),dim(currGamma)[2],
                                                                   dim(currGamma)[3]))*array(currPhi[currZ[optAlloc[[c]]],,],
                                                                                             dim=c(length(optAlloc[[c]]),dim(currPhi)[2],dim(currPhi)[3]))+
                                                         (1-array(currGamma[currZ[optAlloc[[c]]],,],
                                                                  dim=c(length(optAlloc[[c]]),dim(currGamma)[2],dim(currGamma)[3])))*
                                                         array(currNullPhi[rep(1,length(optAlloc[[c]])),,],
                                                               dim=c(length(optAlloc[[c]]),dim(currNullPhi)[2],
                                                                     dim(currNullPhi)[3])),2:3,mean))
          
        }
      }
    }else if(xModel=='Normal'){
      # mu stored like phi
      currMu<-scan(muFile,what=double(),skip=skipVal,n=currMaxNClusters*nCovariates,quiet=T)
      currMu<-array(currMu,dim=c(currMaxNClusters,nCovariates))
      if(varSelect){
        # We increase dimensions of nullPhi and currGamma using duplicates, to
        # enable easier calculation of phiStar
        # We increase dimensions of currGamma using duplicates, to
        # enable easier calculation of phiStar
        if(varSelectType=='BinaryCluster'){
          tmpCurrGamma<-scan(gammaFile,what=double(),skip=skipVal,n=currMaxNClusters*nCovariates,quiet=T)
          currGamma<-array(tmpCurrGamma,dim=c(currMaxNClusters,nCovariates))
        }else{
          tmpCurrGamma<-scan(gammaFile,what=double(),skip=skipVal,n=nCovariates,quiet=T)
          tmpCurrGamma<-array(tmpCurrGamma,dim=c(nCovariates,currMaxNClusters))
          currGamma<-t(tmpCurrGamma)
        }
        
      }
      for(c in 1:nClusters){
        muArray[sweep-firstLine+1,c,]<-apply(matrix(currMu[currZ[optAlloc[[c]]],],ncol=nCovariates),2,mean)
        if(varSelect){
          muStarArray[sweep-firstLine+1,c,]<-apply(matrix(currGamma[currZ[optAlloc[[c]]],],
                                                          ncol=nCovariates)*matrix(currMu[currZ[optAlloc[[c]]],],ncol=nCovariates)+
                                                     matrix(1-currGamma[currZ[optAlloc[[c]]],],ncol=nCovariates)*
                                                     matrix(currNullMu[rep(1,length(optAlloc[[c]])),],ncol=nCovariates),2,mean)
        }
      }
      
      if(useIndependentNormal){
        currSigma<-scan(SigmaFile,what=double(),skip=skipVal,n=currMaxNClusters*nCovariates,quiet=T)
        currSigma<-array(currSigma,dim=c(currMaxNClusters,nCovariates))
      }else{
        currSigma<-scan(SigmaFile,what=double(),skip=skipVal,n=currMaxNClusters*nCovariates*nCovariates,quiet=T)
        currSigma<-array(currSigma,dim=c(currMaxNClusters,nCovariates,nCovariates))
      }
      for(c in 1:nClusters){
        if(useIndependentNormal){
          sigmaArray[sweep-firstLine+1,c,]<-apply(matrix(currSigma[currZ[optAlloc[[c]]],],ncol=nCovariates),2,mean)
        }else{
          sigmaArray[sweep-firstLine+1,c,,]<-apply(array(currSigma[currZ[optAlloc[[c]]],,],
                                                         dim=c(length(optAlloc[[c]]),dim(currSigma)[2],dim(currSigma)[3])),2:3,mean)
        }
      }
    }else if(xModel=='Mixed'){
      currPhi<-scan(phiFile,what=double(),
                    skip=skipVal,n=currMaxNClusters*maxNCategories*nDiscreteCovs,quiet=T)
      # This is slightly convoluted, because of the way that R reads in by column
      # I switched the order of categories and covariates in column below, and then
      # take the transpose to correct in the loop
      currPhi<-array(currPhi,dim=c(currMaxNClusters,maxNCategories,nDiscreteCovs))
      # mu stored like phi
      currMu<-scan(muFile,what=double(),skip=skipVal,n=currMaxNClusters*nContinuousCovs,quiet=T)
      currMu<-array(currMu,dim=c(currMaxNClusters,nContinuousCovs))
      if(varSelect){
        # We increase dimensions of currGamma using duplicates, to
        # enable easier calculation of phiStar
        if(varSelectType=='BinaryCluster'){
          tmpCurrGamma<-scan(gammaFile,what=double(),
                             skip=skipVal,n=nCovariates*currMaxNClusters,quiet=T)
          tmpCurrGamma<-array(tmpCurrGamma,dim=c(currMaxNClusters,nCovariates))
        }else{
          tmpCurrGamma<-scan(gammaFile,what=double(),skip=skipVal,n=nCovariates,quiet=T)
          tmpCurrGamma<-array(tmpCurrGamma,dim=c(nCovariates,currMaxNClusters))
          tmpCurrGamma<-t(tmpCurrGamma)
        }
        currGamma<-array(dim=c(currMaxNClusters,maxNCategories,nCovariates))
        for(p in 1:maxNCategories){
          currGamma[,p,]<-tmpCurrGamma
        }
      }	
      for(c in 1:nClusters){
        phiArray[sweep-firstLine+1,c,,]<-t(apply(array(currPhi[currZ[optAlloc[[c]]],,],
                                                       dim=c(length(optAlloc[[c]]),
                                                             dim(currPhi)[2],dim(currPhi)[3])),2:3,mean))
        if(varSelect){
          currGammaD<-array(currGamma[,,1:nDiscreteCovs],dim=c(currMaxNClusters,maxNCategories,nDiscreteCovs))
          phiStarArray[sweep-firstLine+1,c,,]<-t(apply(array(currGammaD[currZ[optAlloc[[c]]],,],
                                                             dim=c(length(optAlloc[[c]]),dim(currGammaD)[2],
                                                                   dim(currGammaD)[3]))*array(currPhi[currZ[optAlloc[[c]]],,],
                                                                                              dim=c(length(optAlloc[[c]]),dim(currPhi)[2],dim(currPhi)[3]))+
                                                         (1-array(currGammaD[currZ[optAlloc[[c]]],,],
                                                                  dim=c(length(optAlloc[[c]]),dim(currGammaD)[2],dim(currGammaD)[3])))*
                                                         array(currNullPhi[rep(1,length(optAlloc[[c]])),,],
                                                               dim=c(length(optAlloc[[c]]),dim(currNullPhi)[2],
                                                                     dim(currNullPhi)[3])),2:3,mean))
        }
        muArray[sweep-firstLine+1,c,]<-apply(matrix(currMu[currZ[optAlloc[[c]]],],ncol=nContinuousCovs),2,mean)
        if(varSelect){
          currGammaC<-array(currGamma[,1,(nDiscreteCovs+1):nCovariates],
                            dim=c(currMaxNClusters,nContinuousCovs))
          muStarArray[sweep-firstLine+1,c,]<-apply(matrix(currGammaC[currZ[optAlloc[[c]]],],
                                                          ncol=nContinuousCovs)*matrix(currMu[currZ[optAlloc[[c]]],],ncol=nContinuousCovs)+
                                                     matrix(1-currGammaC[currZ[optAlloc[[c]]],],ncol=nContinuousCovs)*
                                                     matrix(currNullMu[rep(1,length(optAlloc[[c]])),],ncol=nContinuousCovs),2,mean)
        }
      }
      
      if(useIndependentNormal){
        currSigma<-scan(SigmaFile,what=double(),skip=skipVal,n=currMaxNClusters*nContinuousCovs,quiet=T)
        currSigma<-array(currSigma,dim=c(currMaxNClusters,nContinuousCovs))
      
      }else{
        currSigma<-scan(SigmaFile,what=double(),skip=skipVal,n=currMaxNClusters*nContinuousCovs*nContinuousCovs,quiet=T)
        currSigma<-array(currSigma,dim=c(currMaxNClusters,nContinuousCovs,nContinuousCovs))
      }
      for(c in 1:nClusters){
        if(useIndependentNormal){
          sigmaArray[sweep-firstLine+1,c,]<-apply(matrix(currSigma[currZ[optAlloc[[c]]],],ncol=nContinuousCovs),2,mean)
        }else{
          sigmaArray[sweep-firstLine+1,c,,]<-apply(array(currSigma[currZ[optAlloc[[c]]],,],
                                                         dim=c(length(optAlloc[[c]]),dim(currSigma)[2],dim(currSigma)[3])),2:3,mean)
          
        }
      }
    }
  }
  
  # Calculate the empiricals
  empiricals<-rep(0,nClusters)
  if(!is.null(yModel)){
    for(c in 1:nClusters){
      if(yModel=='Bernoulli'||yModel=='Normal'||yModel=='Survival'||yModel=='Quantile'){
        empiricals[c]<-mean(yMat[optAlloc[[c]],1])
      }else if(yModel=='Binomial'){
        empiricals[c]<-mean(yMat[optAlloc[[c]],1]/yMat[optAlloc[[c]],2])
      }else if(yModel=='Poisson'){
        empiricals[c]<-mean(yMat[optAlloc[[c]],1]/yMat[optAlloc[[c]],2])
        #}else if(yModel=='Categorical'){
        # no empiricals for categorical outcome				
      }
    }
  }
  
  if(xModel=='Discrete'){
    out<-list('riskProfClusObj'=clusObj,'risk'=riskArray,'profile'=phiArray,'profileStar'=phiStarArray,'empiricals'=empiricals)
  }else if(xModel=='Normal'){
    out<-list('riskProfClusObj'=clusObj,'risk'=riskArray,
              'profile'=muArray,'profileStar'=muStarArray,
              'profileStdDev'=sigmaArray,'empiricals'=empiricals)
  }else if(xModel=='Mixed'){
    out<-list('riskProfClusObj'=clusObj,'risk'=riskArray,
              'profilePhi'=phiArray,'profileStarPhi'=phiStarArray,
              'profileMu'=muArray,'profileStarMu'=muStarArray,
              'profileStdDev'=sigmaArray,'empiricals'=empiricals)
  }
  
  close(zFile)
  close(nClustersFile)
  if(xModel=="Discrete"){
    close(phiFile)
    if(varSelect){
      close(gammaFile)
    }
  }else if(xModel=="Normal"){
    close(muFile)
    close(SigmaFile)
    if(varSelect){
      close(gammaFile)
    }
  }else if(xModel=="Mixed"){
    close(phiFile)
    close(muFile)
    close(SigmaFile)
    if(varSelect){
      close(gammaFile)
    }
  }
  
  
  if(includeResponse){
    close(thetaFile)
    if(nFixedEffects>0){
      close(betaFile)
    }
    if (yModel=="Survival"){
      if (!weibullFixedShape) {
        out$nuArray<-nuArray
        close(nuFile)
      } else {
        out$nuArray<-nu
      }
    }
  }
  
  return(out)
}



# Plot output values
plotRiskProfile<-function(riskProfObj,outFile,showRelativeRisk=F,orderBy=NULL,whichClusters=NULL,whichCovariates=NULL,useProfileStar=F,riskLim=NULL){
  
  riskProfClusObj=NULL
  clusObjRunInfoObj=NULL
  includeResponse=NULL
  yModel=NULL
  profileStar=NULL
  xModel=NULL
  whicCov=NULL
  nCategoriesY=NULL
  cluster=NULL
  prob=NULL
  meanProb=NULL
  fillColor=NULL
  lowerProb=NULL
  upperProb=NULL
  meanRisk=NULL
  lowerRisk=NULL
  upperRisk=NULL
  clusterSize=NULL
  mu=NULL
  meanMu=NULL
  lowerMu=NULL
  upperMu=NULL
  sigma=NULL
  meanSigma=NULL
  lowerSigma=NULL
  upperSigma=NULL
  weibullFixedShape=NULL
  nu=NULL
  meanNu=NULL
  lowerNu=NULL
  upperNu=NULL	
  useIndependentNormal=NULL
  
  for (i in 1:length(riskProfObj)) assign(names(riskProfObj)[i],riskProfObj[[i]])
  for (i in 1:length(riskProfClusObj)) assign(names(riskProfClusObj)[i],riskProfClusObj[[i]])
  for (i in 1:length(clusObjRunInfoObj)) assign(names(clusObjRunInfoObj)[i],clusObjRunInfoObj[[i]])
  
  if (nClusters==1) stop("Cannot produce plots because only one cluster has been found.")
  
  if(includeResponse){
    if(yModel=="Normal"||yModel=="Quantile"){
      showRelativeRisk<-F
    }
  }
  
  if(useProfileStar){
    profile<-profileStar
  }
  if(!is.null(whichCovariates)){
    if (!is.numeric(whichCovariates)){
      whichCovariatesTmp<-vector()
      for (k in 1:length(whichCovariates)){
        whichCovariatesTmp[k]<-which(riskProfClusObj$clusObjRunInfoObj$covNames==whichCovariates[k])
      }
      whichCovariates<-whichCovariatesTmp
    }
    if(xModel=='Discrete'){
      profile<-profile[,,whichCovariates,]
      nCategories<-nCategories[whichCovariates]
      covNames<-covNames[whichCovariates]
      nCovariates<-length(whichCovariates)
    }else if(xModel=='Normal'){
      profile<-profile[,,whichCovariates]
      if(useIndependentNormal){
        profileStdDev<-profileStdDev[,,whichCovariates]
      }else{
        profileStdDev<-profileStdDev[,,whichCovariates,whichCovariates]
      }
      covNames<-covNames[whichCovariates]
      nCovariates<-length(whichCovariates)
    }else if(xModel=='Mixed'){
      nDiscreteCovsAll <- nDiscreteCovs
      nContinuousCovsAll <- nContinuousCovs
      whichDiscreteCovs <- whichCovariates[which(whichCovariates<=nDiscreteCovs)]
      whichContinuousCovs <- whichCovariates[which(whichCovariates>nDiscreteCovs)]
      discreteCovs <- discreteCovs[whichDiscreteCovs]
      nDiscreteCovs <- length(discreteCovs)
      continuousCovs <- continuousCovs[whichContinuousCovs-nDiscreteCovsAll]
      nContinuousCovs <- length(continuousCovs)
      profilePhi<-profilePhi[,,whichDiscreteCovs,]
      nCategories<-nCategories[whichDiscreteCovs]
      profileMu<-profileMu[,,whichContinuousCovs-nDiscreteCovsAll]
      if(useIndependentNormal){
        profileStdDev<-profileStdDev[,,whichContinuousCovs-nDiscreteCovsAll]
      }else{
        profileStdDev<-profileStdDev[,,whichContinuousCovs-nDiscreteCovsAll,whichContinuousCovs-nDiscreteCovsAll]
      }
      covNames<-c(discreteCovs,continuousCovs)
      nCovariates<-length(covNames)
      
    }
  }
  
  if (nCovariates>15) stop("ERROR: this plotting function is suitable only for up to 15 covariates. If your dataset includes more than 15 variables, the option 'whichCovariates' allows plotting of subsets of covariates.")
  
  png(outFile,width=1200,height=800)
  orderProvided<-F
  
  if(!is.null(orderBy)){
    if(!includeResponse){
      if(orderBy!='Empirical'&&orderBy!='ClusterSize'&&!orderBy%in%covNames){
        if(is.numeric(orderBy)){
          if(length(orderBy)==nClusters){
            orderProvided<-T
            meanSortIndex<-orderBy
          }else{
            cat("Order vector provided not of same length as number of clusters. Reverting to default ordering.\n")	
            orderBy<-NULL
          }
          orderBy<-NULL
        }
        #orderBy<-NULL
      }
    }else{
      if(orderBy!='Risk'&&orderBy!='Empirical'&&orderBy!='ClusterSize'&&!orderBy%in%covNames){
        if(is.numeric(orderBy)){
          if(length(orderBy)==nClusters){
            orderProvided<-T
            meanSortIndex<-orderBy
          }else{
            cat("Order vector provided not of same length as number of clusters. Reverting to default ordering.\n")	
            orderBy<-NULL
          }
          orderBy<-NULL
        }
        #orderBy<-NULL
      }
    }
    
  }
  
  # Set up the layout for the plot
  plotLayout<-grid.layout(ncol = nCovariates+2, nrow = 6)
  grid.newpage()
  pushViewport(viewport(layout = plotLayout))
  
  if(!orderProvided){
    if(!is.null(risk)){
      if(is.null(orderBy)){
        # Default is to order by posterior theta risk
        # Compute the means
        orderStat<-apply(risk,2,median)
      }else{
        if(orderBy=='Risk'){
          orderStat<-apply(risk,2,median)
        }else if(orderBy=='Empirical'){
          orderStat<-empiricals
        }else if(orderBy=='ClusterSize'){
          orderStat<-clusterSizes
        }else{
          whichCov<-match(orderBy,covNames)
          if(xModel=='Normal'){
            orderStat<-apply(profile[,,whichCov],2,median)
          }else{
            # This assumes that there is some order to the categories
            # and then uses an expected value
            tmpMat<-profile[,,whichCov,1]
            if(nCategories[whichCov]>1){
              for(k in 2:nCategories[whichCov]){
                tmpMat<-tmpMat+k*profile[,,whichCov,k]
              }
            }
            orderStat<-apply(tmpMat,2,median)
          }
        }
      }
    }else{
      if(is.null(orderBy)){
        # Default is to order by empirical risk
        orderStat<-empiricals
      }else{
        if(orderBy=='Empirical'){
          orderStat<-empiricals
        }else if(orderBy=='ClusterSize'){
          orderStat<-clusterSizes
        }else{
          whichCov<-match(orderBy,covNames)
          if(xModel=='Normal'){
            orderStat<-apply(profile[,,whichCov],2,median)
          }else{
            # This assumes that there is some order to the categories
            # and then uses an expected value
            tmpMat<-profile[,,whichCov,1]
            if(nCategories[whichCov]>1){
              for(k in 2:(nCategories[whichCov])){
                tmpMat<-tmpMat+k*profile[,,whichCov,k]
              }
            }
            orderStat<-apply(tmpMat,2,median)
          }
        }
      }	
    }
    # Sort into ascending mean size
    meanSortIndex<-order(orderStat,decreasing=F)
  }
  if(includeResponse){
    # Reorder the risk matrix
    riskDim<-dim(risk)
    risk<-array(risk[,meanSortIndex,],dim=riskDim)
    if(showRelativeRisk){
      for(c in nClusters:1){
        risk[,c,]<-risk[,c,]/risk[,1,]
      }
    }
    # reorder the nu matrix 
    if (yModel=="Survival"&&!weibullFixedShape){
      nuDim<-dim(nuArray)
      nuArray<-array(nuArray[,meanSortIndex],dim=nuDim)
    }
  }
  
  # Reorder the cluster sizes
  clusterSizes<-clusterSizes[meanSortIndex]
  # Reorder the empiricals
  empiricals<-empiricals[meanSortIndex]
  meanEmpirical<-sum(empiricals*clusterSizes)/sum(clusterSizes)
  
  ##############################################################
  if(is.null(whichClusters)){
    whichClusters<-1:nClusters
  }
  nClusters<-length(whichClusters)
  ##############################################################
  
  if(includeResponse){
    # Recompute the means and now also credible intervals
    riskMeans<-apply(risk,2,mean,trim=0.005)
    riskMean<-sum(riskMeans*clusterSizes)/sum(clusterSizes)
    riskLower<-apply(risk,2,quantile,0.05)
    riskUpper<-apply(risk,2,quantile,0.95)
    # The next line is to avoid outliers spoiling plot scales
    plotMax<-max(riskUpper)
    
    # Get the plot colors
    riskColor<-ifelse(riskLower>rep(riskMean,nClusters),"high",
                      ifelse(riskUpper<rep(riskMean,nClusters),"low","avg"))
    if (yModel=="Categorical"){		
      # riskDF<-data.frame("risk"=c(),"category"=c(),"cluster"=c(),"meanRisk"=c(),
      # 	"lowerRisk"=c(),"upperRisk"=c(),"fillColor"=c())
      ##############################################################
      my.list <- vector('list', length(whichClusters)*nCategoriesY)
      ##############################################################
    } else {
      # riskDF<-data.frame("risk"=c(),"cluster"=c(),"meanRisk"=c(),
      # 	"lowerRisk"=c(),"upperRisk"=c(),"fillColor"=c())
      ##############################################################
      my.list <- vector('list', length(whichClusters))
      ##############################################################
    }
    if (yModel=="Survival"&&!weibullFixedShape){
      # Recompute the means and now also credible intervals
      nuMeans<-apply(nuArray,2,mean,trim=0.005)
      nuMean<-sum(nuMeans*clusterSizes)/sum(clusterSizes)
      nuLower<-apply(nuArray,2,quantile,0.05)
      nuUpper<-apply(nuArray,2,quantile,0.95)
      
      # nuDF<-data.frame("nu"=c(),"cluster"=c(),"meanNu"=c(),
      # 	"lowerNu"=c(),"upperNu"=c(),"fillColor"=c())
      ##############################################################
      # my.list.nu <- vector('list', length(whichClusters))
      ##############################################################
    }
    
  }else{
    riskColor<-ifelse(empiricals>rep(meanEmpirical,length(empiricals)),"high",
                      ifelse(empiricals<rep(meanEmpirical,nClusters),"low","avg"))
  }
  
  
  # if(is.null(whichClusters)){
  # 	whichClusters<-1:nClusters
  # }
  # nClusters<-length(whichClusters)
  
  # empiricalDF<-data.frame("empiricals"=c(),"meanEmpirical"=c(),"cluster"=c(),"fillColor"=c())
  # sizeDF<-data.frame("clusterSize"=c(),"cluster"=c(),"fillColor"=c())
  
  ##############################################################
  my.list.nu <- vector('list', length(whichClusters))
  
  my.list.empirical <- vector('list', length(whichClusters))
  my.list.size <- vector('list', length(whichClusters))
  
  
  z=1; z.nu=1; z.other=1		
  #################################################################
  
  # Restructure the data for plotting
  for(c in whichClusters){
    if(includeResponse){
      if (yModel=="Categorical"){
        plotRisk<-risk[,c,]
        nPoints<-dim(plotRisk)[1]
        for (k in 1:nCategoriesY){				
          # riskDF<-rbind(riskDF,data.frame("risk"=plotRisk[,k],
          # 	"category"=rep(k,nPoints),
          # 	"cluster"=rep(c,nPoints),
          # 	"meanRisk"=rep(riskMean,nPoints),
          # 	"lowerRisk"=rep(riskLower[c],nPoints),
          # 	"upperRisk"=rep(riskUpper[c],nPoints),
          # 	"fillColor"=rep(riskColor[c],nPoints)))
          
          #################################################################
          my.list[[z]] <- data.frame("risk"=plotRisk[,k],
                                     "category"=rep(k,nPoints),
                                     "cluster"=rep(c,nPoints),
                                     "meanRisk"=rep(riskMean,nPoints),
                                     "lowerRisk"=rep(riskLower[c],nPoints),
                                     "upperRisk"=rep(riskUpper[c],nPoints),
                                     "fillColor"=rep(riskColor[c],nPoints))
          z=z+1
          #################################################################	
          
        }
      } else {
        plotRisk<-risk[,c,]
        plotRisk<-plotRisk[plotRisk<plotMax]
        nPoints<-length(plotRisk)
        # riskDF<-rbind(riskDF,data.frame("risk"=plotRisk,"cluster"=rep(c,nPoints),
        # 	"meanRisk"=rep(riskMean,nPoints),
        # 	"lowerRisk"=rep(riskLower[c],nPoints),
        # 	"upperRisk"=rep(riskUpper[c],nPoints),
        # 	"fillColor"=rep(riskColor[c],nPoints)))
        
        #################################################################
        my.list[[z]] <- data.frame("risk"=plotRisk,"cluster"=rep(c,nPoints),
                                   "meanRisk"=rep(riskMean,nPoints),
                                   "lowerRisk"=rep(riskLower[c],nPoints),
                                   "upperRisk"=rep(riskUpper[c],nPoints),
                                   "fillColor"=rep(riskColor[c],nPoints))
        z=z+1
        #################################################################
      }
      if (yModel=="Survival"&&!weibullFixedShape){
        plotNu<-nuArray[,c]
        nPoints<-length(plotNu)
        # nuDF<-rbind(nuDF,data.frame("nu"=plotNu,"cluster"=rep(c,nPoints),
        # 	"meanNu"=rep(nuMean,nPoints),
        # 	"lowerNu"=rep(nuLower[c],nPoints),
        # 	"upperNu"=rep(nuUpper[c],nPoints),
        # 	"fillColor"=rep(riskColor[c],nPoints)))
        
        #################################################################
        my.list.nu[[z.nu]] <- data.frame("nu"=plotNu,"cluster"=rep(c,nPoints),
                                         "meanNu"=rep(nuMean,nPoints),
                                         "lowerNu"=rep(nuLower[c],nPoints),
                                         "upperNu"=rep(nuUpper[c],nPoints),
                                         "fillColor"=rep(riskColor[c],nPoints))
        z.nu=z.nu+1
        #################################################################	
        
      }
    }
    # empiricalDF<-rbind(empiricalDF,
    # 	data.frame("empiricals"=empiricals[c],
    # 	"meanEmpirical"=meanEmpirical,"cluster"=c,"fillColor"=riskColor[c]))
    # sizeDF<-rbind(sizeDF,
    # 	data.frame("clusterSize"=clusterSizes[c],"cluster"=c,"fillColor"=riskColor[c]))
    
    #################################################################
    my.list.empirical[[z.other]] <- data.frame("empiricals"=empiricals[c],
                                               "meanEmpirical"=meanEmpirical,"cluster"=c,"fillColor"=riskColor[c])
    my.list.size[[z.other]] <- data.frame("clusterSize"=clusterSizes[c],"cluster"=c,"fillColor"=riskColor[c])
    z.other=z.other+1
    #################################################################	
    
  }
  
  #################################################################
  if(includeResponse){
    
    riskDF <- rbindlist(my.list)
    
    nuDF <- rbindlist(my.list.nu)
    
    empiricalDF <- rbindlist(my.list.empirical)
    
  }
  
  sizeDF <- rbindlist(my.list.size)
  
  #################################################################
  
  if(includeResponse){
    if(yModel=='Categorical'){
      # riskDF<-data.frame("prob"=c(),"cluster"=c(),"category"=c(),"meanProb"=c(),
      # 	"lowerProb"=c(),"upperProb"=c(),"fillColor"=c())
      ##############################################################
      my.list <- vector('list', length(whichClusters)*nCategoriesY)
      z=1
      ##############################################################
      for(k in 1:nCategoriesY){
        
        probMat<-risk[,,k]
        nPoints<-nrow(probMat)
        probMeans<-apply(probMat,2,mean)
        probMean<-sum(probMeans*clusterSizes)/sum(clusterSizes)
        probLower<-apply(probMat,2,quantile,0.05)
        probUpper<-apply(probMat,2,quantile,0.95)
        
        # Get the plot colors
        probColor<-ifelse(probLower>rep(probMean,length(probLower)),"high",
                          ifelse(probUpper<rep(probMean,length(probUpper)),"low","avg"))
        
        for(c in whichClusters){
          # riskDF<-rbind(riskDF,data.frame("prob"=probMat[,c],"cluster"=rep(c,nPoints),
          # 	"category"=rep(k-1,nPoints),
          # 	"meanProb"=rep(probMean,nPoints),
          # 	"lowerProb"=rep(probLower[c],nPoints),
          # 	"upperProb"=rep(probUpper[c],nPoints),
          # 	"fillColor"=rep(probColor[c],nPoints)))
          # 	 rownames(riskDF)<-seq(1,nrow(riskDF),1)
          
          #################################################################
          my.list[[z]] <- data.frame("prob"=probMat[,c],"cluster"=rep(c,nPoints),
                                     "category"=rep(k-1,nPoints),
                                     "meanProb"=rep(probMean,nPoints),
                                     "lowerProb"=rep(probLower[c],nPoints),
                                     "upperProb"=rep(probUpper[c],nPoints),
                                     "fillColor"=rep(probColor[c],nPoints))
          z=z+1
          #################################################################
          
        }
      }
      
      #################################################################
      riskDF <- rbindlist(my.list)
      rownames(riskDF)<-seq(1,nrow(riskDF),1)
      #################################################################
      
      plotObj<-ggplot(riskDF)
      plotObj<-plotObj+facet_wrap(~category,ncol=1,as.table=F,scales="free_y")
      plotObj<-plotObj+geom_hline(aes(yintercept=meanProb))
      plotObj<-plotObj+geom_boxplot(aes(x=as.factor(cluster),y=prob,fill=as.factor(fillColor)),outlier.size=0.5)
      plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=lowerProb,colour=as.factor(fillColor)),size=1.5)
      plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=upperProb,colour=as.factor(fillColor)),size=1.5)
      plotObj<-plotObj+
        scale_fill_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
        scale_colour_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
        theme(legend.position="none")+labs(x="Cluster")+theme(axis.title.x=element_text(size=10))
      plotObj<-plotObj+labs(y="Probability")+theme(axis.title.y=element_text(size=10,angle=90))
      plotObj<-plotObj+labs(title=ifelse(showRelativeRisk,'Relative Risk','Risk'),plot.title=element_text(size=10))
      # Margin order is (top,right,bottom,left)
      plotObj<-plotObj+theme(plot.margin=unit(c(0.5,0.15,0.5,0.15),'lines'))+
        theme(plot.margin=unit(c(0,0,0,0),'lines'))
      print(plotObj,vp=viewport(layout.pos.row=1:6,layout.pos.col=2))
      #		}else if (yModel=="Survival"&&!weibullFixedShape){
      #			rownames(riskDF)<-seq(1,nrow(riskDF),by=1)
      #
      #			# Create the risk plot
      #			plotObj<-ggplot(riskDF)
      #			plotObj<-plotObj+geom_hline(aes(x=as.factor(cluster),y=risk,yintercept=meanRisk))
      #			plotObj<-plotObj+geom_boxplot(aes(x=as.factor(cluster),y=risk,fill=as.factor(fillColor)),outlier.size=0.5)
      #			plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=lowerRisk,colour=as.factor(fillColor)),size=1.5)
      #			plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=upperRisk,colour=as.factor(fillColor)),size=1.5)
      #			plotObj<-plotObj+scale_fill_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
      #				scale_colour_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
      #				theme(legend.position="none")+
      #				labs(x="Cluster",y=ifelse(showRelativeRisk,'RR',
      #				ifelse(yModel=="Categorical"||yModel=="Bernoulli"||yModel=="Binomial","Probability","E[Y]")))
      #			plotObj<-plotObj+theme(axis.title.y=element_text(size=10,angle=90),axis.title.x=element_text(size=10))
      #			plotObj<-plotObj+labs(title=ifelse(showRelativeRisk,'Relative Risk','Risk'),plot.title=element_text(size=10))
      #			# Margin order is (top,right,bottom,left)
      #			plotObj<-plotObj+theme(plot.margin=unit(c(0,0,0,0),'lines'))+theme(plot.margin=unit(c(0.5,0.15,0.5,0.15),'lines'))
      #			print(plotObj,vp=viewport(layout.pos.row=1:3,layout.pos.col=2))
      #
      #			rownames(nuDF)<-seq(1,nrow(nuDF),by=1)
      #			# Create the nu plot
      #			plotObj<-ggplot(nuDF)
      #			plotObj<-plotObj+geom_hline(aes(x=as.factor(cluster),y=nu,yintercept=meanNu))
      #			plotObj<-plotObj+geom_boxplot(aes(x=as.factor(cluster),y=nu,fill=as.factor(fillColor)),outlier.size=0.5)
      #			plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=lowerNu,colour=as.factor(fillColor)),size=1.5)
      #			plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=upperNu,colour=as.factor(fillColor)),size=1.5)
      #			plotObj<-plotObj+scale_fill_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
      #				scale_colour_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
      #				theme(legend.position="none")+
      #				labs(x="Cluster",y="Shape Parameter")
      #			plotObj<-plotObj+theme(axis.title.y=element_text(size=10,angle=90),axis.title.x=element_text(size=10))
      #			plotObj<-plotObj+labs(title="",plot.title=element_text(size=10))
      #			# Margin order is (top,right,bottom,left)
      #			plotObj<-plotObj+theme(plot.margin=unit(c(0,0,0,0),'lines'))+theme(plot.margin=unit(c(0.5,0.15,0.5,0.15),'lines'))
      #			print(plotObj,vp=viewport(layout.pos.row=4:6,layout.pos.col=2))
    } else {
      rownames(riskDF)<-seq(1,nrow(riskDF),by=1)
      
      # Create the risk plot
      plotObj<-ggplot(riskDF)
      plotObj<-plotObj+geom_hline(aes(yintercept=meanRisk))
      plotObj<-plotObj+geom_boxplot(aes(x=as.factor(cluster),y=risk,fill=as.factor(fillColor)),outlier.size=0.5)
      if (!is.null(riskLim)) plotObj<-plotObj+coord_cartesian(ylim = riskLim)
      plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=lowerRisk,colour=as.factor(fillColor)),size=1.5)
      plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=upperRisk,colour=as.factor(fillColor)),size=1.5)
      plotObj<-plotObj+scale_fill_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
        scale_colour_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
        theme(legend.position="none")+
        labs(x="Cluster",y=ifelse(showRelativeRisk,'RR',
                                  ifelse(yModel=="Categorical"||yModel=="Bernoulli"||yModel=="Binomial","Probability","E[Y]")))
      plotObj<-plotObj+theme(axis.title.y=element_text(size=10,angle=90),axis.title.x=element_text(size=10))
      plotObj<-plotObj+labs(title=ifelse(showRelativeRisk,'Relative Risk','Risk'),plot.title=element_text(size=10))
      # Margin order is (top,right,bottom,left)
      plotObj<-plotObj+theme(plot.margin=unit(c(0,0,0,0),'lines'))+theme(plot.margin=unit(c(0.5,0.15,0.5,0.15),'lines'))
      print(plotObj,vp=viewport(layout.pos.row=1:6,layout.pos.col=2))
    }	
  }
  
  # Create a bar chart of cluster empiricals
  if((!is.null(yModel))){
    if(yModel!="Categorical"){
      plotObj<-ggplot(empiricalDF)
      plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=empiricals,colour=as.factor(fillColor)),size=3)
      plotObj<-plotObj+geom_hline(aes(yintercept=meanEmpirical))
      plotObj<-plotObj+scale_colour_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
        theme(legend.position="none")
      plotObj<-plotObj+labs(title='Empirical Data',plot.title=element_text(size=10))
      plotObj<-plotObj+theme(axis.title.x=element_text(size=10),axis.title.y=element_text(size=10,angle=90))
      plotObj<-plotObj+
        labs(y=ifelse(yModel=="Bernoulli","Proportion of cases",
                      ifelse(yModel=="Binomial","Avg Proportion of occurrence",
                             ifelse(yModel=="Poisson","Avg Count",
                                    ifelse(yModel=="Survival","Avg Survival Time",
                                           ifelse(yModel=="Categorical","Avg Proportion of occurrence","Avg Y"))))),x="Cluster")
      plotObj<-plotObj+theme(plot.margin=unit(c(0,0,0,0),'lines'))+theme(plot.margin=unit(c(0.15,0.5,0.5,1),'lines'))
      print(plotObj,vp=viewport(layout.pos.row=1:3,layout.pos.col=1))
    }
  }
  # Create a bar chart of cluster sizes
  plotObj<-ggplot(sizeDF)
  plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=clusterSize,colour=as.factor(fillColor)),size=3)
  plotObj<-plotObj+scale_colour_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+theme(legend.position="none")
  plotObj<-plotObj+labs(title="Size",plot.title=element_text(size=10))
  plotObj<-plotObj+theme(axis.title.x=element_text(size=10),axis.title.y=element_text(size=10,angle=90))
  plotObj<-plotObj+labs(y="No. of Subjects",x="Cluster")
  plotObj<-plotObj+theme(plot.margin=unit(c(0,0,0,0),'lines'))+theme(plot.margin=unit(c(0.15,0.5,0.5,1),'lines'))
  print(plotObj,vp=viewport(layout.pos.row=4:6,layout.pos.col=1))
  
  # Loop over the covariates
  for(j in 1:nCovariates){
    if(xModel=='Discrete'){
      # profileDF<-data.frame("prob"=c(),"cluster"=c(),"category"=c(),"meanProb"=c(),
      # 	"lowerProb"=c(),"upperProb"=c(),"fillColor"=c())
      #################################################################
      my.list <- vector('list', length(whichClusters)*nCategories[j])
      z=1		
      #################################################################
      for(k in 1:nCategories[j]){
        probMat<-profile[,meanSortIndex,j,k]
        nPoints<-nrow(probMat)
        probMeans<-apply(probMat,2,mean)
        probMean<-sum(probMeans*clusterSizes)/sum(clusterSizes)
        probLower<-apply(probMat,2,quantile,0.05)
        probUpper<-apply(probMat,2,quantile,0.95)
        
        # Get the plot colors
        probColor<-ifelse(probLower>rep(probMean,length(probLower)),"high",
                          ifelse(probUpper<rep(probMean,length(probUpper)),"low","avg"))
        
        
        for(c in whichClusters){
          # profileDF<-rbind(profileDF,data.frame("prob"=probMat[,c],"cluster"=rep(c,nPoints),
          # 	"category"=rep(k-1,nPoints),
          # 	"meanProb"=rep(probMean,nPoints),
          # 	"lowerProb"=rep(probLower[c],nPoints),
          # 	"upperProb"=rep(probUpper[c],nPoints),
          # 	"fillColor"=rep(probColor[c],nPoints)))
          # 	 rownames(profileDF)<-seq(1,nrow(profileDF),1)
          #################################################################
          my.list[[z]] <- data.frame("prob"=probMat[,c],"cluster"=rep(c,nPoints),
                                     "category"=rep(k-1,nPoints),
                                     "meanProb"=rep(probMean,nPoints),
                                     "lowerProb"=rep(probLower[c],nPoints),
                                     "upperProb"=rep(probUpper[c],nPoints),
                                     "fillColor"=rep(probColor[c],nPoints))
          z=z+1
          #################################################################
          
        }
      }
      #################################################################
      profileDF <- rbindlist(my.list)
      rownames(profileDF)<-seq(1,nrow(profileDF),1)
      
      #################################################################
      
      plotObj<-ggplot(profileDF)
      plotObj<-plotObj+facet_wrap(~category,ncol=1,as.table=F,scales="free_y")
      plotObj<-plotObj+geom_hline(aes(yintercept=meanProb))
      plotObj<-plotObj+geom_boxplot(aes(x=as.factor(cluster),y=prob,fill=as.factor(fillColor)),outlier.size=0.5)
      plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=lowerProb,colour=as.factor(fillColor)),size=1.5)
      plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=upperProb,colour=as.factor(fillColor)),size=1.5)
      plotObj<-plotObj+
        scale_fill_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
        scale_colour_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
        theme(legend.position="none")+labs(x="Cluster")+theme(axis.title.x=element_text(size=10))
      if(j==1){
        plotObj<-plotObj+labs(y="Probability")+theme(axis.title.y=element_text(size=10,angle=90))
      }else{
        plotObj<-plotObj+theme(axis.title.y=element_blank())
      }
      plotObj<-plotObj+labs(title=covNames[j],plot.title=element_text(size=10))
      plotObj<-plotObj+theme(plot.margin=unit(c(0.5,ifelse(j==nCovariates,1,0),0.5,ifelse(j==1,0.5,0)),'lines'))+
        theme(plot.margin=unit(c(0,0,0,0),'lines'))
      
      print(plotObj,vp=viewport(layout.pos.row=1:6,layout.pos.col=j+2))
    }else if(xModel=='Normal'){
      # Plot the means
      # profileDF<-data.frame("mu"=c(),"cluster"=c(),"muMean"=c(),
      # 	"lowerMu"=c(),"upperMu"=c(),"fillColor"=c())
      #################################################################
      my.list <- vector('list', length(whichClusters))
      z=1		
      #################################################################
      muMat<-profile[,meanSortIndex,j]
      muMeans<-apply(muMat,2,mean)
      muMean<-sum(muMeans*clusterSizes)/sum(clusterSizes)
      muLower<-apply(muMat,2,quantile,0.05)
      muUpper<-apply(muMat,2,quantile,0.95)
      # The next line is to avoid outliers spoiling plot scales
      plotMax<-max(muUpper)
      plotMin<-min(muLower)
      
      # Get the plot colors
      muColor<-ifelse(muLower>rep(muMean,length(muLower)),"high",
                      ifelse(muUpper<rep(muMean,length(muUpper)),"low","avg"))
      for(c in whichClusters){
        plotMu<-muMat[,c]
        plotMu<-plotMu[plotMu<=plotMax&plotMu>=plotMin]
        nPoints<-length(plotMu)
        # profileDF<-rbind(profileDF,data.frame("mu"=plotMu,"cluster"=rep(c,nPoints),
        # 	"meanMu"=rep(muMean,nPoints),
        # 	"lowerMu"=rep(muLower[c],nPoints),
        # 	"upperMu"=rep(muUpper[c],nPoints),
        # 	"fillColor"=rep(muColor[c],nPoints)))
        #################################################################
        my.list[[z]] <- data.frame("mu"=plotMu,"cluster"=rep(c,nPoints),
                                   "meanMu"=rep(muMean,nPoints),
                                   "lowerMu"=rep(muLower[c],nPoints),
                                   "upperMu"=rep(muUpper[c],nPoints),
                                   "fillColor"=rep(muColor[c],nPoints))
        z=z+1
        #################################################################	
      }
      #################################################################
      profileDF <- rbindlist(my.list)
      rownames(profileDF)<-seq(1,nrow(profileDF),1)
      #################################################################
      
      plotObj<-ggplot(profileDF)
      plotObj<-plotObj+geom_hline(aes(yintercept=meanMu))
      plotObj<-plotObj+geom_boxplot(aes(x=as.factor(cluster),y=mu,fill=as.factor(fillColor)),outlier.size=0.5)
      plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=lowerMu,colour=as.factor(fillColor)),size=1.5)
      plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=upperMu,colour=as.factor(fillColor)),size=1.5)
      plotObj<-plotObj+
        scale_fill_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
        scale_colour_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
        theme(legend.position="none")+labs(x="Cluster")+theme(axis.title.x=element_text(size=10))
      if(j==1){
        plotObj<-plotObj+labs(y="Mean")+theme(axis.title.y=element_text(size=10,angle=90))
      }else{
        plotObj<-plotObj+theme(axis.title.y=element_blank())
      }
      plotObj<-plotObj+labs(title=covNames[j],plot.title=element_text(size=10))
      plotObj<-plotObj+
        theme(plot.margin=unit(c(0.5,ifelse(j==nCovariates,1,0),0.5,ifelse(j==1,0.5,0)),'lines'))+
        theme(plot.margin=unit(c(0,0,0,0),'lines'))
      
      print(plotObj,vp=viewport(layout.pos.row=1:3,layout.pos.col=j+2))
      
      # Plot the variances
      # profileDF<-data.frame("sigma"=c(),"cluster"=c(),"sigmaMean"=c(),
      # 	"lowerSigma"=c(),"upperSigma"=c(),"fillColor"=c())
      #################################################################
      my.list <- vector('list', length(whichClusters))
      z=1		
      #################################################################
      if(useIndependentNormal){
        sigmaMat<-profileStdDev[,meanSortIndex,j]
      }else{
        sigmaMat<-profileStdDev[,meanSortIndex,j,j]
      }
      sigmaMeans<-apply(sigmaMat,2,mean)
      sigmaMean<-sum(sigmaMeans*clusterSizes)/sum(clusterSizes)
      sigmaLower<-apply(sigmaMat,2,quantile,0.05)
      sigmaUpper<-apply(sigmaMat,2,quantile,0.95)
      # The next line is to avoid outliers spoiling plot scales
      plotMax<-max(sigmaUpper)
      
      # Get the plot colors
      sigmaColor<-ifelse(sigmaLower>rep(sigmaMean,length(sigmaLower)),"high",
                         ifelse(sigmaUpper<rep(sigmaMean,length(sigmaUpper)),"low","avg"))
      for(c in whichClusters){
        plotSigma<-sigmaMat[,c]
        plotSigma<-plotSigma[plotSigma<plotMax]
        nPoints<-length(plotSigma)
        # profileDF<-rbind(profileDF,data.frame("sigma"=plotSigma,"cluster"=rep(c,nPoints),
        # 	"meanSigma"=rep(sigmaMean,nPoints),
        # 	"lowerSigma"=rep(sigmaLower[c],nPoints),
        # 	"upperSigma"=rep(sigmaUpper[c],nPoints),
        # 	"fillColor"=rep(sigmaColor[c],nPoints)))
        #################################################################
        my.list[[z]] <- data.frame("sigma"=plotSigma,"cluster"=rep(c,nPoints),
                                   "meanSigma"=rep(sigmaMean,nPoints),
                                   "lowerSigma"=rep(sigmaLower[c],nPoints),
                                   "upperSigma"=rep(sigmaUpper[c],nPoints),
                                   "fillColor"=rep(sigmaColor[c],nPoints))
        z=z+1
        #################################################################	
        
      }
      #################################################################
      profileDF <- rbindlist(my.list)
      rownames(profileDF)<-seq(1,nrow(profileDF),1)
      #################################################################
      
      plotObj<-ggplot(profileDF)
      plotObj<-plotObj+geom_hline(aes(yintercept=meanSigma))
      plotObj<-plotObj+geom_boxplot(aes(x=as.factor(cluster),y=sigma,fill=as.factor(fillColor)),outlier.size=0.5)
      plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=lowerSigma,colour=as.factor(fillColor)),size=1.5)
      plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=upperSigma,colour=as.factor(fillColor)),size=1.5)
      plotObj<-plotObj+
        scale_fill_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
        scale_colour_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
        theme(legend.position="none")+labs(x="Cluster")+theme(axis.title.x=element_text(size=10))
      if(j==1){
        plotObj<-plotObj+labs(y="Std Dev")+theme(axis.title.y=element_text(size=10,angle=90))
      }else{
        plotObj<-plotObj+theme(axis.title.y=element_blank())
      }
      plotObj<-plotObj+
        theme(plot.margin=unit(c(0.5,ifelse(j==nCovariates,1,0),0.5,ifelse(j==1,0.5,0)),'lines'))+
        theme(plot.margin=unit(c(0,0,0,0),'lines'))
      
      print(plotObj,vp=viewport(layout.pos.row=4:6,layout.pos.col=j+2))
    }else if(xModel=='Mixed'){
      if (j<=nDiscreteCovs){
        # profileDF<-data.frame("prob"=c(),"cluster"=c(),"category"=c(),"meanProb"=c(),
        # 	"lowerProb"=c(),"upperProb"=c(),"fillColor"=c())
        
        #################################################################
        my.list <- vector('list', length(whichClusters)*nCategories[j])
        z=1		
        #################################################################
        
        for(k in 1:nCategories[j]){
          if (nDiscreteCovs==1) {
            probMat<-profilePhi[,meanSortIndex,1,k]
          } else {
            probMat<-profilePhi[,meanSortIndex,j,k]
          }
          nPoints<-nrow(probMat)
          probMeans<-apply(probMat,2,mean)
          probMean<-sum(probMeans*clusterSizes)/sum(clusterSizes)
          probLower<-apply(probMat,2,quantile,0.05)
          probUpper<-apply(probMat,2,quantile,0.95)
          
          # Get the plot colors
          probColor<-ifelse(probLower>rep(probMean,length(probLower)),"high",
                            ifelse(probUpper<rep(probMean,length(probUpper)),"low","avg"))
          
          
          
          for(c in whichClusters){
            # profileDF<-rbind(profileDF,data.frame("prob"=probMat[,c],"cluster"=rep(c,nPoints),
            # 	"category"=rep(k-1,nPoints),
            # 	"meanProb"=rep(probMean,nPoints),
            # 	"lowerProb"=rep(probLower[c],nPoints),
            # 	"upperProb"=rep(probUpper[c],nPoints),
            # 	"fillColor"=rep(probColor[c],nPoints)))
            # 	 rownames(profileDF)<-seq(1,nrow(profileDF),1)
            
            #################################################################
            my.list[[z]] <- data.frame("prob"=probMat[,c],"cluster"=rep(c,nPoints),
                                       "category"=rep(k-1,nPoints),
                                       "meanProb"=rep(probMean,nPoints),
                                       "lowerProb"=rep(probLower[c],nPoints),
                                       "upperProb"=rep(probUpper[c],nPoints),
                                       "fillColor"=rep(probColor[c],nPoints))
            z=z+1
            #################################################################
          }
        }
        
        #################################################################
        profileDF <- rbindlist(my.list)
        rownames(profileDF)<-seq(1,nrow(profileDF),1)
        
        #print(str(profileDF))
        #print(head(profileDF))
        #################################################################
        
        
        plotObj<-ggplot(profileDF)
        plotObj<-plotObj+facet_wrap(~category,ncol=1,as.table=F,scales="free_y")
        plotObj<-plotObj+geom_hline(aes(yintercept=meanProb))
        plotObj<-plotObj+geom_boxplot(aes(x=as.factor(cluster),y=prob,fill=as.factor(fillColor)),outlier.size=0.5)
        plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=lowerProb,colour=as.factor(fillColor)),size=1.5)
        plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=upperProb,colour=as.factor(fillColor)),size=1.5)
        plotObj<-plotObj+
          scale_fill_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
          scale_colour_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
          theme(legend.position="none")+labs(x="Cluster")+theme(axis.title.x=element_text(size=10))
        if(j==1){
          plotObj<-plotObj+labs(y="Probability")+theme(axis.title.y=element_text(size=10,angle=90))
        }else{
          plotObj<-plotObj+theme(axis.title.y=element_blank())
        }
        plotObj<-plotObj+labs(title=covNames[j],plot.title=element_text(size=10))
        plotObj<-plotObj+theme(plot.margin=unit(c(0.5,ifelse(j==nCovariates,1,0),0.5,ifelse(j==1,0.5,0)),'lines'))+
          theme(plot.margin=unit(c(0,0,0,0),'lines'))
        
        print(plotObj,vp=viewport(layout.pos.row=1:6,layout.pos.col=j+2))
      } else {			
        # Plot the means
        # profileDF<-data.frame("mu"=c(),"cluster"=c(),"muMean"=c(),
        # 	"lowerMu"=c(),"upperMu"=c(),"fillColor"=c())
        #################################################################
        my.list <- vector('list', length(whichClusters))
        z=1		
        #################################################################
        if (nContinuousCovs==1){
          muMat<-profileMu[,meanSortIndex,1]
        } else {
          muMat<-profileMu[,meanSortIndex,(j-nDiscreteCovs)]
        }
        muMeans<-apply(muMat,2,mean)
        muMean<-sum(muMeans*clusterSizes)/sum(clusterSizes)
        muLower<-apply(muMat,2,quantile,0.05)
        muUpper<-apply(muMat,2,quantile,0.95)
        # The next line is to avoid outliers spoiling plot scales
        plotMax<-max(muUpper)
        plotMin<-min(muLower)
        
        
        # Get the plot colors
        muColor<-ifelse(muLower>rep(muMean,length(muLower)),"high",
                        ifelse(muUpper<rep(muMean,length(muUpper)),"low","avg"))
        for(c in whichClusters){
          plotMu<-muMat[,c]
          plotMu<-plotMu[plotMu<=plotMax&plotMu>=plotMin]
          nPoints<-length(plotMu)
          # profileDF<-rbind(profileDF,data.frame("mu"=plotMu,"cluster"=rep(c,nPoints),
          # 	"meanMu"=rep(muMean,nPoints),
          # 	"lowerMu"=rep(muLower[c],nPoints),
          # 	"upperMu"=rep(muUpper[c],nPoints),
          # 	"fillColor"=rep(muColor[c],nPoints)))
          #################################################################
          my.list[[z]] <- data.frame("mu"=plotMu,"cluster"=rep(c,nPoints),
                                     "meanMu"=rep(muMean,nPoints),
                                     "lowerMu"=rep(muLower[c],nPoints),
                                     "upperMu"=rep(muUpper[c],nPoints),
                                     "fillColor"=rep(muColor[c],nPoints))
          z=z+1
          #################################################################	
          
        }
        #rownames(profileDF)<-seq(1,nrow(profileDF),1)
        #################################################################
        profileDF <- rbindlist(my.list)
        
        rownames(profileDF)<-seq(1,nrow(profileDF),1)
        
        #print(str(profileDF))
        #print(head(profileDF))
        #################################################################
        plotObj<-ggplot(profileDF)
        plotObj<-plotObj+geom_hline(aes(yintercept=meanMu))
        plotObj<-plotObj+geom_boxplot(aes(x=as.factor(cluster),y=mu,fill=as.factor(fillColor)),outlier.size=0.5)
        plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=lowerMu,colour=as.factor(fillColor)),size=1.5)
        plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=upperMu,colour=as.factor(fillColor)),size=1.5)
        plotObj<-plotObj+
          scale_fill_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
          scale_colour_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
          theme(legend.position="none")+labs(x="Cluster")+theme(axis.title.x=element_text(size=10))
        if(j==1){
          plotObj<-plotObj+labs(y="Mean")+theme(axis.title.y=element_text(size=10,angle=90))
        }else{
          plotObj<-plotObj+theme(axis.title.y=element_blank())
        }
        plotObj<-plotObj+labs(title=covNames[j],plot.title=element_text(size=10))
        plotObj<-plotObj+
          theme(plot.margin=unit(c(0.5,ifelse(j==nCovariates,1,0),0.5,ifelse(j==1,0.5,0)),'lines'))+
          theme(plot.margin=unit(c(0,0,0,0),'lines'))
        
        print(plotObj,vp=viewport(layout.pos.row=1:3,layout.pos.col=j+2))
        
        # Plot the variances
        # profileDF<-data.frame("sigma"=c(),"cluster"=c(),"sigmaMean"=c(),
        # 	"lowerSigma"=c(),"upperSigma"=c(),"fillColor"=c())
        #################################################################
        my.list <- vector('list', length(whichClusters))
        z=1		
        #################################################################
        if (nContinuousCovs==1){
          if(useIndependentNormal){
            sigmaMat<-profileStdDev[,meanSortIndex,1]
          }else{
            sigmaMat<-profileStdDev[,meanSortIndex,1,1]
          }
        } else {
          if(useIndependentNormal){
            sigmaMat<-profileStdDev[,meanSortIndex,(j-nDiscreteCovs)]
          }else{
            sigmaMat<-profileStdDev[,meanSortIndex,(j-nDiscreteCovs),(j-nDiscreteCovs)]
          }
        }
        sigmaMeans<-apply(sigmaMat,2,mean)
        sigmaMean<-sum(sigmaMeans*clusterSizes)/sum(clusterSizes)
        sigmaLower<-apply(sigmaMat,2,quantile,0.05)
        sigmaUpper<-apply(sigmaMat,2,quantile,0.95)
        # The next line is to avoid outliers spoiling plot scales
        plotMax<-max(sigmaUpper)
        
        # Get the plot colors
        sigmaColor<-ifelse(sigmaLower>rep(sigmaMean,length(sigmaLower)),"high",
                           ifelse(sigmaUpper<rep(sigmaMean,length(sigmaUpper)),"low","avg"))
        for(c in whichClusters){
          plotSigma<-sigmaMat[,c]
          plotSigma<-plotSigma[plotSigma<plotMax]
          nPoints<-length(plotSigma)
          # profileDF<-rbind(profileDF,data.frame("sigma"=plotSigma,"cluster"=rep(c,nPoints),
          # 	"meanSigma"=rep(sigmaMean,nPoints),
          # 	"lowerSigma"=rep(sigmaLower[c],nPoints),
          # 	"upperSigma"=rep(sigmaUpper[c],nPoints),
          # 	"fillColor"=rep(sigmaColor[c],nPoints)))
          #################################################################
          my.list[[z]] <- data.frame("sigma"=plotSigma,"cluster"=rep(c,nPoints),
                                     "meanSigma"=rep(sigmaMean,nPoints),
                                     "lowerSigma"=rep(sigmaLower[c],nPoints),
                                     "upperSigma"=rep(sigmaUpper[c],nPoints),
                                     "fillColor"=rep(sigmaColor[c],nPoints))
          z=z+1
          #################################################################	
        }
        # rownames(profileDF)<-seq(1,nrow(profileDF),1)
        #################################################################
        profileDF <- rbindlist(my.list)
        rownames(profileDF)<-seq(1,nrow(profileDF),1)
        
        #print(str(profileDF))
        #print(head(profileDF))
        #################################################################
        
        plotObj<-ggplot(profileDF)
        plotObj<-plotObj+geom_hline(aes(yintercept=meanSigma))
        plotObj<-plotObj+geom_boxplot(aes(x=as.factor(cluster),y=sigma,fill=as.factor(fillColor)),outlier.size=0.5)
        plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=lowerSigma,colour=as.factor(fillColor)),size=1.5)
        plotObj<-plotObj+geom_point(aes(x=as.factor(cluster),y=upperSigma,colour=as.factor(fillColor)),size=1.5)
        plotObj<-plotObj+
          scale_fill_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
          scale_colour_manual(values = c(high ="#CC0033",low ="#0066CC", avg ="#33CC66"))+
          theme(legend.position="none")+labs(x="Cluster")+theme(axis.title.x=element_text(size=10))
        if(j==1){
          plotObj<-plotObj+labs(y="Std Dev")+theme(axis.title.y=element_text(size=10,angle=90))
        }else{
          plotObj<-plotObj+theme(axis.title.y=element_blank())
        }
        plotObj<-plotObj+
          theme(plot.margin=unit(c(0.5,ifelse(j==nCovariates,1,0),0.5,ifelse(j==1,0.5,0)),'lines'))+
          theme(plot.margin=unit(c(0,0,0,0),'lines'))
        
        print(plotObj,vp=viewport(layout.pos.row=4:6,layout.pos.col=j+2))
      }
      
    }
  }
  dev.off()
  return(meanSortIndex)
}





# Calculate predictions, and if possible assess predictive performance
calcPredictions<-function(riskProfObj,predictResponseFileName=NULL, doRaoBlackwell=F,
                          fullSweepPredictions=F,fullSweepLogOR=F,fullSweepHazardRatio=F,referenceClusterOR=NA){
  
  riskProfClusObj=NULL
  clusObjRunInfoObj=NULL
  yModel=NULL
  reportBurnIn=NULL
  nBurn=NULL
  nFilter=NULL
  nSweeps=NULL
  nPredictSubjects=NULL
  fullPredictFile=NULL
  nFixedEffects=NULL
  directoryPath=NULL
  fileStem=NULL
  nCategoriesY=NULL
  nSubjects=NULL
  weibullFixedShape=NULL
  risk=NULL
  
  for (i in 1:length(riskProfObj)) assign(names(riskProfObj)[i],riskProfObj[[i]])
  for (i in 1:length(riskProfClusObj)) assign(names(riskProfClusObj)[i],riskProfClusObj[[i]])
  for (i in 1:length(clusObjRunInfoObj)) assign(names(clusObjRunInfoObj)[i],clusObjRunInfoObj[[i]])
  
  if(yModel=="Poisson"||yModel=="Normal"||yModel=="Quantile"){
    if (fullSweepLogOR==T){
      fullSweepLogOR=F
      cat("Log odds ratio does not make sense for Poisson, Normal or Quantile response\n")
    }
  }
  
  
  if(!yModel=="Survival"){
    if (fullSweepHazardRatio==T){
      fullSweepHazardRatio=F
      cat("Hazard ratio makes sense for Survival response only\n")
    }
    if (!weibullFixedShape){
      doRaoBlackwell=F
      cat("For Survival response with cluster specific shape parameter Rao-Blackwell predictions are not possible.\n")
    }
  }
  
  if (yModel=="Survival"){
    restrictedMeanSurvival<-max(clusObjRunInfoObj$yMat[,1])
  } 
  
  firstLine<-ifelse(reportBurnIn,nBurn/nFilter+2,1)
  lastLine<-(nSweeps+ifelse(reportBurnIn,nBurn+1,0))/nFilter
  nSamples<-lastLine-firstLine+1
  
  # First of all we see if there a covariate file has been supplied
  if(nPredictSubjects==0){
    stop("No prediction subjects processed by C++\n")
  }
  
  # Get the response and fixed effects data if available
  responseProvided<-F
  extraInfoProvided<-F # extra info is denominator for binomial and poisson
  fixedEffectsProvided<-F
  if (is.null(predictResponseFileName)&&fullPredictFile==TRUE){
    predictResponseFileName = file.path(directoryPath,paste(fileStem,'_predictFull.txt',sep=''))
  }
  if(!is.null(predictResponseFileName)){
    predictResponseData<-scan(predictResponseFileName,quiet=T)
    predictResponseMat<-matrix(predictResponseData[2:length(predictResponseData)],
                               nrow=nPredictSubjects,byrow=T)
    predictYMat<-matrix(predictResponseMat[,1],ncol=1)
    if(yModel=="Poisson"||yModel=="Binomial"||yModel=="Survival"){
      predictYMat<-cbind(predictYMat,predictResponseMat[,ncol(predictResponseMat)])
      if(all(predictYMat[,2]>-999)){
        extraInfoProvided<-T
      }
    }
    if(all(predictYMat[,1]>-999)){
      responseProvided<-T
    }
    if(nFixedEffects>0){
      fixedEffectsProvided<-T
      predictWMat<-matrix(predictResponseMat[,2:(nFixedEffects+1)],nrow=nPredictSubjects)
      # Set missing values to their average or reference value
      predictWMat[predictWMat==-999]<-0
    }
  }
  
  if(fixedEffectsProvided){
    betaFileName <-file.path(directoryPath,paste(fileStem,'_beta.txt',sep=''))
    betaFile<-file(betaFileName,open="r")
    betaArray<-array(0,dim=c(nSamples,nFixedEffects,nCategoriesY))
    for(sweep in firstLine:lastLine){
      if(sweep==firstLine){
        skipVal<-firstLine-1
      }else{
        skipVal<-0
      }
      if (yModel=="Categorical") {
        currBetaVector<-scan(betaFile,what=double(),skip=skipVal,n=nFixedEffects*(nCategoriesY-1),quiet=T)
        currBeta<-matrix(currBetaVector,ncol=(nCategoriesY-1),byrow=T)
        currBeta<-cbind(rep(0,dim(currBeta)[1]),currBeta)
      } else {
        currBetaVector<-scan(betaFile,what=double(),skip=skipVal,n=nFixedEffects,quiet=T)
        currBeta<-matrix(currBetaVector,ncol=nCategoriesY,byrow=T)
      }
      betaArray[sweep-firstLine+1,,]<-currBeta
    }
  }
  
  if (yModel=="Survival"){
    if (weibullFixedShape){
      nuFileName<-file.path(directoryPath,paste(fileStem,'_nu.txt',sep=''))
      nuFile<-file(nuFileName,open="r")
      nu<-mean(read.table(nuFile)[,1])
      close(nuFile)
    }
  } 
  
  # Already done the allocation in the C++
  if(doRaoBlackwell){
    # Construct the RB theta file name
    thetaFileName<-file.path(directoryPath,paste(fileStem,'_predictThetaRaoBlackwell.txt',sep=''))
    thetaArray<-array(0,dim=c(nSamples,nPredictSubjects,nCategoriesY))
    # Read the RB theta data
    if (yModel=="Categorical"){
      thetaMat<-matrix(scan(thetaFileName,what=double(),quiet=T),byrow=T,ncol=nPredictSubjects*(nCategoriesY-1))
      for (sweep in firstLine:lastLine){
        thetaArrayRow<-cbind(rep(0,nPredictSubjects),matrix(thetaMat[sweep,],ncol=nCategoriesY-1,byrow=T))
        thetaArray[sweep-firstLine+1,,]<-thetaArrayRow
      }
    } else {
      thetaMat<-matrix(scan(thetaFileName,what=double(),quiet=T),byrow=T,ncol=nPredictSubjects)
      thetaArray[,,1]<-thetaMat[firstLine:nrow(thetaMat),]
    }
  }else{
    # Construct the file names
    zFileName <- file.path(directoryPath,paste(fileStem,'_z.txt',sep=''))
    zFile<-file(zFileName,open="r")
    thetaFileName<-file.path(directoryPath,paste(fileStem,'_theta.txt',sep=''))
    thetaFile<-file(thetaFileName,open="r")
    nClustersFileName<-file.path(directoryPath,paste(fileStem,'_nClusters.txt',sep=''))
    nClustersFile<-file(nClustersFileName,open="r")
    if (yModel=="Survival"&&!weibullFixedShape){
      nuFileName<-file.path(directoryPath,paste(fileStem,'_nu.txt',sep=''))
      nuFile<-file(nuFileName,open="r")
      nuArrayPred<-array(0,dim=c(nSamples,nPredictSubjects))
    }
    # initialise theta and beta arrays
    thetaArray<-array(0,dim=c(nSamples,nPredictSubjects,nCategoriesY))
    for(sweep in firstLine:lastLine){
      if(sweep==firstLine){
        skipVal<-firstLine-1
      }else{
        skipVal<-0
      }
      currZ<-1+scan(zFile,what=integer(),skip=skipVal,n=nSubjects+nPredictSubjects,quiet=T)
      tmpCurrZ<-currZ
      currZ<-currZ[(nSubjects+1):length(currZ)]
      currNClusters<-scan(nClustersFile,skip=skipVal,n=1,what=integer(),quiet=T)
      if (yModel=="Categorical") {
        currThetaVector<-scan(thetaFile,what=double(),skip=skipVal,n=currNClusters*(nCategoriesY-1),quiet=T)
        currTheta<-matrix(currThetaVector,ncol=(nCategoriesY-1),byrow=T)
        currTheta<-cbind(rep(0,dim(currTheta)[1]),currTheta)
      } else {
        currThetaVector<-scan(thetaFile,what=double(),skip=skipVal,n=currNClusters,quiet=T)
        currTheta<-matrix(currThetaVector,ncol=nCategoriesY,byrow=T)
        if (yModel=="Survival"&&!weibullFixedShape) currNu<-scan(nuFile,what=double(),skip=skipVal,n=currNClusters,quiet=T)
      }
      thetaArray[sweep-firstLine+1,,]<-as.matrix(currTheta[currZ,],ncol=nCategoriesY)
      if (yModel=="Survival"&&!weibullFixedShape) nuArrayPred[sweep-firstLine+1,]<-as.vector(currNu[currZ])
      
    }
    close(zFile)
    close(thetaFile)
    close(nClustersFile)
    if (yModel=="Survival"&&!weibullFixedShape) close(nuFile)
  }
  
  
  
  # Use the values of theta to derive the predicted values
  predictedY<-array(0,dim=c(nSamples,nPredictSubjects,nCategoriesY))
  for(sweep in 1:nSamples){
    if(sweep==1||sweep%%1000==0){
      cat(paste("Processing sweep",sweep,"of ",nSamples,"\n"))
    }
    
    lambda<-thetaArray[sweep,,]
    if(fixedEffectsProvided){
      currBeta<-betaArray[sweep,,]
      lambda<-lambda+predictWMat%*%currBeta
    }
    
    if(yModel=='Poisson'){
      if(extraInfoProvided){
        # Add in the offset
        lambda<-lambda+log(predictYMat[,2])
      }
    }
    
    if(yModel=='Bernoulli'){
      predictedY[sweep,,]<-exp(lambda)/(1+exp(lambda))
    }else if(yModel=='Binomial'){
      if(extraInfoProvided){
        predictedY[sweep,,]<-predictYMat[,2]*exp(lambda)/(1+exp(lambda))
      }else{
        predictedY[sweep,,]<-exp(lambda)/(1+exp(lambda))
      }
    }else if(yModel=='Poisson'){
      predictedY[sweep,,]<-exp(lambda)
    }else if(yModel=='Normal'||yModel=='Quantile'){
      predictedY[sweep,,]<-lambda
    }else if(yModel=='Survival'){
      if (!weibullFixedShape) nu<-nuArrayPred[sweep,]
      tmpPredictedY<-1/((exp(lambda))^(1/nu))*gamma(1+1/nu)
      tmpPredictedY[which(tmpPredictedY>restrictedMeanSurvival)]<-restrictedMeanSurvival
      predictedY[sweep,,]<-tmpPredictedY
    }else if(yModel=="Categorical"){
      predictedY[sweep,,]<-exp(lambda)/rowSums(exp(lambda))
    }
  }
  
  if(responseProvided){
    bias<-apply(predictedY,2,median)-predictYMat[,1]
    rmse<-sqrt(mean(bias^2))
    mae<-mean(abs(bias))
    bias<-mean(bias)
    output<-list("bias"=bias,"rmse"=rmse,
                 "observedY"=predictYMat[,1],
                 "predictedY"=apply(predictedY,c(2,3),median),
                 "doRaoBlackwell"=doRaoBlackwell,
                 "mae"=mae)
  }else{
    output<-list("bias"=NA,"rmse"=NA,"observedY"=NA,"predictedY"=apply(predictedY,c(2,3),median),"doRaoBlackwell"=doRaoBlackwell,
                 "mae"=NA)
  }
  if(fullSweepPredictions){
    output$predictedYPerSweep<-predictedY
  }
  if(fullSweepLogOR){
    logORPerSweep<-matrix(0,ncol=ncol(predictedY),nrow=nrow(predictedY))
    for(i in 1:dim(thetaArray)[1]){
      if (is.na(referenceClusterOR)){
        # Always relative to the first prediction subject
        logORPerSweep[i,]<-thetaArray[i,,]-thetaArray[i,1,]
      } else {
        # relative to the cluster with lowest risk 
        logORPerSweep[i,]<-thetaArray[i,,]-risk[i,referenceClusterOR,]
      }
    }
    output$logORPerSweep<-logORPerSweep
  }
  if (fullSweepHazardRatio){
    fullHR<-matrix(0,ncol=ncol(predictedY),nrow=nrow(predictedY))
    for(i in 1:dim(thetaArray)[1]){
      # Always relative to the first prediction subject
      fullHR[i,]<-predictedY[i,,]/predictedY[i,1,]
    }
    output$fullHR<-fullHR
    
  }
  if(fixedEffectsProvided){
    close(betaFile)
  }
  return(output)
}

# Show the continuous hyperparameter for variable selection
summariseVarSelectRho<-function(runInfoObj){
  
  directoryPath=NULL
  fileStem=NULL
  nCovariates=NULL
  reportBurnIn=NULL
  nBurn=NULL
  nFilter=NULL
  nSweeps=NULL
  
  for (i in 1:length(runInfoObj)) assign(names(runInfoObj)[i],runInfoObj[[i]])
  
  # Rho file name
  rhoFileName <- file.path(directoryPath,paste(fileStem,'_rho.txt',sep=''))
  rhoMat<-matrix(scan(rhoFileName,what=double(),quiet=T),ncol=nCovariates,byrow=T)
  
  # Restrict to after burn in
  firstLine<-ifelse(reportBurnIn,nBurn/nFilter+2,1)
  lastLine<-(nSweeps+ifelse(reportBurnIn,nBurn+1,0))/nFilter
  
  rhoMat<-rhoMat[firstLine:lastLine,]
  
  rhoMean<-apply(rhoMat,2,mean)
  rhoMedian<-apply(rhoMat,2,median)
  rhoLowerCI<-apply(rhoMat,2,quantile,0.05)
  rhoUpperCI<-apply(rhoMat,2,quantile,0.95)
  
  return(list("rho"=rhoMat,"rhoMean"=rhoMean,"rhoMedian"=rhoMedian,"rhoLowerCI"=rhoLowerCI,"rhoUpperCI"=rhoUpperCI))
  
}


# Function to compute the marginal model posterior (only for discrete covariates and Bernoulli outcome)
margModelPosterior<-function(runInfoObj,allocation){
  
  xModel=NULL
  yModel=NULL
  varSelect=NULL
  nSubjects=NULL
  xMat=NULL
  directoryPath=NULL
  fileStem=NULL
  includeResponse=NULL
  nFixedEffects=NULL
  reportBurnIn=NULL
  nBurn=NULL
  nFilter=NULL
  nSweeps=NULL
  nProgress=NULL
  dPitmanYor=NULL
  nCovariates=NULL
  
  for (i in 1:length(runInfoObj)) assign(names(runInfoObj)[i],runInfoObj[[i]])
  
  # this function only works for Bernoulli outcome and discrete covariates, so check that it is used correctly
  if (includeResponse) {
    if (xModel!="Discrete"||yModel!="Bernoulli") stop("ERROR: The computation of the marginal model posterior has only been implemented for Bernoulli outcome and discrete covariates.")
  } else {
    if (xModel!="Discrete") stop("ERROR: The computation of the marginal model posterior has only been implemented for Bernoulli outcome and discrete covariates.")
  }
  # no variable selection has been implemented
  if (varSelect==TRUE) print("Warning: Variable selection is not taken into account for the computation of marginal model posterior")
  # the subjects with missing values are simply removed for now
  missingX<-FALSE
  for (k in 1:nSubjects){
    if (nCovariates>1&&sum(xMat[k,]==-999)>0) {
      missingX<-TRUE
      stop("ERROR: No missing value handling technique has been implemented for the marginal model posterior. This function cannot be run if missing values are present.")
    }
    if (nCovariates==1&&sum(xMat[k]==-999)>0) {
      missingX<-TRUE
      stop("ERROR: No missing value handling technique has been implemented for the marginal model posterior. This function cannot be run if missing values are present.")
    }
  }
  # this function only works for dPitmanYor == 0, ie only for Dirichlet process prior
  if (runInfoObj$dPitmanYor !=0) stop("ERROR: the marginal model posterior has only been implemented for Dirichlet process priors.")
  
  # read in value of hyperparameters
  runData<-readLines(file.path(directoryPath,paste(fileStem,'_log.txt',sep='')))
  
  hyperParams<-list()
  
  if (includeResponse==T){
    sigmaTheta<-runData[grep('sigmaTheta',runData)]
    sigmaTheta<-substr(sigmaTheta,regexpr(':',sigmaTheta)+1,nchar(sigmaTheta))
    sigmaTheta<-gsub(' ','',sigmaTheta)
    sigmaTheta<-gsub('\t','',sigmaTheta)
    sigmaTheta<-as.integer(sigmaTheta)
    
    dofTheta<-runData[grep('dofTheta',runData)]
    dofTheta<-substr(dofTheta,regexpr(':',dofTheta)+1,nchar(dofTheta))
    dofTheta<-gsub(' ','',dofTheta)
    dofTheta<-gsub('\t','',dofTheta)
    dofTheta<-as.integer(dofTheta)
    
    hyperParams<-list(sigmaTheta=sigmaTheta,dofTheta=dofTheta)
  }
  
  if (nFixedEffects>0){
    sigmaBeta<-runData[grep('sigmaBeta',runData)]
    sigmaBeta<-substr(sigmaBeta,regexpr(':',sigmaBeta)+1,nchar(sigmaBeta))
    sigmaBeta<-gsub(' ','',sigmaBeta)
    sigmaBeta<-gsub('\t','',sigmaBeta)
    sigmaBeta<-as.integer(sigmaBeta)
    
    dofBeta<-runData[grep('dofBeta',runData)]
    dofBeta<-substr(dofBeta,regexpr(':',dofBeta)+1,nchar(dofBeta))
    dofBeta<-gsub(' ','',dofBeta)
    dofBeta<-gsub('\t','',dofBeta)
    dofBeta<-as.integer(dofBeta)
    
    hyperParams$sigmaBeta<-sigmaBeta
    hyperParams$dofBeta<-dofBeta
  }
  
  
  # set alpha, whether it has been estimated or it is fixed
  # find out if it was fixed or estimated 
  alphaUpdate<-runData[grep('Update alpha',runData)]
  alphaUpdate<-substr(alphaUpdate,regexpr(':',alphaUpdate)+1,nchar(alphaUpdate))
  alphaUpdate<-gsub(' ','',alphaUpdate)
  if (alphaUpdate=="False") {
    alpha<-runData[grep('Fixed alpha: ',runData)]
    alpha<-substr(alpha,regexpr(':',alpha)+1,nchar(alpha))
    alpha<-as.integer(alpha)
  } else {
    # if alpha wasn't fixed, take median value of chain
    firstLine<-ifelse(reportBurnIn,nBurn/nFilter+2,1)
    skipLines<-ifelse(reportBurnIn,nBurn/nFilter+1,0)
    lastLine<-(nSweeps+ifelse(reportBurnIn,nBurn+1,0))/nFilter		
    alphaFileName <- file(file.path(directoryPath,paste(fileStem,'_alpha.txt',sep='')))
    open(alphaFileName)
    alphaValues<-vector()
    alphaValues[1]<-scan(alphaFileName,what=double(),skip=skipLines,nlines=1,quiet=T)
    for (i in (firstLine+1):lastLine){
      alphaValues[i-firstLine]<-scan(alphaFileName,what=double(),skip=0,nlines=1,quiet=T)
    }
    close(alphaFileName)
    alpha<-median(alphaValues)
  }
  runInfoObj$alphaMPP <- alpha
  
  if (xModel=="Discrete"){
    aPhi<-runData[grep('aPhi',runData)]
    aPhi<-substr(aPhi,regexpr(':',aPhi)+1,nchar(aPhi))
    aPhi<-strsplit(aPhi," ")[[1]][-1]
    aPhi<-as.integer(aPhi)
    hyperParams$aPhi<-aPhi
  }
  
  runInfoObj$hyperParams <- hyperParams
  
  # read first allocation iteration after burnin
  firstLine<-ifelse(reportBurnIn,nBurn/nFilter+1,1)
  skipLines<-ifelse(reportBurnIn,nBurn/nFilter,0)
  lastLine<-(nSweeps+ifelse(reportBurnIn,nBurn+1,0))/nFilter	
  
  # open allocation file
  if (missing(allocation)){
    zFileName <- file(file.path(directoryPath,paste(fileStem,'_z.txt',sep='')))
    open(zFileName)
    zAllocCurrent<-scan(zFileName,what=integer(),skip=skipLines,nlines=1,quiet=T)
    zAllocCurrent<-zAllocCurrent[1:nSubjects]
  } else {
    zAllocCurrent<-allocation[1:nSubjects]
    firstLine<-0
    skipLines<-0
    lastLine<-0
  }
  
  
  # initialise output vectors
  margModPost<-rep(0,length=(lastLine-firstLine+1))
  
  clusterSizes<-table(zAllocCurrent)
  nClusters<-length(clusterSizes)
  # set initial values of parameters of interest
  if (nFixedEffects>0){
    parFirstIter<-c(rep(0,nClusters),rep(0,nFixedEffects))
  } else {
    parFirstIter<-c(rep(0,nClusters))
  }
  # compute marginal model posterior
  
  output<-.pZpXpY(zAlloc=zAllocCurrent, par=parFirstIter, clusterSizes=clusterSizes, nClusters=nClusters, runInfoObj=runInfoObj, alpha=alpha)
  margModPost[1]<-output$margModPost
  if (missing(allocation)){
    for (iter in (firstLine+1):lastLine){
      if (iter%%nProgress==0) print(iter)
      # identify allocations for this sweep
      zAllocCurrent<-scan(zFileName,what=integer(),nlines=1,quiet=T)
      zAllocCurrent<-zAllocCurrent[1:nSubjects]
      # parameters
      # number of elements in each cluster
      clusterSizes<-table(zAllocCurrent)
      # number of clusters
      nClusters<-length(clusterSizes)
      # computing the marginal likelihood
      # version using the previous beta mode for next step	
      if (nFixedEffects>0){
        parTmp<-c(rep(0,nClusters),rep(0,nFixedEffects))
      } else {
        parTmp<-c(rep(0,nClusters))
      }
      
      output<-.pZpXpY(zAlloc=zAllocCurrent,par=parTmp, clusterSizes=clusterSizes, nClusters=nClusters, runInfoObj = runInfoObj, alpha=alpha)
      margModPost[iter-firstLine+1]<-output$margModPost
      
    }	
  }
  if (missing(allocation)){
    close(zFileName)
  }
  
  write.table(margModPost,file.path(directoryPath,paste(fileStem,"_margModPost.txt",sep="")), col.names = FALSE,row.names = FALSE)
  return(list("meanMargModPost"=mean(margModPost),"runInfoObj"=runInfoObj))
}

# internal function
# Function to evaluate pYGivenZW for Bernoulli outcome - required for the Laplace approximation
.pYGivenZW_Bernoulli<-function(thetaBeta,zAlloc,nClusters,hyperParams,
                               yMat,wMat,nSubjects,nFixedEffects,nTableNames,constants){
  dimThetaBeta<-length(thetaBeta)
  theta<-head(thetaBeta,nClusters)
  if (nFixedEffects>0){
    beta<-tail(thetaBeta,-nClusters)
    betaW<-as.matrix(wMat)%*%beta
  } else {
    beta<-0
    hyperParams$dofBeta<-0
    hyperParams$sigmaBeta<-0
    betaW<-0
  }
  nTableNames<-as.integer(nTableNames)
  maxNTableNames<-max(nTableNames)
  out<-.Call("pYGivenZW",beta,theta,zAlloc,hyperParams$sigmaBeta,
             hyperParams$sigmaTheta,hyperParams$dofTheta,hyperParams$dofBeta,nSubjects,
             yMat,betaW,nFixedEffects,nTableNames,constants,
             maxNTableNames,PACKAGE = 'PReMiuM')
  return(out)
}

# internal function
# Function to evaluate the gradient of pYGivenZW for Bernoulli outcome - required for the Laplace approximation
.pYGivenZW_Bernoulli_gradient<-function(thetaBeta,zAlloc,nClusters,hyperParams,
                                        yMat,wMat,nSubjects,nFixedEffects,nTableNames,constants){
  dimThetaBeta<-length(thetaBeta)
  theta<-head(thetaBeta,nClusters)
  nTableNames<-as.integer(nTableNames)
  maxNTableNames<-max(nTableNames)
  if (nFixedEffects>0){
    beta<-tail(thetaBeta,-nClusters)
    betaW<-as.matrix(wMat)%*%beta
    yPred<-.Call("GradpYGivenZW",beta,theta,zAlloc,nSubjects,
                 betaW,yMat,nFixedEffects,nTableNames,
                 maxNTableNames,PACKAGE = 'PReMiuM')
    wPred<- as.matrix(wMat)*yPred
  } else {
    beta<-0
    betaW<-0
    yPred<-.Call("GradpYGivenZW",beta,theta,zAlloc,nSubjects,
                 betaW,yMat,nFixedEffects,nTableNames,
                 maxNTableNames,PACKAGE = 'PReMiuM')
  }
  gr.theta<-rep(0,nClusters)
  for (k in 1:nClusters){
    gr.theta[k]<-sum(yPred[zAlloc==nTableNames[k]])
  }
  # contribution from the derivative of theta
  numeratorTheta<-(hyperParams$dofTheta+1)*theta
  paramsTheta<-hyperParams$dofTheta*hyperParams$sigmaTheta^2
  gr.theta<-gr.theta-numeratorTheta/(paramsTheta+theta^2)
  out<- -c(gr.theta)/nSubjects
  if (nFixedEffects>0){
    # contribution from the derivative of beta		
    gr.beta<-rep(0,nFixedEffects)
    gr.beta<-gr.beta+apply(wPred,2,sum)
    numeratorBeta<-(hyperParams$dofBeta+1)*beta
    paramsBeta<-hyperParams$dofBeta*hyperParams$sigmaBeta^2
    gr.beta<-gr.beta-numeratorBeta/(paramsBeta+beta^2)
    out<- c(out,-gr.beta/nSubjects)	
  }
  return(out)
}

# internal function
# Function to evaluate the Hessian matrix of pYGivenZW for Bernoulli outcome - required for the Laplace approximation
.pYGivenZW_Bernoulli_hessianMat<-function(thetaBeta,zAlloc,nClusters,hyperParams,
                                          wMat,nSubjects,nFixedEffects,nSweeps,nTableNames){
  dimThetaBeta<-length(thetaBeta)
  theta<-head(thetaBeta,nClusters)
  if (nFixedEffects>0){
    wMat<-as.matrix(wMat)
    beta<-tail(thetaBeta,-nClusters)
    betaW<-wMat%*%beta
  } else {
    beta<-0
    betaW<-0
  }
  hes.mat<-matrix(0,ncol=dimThetaBeta,nrow=dimThetaBeta)
  exp.predictor<-rep(0,nSubjects)
  denomPred<-rep(0,nSubjects)
  predictor<-rep(0,nSubjects)
  thetaTmp<-rep(0,length=max(as.integer(nTableNames)))
  k<-1
  for (i in as.integer(nTableNames)+1) {
    thetaTmp[i]<-theta[k]	
    k<-k+1
  }
  for (i in 1:nSubjects){
    predictor[i]<-thetaTmp[zAlloc[i]+1]
  }
  if (nFixedEffects>0){
    exp.predictor<-exp(predictor+as.vector(betaW))
  } else {
    exp.predictor<-exp(predictor)
  }
  denomPred<-exp.predictor/(exp.predictor+1)^2
  # second derivatives
  # wrt theta_i, theta_j is = 0
  # wrt theta_i, theta_i is
  paramsTheta<-hyperParams$dofTheta*hyperParams$sigmaTheta^2
  for (k in 1:nClusters){
    diag(hes.mat)[k]<-sum(denomPred[zAlloc==nTableNames[k]]) + (hyperParams$dofTheta+1)*
      (paramsTheta - theta[k]^2)/
      (paramsTheta + theta[k]^2)^2
    
  }
  if (nFixedEffects>0){
    # wrt beta_i, theta_j is
    for (k in 1:nFixedEffects){
      for (j in 1:nClusters){
        indexJ<-zAlloc==nTableNames[j]
        hes.mat[j,(nClusters+k)]<-hes.mat[(nClusters+k),j]<-
          sum(wMat[indexJ,k]*denomPred[indexJ])
      }
    }
    # wrt beta_i, beta_j is
    if (nFixedEffects>1){
      for (k in 1:(nFixedEffects-1)){
        for (j in (k+1):nFixedEffects){
          hes.mat[(nClusters+k),(nClusters+j)]<-hes.mat[(nClusters+j),(nClusters+k)]<-
            sum((wMat[,k]*wMat[,j])*denomPred)
        }
      }
    }
    # wrt beta_i, beta_i is
    betaSq<-beta*beta
    paramsBeta<-hyperParams$dofBeta*hyperParams$sigmaBeta^2
    diag(hes.mat)[(nClusters+1):(dimThetaBeta)]<- 
      apply(wMat^2 * denomPred,2,sum)+(hyperParams$dofBeta+1)*
      (paramsBeta - betaSq)/
      (paramsBeta + betaSq)^2
  }
  return(hes.mat/nSubjects) 
}	


# internal function
# marginal model posterior for one iteration
.pZpXpY<-function(zAlloc,runInfoObj,par=NULL,clusterSizes,nClusters,alpha){
  
  nSweeps=NULL
  nFixedEffects=NULL
  nCategories=NULL
  hyperParams=NULL
  nCovariates=NULL
  xMat=NULL
  includeResponse=NULL
  yMat=NULL
  wMat=NULL
  nSubjects=NULL
  alphaMPP=NULL
  
  for (i in 1:length(runInfoObj)) assign(names(runInfoObj)[i],runInfoObj[[i]])
  
  # marginal likelihood is p(Z|alpha,X,Y)
  
  # p(Z|alpha,X,Y) is prop to p(X|Z)p(Y|Z,W)p(Z|alpha)
  # we are going to call the three quantities pX, pY and pZ
  
  nTableNames<-names(clusterSizes)
  
  # set initial parameters at 0 if a better starting point is not provided	
  if (is.null(par)) par<-c(rep(0,nClusters),rep(0,nFixedEffects))
  
  # number of elements in following clusters
  nPlus<-head(c(rev(cumsum(rev(clusterSizes[-1]))),0),-1)	
  
  # computation of pX+pZ
  pZpX<-.Call("pZpX",nClusters,nCategories,hyperParams$aPhi,clusterSizes,nCovariates, zAlloc, as.vector(as.matrix(xMat)), as.integer(nTableNames), alphaMPP, nPlus, PACKAGE = 'PReMiuM')
  
  # computation of pY
  if (includeResponse==T){
    # constants of pY
    constants<-nClusters*(0.5*(hyperParams$dofTheta+1)*log(hyperParams$dofTheta)-
                            lgamma(0.5*(hyperParams$dofTheta+1))+0.5*log(hyperParams$dofTheta*pi)+lgamma(hyperParams$dofTheta*0.5))
    if (nFixedEffects>0) constants<-constants+nFixedEffects*(0.5*(hyperParams$dofBeta+1)*log(hyperParams$dofBeta)-
                                                               lgamma(0.5*(hyperParams$dofBeta+1))+0.5*log(hyperParams$dofBeta*pi)+lgamma(hyperParams$dofBeta*0.5))
    # computation of min for Laplace approximation		
    laplaceOut <- optim(par = par, # initial parameters
                        fn = .pYGivenZW_Bernoulli, # function to minimise over
                        control=list(fnscale=1), # need to minimise 
                        method="L-BFGS-B", # fastest method according to tests
                        gr=.pYGivenZW_Bernoulli_gradient, # gradient
                        hessian=FALSE, # we don't require the approximate computation of the hessian matrix, it's computed separately and exactly
                        # other parameters necessary for fn above
                        nClusters=nClusters, 
                        zAlloc=zAlloc,hyperParams=hyperParams,
                        yMat=yMat,wMat=wMat,nSubjects=nSubjects,
                        nFixedEffects=nFixedEffects,nTableNames=nTableNames,constants=constants)
    laplaceMode <- laplaceOut$value 
    # computation of the hessian matrix		
    laplaceHessian <- .pYGivenZW_Bernoulli_hessianMat(laplaceOut$par,zAlloc=zAlloc,nClusters=nClusters,hyperParams=hyperParams,
                                                      wMat=wMat,nSubjects=nSubjects,
                                                      nFixedEffects=nFixedEffects,
                                                      nSweeps=nSweeps,nTableNames=nTableNames)
    pY <- -nSubjects * laplaceMode + 0.5*log(det(laplaceHessian)) -
      (nClusters+nFixedEffects)*0.5*log(nSubjects)+(nClusters+nFixedEffects)*0.5*log(2*pi)
    margModPost <- pY+pZpX  
    output<-list(margModPost=margModPost,laplacePar=laplaceOut$par[(nClusters+1):(nClusters+nFixedEffects)],pY=pY)
  } else {
    margModPost <- pZpX  
    output<-list(margModPost=margModPost)
  }
  return(output)
  
}

setHyperparams<-function(shapeAlpha=NULL,rateAlpha=NULL,aPhi=NULL,mu0=NULL,Tau0=NULL,TauIndep0=NULL,R0=NULL,RIndep0=NULL,
                         kappa0=NULL,kappa1=NULL,nu0=NULL,muTheta=NULL,sigmaTheta=NULL,dofTheta=NULL,muBeta=NULL,sigmaBeta=NULL,dofBeta=NULL,
                         shapeTauEpsilon=NULL,rateTauEpsilon=NULL,aRho=NULL,bRho=NULL,atomRho=NULL,shapeSigmaSqY=NULL,scaleSigmaSqY=NULL,pQuantile=NULL,
                         rSlice=NULL,truncationEps=NULL,shapeTauCAR=NULL,rateTauCAR=NULL,shapeNu=NULL,scaleNu=NULL,initAlloc=NULL){
  out<-list()
  if (!is.null(shapeAlpha)){
    out$shapeAlpha<-shapeAlpha
  }
  if (!is.null(rateAlpha)){
    out$rateAlpha<-rateAlpha
  }
  if (!is.null(aPhi)){
    out$aPhi<-aPhi
  }
  if (!is.null(mu0)){
    out$mu0<-mu0
  }
  if (!is.null(Tau0)){
    out$Tau0<-Tau0
  }
  if (!is.null(TauIndep0)){
    out$TauIndep0<-TauIndep0
  }
  if (!is.null(R0)){
    out$R0<-R0
  }
  if (!is.null(RIndep0)){
    out$RIndep0<-RIndep0
  }
  if (!is.null(kappa0)){
    out$kappa0<-kappa0
  }
  if (!is.null(kappa1)){
    out$kappa1<-kappa1
  }
  if (!is.null(nu0)){
    out$nu0<-nu0
  }
  if (!is.null(muTheta)){
    out$muTheta<-muTheta
  }
  if (!is.null(sigmaTheta)){
    out$sigmaTheta<-sigmaTheta
  }
  if (!is.null(dofTheta)){
    out$dofTheta<-dofTheta
  }
  if (!is.null(muBeta)){
    out$muBeta<-muBeta
  }
  if (!is.null(sigmaBeta)){
    out$sigmaBeta<-sigmaBeta
  }
  if (!is.null(dofBeta)){
    out$dofBeta<-dofBeta
  }
  if (!is.null(shapeTauEpsilon)){
    out$shapeTauEpsilon<-shapeTauEpsilon
  }
  if (!is.null(rateTauEpsilon)){
    out$rateTauEpsilon<-rateTauEpsilon
  }
  if (!is.null(aRho)){
    out$aRho<-aRho
  }
  if (!is.null(bRho)){
    out$bRho<-bRho
  }
  if (!is.null(atomRho)){
    out$atomRho<-atomRho
  }
  if (!is.null(shapeSigmaSqY)){
    out$shapeSigmaSqY<-shapeSigmaSqY
  }
  if (!is.null(scaleSigmaSqY)){
    out$scaleSigmaSqY<-scaleSigmaSqY
  }
  if (!is.null(pQuantile)){
    out$pQuantile<-pQuantile
  }
  if (!is.null(rSlice)){
    out$rSlice<-rSlice
  }
  if (!is.null(truncationEps)){
    out$truncationEps<-truncationEps
  }
  if (!is.null(shapeTauCAR)){
    out$shapeTauCAR<-shapeTauCAR
  }
  if (!is.null(rateTauCAR)){
    out$rateTauCAR<-rateTauCAR
  }	
  if (!is.null(shapeNu)){
    out$shapeNu<-shapeNu
  }
  if (!is.null(scaleNu)){
    out$scaleNu<-scaleNu
  }
  if (!is.null(initAlloc)){
    out$initAlloc<-initAlloc
  }
  return(out)
}


# Compute Ratio of variances (for extra variation case)
computeRatioOfVariance<-function(runInfoObj){
  
  directoryPath=NULL
  extraYVar=NULL
  fileStem=NULL
  reportBurnIn=NULL
  nSweeps=NULL
  nFilter=NULL
  nSubjects=NULL
  nPredictSubjects=NULL
  nBurn=NULL
  
  for (i in 1:length(runInfoObj)) assign(names(runInfoObj)[i],runInfoObj[[i]])
  
  if (extraYVar==FALSE) stop("The ratio of variances can only be computed when extra variation in the response is included in the model.")
  
  # Construct the number of clusters file name
  nClustersFileName <- file.path(directoryPath,paste(fileStem,'_nClusters.txt',sep=''))
  # Construct the allocation file name
  zFileName <- file.path(directoryPath,paste(fileStem,'_z.txt',sep=''))
  # Construct the allocation file name
  thetaFileName <- file.path(directoryPath,paste(fileStem,'_theta.txt',sep=''))
  # Construct the allocation file name
  epsilonFileName <- file.path(directoryPath,paste(fileStem,'_epsilon.txt',sep=''))
  
  # Restrict to sweeps after burn in
  firstLine<-ifelse(reportBurnIn,nBurn/nFilter+2,1)
  lastLine<-(nSweeps+ifelse(reportBurnIn,nBurn+1,0))/nFilter
  
  ratioOfVariance<-rep(0,length(lastLine-firstLine+1))
  for(sweep in firstLine:lastLine){
    currMaxNClusters<-scan(nClustersFileName,what=integer(),skip=sweep-1,n=1,quiet=T)
    zCurr<-1+scan(zFileName,what=integer(),skip=sweep-1,n=nSubjects+nPredictSubjects,quiet=T)
    zCurr<-zCurr[1:nSubjects]
    thetaCurr<-scan(thetaFileName,what=double(),skip=sweep-1,n=currMaxNClusters,quiet=T)
    thetaCurr<-thetaCurr[zCurr]
    vTheta<-var(thetaCurr)
    epsilonCurr<-scan(epsilonFileName,what=double(),skip=sweep-1,n=nSubjects,quiet=T)
    vEpsilon<-var(epsilonCurr)
    ratioOfVariance[sweep-firstLine+1]<-vTheta/(vTheta+vEpsilon)
    
  }
  return(ratioOfVariance)
  
}

# Function to convert a vector to a symmetric matrix (upper triangle)
vec2mat<-function (data = NA, nrow = 1) 
{
  nData <- length(data)
  
  nElem <- round(nrow * (nrow + 1)/2)
  
  result <- matrix(NA, nrow = nrow, ncol = nrow)
  
  result[lower.tri(result, diag = FALSE)] <- data
  result[upper.tri(result, diag = FALSE)] <- t(result)[upper.tri(result)]
  diag(result)<-0
  
  return(result)
}

# Function to plot a heatmap representing the dissimilarity matrix
# Some re-ordering of the observations is happening automatically
heatDissMat<-function(dissimObj,main=NULL,xlab=NULL,ylab=NULL)
{
  
  nSbj<-dissimObj$disSimRunInfoObj$nSubjects
  
  col.labels<-c("0","0.5","1")
  colours <- colorRampPalette(c("white","black"))(10)
  
  dissMat<-vec2mat(dissimObj$disSimMat,nrow=nSbj)
  heatmap(1-dissMat, keep.dendro=FALSE,symm=TRUE, Rowv=NA, labRow=FALSE, labCol=FALSE, margins=c(4.5,4.5), col= colours ,main = main, xlab=xlab, ylab=ylab)
  color.legend(0.95,0.7,1,1,legend = col.labels, colours, gradient="y",align="rb")
  
}

# function to plot the trace of the global parameters 
globalParsTrace<-function(runInfoObj, parameters = "nClusters",plotBurnIn=FALSE,whichBeta=1){
  
  directoryPath=NULL
  fileStem=NULL
  nSweeps=NULL
  nFilter=NULL
  nSubjects=NULL
  nBurn=NULL
  reportBurnIn=NULL
  alpha= NULL
  
  for (i in 1:length(runInfoObj)) assign(names(runInfoObj)[i],runInfoObj[[i]])
  
  if (parameters=="mpp") {
    parametersIn<-"margModPost"
  } else {
    parametersIn<-parameters
  }
  
  # Construct the parameter file name
  parFileName <- file.path(directoryPath,paste(fileStem,"_",parametersIn,".txt",sep=""))
  
  # read the data in
  parData<-read.table(parFileName)
  
  if(parameters== "nClusters") ylabPar<- "Number of clusters"
  if(parameters=="mpp") ylabPar<-"Log marginal model posterior"
  if(parameters=="beta") ylabPar<-"beta"
  if(parameters=="alpha") {
    if (alpha < -1) {
      ylabPar<-"alpha"
    } else {
      stop("ERROR: Parameter alpha is random only for the Dirichlet process prior with random alpha.") 
    }
  }
  
  xlabPars<-"Sweeps (after burn in)"
  
  
  if(plotBurnIn==TRUE){
    if (parameters=="mpp") stop("The mpp is only computed after the burn in. Set plotBurnIn=FALSE.")
    if (reportBurnIn==TRUE) {
      
      rangeSweeps<-1:((nBurn+nSweeps)/nFilter)
      rangeParData<-1:(length(rangeSweeps)/nFilter)
      xlabPars<-"Sweeps (including the burn in)"
    } else {
      stop("The function globalParsTrace cannot plot the burn in because the reportBurnIn option in profRegr was not set to TRUE and therefore the burn in has not been recorded.")
    }
  } else {
    if (parameters=="mpp"){
      firstLine<-ifelse(reportBurnIn,nBurn/nFilter+2,1)
      lastLine<-(nSweeps+ifelse(reportBurnIn,nBurn+1,0))/nFilter
      rangeSweeps<-firstLine:lastLine#(nBurn+1):(nBurn+nSweeps)
      rangeParData<-1:(nSweeps)	
    } else {
      if (reportBurnIn==TRUE) {
        firstLine<-ifelse(reportBurnIn,nBurn/nFilter+2,1)
        lastLine<-(nSweeps+ifelse(reportBurnIn,nBurn+1,0))/nFilter
        rangeSweeps<-firstLine:lastLine#(nBurn+1):(nBurn+nSweeps)
        rangeParData<-rangeSweeps
      } else {
        firstLine<-ifelse(reportBurnIn,nBurn/nFilter+2,1)
        lastLine<-(nSweeps+ifelse(reportBurnIn,nBurn+1,0))/nFilter
        rangeSweeps<-(nBurn/nFilter+1):((nBurn+nSweeps)/nFilter)
        rangeParData<-1:(nSweeps/nFilter)	
        
      }
    }
  }
  
  if (parameters=="alpha" || parameters=="nClusters"){
    plot(rangeSweeps,parData[rangeParData,1],type="l",lty=1,col="red",ylab=ylabPar,xlab=xlabPars,cex.axis=1.5,cex.lab=1.5)
  } else if (parameters=="mpp"){
    plot(1:nSweeps,parData[,1],type="l",lty=1,col="red",ylab=ylabPar,xlab=xlabPars,cex.axis=1.5,cex.lab=1.5)
  } else if (parameters=="beta"){
    plot(rangeSweeps,parData[rangeParData,whichBeta],type="l",lty=1,col="red",ylab=ylabPar,xlab=xlabPars,cex.axis=1.5,cex.lab=1.5)
  }
  
}

# plot posterior predictive densities
plotPredictions<-function(outfile,runInfoObj,predictions,logOR=FALSE){
  
  # ignores fixed effects
  
  nPredictedSubjects=NULL	
  directoryPath=NULL
  fileStem=NULL
  yModel=NULL
  xModel=NULL
  logOddsRatio=NULL
  weibullFixedShape=NULL
  
  for (i in 1:length(runInfoObj)) assign(names(runInfoObj)[i],runInfoObj[[i]])
  
  if (yModel!="Bernoulli"&&yModel!="Normal"&&yModel!="Survival"&&yModel!="Quantile") stop("This function has been developed for Bernoulli, Normal, Quantile and Survival response only.")
  #	if (xModel=="Mixed") stop("This function has been developed for Discrete and Normal covariates only.")
  if (yModel=="Normal"||yModel=="Quantile") logOR<-FALSE
  
  #if (runInfoObj$nFixedEffects>0) print("Note that fixed effects are not processed in this function.")
  
  predictResponseFileName = file.path(runInfoObj$directoryPath,paste(runInfoObj$fileStem,'_predict.txt',sep=''))
  relScenarios<-read.table(predictResponseFileName,header=FALSE,skip=1)
  
  # sistemare a seconda se logOR e' stato calcolato o no
  if (logOR==FALSE) {
    preds<-predictions$predictedYPerSweep[,,1]		
  } else {
    if (!is.null(predictions$logORPerSweep)){
      preds<-predictions$logORPerSweep
    } else {
      stop("Log OR (odds ratios) cannot be plotted because they have not been computed by calcPredictions. Re-run calcPredictions with option fullSweepLogOR=TRUE.")
    }
  }
  
  # output file
  pdf(outfile,onefile=TRUE)
  
  # Relevant scenarios
  nPredictSubjects<-runInfoObj$nPredictSubjects
  
  denObj<-vector(mode="list")
  for(i in 1:nPredictSubjects){
    denObj[[i]]<-density(na.omit(preds[,i]))#,bw=0.8) # removed over smoothing of predictions
  }
  
  for(k in 1:nPredictSubjects){
    plotDF<-data.frame('logOddsRatio'=denObj[[k]]$x,'density'=denObj[[k]]$y)
    plotObj<-ggplot(plotDF)
    plotObj<-plotObj+geom_line(aes(x=logOddsRatio,y=density),size=0.2)
    plotObj<-plotObj+theme(legend.position="none")
    plotObj<-plotObj+labs(x=ifelse(logOR==TRUE,"Log OR of response","Response"))+theme(axis.title.x=element_text(size=15))+labs(y="Density")+theme(axis.title.y=element_text(size=15,angle=90))
    plotObj<-plotObj+theme(axis.text.x=element_text(size=15))+theme(axis.text.y=element_text(size=15))
    print(plotObj)
  }
  output<-dev.off()
}

#.onAttach <- function(...) {
#	message("I need to collect case studies of the usage of the package PReMiuM. I would really appreciate if you could email me at liveranis@gmail.com if you use the package.")
# I need to collect case studies of the impact of my work (including developing and maintaining the package PReMiuM). I would really appreciate if you could email me at liveranis@gmail.com if you use the package.\n 
#	packageStartupMessage("Help support the development of PReMiuM: please share how PReMiuM is helping your work. Visit http://www.silvialiverani.com/support-premium/ or email liveranis@gmail.com.")
#\}

#.onLoad <- function(...) {
#	message("message from .onLoad via message")
#	packageStartupMessage("message from .onLoad via packageStartupMessage\n")
#}

simBenchmark <- function(whichModel = "clusSummaryBernoulliDiscrete", 
                         nSweeps = 1000, nBurn = 1000, seedProfRegr = 123){
  
  modelSpecs<-match.fun(whichModel)()
  nClusters<-modelSpecs$nClusters
  clusSizes = rep(1:nClusters,modelSpecs$clusterSizes)
  
  #run the function to generate the sample data using the name of the distribution from the function list.
  inputs = generateSampleDataFile(modelSpecs)
  
  #create a vector 'known' that repeats "Known 1", "Known 2", etc for the total amount in known cluster 1, known cluster 2, etc.
  #
  known = NULL
  for (j in 1:nClusters){
    newdat = rep(paste("Known",j),table(clusSizes)[j])
    known = c(known, newdat)
  }
  
  #multiple 'if' statements to determine which parameters need to be filled in. 
  #Some distributions have more parameters than others.
  #
  if (exists("fixedEffectsNames",where = inputs)){
    runInfoObj<-profRegr(yModel=inputs$yModel, xModel=inputs$xModel, 
                         nSweeps=nSweeps, nBurn=nBurn, data=inputs$inputData, 
                         output="output",covNames = inputs$covNames,
                         fixedEffectsNames = inputs$fixedEffectNames, seed=seedProfRegr)
  } else {
    if (exists("discreteCovs",where = inputs)){
      runInfoObj<-profRegr(yModel=inputs$yModel, xModel=inputs$xModel, 
                           nSweeps=nSweeps, nBurn=nBurn, data=inputs$inputData, 
                           output="output",covNames = inputs$covNames,
                           discreteCovs = inputs$discreteCovs, 
                           continuousCovs = inputs$continuousCovs, seed=seedProfRegr)
    } else {
      runInfoObj<-profRegr(yModel=inputs$yModel, xModel=inputs$xModel, 
                           nSweeps=nSweeps, nBurn=nBurn, data=inputs$inputData, 
                           output="output",covNames = inputs$covNames, seed=seedProfRegr)
    }
  }
  
  #the rest of the PReMiuM steps to get the optimal clustering
  dissimObj<-calcDissimilarityMatrix(runInfoObj)
  clusObj<-calcOptimalClustering(dissimObj, maxNClusters = nClusters*2)
  #riskProfileObj<-calcAvgRiskAndProfile(clusObj)
  #clusterOrderObj<-plotRiskProfile(riskProfileObj,fileName)
  optAlloc<-clusObj$clustering
  
  #creates a data frame with the optimal clusters, Y values for the simulated data, 
  #and the known truth clusters
  output<-data.frame(clusterAllocation=as.factor(optAlloc),outcome=inputs$inputData$outcome,
                     generatingCluster=as.factor(known))
  return(output)
}
Any scripts or data that you put into this service are public.
PReMiuM documentation built on Nov. 14, 2023, 1:08 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
PReMiuM
Dirichlet Process Bayesian Clustering, Profile Regression

R/postProcess.R
In PReMiuM: Dirichlet Process Bayesian Clustering, Profile Regression

Documented in calcAvgRiskAndProfile calcDissimilarityMatrix calcOptimalClustering calcPredictions computeRatioOfVariance globalParsTrace heatDissMat is.wholenumber margModelPosterior plotPredictions plotRiskProfile profRegr setHyperparams simBenchmark summariseVarSelectRho

Try the PReMiuM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PReMiuM Dirichlet Process Bayesian Clustering, Profile Regression

R/postProcess.R In PReMiuM: Dirichlet Process Bayesian Clustering, Profile Regression

Documented in calcAvgRiskAndProfile calcDissimilarityMatrix calcOptimalClustering calcPredictions computeRatioOfVariance globalParsTrace heatDissMat is.wholenumber margModelPosterior plotPredictions plotRiskProfile profRegr setHyperparams simBenchmark summariseVarSelectRho

Try the PReMiuM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PReMiuM
Dirichlet Process Bayesian Clustering, Profile Regression

R/postProcess.R
In PReMiuM: Dirichlet Process Bayesian Clustering, Profile Regression