R/FunctionsforUser.r

######################################################################################
# Contains extra functions beyond MRH estimation that the user may want for analyzing
#	results.
######################################################################################

##################################################################################
# binOptimize: This function assists the user in finding the optimal number of bins
#	and the optimal time unit for analysis with the MRH library, and returns the
#	new times and censoring variable.
##################################################################################
FindBinWidth = function(time, delta, time.unit, maxStudyTime){

	if(missing(maxStudyTime)){	
		censortime = max(time)	
	} else {
		censortime = maxStudyTime
		print(noquote(paste('With the given study length, ', length(which(time > censortime & delta == 1)),
					' extra observed failures will be censored.')))
		cat(' \n')
		time[time > censortime] = censortime
	}

	if(time.unit == 's'){	
		wordtime.unit = 'seconds'
		censortime = censortime
	} else if(time.unit == 'min'){
		wordtime.unit = 'minutes'
		censortime = censortime*60
	} else if(time.unit == 'h'){
		wordtime.unit = 'hours'
		censortime = censortime*60*60
	} else if(time.unit == 'd'){
		wordtime.unit = 'days'
		censortime = censortime*24*60*60
	} else if(time.unit == 'w'){	
		wordtime.unit = 'weeks'
		censortime = censortime*7*24*60*60
	} else if(time.unit == 'mon'){	
		wordtime.unit = 'months'
		censortime = censortime*365.25/12*24*60*60
	} else if(time.unit == 'y'){	
		wordtime.unit = 'years'
		censortime = censortime*365.25*24*60*60
	}	
		
	print(noquote(paste('The mean failure time is ', round(mean(time), 3), wordtime.unit)))
	print(noquote(paste('The median failure time is ', round(median(time), 3), wordtime.unit)))
	print(noquote(paste('The range of failure times is ', round(min(time), 3), 'to',
				round(max(time), 3), wordtime.unit)))
	cat(' \n')
	
	Mpossible = 2:10
	time.perbin = censortime/2^Mpossible
	
	orig.perbin = as.data.frame(cbind(time.perbin, time.perbin/60, time.perbin/(60*60),
									  time.perbin/(60*60*24), time.perbin/(60*60*24*7),
									  time.perbin/(60*60*24*365.25/12), time.perbin/(60*60*24*365.25)))
	names(orig.perbin) = c('secs', 'mins', 'hours', 'days', 'weeks', 'months', 'years')
	row.names(orig.perbin) = paste('M', Mpossible, sep = '')
	
	return(orig.perbin)
	
}

###########################################################################################
# The AnalyzeMultiple function calculates the estimates and bounds 
###########################################################################################
AnalyzeMultiple = function(datalist, fileNames, alpha.level = 0.05, maxStudyTime, GR = TRUE){
	
	censortime = maxStudyTime
	
	if(missing(datalist) & missing(fileNames)){ stop("Need a list of data sets or the file names 
													 for the data sets.") }
	if(missing(fileNames)){	numsets = length(datalist)	
	} else { numsets = length(fileNames)	}

	if(missing(fileNames)){	pdata = MRH(datalist[[1]])
	} else {	pdata = MRH(read.table(fileNames[1], header = TRUE))	}

	nonph.model = TRUE
	if(substr(colnames(pdata)[2], 3, 3) == ""){	nonph.model = FALSE	}
	pdata = pdata[,-c(1,ncol(pdata))]
	if(nonph.model == FALSE){		
		M = log(length(which(substr(colnames(pdata), 1, 1) == 'd')))/log(2)
		numhazards = 1
		numphbetas = ncol(pdata)-2^M*2
		if(numphbetas > 0){ phbetanames = names(pdata)[(2^M+1):(ncol(pdata)-2^M-1)]	}
		length.uniques = sapply(1:ncol(pdata), function(x) length(unique(pdata[,x])))
		pruned.Rmps = which(length.uniques == 1 & pdata[1,] == 0.5 & substr(colnames(pdata), 1, 3) == 'Rmp')
		if(length(pruned.Rmps) > 0){
            if(length(which(substr(colnames(pdata), 1, 3) == 'gam')) > 0){
                pruned.Rmps = c(pruned.Rmps, pruned.Rmps+2^M-1)
            }
			gr.keep.index = c((2^M+1):ncol(pdata))[-c(pruned.Rmps-2^M)]
		} else {	gr.keep.index = c((2^M+1):ncol(pdata))	}
	} else {
		firstgroup = substr(colnames(pdata)[2], 4, 4)
		dnames = colnames(pdata)[which(substr(colnames(pdata), 1, 1) == 'd')]
		numbins = length(which(unlist(strsplit(dnames, '_'))[1:length(dnames)*2] == firstgroup))
		M = log(numbins)/log(2)
		hazgroups = unique(unlist(strsplit(dnames, '_'))[1:length(dnames)*2])
		numhazards = length(hazgroups)
		numphbetas = ncol(pdata)-(2^M*2*numhazards + 2^M*(numhazards-1))-1
		if(numphbetas > 0){ phbetanames = names(pdata)[(2^M*numhazards+1):(ncol(pdata)-2^M*numhazards-1)]	}
		length.uniques = sapply(1:ncol(pdata), function(x) length(unique(pdata[,x])))
		pruned.Rmps = which(length.uniques == 1 & pdata[1,] == 0.5 & substr(colnames(pdata), 1, 3) == 'Rmp')
		if(length(pruned.Rmps) > 0){
            if(length(which(substr(colnames(pdata), 1, 3) == 'gam')) > 0){
                pruned.Rmps = c(pruned.Rmps, pruned.Rmps+(2^M-1)*numhazards)
            }
			gr.keep.index = c((2^M*numhazards+1):ncol(pdata))[-c(pruned.Rmps-2^M*numhazards)]
			keep.names = colnames(pdata)[gr.keep.index]
			beta.nph.start = which(substr(keep.names, nchar(keep.names)-3, nchar(keep.names))=='bin1')[1]
			gr.keep.index = gr.keep.index[-c(beta.nph.start:(beta.nph.start-1+2^M*(numhazards-1)))]
		} else {
			gr.keep.index = c((2^M*numhazards+1):ncol(pdata))
			keep.names = colnames(pdata)[gr.keep.index]
			beta.nph.start = which(substr(keep.names, nchar(keep.names)-3, nchar(keep.names))=='bin1')[1]
			gr.keep.index = gr.keep.index[-(beta.nph.start:(beta.nph.start-1+2^M*(numhazards-1)))]
		}
	}
	
	hazrate = survrate = cumulhaz = d = H = Rmp = beta = betanph = k = gamma = mcmctemplist = burnIn = numIters = NULL
	gr.datalist = NULL
	for(filectr in 1:numsets){
		
		#### Store the parameter file in pdata ###
		if(missing(fileNames)){		temp.pdata = datalist[[filectr]]
		} else {	temp.pdata = read.table(fileNames[filectr], header = TRUE)	}
		pdata = MRH(temp.pdata)
		gr.datalist = c(gr.datalist, list(temp.pdata[,-c(1,ncol(temp.pdata))][,gr.keep.index]))
	
		### Keep track of the burn in for each data set.	
		### This is needed for the Gelman-Rubin test.
		burnIn = c(burnIn, pdata[1,1])
		numIters = c(numIters, pdata[nrow(pdata),1])
		thinval = pdata[2,1]-pdata[1,1]
		
		#### Calculate the medians and alpha-level quantiles ####	
		pdata.summary = summary(pdata, alpha.level = alpha.level, maxStudyTime = maxStudyTime)
		hazrate = cbind(hazrate, as.matrix(pdata.summary$hazardRate))
		survrate = cbind(survrate, as.matrix(pdata.summary$SurvivalCurve))
		cumulhaz = cbind(cumulhaz, as.matrix(pdata.summary$CumulativeHazard))
		if('beta' %in% names(pdata.summary)){	beta = cbind(beta, as.matrix(pdata.summary$beta))	
		} else if('betaPH' %in% names(pdata.summary)){
			beta = cbind(beta, as.matrix(pdata.summary$betaPH))
			betanph = cbind(betanph, as.matrix(pdata.summary$betaNPH))
		}
		d = cbind(d, as.matrix(pdata.summary$d))
		H = cbind(H, as.matrix(pdata.summary$H))
		Rmp = cbind(Rmp, as.matrix(pdata.summary$Rmp))
		if('k' %in% names(pdata.summary)){	k = cbind(k, as.matrix(pdata.summary$k))	}
		if('gamma' %in% names(pdata.summary)){	gamma = cbind(gamma, as.matrix(pdata.summary$gamma))	}
			
	}
	
	hazrate.full = cbind(sapply(1:nrow(hazrate), function(x) median(hazrate[x,(1:numsets-1)*3+1])),
						 sapply(1:nrow(hazrate), function(x) median(hazrate[x,(1:numsets-1)*3+2])),
						 sapply(1:nrow(hazrate), function(x) median(hazrate[x,(1:numsets-1)*3+3])))
	survrate.full = cbind(sapply(1:nrow(survrate), function(x) median(survrate[x,(1:numsets-1)*3+1])),
						  sapply(1:nrow(survrate), function(x) median(survrate[x,(1:numsets-1)*3+2])),
						  sapply(1:nrow(survrate), function(x) median(survrate[x,(1:numsets-1)*3+3])))
	cumulhaz.full = cbind(sapply(1:nrow(cumulhaz), function(x) median(cumulhaz[x,(1:numsets-1)*3+1])),
						  sapply(1:nrow(cumulhaz), function(x) median(cumulhaz[x,(1:numsets-1)*3+2])),
						  sapply(1:nrow(cumulhaz), function(x) median(cumulhaz[x,(1:numsets-1)*3+3])))
	
	d.full = cbind(sapply(1:nrow(d), function(x) median(d[x,(1:numsets-1)*3+1])),
				   sapply(1:nrow(d), function(x) median(d[x,(1:numsets-1)*3+2])),
				   sapply(1:nrow(d), function(x) median(d[x,(1:numsets-1)*3+3])))
	H.full = cbind(sapply(1:nrow(H), function(x) median(H[x,(1:numsets-1)*3+1])),
				   sapply(1:nrow(H), function(x) median(H[x,(1:numsets-1)*3+2])),
				   sapply(1:nrow(H), function(x) median(H[x,(1:numsets-1)*3+3])))
	Rmp.full = cbind(sapply(1:nrow(Rmp), function(x) median(Rmp[x,(1:numsets-1)*3+1])),
					 sapply(1:nrow(Rmp), function(x) median(Rmp[x,(1:numsets-1)*3+2])),
					 sapply(1:nrow(Rmp), function(x) median(Rmp[x,(1:numsets-1)*3+3])))
	if('beta' %in% names(pdata.summary)){
		beta.full = cbind(sapply(1:nrow(beta), function(x) median(beta[x,(1:numsets-1)*3+1])),
						  sapply(1:nrow(beta), function(x) median(beta[x,(1:numsets-1)*3+2])),
						  sapply(1:nrow(beta), function(x) median(beta[x,(1:numsets-1)*3+3])))
	} else if('betaPH' %in% names(pdata.summary)){
		beta.full = rbind(cbind(sapply(1:nrow(beta), function(x) median(beta[x,(1:numsets-1)*3+1])),
								sapply(1:nrow(beta), function(x) median(beta[x,(1:numsets-1)*3+2])),
								sapply(1:nrow(beta), function(x) median(beta[x,(1:numsets-1)*3+3]))),
						  cbind(sapply(1:nrow(betanph), function(x) median(betanph[x,(1:numsets-1)*3+1])),
								sapply(1:nrow(betanph), function(x) median(betanph[x,(1:numsets-1)*3+2])),
								sapply(1:nrow(betanph), function(x) median(betanph[x,(1:numsets-1)*3+3]))))
	}
	if('k' %in% names(pdata.summary)){
		k.full = cbind(sapply(1:nrow(k), function(x) median(k[x, (1:numsets-1)*3+1])),
					   sapply(1:nrow(k), function(x) median(k[x, (1:numsets-1)*3+2])),
					   sapply(1:nrow(k), function(x) median(k[x, (1:numsets-1)*3+3])))
	}
	if('gamma' %in% names(pdata.summary)){
		gamma.full = cbind(sapply(1:nrow(gamma), function(x) median(gamma[x, (1:numsets-1)*3+1])),
						   sapply(1:nrow(gamma), function(x) median(gamma[x, (1:numsets-1)*3+2])),
						   sapply(1:nrow(gamma), function(x) median(gamma[x, (1:numsets-1)*3+3])))
	}
	
	############ Create the summary table #################
	colnames(hazrate.full) = colnames(pdata.summary$hazardRate)
	rownames(hazrate.full) = rownames(pdata.summary$hazardRate)
	colnames(survrate.full) = colnames(pdata.summary$SurvivalCurve)
	rownames(survrate.full) = rownames(pdata.summary$SurvivalCurve)
	colnames(cumulhaz.full) = colnames(pdata.summary$CumulativeHazard)
	rownames(cumulhaz.full) = rownames(pdata.summary$CumulativeHazard)
	colnames(d.full) = colnames(pdata.summary$d)
	rownames(d.full) = rownames(pdata.summary$d)
	colnames(H.full) = colnames(pdata.summary$H)
	rownames(H.full) = rownames(pdata.summary$H)
	colnames(Rmp.full) = colnames(pdata.summary$Rmp)
	rownames(Rmp.full) = rownames(pdata.summary$Rmp)
	if('beta' %in% names(pdata.summary)){
		colnames(beta.full) = colnames(pdata.summary$beta)
		rownames(beta.full) = rownames(pdata.summary$beta)
	} else if('betaPH' %in% names(pdata.summary)){
		colnames(beta.full) = colnames(pdata.summary$betaPH)
		rownames(beta.full) = c(rownames(pdata.summary$betaPH), rownames(pdata.summary$betaNPH))
	}

	if(GR == TRUE){
		if(length(unique(burnIn)) > 1){
			warning("Burn-in number is not equal across all data sets.  Gelman-Rubin diagnostic test
				 cannot be performed")
			gr = NULL
		}
		if(length(unique(numIters)) > 1){
			warning("Number of MCMC iterations is not equal across all data sets.  Gelman-Rubin diagnostic test
					cannot be performed")
			gr = NULL
		} else {
			gr = gelman.scale(gr.datalist, number.inchain = (numIters[1]-burnIn[1])/thinval, numsets = numsets)
		}
	} 
	output = list(hazardRate = hazrate.full)
	if('beta' %in% names(pdata.summary)){	output = c(output, list(beta = beta.full))	}
	output = c(output, list(SurvivalCurve = survrate.full, CumulativeHazard = cumulhaz.full, d = d.full, 
							H = H.full, Rmp = Rmp.full))
	if('gamma' %in% names(pdata.summary)){	output = c(output, list(gamma = gamma.full))	}
	if('k' %in% names(pdata.summary)){	output = c(output, list(k = k.full))	}
	if(GR == TRUE){	output = c(output, list(gelman.rubin = gr))	}
	
	return(output)
}

##########################################################################################
# The DIC function calculates DIC using the log likelihood.
##########################################################################################
DIC = function(mrhobject, n){
	
	if(is.MRH(mrhobject) == FALSE){ stop("Object is not an MRH object")	}
	if(is.null(colnames(mrhobject))){ if(names(mrhobject)[1] == 'summary'){
		mrhobject = MRH(read.table(paste(mrhobject$outfolder, '/MCMCchains.txt', sep = ''), header = TRUE))
	}}
	nonph.model = TRUE
	if(substr(colnames(mrhobject)[2], 3, 3) == ""){	nonph.model = FALSE	}
	mrhobject = mrhobject[,-1]
	
	if(nonph.model == FALSE){		
		M = log(length(which(substr(colnames(mrhobject), 1, 1) == 'd')))/log(2)
		numhazards = 1
	} else {
		firstgroup = substr(colnames(mrhobject)[2], 4, 4)
		dnames = colnames(mrhobject)[which(substr(colnames(mrhobject), 1, 1) == 'd')]
		numbins = length(which(unlist(strsplit(dnames, '_'))[1:length(dnames)*2] == firstgroup))
		M = log(numbins)/log(2)
		hazgroups = unique(unlist(strsplit(dnames, '_'))[1:length(dnames)*2])
		numhazards = length(hazgroups)
	}
	
	numparams = ncol(mrhobject) - 1 - 2^M*numhazards - 2^M*(numhazards-1)
	Rmp.index = which(substr(colnames(mrhobject), 1, 3) == 'Rmp')
	pruned.Rmps = which(colMeans(mrhobject[,Rmp.index]) == 0.5 &
						sapply(Rmp.index, function(x) var(mrhobject[,x])) == 0)
	numparams = numparams - length(pruned.Rmps)

	neg2loglik = mrhobject[,ncol(mrhobject)]
	neg2loglik.summ = as.data.frame(matrix(summary(neg2loglik), ncol = 1))
    row.names(neg2loglik.summ) = names(summary(neg2loglik))
    names(neg2loglik.summ) = 'value'
    
	DIC = .5*var(neg2loglik) + mean(neg2loglik)
	AIC = max(2*numparams + neg2loglik)
	BIC = NA
	if(!missing(n)){
		BIC = max(neg2loglik + numparams*log(n))
	} else {
		warning("Need number of subjects in data set (n) to calculate BIC. This number can be found in the MCMCInfo.txt file in the output folder.")
	}
	ICtable = as.data.frame(matrix(c(DIC, AIC, BIC), ncol = 1))
	row.names(ICtable) = c('DIC', 'AIC', 'BIC')
    names(ICtable) = 'value'
	return(list(neg2loglik.summ = neg2loglik.summ, ICtable = ICtable))
	
}		

##########################################################################################
# The CalcFunction function calculates the hazard rate, survival rate, and/or 
#	cumulative hazard rate given an MRH object.
##########################################################################################
CalcFunction = function(mrhobject, function.type = c('h', 'H', 'S'), maxStudyTime, alpha.level = 0.05){

	censortime = NULL
	if(!missing(maxStudyTime)){	censortime = maxStudyTime	}
	################ Find M, get the chains #############
	if(is.MRH(mrhobject) == FALSE){ stop("Object is not an MRH object")	}
	if(is.null(colnames(mrhobject))){ if(names(mrhobject)[1] == 'summary'){
		censortime = mrhobject$maxStudyTime
		mrhobject = MRH(read.table(paste(mrhobject$outfolder, '/MCMCchains.txt', sep = ''), header = TRUE))
	}}
	if(is.null(censortime) & 'h' %in% function.type){ stop("Maximum study time needed for hazard rate calculation")	}
	nonph.model = TRUE
	if(substr(colnames(mrhobject)[2], 3, 3) == ""){	nonph.model = FALSE	}
	mrhobject = mrhobject[,-1]
	
	if(nonph.model == FALSE){		
		M = log(length(which(substr(colnames(mrhobject), 1, 1) == 'd')))/log(2)
		numhazards = 1
	} else {
		firstgroup = substr(colnames(mrhobject)[2], 4, 4)
		dnames = colnames(mrhobject)[which(substr(colnames(mrhobject), 1, 1) == 'd')]
		numbins = length(which(unlist(strsplit(dnames, '_'))[1:length(dnames)*2] == firstgroup))
		M = log(numbins)/log(2)
		hazgroups = unique(unlist(strsplit(dnames, '_'))[1:length(dnames)*2])
		numhazards = length(hazgroups)
	}
	q.probs = c(alpha.level/2, .5, 1-alpha.level/2)
	if('h' %in% function.type){	binwidth = censortime/2^M	}
	
	############# Calculate hazard rate ##############
	if('h' %in% function.type){
		hazrate = sapply(1:(2^M*numhazards), function(x) quantile(mrhobject[,x], probs = q.probs))/binwidth
		dnames = paste('h.bin', 1:2^M, sep = '')
		if(numhazards > 1){
			dnames = paste(rep(dnames, numhazards), rep(paste('.group', 1:numhazards, sep = ''), each = 2^M), sep = '')
		}
		colnames(hazrate) = dnames
		rownames(hazrate) = paste('q', c(alpha.level/2, .5, 1-alpha.level/2), sep = '.')
		output = list(hazrate = hazrate)
	} 
	############# Calculate the cumulative hazard and/or survival functions ##############
	if('H' %in% function.type || 'S' %in% function.type){
		mcmc.cumulhaz = NULL
		for(hazctr in 1:numhazards){
			mcmc.cumulhaz = cbind(mcmc.cumulhaz, matrix(0, ncol = 1, nrow = nrow(mrhobject)))
			for(colctr in 1:2^M){
				mcmc.cumulhaz = cbind(mcmc.cumulhaz, mcmc.cumulhaz[,colctr+(hazctr-1)*(2^M+1)]+mrhobject[,colctr+(hazctr-1)*2^M])
			}
		}
		if('H' %in% function.type){
			cumulhaz = sapply(1:((2^M+1)*numhazards), function(x) quantile(mcmc.cumulhaz[,x], probs = q.probs))
			cumulnames = c('H.start', paste('H.bin', 1:2^M, sep = ''))
			if(numhazards > 1){
				cumulnames = paste(rep(cumulnames, numhazards), rep(paste('.group', 1:numhazards, sep = ''), each = 2^M+1), sep = '')
			}
			colnames(cumulhaz) = cumulnames
			rownames(cumulhaz) = paste('q', c(alpha.level/2, .5, 1-alpha.level/2), sep = '.')
			if('h' %in% function.type){	output = c(output, list(cumulhaz = cumulhaz))
			} else {	output = list(cumulhaz = cumulhaz)	}
		}
		if('S' %in% function.type){
			mcmc.St = exp(-mcmc.cumulhaz)
			survfxn = sapply(1:((2^M+1)*numhazards), function(x) quantile(mcmc.St[,x], probs = q.probs))
			survnames = c('S.start', paste('S.bin', 1:2^M, sep = ''))
			if(numhazards > 1){
				survnames = paste(rep(survnames, numhazards), rep(paste('.group', 1:numhazards, sep = ''), each = 2^M+1), sep = '')
			}
			colnames(survfxn) = survnames
			rownames(survfxn) = paste('q', c(alpha.level/2, .5, 1-alpha.level/2), sep = '.')
			if(length(function.type) > 1){ output = c(output, list(survfunction = survfxn))
			} else {	output = list(survfunction = survfxn)	}
		}
	}
	return(output)
}

Try the MRH package in your browser

Any scripts or data that you put into this service are public.

MRH documentation built on May 2, 2019, 11:10 a.m.