R/reports-peaks.R

Defines functions pack.admin.input.peaks locus.likes.peaks checkStutter combineStutter getPeakContributors plotline simplify.locus.names CSP.heights.plot representation meanRfu get.representation.rfu reference.tables.peaks formatRef formatAllele isSingleRefReplicatedAll isSingleRefReplicatedCertain callLocusPeaks callReplicatePeaks getLocusCertains getReplicateCertains filterCertains unattribForRefs unattributablePeaks plotUnnatributablePeaks hypothesis.generator.peaks removeContribs local.likelihood.table.reformatter.peaks chosen.parameter.table.reformatter.peaks file.inputs.table.reformatter.peaks seedTable getDNAcontDeg DNAcontDeg create.hypothesis.string.peaks create.contributor.table common.report.section.peaks locus.unusual unusual.alleles.peaks getHeightsQ getQpeaks pack.genetics.for.peaks.reports getHeightsUnattrib minorAsDropin allele.report.peaks checkParamNearBoundaries output.report.peaks

Documented in allele.report.peaks CSP.heights.plot locus.likes.peaks output.report.peaks pack.admin.input.peaks

pack.admin.input.peaks <- function(peaksFile, refFile, caseName='dummy',databaseFile=NULL, kit=NULL, linkageFile=NULL, detectionThresh=20, outputPath=getwd() ) {
	# Packs and verifies administrative information.
	# Documentation in man directory.
    	paths <- c(peaksFile, refFile) 
	if(!is.null(databaseFile)) paths <- c(databaseFile, paths, recursive=TRUE)
	if(!is.null(linkageFile)) paths <- c(linkageFile, paths, recursive=TRUE)
	for(path in paths) {
		if(!file.exists(path))
			stop(paste(path, "does not exist."))
		else { 
			info <- file.info(path)
			if(info$isdir) stop(paste(path, "is not a file."))
      		}
    		} # loop over files.
	if(file.exists(outputPath) & !file.info(outputPath)$isdir) 
	stop(paste(outputPath, " exists and is not a directory."))
	if(!is.null(kit))
		{
		if(!kit%in%c("DNA17","Identifiler","SGMplus"))
			{
			stop(paste0(kit, " is not a default database supplied with likeLTD."))
			}
		}
	admin <- list(caseName=caseName,
                databaseFile=databaseFile,
                linkageFile=linkageFile,
                peaksFile=peaksFile,
                refFile=refFile,
                outputPath=outputPath,
		kit=kit,
		detectionThresh = detectionThresh )
	return(admin)}

locus.likes.peaks <- function(hypothesis,results,...){
	# Generate locus likelihoods from overall likelihood
	# 	hypothesis: generated by either defence.hypothesis() or 
      #     prosecution.hypothesis()
	# results: results from do.call(optim,params)
	model <- create.likelihood.vectors.peaks(hypothesis)
	arguments <- relistArguments.peaks(results$optim$bestmem, hypothesis, ...)
	likes <- do.call(model,arguments)
	likes <- likes$objectives * likes$penalties
	}


# function to check if stutter
checkStutter = function(x,alleles,heights,stutterPos=-1,upperThresh=0.15,lowerThresh=0.05)
	{
	thisIndex = which(alleles==x)
	parentIndex = which(alleles==x-stutterPos)
	if(length(parentIndex)>0)
		{
		# x>15% allelic
		if(heights[thisIndex]/heights[parentIndex]>=upperThresh) return(2)
		# 5%<x<15% uncertain
		if((heights[thisIndex]/heights[parentIndex]<upperThresh)&(heights[thisIndex]/heights[parentIndex]>=lowerThresh)) return(1)
		# x<5% = non-alleleic
		if(heights[thisIndex]/heights[parentIndex]<lowerThresh) return(0)
		} else {
		# no parent peak (not in stutter position)
		return(NA)
		}
	}
# combine stutter calls for overall call
combineStutter = function(stutterCall,doubleCall,overCall)
	{
	calls = c(stutterCall,doubleCall,overCall)
	if(any(calls==0,na.rm=TRUE)) return(0)
	if(any(calls==1,na.rm=TRUE)) return(1)
	return(2)
	}

getPeakContributors = function(locusPeaks,locusRefs)
	{
	conts = list()
	for(i in 1:length(locusPeaks))
		{
		conts[[length(conts)+1]] = which(sapply(locusRefs,FUN=function(x) any(as.numeric(x)%in%as.numeric(locusPeaks[i]))))
		}
	return(conts)
	}

# function to plot a line with multiple colours depending on the known contributors
plotline = function(height,size,contributors,colours)
	{
	if(length(contributors)==0) 
		{
		lines(c(size,size),c(0,height),col="black",lwd=3,lend=1)
		} else {
		# solid line
		lines(c(size,size),c(0,height),col=colours[contributors[1]],lwd=3,lend=1)
		if(length(contributors)>1)
			{
			# hexadecimal goes to 15
			max = 15
			# number of contributors at this peak
			ncont = length(contributors)
			# how large we can make each subline
			factor = floor(max/(ncont-1))
			factor = 2
			for(i in 1:(ncont-1))
				{
				# hexadecimal code for lines on/off scheme for each contributor
				lineScheme = paste(c(toString(as.hexmode((ncont-i)*factor)),toString(as.hexmode(i*factor))),collapse="",sep="")
				# plot the subline for this individual
				lines(c(size,size),c(0,height),col=colours[contributors[i+1]],lwd=3,lend=1,lty=lineScheme)
				}
			}
		}
	}


   
simplify.locus.names = function(names)
    {
    index = grep("D[0-9]{1,}S{1}",names)
    names[index] = sapply(names[index],FUN=function(a) strsplit(a,split="S")[[1]][1])
    return(names)
    }

# function to plot the CSP data as RFU against mean BP for each locus
CSP.heights.plot = function(csp,refs,dbFile=NULL,kit=NULL,outputFile=NULL,
				toPlot=NULL,detectThresh=NULL,
				uncThresh=0.05,stutterThresh=0.15,
				doStutter=FALSE,replicate=1,...)
	{
	# get database
    if(is.null(dbFile)&is.null(kit)) kit = "DNA17"
    alleleDb = load.allele.database(dbFile,kit)
	# get K profiles
	Q = refs[which(unlist(refs[,1])),-1]
	# make Q the first individual
	refIndex = which(unlist(refs[,1]))
	refs = rbind(refs[refIndex,],refs[-refIndex,])
	# colours
	contColours = rainbow(nrow(refs))
	nK = length(contColours)
	cspProfile = sapply(csp$alleles,FUN=convert.to.binary,simplify=FALSE)
        cspProfile = cspProfile = t(sapply(cspProfile,FUN=function(x) sapply(x,FUN=unlist,simplify=FALSE)))
        alleleDb = ethnic.database.lus(colnames(alleleDb)[5], colnames(cspProfile), alleleDb)
        alleleDb = missing.alleles.peaks(alleleDb, cspProfile, refs[which(unlist(refs[,1])),-1,drop=FALSE], refs)
	# locus names
	if(is.null(toPlot))
		{
		loci = rownames(csp$alleles[[replicate]])
		} else {
		loci = toPlot
		}
	# make detectionThresh for each locus
	if(length(detectThresh)==1) 
		{
		detectThresh = rep(list(detectThresh),times=length(loci))
		names(detectThresh) = loci
		}
	# stutter calls 
	if(doStutter) 
		{
		stutters = make.allelic.calls(csp,stutterThresh)
		}
	# set plotting parameters that are constant across loci
	dims = rep(ceiling(sqrt(length(loci))),times=2)
	YLIM = c(0,max(as.numeric(unlist(csp$heights[[replicate]][loci,])),na.rm=TRUE))
	YLIM = YLIM + c(0,YLIM[2]/10)
	if(is.na(YLIM[2])|is.infinite(YLIM[2])) YLIM[2] = max(unlist(detectThresh))*2
	if(!is.null(outputFile)) pdf(outputFile)
	par(mfrow=dims,mar=c(2,2,2,0.5))
	# loop over loci
	for(j in 1:length(loci))
		{
		print(paste0("...",loci[j],"..."))
		# remove NAs
		index = which(!is.na(csp$alleles[[replicate]][loci[j],]))
		alleles = as.numeric(unlist(csp$alleles[[replicate]][loci[j],][index]))
		heights = as.numeric(unlist(csp$heights[[replicate]][loci[j],][index]))
		thisDB = alleleDb[[which(names(alleleDb)==loci[j])]]
		dbIndex = sapply(alleles,FUN=function(x) which(rownames(thisDB)==x))
		# set plotting parameters that are specific to a locus
		if(length(alleles)==0)
			{
			XLIM = c(0,1)
			} else {
			XLIM = range(thisDB[dbIndex,2],na.rm=TRUE)
			}
		XLIM = XLIM + c(-2,2)
		flag = (j-1)%%dims[1]==0
		# make an empty plot
		plot(NA,ylim=YLIM,xlim=XLIM,ylab="RFU",xlab="Size",main=loci[j],yaxt=ifelse(flag,'s','n'),...)
		# add baseline
		abline(h=0)
		# add detection threshold
		if(!is.null(detectThresh)) abline(h=detectThresh[[loci[j]]],col="red",lty=3)
		# only bother plotting peaks if there are peaks
		if(length(alleles>0))
			{
			if(!is.null(refs))
				{
				# get which K contributes to each peak
				peakContributors = getPeakContributors(alleles,refs[,loci[j]])
				# add peaks to plot
				mapply(heights,thisDB[dbIndex,2],peakContributors,FUN=function(a,b,c) plotline(height=a,size=b,contributors=c,contColours))
				} else {
				mapply(heights,thisDB[dbIndex,2],FUN=function(a,b) lines(x=c(b,b),y=c(0,a),col="black",lwd=2))
				}
			# add peak labels to plot
			if(!doStutter)
				{
				# with no predicted stutter
				mapply(thisDB[dbIndex,2],heights,alleles,FUN=function(x,y,a) text(x,y+(YLIM[2]/12),a,col="blue"))
				} else {
				# with predicted stutter
				# stutter
				stutter = sapply(alleles,FUN=checkStutter,alleles=alleles,heights=heights)
				doubleStutter = sapply(alleles,FUN=checkStutter,alleles=alleles,heights=heights,stutterPos=-2,upperThresh=0.1)
				overStutter = sapply(alleles,FUN=checkStutter,alleles=alleles,heights=heights,stutterPos=1,upperThresh=0.1)
				calls = mapply(FUN=combineStutter,stutter,doubleStutter,overStutter)
				labelCols = vector(length=length(calls))
				certIndex = which(calls==2)
				labelCols[certIndex] = "green3"
				uncIndex = which(calls==1)
				labelCols[uncIndex] = "orange1"
				labelCols[-c(certIndex,uncIndex)] = "gray48"
				# with predicted stutter (crude)
				mapply(thisDB[dbIndex,2],heights,alleles,labelCols,FUN=function(x,y,a,c) text(x,y+(YLIM[2]/12),a,col=c))
				}
			}
		}		
    if(!is.null(outputFile)) dev.off()
	}

representation = function(K,alleles,heights,loci)
    {
    representReplicates = sapply(1:length(alleles), FUN=function(y) sapply(loci,FUN=function(x) K[[x]]%in%alleles[[y]][x,],simplify=FALSE),simplify=FALSE)
    representLoci = do.call(Map,c(rbind,representReplicates))
    representOverall = do.call(cbind,representLoci)

    representation = apply(representOverall,MARGIN=1,FUN=function(a) sum(a)/length(a))
    representation = c(representation,sum(representOverall)/prod(dim(representOverall)))
    names(representation) = c(names(alleles),"Overall")
    return(representation)
    }

meanRfu = function(K,alleles,heights,loci)
    {
    rfu = sapply(1:length(alleles), FUN=function(z) sapply(loci,FUN=function(x) sapply(K[[x]], FUN=function(y) ifelse(!y%in%alleles[[z]][x,],0,as.numeric(heights[[z]][x,which(alleles[[z]][x,]==y)])))))
    rfu = c(colMeans(rfu),mean(rfu))
    names(rfu) = c(names(alleles),"Overall")
    return(rfu)
    }


# function to get the mean RFU and estimate of representation for each contributor
get.representation.rfu = function(csp,refs)
    {
    loci = colnames(refs)[-1]
    repres = sapply(1:nrow(refs),FUN=function(x) representation(refs[x,-1],csp$alleles,csp$heights,loci))
    rownames(repres)[-nrow(repres)] = paste0("Replicate: ", rownames(repres)[-nrow(repres)])
    repres = cbind(paste0("{\\b ",rownames(repres),"}"),repres)
    colnames(repres) = c("Reference profile",rownames(refs))
    rfu = sapply(1:nrow(refs),FUN=function(x) meanRfu(refs[x,-1],csp$alleles,csp$heights,loci))
    rownames(rfu)[-nrow(rfu)] = paste0("Replicate: ", rownames(rfu)[-nrow(rfu)])
    rfu = cbind(paste0("{\\b ",rownames(rfu),"}"),rfu)
    colnames(rfu) = c("Reference profile",rownames(refs))
    #rownames(info) = c("representation","meanRFU")
    #colnames(info) = rownames(refs)
    return(list(repres=repres,rfu=rfu))
    }



# function to create a table of reference profiles for peaks reports
reference.tables.peaks = function(refs,csp,certains)
	{
	refCountsAll = sapply(1:nrow(refs),FUN=function(x) isSingleRefReplicatedAll(refs[x,-1],csp))
	refCountsCertain = sapply(1:nrow(refs),FUN=function(x) isSingleRefReplicatedCertain(refs[x,-1],certains))
	formattedAll = sapply(1:nrow(refs),FUN=function(x) formatRef(refs[x,-1],refCountsAll[,x]))
	formattedCertain = sapply(1:nrow(refs),FUN=function(x) formatRef(refs[x,-1],refCountsCertain[,x]))
	formattedAll = rbind(rownames(refs),formattedAll)
	formattedCertain = rbind(rownames(refs),formattedCertain)
	rownames(formattedAll) = rownames(formattedCertain) = c("Profile",colnames(refs)[-1])
	return(list(all=formattedAll,certains=formattedCertain))
	}

formatRef = function(ref,counts)
	{
	sapply(1:length(ref), FUN=function(x)
		paste(sapply(1:length(ref[[x]]), FUN=function(y) 
			formatAllele(ref[[x]][y],counts[[x]][y])),collapse=",")
			)
	}

formatAllele = function(allele,count)
	{
	if(length(count)==0) return(allele)
	for(i in 1:length(allele))
		{
		if(count[i]==0)
			{
			allele[i] = paste0('{\\chbrdr\\brdrs ',allele[i],'}')
			} else if(count[i]==1) {
			allele[i] = paste0('{\\i ',allele[i],'}')
			} else {
			allele[i] = paste0('{\\b ',allele[i],'}')
			}
		}
	return(allele)
	}

isSingleRefReplicatedAll = function(reference,csp)
	{
	loci=names(reference)
	sapply(loci, FUN=function(x)
		sapply(reference[[x]], FUN=function(y) 
			length(which(as.numeric(unlist(sapply(csp$alleles,FUN=function(a) a[x,],simplify=FALSE)))==y))
			),simplify=FALSE)
	}

isSingleRefReplicatedCertain = function(reference,certain)
	{
	loci=names(reference)
	sapply(loci, FUN=function(x)
		sapply(reference[[x]], FUN=function(y) 
			length(which(as.numeric(unlist(sapply(certain,FUN=function(a) a[[x]],simplify=FALSE)))==y))
			),simplify=FALSE)
	}




# function to determine whether peaks are certain (2), uncertain (1) or non-allelic (0) 
callLocusPeaks = function(alleles,heights)
	{
	alleles = as.numeric(alleles)
	heights = as.numeric(heights)
	naIndex = which(is.na(alleles))
	if(length(naIndex)>0) 
		{
		alleles = alleles[-naIndex]
		heights = heights[-naIndex]
		}
	stutter = sapply(alleles,FUN=checkStutter,alleles=alleles,heights=heights)
	doubleStutter = sapply(alleles,FUN=checkStutter,alleles=alleles,heights=heights,stutterPos=-2,upperThresh=0.1)
	overStutter = sapply(alleles,FUN=checkStutter,alleles=alleles,heights=heights,stutterPos=1,upperThresh=0.1)
	calls = mapply(FUN=combineStutter,stutter,doubleStutter,overStutter)
	return(calls)
	}

callReplicatePeaks = function(alleles,heights)
	{
	loci = rownames(alleles)
	return(sapply(loci, FUN=function(y) callLocusPeaks(alleles[y,],heights[y,]),simplify=FALSE))
	}

# function to filter down from allelic calls to just keep the certain alleles
getLocusCertains = function(alleles,calls)
	{
	alleles = as.numeric(alleles)
	naIndex = which(is.na(alleles))
	if(length(naIndex)>0) 
		{
		alleles = alleles[-naIndex]
		}
	return(alleles[which(calls==2)])
	}

getReplicateCertains = function(alleles,calls)
	{
	loci = rownames(alleles)
	return(sapply(loci, FUN=function(y) getLocusCertains(alleles[y,],calls[[y]]),simplify=FALSE))
	}

filterCertains = function(csp,ref)
	{
	return(csp[which(!csp%in%as.numeric(unlist(ref)))])
	}

# function to add unnatributable peaks to reference tables
unattribForRefs = function(csp,refs,certains)
	{
	# with estimation of stutters etc
	# find the unnatributable alleles
	unattributable1 = sapply(1:length(certains), FUN=function(x) 
				sapply(names(certains[[x]]), FUN=function(y) 
					filterCertains(certains[[x]][[y]],refs[,y]),
					simplify=FALSE),simplify=FALSE)
	# unattributable counts with estimation
	counts1 = sapply(do.call(Map,c(c,unattributable1)),FUN=table)
	withEstimation = sapply(1:length(counts1),FUN=function(a) paste(formatAllele(names(counts1[[a]]),counts1[[a]]),collapse=","))
	# without estimation of stutters etc
	unattributable2 = sapply(1:length(csp$alleles), FUN=function(x) 
				sapply(names(certains[[x]]), FUN=function(y) 
					filterCertains(unlist(csp$alleles[[x]][y,][which(!is.na(csp$alleles[[x]][y,]))]),refs[,y]),
					simplify=FALSE),simplify=FALSE)
	# unattributable counts without estimation
	counts2 = sapply(do.call(Map,c(c,unattributable2)),FUN=table)
	withoutEstimation = sapply(1:length(counts2),FUN=function(a) paste(formatAllele(names(counts2[[a]]),counts2[[a]]),collapse=","))
	return(list(withEstimation=withEstimation,withoutEstimation=withoutEstimation,simple=unattributable1))
	}

# function to determine which peaks are unnatributable while attempting to ignore stutter peaks
# not perfect
unattributablePeaks = function(csp,refs,certains)
	{
	# find the unnatributable alleles
	unnatributable = sapply(1:length(certains), FUN=function(x) 
				sapply(names(certains[[x]]), FUN=function(y) 
					filterCertains(certains[[x]][[y]],refs[,y]),
					simplify=FALSE),simplify=FALSE)
	if(length(unnatributable)>1) 
		{
		together = do.call(Map,c(c,unnatributable))
		} else {
		together = unnatributable[[1]]
		}
	counts = sapply(together,table,simplify=FALSE)
	replicated = cbind(sapply(counts,FUN=function(a) length(which(a==1))),sapply(counts,FUN=function(a) length(which(a>1))))
	rownames(replicated) = colnames(refs)[-1]
	colnames(replicated) = c("Unreplicated","Replicated")
	return(replicated)
	}

# function to plot replicated and unreplicated unnatributable alleles determined by assuming stutter rates
plotUnnatributablePeaks = function(replicated)
	{
	par(mai=c(1,1.2,1,1))
	barplot(t(replicated),legend.text=c("Unreplicated","Replicated"),xlab="Frequency",horiz=TRUE,las=1,xlim=c(0,max(rowSums(replicated))+2),col=c("gray69","gray30"))
	abline(v=seq(from=0,to=max(rowSums(replicated)),by=2)[-1],lty=2)
	}

# function to generate hypotheses suggestions for peak heights
hypothesis.generator.peaks = function(unattrib,nReps)
	{
	out = matrix(NA,ncol=3,nrow=0)
	colnames(out) = c("nU","doDropin","Guidance")
	nU = ceiling(max(rowSums(unattrib))/2)
	dropin = "FALSE"
	if(nU==3)
		{
		rec = "Can only be evaluated by removing the additional U from defence"
		} else if(nU>3) {
		rec = "Too many U's to evaluate"
		} else {
		rec = "Recommended"
		}	
	out = rbind(out,c(nU,dropin,rec))
	if(nU==0) return(out)
	# check if dropin could be sensible 
	# (counts replicated and unreplicated unattributables with nU-1)
	testUnattrib = t(apply(unattrib,MARGIN=1,FUN=function(a) removeContribs(a,nU-1)))
	if(any(testUnattrib[,2]>0)|sum(testUnattrib[,1])>(2*nReps)) 
		{
		# if replicated of too many dropins, dont suggest dropin
		return(out)
		} else {
		if(sum(testUnattrib[,1])>nReps)
			{
			if((nU-1)<3) 
				{
				# if between 1 and 2 dropins per replicate consider
				out = rbind(out,c(nU-1,"TRUE","Worth considering"))
				}
			} else {
			if((nU-1)<3) 
				{
				# if < 1 dropin per replicate suggest
				out = rbind(out,c(nU-1,"TRUE","Recommended"))
				if(rec=="Recommended") out[1,3] = ""
				}
			}
		return(out)
		}
	}

# remove a single contributor to check if a dropin hypothesis is sensible
removeContribs = function(x,toRemove)
	{
	toRemove = toRemove*2
	if((sum(x)-toRemove)<0)
		{
		return(c(0,0))
		} else {
		if(x[2]>=toRemove)
			{
			x[2] = x[2]-toRemove
			toRemove = 0
			} else {
			toRemove = toRemove - x[2]
			x[2] = 0
			}
		if(x[1]>=toRemove)
			{
			x[1] = x[1]-toRemove
			toRemove = 0
			} else {
			toRemove = toRemove - x[1]
			x[1] = 0
			}	
		}
	return(x)
	}

# function to return locus likelihoods and LRs for peak heights
local.likelihood.table.reformatter.peaks <- function(prosecutionHypothesis,defenceHypothesis,prosecutionResults,defenceResults)
    {
	P <- log10(locus.likes.peaks(prosecutionHypothesis,prosecutionResults))
	D <- log10(locus.likes.peaks(defenceHypothesis,defenceResults))
	table  <- rbind(P,D,(P-D),10^(P-D))
	#table <- sprintf("%.2f",round(table,2))
	table <- cbind(c("Prosecution.log10","Defence.log10","Ratio.log10","Ratio"),table)
	rownames(table) = c("Prosecution.log10","Defence.log10","Ratio.log10","Ratio")
	colnames(table)[1] =  "Likelihood"
	table[,-1] = sprintf("%.2f",round(as.numeric(table[,-1]),2))
    return(table)
    }
    

# function to return table of user input parameters
chosen.parameter.table.reformatter.peaks <- function(prosecutionHypothesis)
    {
    chosen = c("nUnknowns","ethnic","adj","fst","relatedness","relationship","doDropin","doDoubleStutter","doOverStutter","detectionThresh")
	keep <- sapply(chosen,FUN=function(a) grep(a,names(prosecutionHypothesis)))
	table <- as.data.frame(unlist(prosecutionHypothesis[unlist(keep)]))
	extra <- data.frame(Parameter= rownames(table))
	combined <- cbind(extra,table)
	colnames(combined) <- c('Parameter','User input')
    return(combined)
    }

# function to produce table of file inputs
file.inputs.table.reformatter.peaks <- function(prosecutionHypothesis)
	{
	tmp = strsplit(prosecutionHypothesis$peaksFile,split="/")[[1]]
	cspFile = tmp[length(tmp)]
	tmp = strsplit(prosecutionHypothesis$refFile,split="/")[[1]]
	refFile = tmp[length(tmp)]
	if(is.null(prosecutionHypothesis$databaseFile)&is.null(prosecutionHypothesis$kit))
		{
		databaseFile = "DNA17-db.txt (Default)"
		} else if (is.null(prosecutionHypothesis$databaseFile)) {
        databaseFile = paste0(prosecutionHypothesis$kit,".txt (Default)")
		} else {
		tmp = strsplit(prosecutionHypothesis$databaseFile,split="/")[[1]]
		databaseFile = tmp[length(tmp)]
		}
	table = data.frame(file=c("CSP","Reference","Database"),input=c(cspFile,refFile,databaseFile))
	colnames(table) = c("File","Used")
	return(table)
	}

# function to display the seed used
seedTable <- function(results)
    {
    if(is.null(results$seed.input)) 
        {
        extra="Randomly generated"
        } else {
        extra = "User input"
        }
    out = cbind(toString(results$seed.used),extra)
    colnames(out) = c("Seed","Origin")
    return(out)
    }

# DNAcont and deg estimates for a single hypothesis
getDNAcontDeg = function(estimates,repNames,knownProfs,hyp)
    {
    DNAcontIndex = grep("DNAcont",names(estimates))
    repIndex = grep("repAdjust",names(estimates))
    degIndex = grep("degradation",names(estimates))
    DNAcont = estimates[DNAcontIndex]
    if(length(repIndex)>0)
        {
        DNAcont = sapply(DNAcont,FUN=function(a) a*c(1,estimates[repIndex]))
        }
    out = rbind(round(DNAcont,2),round(10^estimates[degIndex],5))
    out = cbind(c(paste0("{\\b Replicate: ",repNames,"}"),"{\\b Degradation}"),out)
    contNames = vector(length=length(DNAcontIndex))
    if(nrow(knownProfs)==0)
        {
        namesK = c()
        } else {
        namesK = rownames(knownProfs)[which(!unlist(knownProfs[,1]))]
        }
    nU = length(DNAcontIndex)-nrow(knownProfs)
    if(nU>0) 
        {
        namesU = paste0("U",1:nU)
        if(hyp=="defence") namesU[length(namesU)] = "X"
        } else {
        namesU = c()
        }
    if(hyp=="prosecution") 
        {
        namesQ = rownames(knownProfs)[which(unlist(knownProfs[,1]))]
        outNames = c(namesU,namesK,namesQ)
        } else {
        outNames = c(namesU,namesK)
        }
    colnames(out) = c(ifelse(hyp=="prosecution","Prosecution","Defence"),outNames)
    rownames(out) = 1:nrow(out)
    return(out)
    }

# function to create a table summarising DNA contribution estimates and degradation estimates
DNAcontDeg = function(results,repNames,knownProfsP,knownProfsD)
    {
    DNAcontP = getDNAcontDeg(results$Pros$optim$bestmem,repNames,knownProfsP,"prosecution")
    DNAcontD = getDNAcontDeg(results$Def$optim$bestmem,repNames,knownProfsD,"defence")
    return(list(pros=DNAcontP,def=DNAcontD))
    }

# function to create a string that describes the pros and def hypotheses
create.hypothesis.string.peaks = function(hypP)
	{
	nameQ = paste0(rownames(hypP$knownProfs)[which(unlist(hypP$knownProfs[,1]))]," (Q)")
	nameK = rownames(hypP$knownProfs)[which(!unlist(hypP$knownProfs[,1]))]
	if(hypP$nUnknowns>0)
		{	
		nameU = paste0("U",1:hypP$nUnknowns)
		nameU = paste(nameU,collapse=" + ")
		} else {
		nameU = c()
		}
	pros = paste0("Prosecution hypothesis: ",nameQ)
	def = "Defence hypothesis: Unknown (X)"
	if(length(nameK)>0)
		{
		nameK = paste0(nameK," (K",1:length(nameK),")")
		nameK = paste(nameK,collapse=" + ")
		pros = paste(pros,nameK,sep=" + ",collapse=" + ")
		def = paste(def,nameK,sep=" + ",collapse=" + ")
		}
	if(length(nameU)>0)
		{
		pros = paste(pros,nameU,sep=" + ",collapse=" + ")
		def = paste(def,nameU,sep=" + ",collapse=" + ")
		}
	if(hypP$doDropin)
		{
		pros = paste0(pros," + dropin")
		def = paste0(def," + dropin")
		}
	return(list(pros=pros,def=def))
	}

# function to output alleles for each K
create.contributor.table = function(refs)
	{
	refLabels = NULL
	refNames = NULL
	refNames = c(refNames, rownames(refs)[which(unlist(refs[,1]))])
	refLabels = c(refLabels, "Q")
	Kindex = which(!unlist(refs[,1]))
	if(length(Kindex)>0)
		{
		refNames = c(refNames, rownames(refs)[Kindex])
		refLabels = c(refLabels,paste0("K",1:length(Kindex)))
		}
	out = cbind(refLabels, refNames)
	colnames(out) = c("Label","Reference profile")
	return(out)
	}


# function to create the parts of the report documents that are present in both allele report and output report
common.report.section.peaks = function(names,gen,admin,warnings=NULL,hypothesisString=NULL,resTable=NULL,figRes=300)
	{
	# Create a new Docx. 
	doc <- RTF(names$filename, width=11,height=8.5,omi=c(1,1,1,1))
	# add title
	print("Creating title...")
	addParagraph(doc, line)
	spacer(doc,3)
	addHeader( doc, title=substr(names$title,1,nchar(names$title)-1), subtitle=names$subtitle, font.size=fs0 )
	addParagraph(doc, line)
	if(!is.null(hypothesisString))
		{
		addHeader(doc, hypothesisString$pros, TOC.level=1,font.size=fs1)
		addHeader(doc, hypothesisString$def, TOC.level=1,font.size=fs1)
		} else {
		addTable(doc, create.contributor.table(gen$refs), col.justify='C', header.col.justify='C',font.size=fs3)
		}
	# warnings
	errors = NULL
	loci = rownames(gen$unattributable)
if(length(admin$detectionThresh)==1)
	{
  if(any(as.numeric(unlist(gen$csp$heights))<admin$detectionThresh,na.rm=TRUE))
	errors = c(errors,
    "Observed peak height below detection threshold.")
	} else {
	loci = rownames(gen$unattributable)
	tmp = sapply(loci,FUN=function(x) 
		sapply(1:length(gen$csp$heights), FUN=function(y) 
			if(any(na.omit(unlist(gen$csp$heights[[y]][x,]))<
				admin$detectionThresh[[x]])) 
    					paste0("WARNING: Observed peak height below detection threshold at ", 
					x, " replicate ", y)
			)
		)
	errors = c(errors, unlist(tmp))
	}
    if(length(errors)>0) sapply(1:length(errors),FUN=function(a) addParagraph(doc, errors[a]))
	if(length(warnings)>0) sapply(1:length(warnings),FUN=function(a) addParagraph(doc, warnings[a]))
	# result
	if(!is.null(resTable))
		{
		addHeader(doc, "Overall Likelihood", TOC.level=2, font.size=fs2)
		addTable(doc, resTable ,col.justify='C', header.col.justify='C')
		spacer(doc,3)
		}
	addPageBreak(doc, width=11,height=8.5,omi=c(1,1,1,1) )
	# add hypothesis
	# plot CSP
	print("Plotting CSP...")	
	addHeader(doc, "Crime scene profiles (CSP)",TOC.level=2,font.size=fs2)
	sapply(1:length(gen$csp$alleles),FUN=function(y) 
		{
		addText(doc,paste0("Replicate: ",names(gen$csp$alleles)[y]),bold=TRUE);
		addNewLine(doc)
		addPlot( doc, plot.fun = print, width = 5.5, height = 5, res=figRes, x = 
			CSP.heights.plot(csp=gen$csp,refs=gen$refs,dbFile=admin$databaseFile,
					kit=admin$kit,detectThresh=admin$detectionThresh,
					doStutter=TRUE,replicate=y))
		if(y==length(gen$csp$alleles)) addParagraph( doc, "The peak heights in RFU (y-axis) and mean adjusted allele length in base pairs (x-axis), with peaks at alleles in the profile of Q coloured in red, peaks at alleles of other assumed contributors shown with other colours, while black peaks are not attributable to Q or any other assumed contributor. Allele labels are coloured according to their possible allelic status (this is intended as a guide and is not assumed by the software): green=allelic, orange=uncertain, grey=non-allelic.")
		addPageBreak(doc, width=11,height=8.5,omi=c(1,1,1,1) )
		})
	# CSP heights
	print("Printing CSP/ref tables...")
	addHeader(doc,"CSP alleles and peak heights", TOC.level=2, font.size=fs2)
	if(!all(is.na(unlist(gen$csp$alleles))))
		{
		for(i in 1:length(gen$csp$alleles))
		{
		  addNewLine(doc)
		  addText(doc,names(gen$csp$alleles)[i],bold=TRUE,font.size=fs2)
		  for(j in 1:nrow(gen$csp$alleles[[i]]))
		  {
		    addNewLine(doc)
		    locus = rownames(gen$csp$alleles[[i]])[j]
		    # get CSP info for this locus
		    index = which(!is.na(gen$csp$alleles[[i]][locus,]))
		    if(length(index)==0)
			{
			addText(doc,paste0("No observed alleles in ",locus),bold=TRUE)
			addNewLine(doc)
			next
			}
		    allelesTmp=gen$csp$alleles[[i]][locus,index]
		    heightsTmp=gen$csp$heights[[i]][locus,index]
		    colnames(heightsTmp)= colnames(allelesTmp)
		    # combine allele and heights
		    toPrint = rbind(allelesTmp,heightsTmp)
		    if(ncol(toPrint)==0) toPrint=matrix(NA,nrow=2,ncol=0)
		    toPrint = cbind(c("Allele","Height"),toPrint)
		    toPrint = as.data.frame(toPrint)
		    colnames(toPrint)[1] = locus
		    colnames(toPrint)[-1] = rep(" ",ncol(toPrint)-1)
	    # get whether K has each allele
	    #tmpK = t(sapply(gen$refs[,locus],FUN=function(x) allelesTmp%in%x))
		    tmpK = t(sapply(gen$refs[,locus],FUN=function(x) sapply(allelesTmp,FUN=function(y) c(NA,"Het","Hom")[sum(x==y)+1])))
		    if(!identical(dim(tmpK),c(nrow(gen$refs),length(allelesTmp)))) tmpK = t(tmpK) # matrix wrong way round when nK>1 & nAllele=1
		    tmpK = as.data.frame(cbind(rownames(gen$refs),tmpK))
		    colnames(tmpK)[1] = locus
		    colnames(tmpK)[-1] = rep(" ",ncol(tmpK)-1)
		    # combine
		    toPrint = rbind(toPrint,tmpK)
		    addTable(doc,  toPrint, col.justify='L', header.col.justify='L')
		  }
		}
		} else {
		addNewLine(doc)
	  addText(doc,"***WARNING***: There are no peaks above the detection threshold in this CSP. Please check input files/parameters if this is unintended.",bold=TRUE)
		}
	if(gen$belowThreshold)
	{
	  addNewLine(doc)
	  addText(doc,"***WARNING***: Some peaks in the provided CSP were below the specified detection threshold. These have been removed from the CSP displayed here, and will not be used for further calculation. If this is unexpected, please review the provided CSP.",bold=TRUE)
	}
	# table of reference profiles
	#print("references")
	#addHeader(doc, "Reference profiles", TOC.level=2, font.size=fs2 )
	#addText(doc,"All peaks in the provided profiles",bold=TRUE)
	#addNewLine(doc)
	#addTable(doc, t(gen$refTables$all), col.justify='C', header.col.justify='C',font.size=fs3)
	#addText(doc,"After removal of peaks that are possibly non-allelic (intended as a guide only - not assumed by software)",bold=TRUE)
	#addNewLine(doc)
	#addTable(doc, t(gen$refTables$certains), col.justify='C', header.col.justify='C',font.size=fs3)
	#addParagraph(doc,"{\\chbrdr\\brdrs Unobserved}, {\\i unreplicated} and {\\b replicated} peaks in provided reference profiles and those unattributable to any reference profile.")
	#addNewLine(doc)
	#addPageBreak(doc, width=11,height=8.5,omi=c(1,1,1,1) )
	#print("known heights")
	#addPageBreak(doc, width=11,height=8.5,omi=c(1,1,1,1) )
	#addHeader(doc, "Peak heights for profiled individuals", TOC.level=1, font.size=fs1)
	#for(i in 1:length(gen$Kheights))
	#{
	#  addNewLine(doc)
	#  addText(doc, names(gen$Kheights)[i], bold=TRUE)
	#  addNewLine(doc)
	#  heightsK = NULL
	#  for(j in 1:length(gen$Kheights[[i]]))
	#  {
	    #addText(doc, names(gen$csp$alleles)[j], bold=TRUE)
	    #addNewLine(doc)
	#    toAdd = rbind(sapply(gen$Kheights[[i]][[j]],FUN=function(y) paste(names(y),collapse=",")),
	#                  sapply(gen$Kheights[[i]][[j]],FUN=function(y) paste(y,collapse=",")))
	#    rownames(toAdd) = paste0(names(gen$csp$alleles)[j],": ",c("Profile Alleles","Heights"))
	    #colnames(toAdd) = sapply(colnames(toAdd),FUN=function(x) strsplit(x,split="S")[[1]][1])
	#    heightsK = rbind(heightsK,toAdd)
	#  }
	#  heightsK = rbind(colnames(heightsK),heightsK)
	#  rownames(heightsK)[1] = "Locus"
	#  addTable(doc,  t(heightsK), col.justify='L', header.col.justify='L')
	#}
	# summary
	#print("summary")
	addNewLine(doc)
	addHeader(doc, "Summary", TOC.level=1,font.size=fs1)
	# representation
	print("Calculating representation...")
	addTable(doc, gen$repTables$repres, col.justify='C', header.col.justify='C',font.size=fs3)
	addParagraph( doc, "Approximate representation (observed/total) for each reference profile per replicate and overall.")
	addNewLine(doc)
	#addTable(doc, gen$repTables$rfu, col.justify='C', header.col.justify='C',font.size=fs3)
	#addParagraph( doc, "Mean RFU for each reference profile per replicate and overall. This may be an over-estimate of the average DNA contribution of an assumed contributor due to sharing of alleles with other contributors. These values are for information purposes only, they are not used by the software.")
	addPageBreak(doc, width=11,height=8.5,omi=c(1,1,1,1) )
	# unnatributable
	#print("unnatributable")
	#addHeader(doc, "Unattributable alleles", TOC.level=1,font.size=fs1)
	#addPlot( doc, plot.fun = print, width = 9, height = 4, res=figRes, x = plotUnnatributablePeaks(gen$unattributable))
	#addParagraph( doc, "Number of unreplicated (light grey) and replicated (dark grey) unattributable alleles per locus, for the  likely-allelic peaks (green allele labels shown in the CSP plots).")
	# unusual
	print("Generating unusual alleles table...")
	addHeader(doc, "Alleles that are rare in at least one database", TOC.level=1,font.size=fs1)
	if(nrow(gen$unusuals)>0)
		{
		addTable(doc, gen$unusuals, col.justify='C', header.col.justify='C',font.size=fs3)
		} else {
		addParagraph( doc, "No unusual alleles.")
		}
	return(doc)
	}

# unusual alleles at a locus
locus.unusual = function(csp,refs,alleleDb,locus)
	{
	thisDB = alleleDb[which(alleleDb$Marker==locus),]
	thisCSP = sapply(csp$alleles,FUN=function(a) a[locus,],simplify=FALSE)
	thisRefs = refs[,locus]
	ethnicIndex = which(!colnames(thisDB)%in%c("Marker","Allele","LUS","BP"))
	out = matrix(NA,ncol=5+length(ethnicIndex),nrow=0)
	colnames(out) = c("Locus","File","Profile","Allele","Issue",colnames(thisDB)[ethnicIndex])
	repNames = names(csp$alleles)
	Knames = rownames(refs)
	# CSP alleles
	# loop over each replicate
	for(i in 1:length(thisCSP))
		{
		# loop over each allele in each replicate
		for(j in 1:length(thisCSP[[i]]))
			{
			thisAllele = as.numeric(thisCSP[[i]][j])
			if(is.na(thisAllele)) next
			index = which(thisDB$Allele==thisAllele)
			# missing alleles
			if(length(index)==0)
				{
				out = rbind(out,c(locus,"CSP",repNames[i],thisAllele,"Not in database",thisDB[index,ethnicIndex]))
				next
				}
			# duplicate alleles
			if(length(index)>1)
				{
				out = rbind(out,c(locus,"CSP",repNames[i],thisAllele,"Duplicate allele",thisDB[index,ethnicIndex]))
				next
				}
			# rare alleles
			if(any(thisDB[index,ethnicIndex]<2))
				{
				out = rbind(out,c(locus,"CSP",repNames[i],thisAllele,"Rare allele",thisDB[index,ethnicIndex]))
				}
			}
		}
	# Reference alleles
	# loop over reference profiles
	for(i in 1:length(thisRefs))
		{
		# loop over each allele in each reference profile
		for(j in 1:length(thisRefs[[i]]))
			{
			thisAllele = as.numeric(thisRefs[[i]][j])
			index = which(thisDB$Allele==thisAllele)
			# missing alleles
			if(length(index)==0)
				{
				out = rbind(out,c(locus,"Reference",Knames[i],thisAllele,"Not in database",thisDB[index,ethnicIndex]))
				next
				}
			# duplicate alleles
			if(length(index)>1)
				{
				out = rbind(out,c(locus,"Reference",Knames[i],thisAllele,"Duplicate allele",thisDB[index,ethnicIndex]))
				next
				}
			# rare alleles
			if(any(thisDB[index,ethnicIndex]<2))
				{
				out = rbind(out,c(locus,"Reference",Knames[i],thisAllele,"Rare allele",thisDB[index,ethnicIndex]))
				}
			}
		}
	return(out)
	}

# unusual alleles for peak heights
unusual.alleles.peaks = function(csp,refs,alleleDb)
	{
	loci = colnames(refs)[-1]
	unusuals = sapply(loci,FUN=function(a) locus.unusual(csp,refs,alleleDb,a))
	return(do.call(rbind,unusuals))
	}

# get peak heights for Q at a single locus and replicate
getHeightsQ = function(heights,alleles,Q,replace=0)
	{
	index = alleles%in%Q
	out = as.numeric(heights[which(index)])
	names(out) = alleles[which(index)]
	index = which(Q%in%alleles)
	if(length(index)==1) 
	  {
	  out = c(out,replace)
	  names(out)[2] = Q[which(!Q%in%alleles)]
	  }
	if(length(index)==0&length(unique(Q))==2) 
	  {
	  out = rep(replace,times=2)
	  names(out) = Q
	  }
	if(length(index)==0&length(unique(Q))==1) 
	  {
	  out = replace
	  names(out) = unique(Q)
	  }
	return(out)
	}

# get peak heights for Q across replicates and loci
getQpeaks = function(csp,Qref,replace=0)
	{
	heightsQ = sapply(1:length(csp$alleles),FUN=function(x) sapply(rownames(csp$alleles[[x]]),FUN=function(y) getHeightsQ(csp$heights[[x]][y,],csp$alleles[[x]][y,],Qref[[y]],replace=replace)),simplify=FALSE)
	return(heightsQ)
}

# function to get useful info for reports
pack.genetics.for.peaks.reports = function(cspFile,refFile,csp=NULL,refs=NULL,dbFile=NULL,kit=NULL,threshold=20)
    {
    belowThreshold=FALSE
    # get csp
    if(is.null(csp)) 
      {
      cspPre = read.peaks.profile(cspFile)
      if(is.null(unlist(cspPre$alleles))) return(NA) # empty CSP
      csp = filter.below.thresh.peaks(cspPre,threshold)
      if(!identical(cspPre,csp)) belowThreshold=TRUE
      if(all(is.na(unlist(csp$alleles)))) return(NA) # empty CSP
      }
    # get references
    if(is.null(refs)) refs = read.known.profiles(refFile)
    # get allele database
    if(is.null(dbFile)&is.null(kit)) kit = "DNA17"
    alleleDb = load.allele.database(dbFile,kit)
    # unusual alleles
    unusuals = unusual.alleles.peaks(csp,refs,alleleDb)
    # make allelic calls
    calls = sapply(1:length(csp$alleles), FUN=function(x) callReplicatePeaks(csp$alleles[[x]],csp$heights[[x]]),simplify=FALSE)
    # list of only certain calls
    certains = sapply(1:length(csp$alleles), FUN=function(x) getReplicateCertains(csp$alleles[[x]],calls[[x]]),simplify=FALSE)
    # unnatributable, with estimation of which peaks are stutters etc
    unattributable = unattributablePeaks(csp,refs,certains)
    # reference tables
    refTables = reference.tables.peaks(refs,csp,certains)
    # unnatributable to add to reference tables
    unattrib = unattribForRefs(csp,refs,certains)
    refTables$certains = cbind(refTables$certains,t(cbind("{\\i Other}",t(unattrib$withEstimation))))
    refTables$all = cbind(refTables$all,t(cbind("{\\i Other}",t(unattrib$withoutEstimation))))
    # representation tables
    repTables = get.representation.rfu(csp,refs)
    # Q peak heights
    Qheights = getQpeaks(csp,refs[which(unlist(refs[,"queried"])),-1])
    # peak heights of each contributor
    Kheights = sapply(1:nrow(refs), FUN=function(x) getQpeaks(csp,refs[x,-1],replace=NA),simplify=FALSE)
    names(Kheights) = rownames(refs)
    return(list(csp=csp,refs=refs,calls=calls,
                certains=certains,unattributable=unattributable,
                refTables=refTables,repTables=repTables,unusuals=unusuals,
                unattribAlleles=unattrib$simple,Qheights=Qheights,Kheights=Kheights,belowThreshold=belowThreshold))
    }

# heights of unattributable alleles
getHeightsUnattrib = function(heights,alleles,unattrib)
	{
	heights[which(alleles%in%unattrib)]
	}

# how many unknowns might be explainable as dropin
minorAsDropin = function(csp,Qheights,unattributable,nU,refs,dropinThresh=3,minAllelesQ=5)
	{
	if(nU==0) return(NULL)
	unattribHeights = sapply(1:length(csp$alleles),FUN=function(x) sapply(rownames(csp$alleles[[x]]),FUN=function(y) getHeightsUnattrib(csp$heights[[x]][y,],csp$alleles[[x]][y,],unattributable[[x]][[y]])),simplify=FALSE)
	# if have known profiles, only compute Qmean on alleles unshared between Q and Ks
	if(any(!unlist(refs[,1])))
	{
	# number of alleles of Q (2 for hom, 1 for het)
	nQ = sapply(Qheights,FUN=function(x) 
	    sapply(x,FUN=function(y) rep((length(y)==1)+1,length(y))),simplify=FALSE)
	# allele of Q in K
	inK = sapply(1:length(Qheights),FUN=function(x) 
	  sapply(1:length(Qheights[[x]]),FUN=function(y) 
	    names(Qheights[[x]][[y]])%in%unlist(refs[which(!unlist(refs[,1])),names(Qheights[[x]])[y]])),
	  simplify=FALSE)
	# sum over whole CSP
	totalHeight = 0
	nAlleles = 0
	for(i in 1:length(Qheights))
	{
	  for(j in 1:length(Qheights[[i]]))
	  {
	    totalHeight = totalHeight + sum(Qheights[[i]][[j]][which(!inK[[i]][[j]])])
	    nAlleles = nAlleles + sum(nQ[[i]][[j]][which(!inK[[i]][[j]])])
	  }
	}
	# only use unshared if there are enough
	if(nAlleles>=minAllelesQ)
	  {
	  Qmean = totalHeight/nAlleles
	  } else {
	 # otherwise use whole profile
	 Qmean = sum(unlist(Qheights))/(length(Qheights)*length(Qheights[[1]])*2)
	  }
	} else {
	  # if no known profiles, use whole profile of Q
	  Qmean = sum(unlist(Qheights))/(length(Qheights)*length(Qheights[[1]])*2) 
	}
	
if(nU==1) 
		{
		kMeans = mean(as.numeric(unlist(unattribHeights)))
		} else {
		#cluster = kmeans(unlist(unattribHeights),nU)
		kMeans = multiKmeans(unattribHeights,nU)
		}
	neededU = sum(Qmean/kMeans<=dropinThresh)
	out = round(c(Qmean,kMeans,neededU))
	names(out) = c("Mean RFU Q",paste0("Mean RFU U",1:nU),"estimated # U required with dropin")
	out = as.data.frame(t(out))
	return(out)
	}

# function to generate the allele report
allele.report.peaks = function(admin,file=NULL,figRes=300,dropinThresh=3)
    {
    # get genetics
    gen = pack.genetics.for.peaks.reports(cspFile=admin$peaksFile,refFile=admin$refFile,kit=admin$kit,threshold=admin$detectionThresh,dbFile=admin$databaseFile)
    if(is.na(gen))
	{
	stop("ERROR: No above threshold peaks in CSP. Please check input.")
	}
    # file name
    names <- filename.maker(admin$outputPath,admin$caseName,file,type='allele')
    names$subtitle <- admin$caseName
    # section common to allele and output report
    doc <- common.report.section.peaks(names,gen,admin,figRes=figRes)
    # section specific to the allele report
    print("Generating suggested parameters...")
    # get suggested hypotheses
    hyps =  hypothesis.generator.peaks(gen$unattributable,length(gen$csp$alleles))
    if(any(hyps[,"Guidance"]=="Recommended"))
    {
    nU = hyps[which(hyps[,"Guidance"]=="Recommended"),"nU"]
    } else {
      nU = min(hyps[,"nU"])
    }
    # get suggested number of contributors who can be explained by dropin
    minorsDropin = minorAsDropin(gen$csp,gen$Qheights,gen$unattribAlleles,nU,gen$refs,dropinThresh)
    if(!is.null(minorsDropin))
	{
      # if some minors as dropin
      minnU = minorsDropin[,"estimated # U required with dropin"]
      if(minnU!=as.numeric(nU))
      {
	toReplace = ifelse(hyps[which(hyps[,3]=="Recommended"),2],"Inefficient approximation","Estimated U without dropin")
	hyps[,3] = gsub("Recommended",toReplace,hyps[,3])
      for(i in minnU:(as.numeric(nU)-1))
      {
	toAdd = ifelse(i==minnU,"Estimated U if dropin modelled","Inefficient approximation") 
        # modify suggested hypotheses
        hyps = rbind(hyps,c(i,TRUE,toAdd))
      }
      }
      # print suggested hypotheses
      addNewLine(doc)
      addHeader(doc, "Suggested parameter values", TOC.level=1, font.size=fs1)
      addTable(doc,hyps, col.justify='C', header.col.justify='C')
      addParagraph( doc, "If an nU value >2 is indicated, an approximate result can be obtained using nU=2 and doDropin=TRUE. Please check the allele designations shown in the CSP plots that were used to generate these hypotheses; if you disagree with the suggested designations the recommendations here may need to be altered.")
      # print minors as dropin recommendation in full
      print("Calculating minor as dropin...")
        addNewLine(doc)
        addHeader(doc, "Minor as dropin", TOC.level=1, font.size=fs1)
        addTable(doc, minorsDropin, col.justify='C', header.col.justify='C')
	addParagraph( doc, paste0("Mean peak height for Q, clustered mean peak heights for unknowns using k-means clustering with ",nU," clusters, and the minimum number of unknowns required to explain the CSP with dropin (mean Q peak height/mean U peak height <= ",dropinThresh,")." ))
    } else {
      # if no minors as dropin, print suggested hypotheses
      addNewLine(doc)
      addHeader(doc, "Suggested parameter values", TOC.level=1, font.size=fs1)
      addTable(doc,hyps, col.justify='C', header.col.justify='C')
      addParagraph( doc, "If an nU value >2 is indicated, an approximate result can be obtained using nU=2 and doDropin=TRUE. Please check the allele designations shown in the CSP plots that were used to generate these hypotheses; if you disagree with the suggested designations the recommendations here may need to be altered.")
	}
    addPageBreak(doc, width=11,height=8.5,omi=c(1,1,1,1) )
    # system info
    print("Printing system info...")
    addHeader(doc, "System information", TOC.level=1,font.size=fs1)
    addTable(doc,  system.info(), col.justify='L', header.col.justify='L')
    done(doc)
    #rtf.formater(names$filename)	
}

# check whether any parameters are close to boundaries
checkParamNearBoundaries = function(results)
{
  vals = results$optim$bestmem
  lower = results$member$lower
  upper = results$member$upper
  range = upper-lower
  checkLower = vals<(lower+(0.001*range))
  checkUpper = vals>(upper-(0.001*range))
  return(checkLower|checkUpper)
}


# function to generate the output report
output.report.peaks <- function(prosecutionHypothesis,defenceHypothesis,results,file=NULL,figRes=300)
    {
    prosecutionResults = results$Pros
    defenceResults = results$Def
    # some checks
    print("Checking warnings...")
    warnings = NULL
    WoE = results$WoE[length(results$WoE)]
    maxWoE = log10(matchProb(prosecutionHypothesis,prosecutionHypothesis$relatedness,prosecutionHypothesis$fst))
    if(maxWoE<WoE) warnings = c(warnings,paste0("WARNING: WoE>max(WoE) by ",round(WoE-maxWoE,3)," bans"))
    # overall likelihood
    resTable =  overall.likelihood.table.reformatter(prosecutionResults,defenceResults)
    # get genetics
    print("Preprocessing genetics...")
    gen = pack.genetics.for.peaks.reports(cspFile=prosecutionHypothesis$peaksFile,
                                          refFile=prosecutionHypothesis$refFile,
                                          csp=list(alleles=prosecutionHypothesis$peaksProfile,
                                                   heights=prosecutionHypothesis$heightsProfile),
                                          refs=prosecutionHypothesis$knownProfs,
                                          threshold=prosecutionHypothesis$detectionThreshold,
                                          dbFile=prosecutionHypothesis$databaseFile,
                                          kit=prosecutionHypothesis$kit)
    # file name
    print("Generating file names...")
    names <- filename.maker(prosecutionHypothesis$outputPath,prosecutionHypothesis$caseName,file,type='results')
    names$subtitle <- prosecutionHypothesis$caseName
    # hypotheses names
    print("Generating hypotheses...")
    hypNames = create.hypothesis.string.peaks(prosecutionHypothesis)
    # section common to allele and output report
    print("Running common documentation...")
    doc = common.report.section.peaks(names,gen,list(databaseFile=prosecutionHypothesis$databaseFile,kit=prosecutionHypothesis$kit,detectionThresh=prosecutionHypothesis$detectionThresh),warnings,hypNames,resTable,figRes=figRes)
    # section specific to the output report
    # locus likelihoods
    print("Printing locus likelihoods...")
    addHeader(doc, "Likelihoods at each locus", TOC.level=2, font.size=fs2)
	addTable(doc, local.likelihood.table.reformatter.peaks(prosecutionHypothesis,defenceHypothesis,prosecutionResults,defenceResults) ,col.justify='C', header.col.justify='C',font.size=8)
	spacer(doc,3)
    # max LR
print("Printing max likelihood...")
	addHeader(doc, "Theoretical maximum LR = Inverse Match Probability (IMP)", TOC.level=2, font.size=fs2)
	addTable(doc, ideal(defenceHypothesis), col.justify='C', header.col.justify='C')
	spacer(doc,3)
	# DNAcont and degradation
print("Printing DNAcont & Deg...")
	DNAcontTables = DNAcontDeg(results,names(prosecutionHypothesis$peaksProfile),prosecutionHypothesis$knownProfs,defenceHypothesis$knownProfs)
    addHeader(doc, "DNA contribution (RFU) and degradation estimates", TOC.level=2, font.size=fs2)
	addTable(doc, DNAcontTables$pros, col.justify='C', header.col.justify='C')
	addTable(doc, DNAcontTables$def, col.justify='C', header.col.justify='C')
	spacer(doc,3)	
	# dropin
print("Printing dropin...")
	addHeader(doc, "Dropin parameter estimates", TOC.level=2, font.size=fs2)
	addTable(doc, overall.dropin.table.reformatter(prosecutionResults,defenceResults), col.justify='C', header.col.justify='C')
	checkPros = abs(prosecutionResults$member$upper["dropin"]-
	                  prosecutionResults$optim$bestmem["dropin"])<0.1
	checkDef = abs(defenceResults$member$upper["dropin"]-
	                 defenceResults$optim$bestmem["dropin"])<0.1
	if(!is.na(checkPros)&!is.na(checkDef))
	  {
	  if(checkPros|checkDef)
	    {
	    addNewLine(doc)
	    addText(doc,"***WARNING***: Dropin parameter estimated close to maximum allowed dropin value. Consider re-running with a higher maximum dropin value.",bold=TRUE)
	    }
	}
	spacer(doc,3)
    # user defined parameters
print("Printing user defined parameters...")
	addHeader(doc, "User defined parameters", TOC.level=1, font.size=fs1)
	addTable(doc, chosen.parameter.table.reformatter.peaks(prosecutionHypothesis), col.justify='L', header.col.justify='L')
	spacer(doc,3)
    # input files
print("Printing input files...")
	addHeader(doc, "Input files", TOC.level=1, font.size=fs1)
	addTable(doc, file.inputs.table.reformatter.peaks(prosecutionHypothesis), col.justify='L', header.col.justify='L')
	spacer(doc,3)
	# seed used
print("Printing seed...")
	addHeader(doc, "Seed used", TOC.level=1, font.size=fs1)
	addTable(doc, seedTable(results), col.justify='L', header.col.justify='L')
    # optimised parameters
print("Printing optimised params...")
	addHeader(doc, "Optimised parameters", TOC.level=1, font.size=fs1)
	# prosecution params
	addHeader(doc, "Prosecution parameters", TOC.level=2, font.size=fs2)
	addTable(doc, optimised.parameter.table.reformatter(prosecutionHypothesis,prosecutionResults), col.justify='L', header.col.justify='L')
	spacer(doc,3)
          # warning if parameters near boundary
        nearBound = checkParamNearBoundaries(prosecutionResults)
        if(any(nearBound))
                {
                addText(doc,paste("***WARNING***: The following prosecution parameters are close to their upper or lower boundary. Consider rerunning with relaxed boundaries.",paste(names(nearBound)[which(nearBound)],collapse=";"),sep=" "),bold=TRUE)
                spacer(doc,3)
                }
	# defence params
	addHeader(doc, "Defence parameters", TOC.level=2, font.size=fs2)
	addTable(doc, optimised.parameter.table.reformatter(defenceHypothesis,defenceResults), col.justify='L', header.col.justify='L')
	spacer(doc,3)
	# warning if parameters near boundary
	nearBound = checkParamNearBoundaries(defenceResults)
        if(any(nearBound))
                {
                addText(doc,paste("***WARNING***: The following defence parameters are close to their upper or lower boundary. Consider rerunning with relaxed boundaries.",paste(names(nearBound)[which(nearBound)],collapse=";"),sep=" "),bold=TRUE)
                spacer(doc,3)
                }
	
# runtime
    if(!is.null(results$runtime))
	{
	addHeader(doc, "Runtime", TOC.level=1,font.size=fs1)
	runtime = results$runtime
	runInfo = data.frame(Parameter=names(runtime),Time=c(paste(round(runtime$elapsed,3),
					units(runtime$elapsed),sep=" "),
					toString(runtime$start),
					toString(runtime$end)))
	addTable(doc,  runInfo, 
			col.justify='L', header.col.justify='L')
	}
    # system information
	addHeader(doc, "System information", TOC.level=1,font.size=fs1)
	addTable(doc,  system.info(), col.justify='L', header.col.justify='L')
    done(doc)
    #rtf.formater(names$filename)	
}

Try the likeLTD package in your browser

Any scripts or data that you put into this service are public.

likeLTD documentation built on May 1, 2019, 7:58 p.m.