R/reports.R

Defines functions output.report allele.report common.report.section spacer ideal matchProb het hom file.inputs.table.reformatter chosen.parameter.table.reformatter optimised.parameter.table.reformatter overall.dropin.table.reformatter overall.dropout.table.reformatter dropDeg calc.dropout rcontConvert overall.likelihood.table.reformatter rtf.formater summary.helper summary.generator estimates.reformatter estimate.csp queried.vs.known compare.hypothesis.inputs pack.genetics.for.output.report pack.genetics.for.allele.report locus.likes hyp.D hyp.P filename.maker system.info hypothesis.generator local.likelihood.table.reformatter reference.table.reformatter csp.table.reformatter unusual.alleles unusual.alleles.per.table table.collapser unattributable.plot.maker load.allele.database pack.admin.input latex.maker latex.table.footer ref.table.to.latex csp.table.to.latex latex.table.header round.3 round.1 round.0 rounder

Documented in allele.report load.allele.database locus.likes output.report pack.admin.input

#--------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------
# 15th March 2014, Adrian Timpson

# All functions required to produce both main reports. Utilising package 'rtf' to produce .doc outputs.
# A premininary 'allele report' is generated by allele.report(), using only 'admin'. This is intended to assist the user in choosing parameter values for a proper run of the likeLTD program.
# Once the user has obtained Pros and Def results, a full output report may be generated by output.report().
# Many sections are common to both reports, so for simplicity these are performed by common.report.section()
# Most other functions perform a specific task in preparing a section of the report

# The exception to this is latex.maker() and its three helper functions, which outputs a separate .tex file containing the two allele tables (crime scene and references)

# The admin report draws its administrative data (default parameters, filepaths etc) from the 'admin' object
# In contrast, the final output report draws its administrative data from the values that were ACTUALLY used (since there are various opportunities downstream of the 'admin' object for these to be changed by the user).
# This means that the object 'genetics' generated by pack.genetics.for.allele.report() for the allele report is different to the 'genetics' object generated by pack.genetics.for.output.report() for the final report.
#--------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------
rounder <- function(x,n){
	# deals with three repetitive issues (that are more insipid than might be expected! :
	# numerics are best converted to text, to prevent MS Word from formatting them
	# rounding

	# rounding a number with a few zeros results in 0 when we actually want 0.00 
	if(is.numeric(x))x <- round(x,n) # method for vectors
	if(is.data.frame(x)){	# method for data.frames, each column at a time
		for(c in 1:ncol(x))if(is.numeric(x[,c]))x[,c] <- round(x[,c],n)
		}
	result <- format(x,nsmall=n,scientific=F)
return(result)}
#--------------------------------------------------------------------------------------------------------------------
# shorthand to avoid bothering with the argument n, for clarity in the code. Can handle vectors and data.frames
round.0 <- function(x)rounder(x,0)
round.1 <- function(x)rounder(x,1)
round.3 <- function(x)rounder(x,3)

#--------------------------------------------------------------------------------------------------------------------
latex.table.header <- function(genetics){
	# helper function for latex.maker()
	N <- ncol(genetics$summary$table$latex)+1
	text <- c('
	\\documentclass{article}\n
	\\usepackage{rotating}\n
	\\begin{document}\n
	\\begin{sidewaystable}[p]\n
	%\\setcounter{table}{1}\n
	',
	paste('\\begin{center}\\begin{tabular}{|',paste(rep('c|',N),collapse=''),'}\\hline',sep=''),
	paste('&',paste(names(genetics$summary$table$latex),collapse='&'),'\\\\\\hline',sep=''),
	paste('\\multicolumn{',N,'}{|l|}{Crime scene profiles}\\\\\\hline',sep=''))
return(text)}

#--------------------------------------------------------------------------------------------------------------------
csp.table.to.latex <- function(genetics){
	# helper function for latex.maker()
	# formats the CSP table of alleles for latex
	table.csp <- table.collapser(genetics$cspData)
	table.unc <- table.collapser(genetics$uncData)
	table <- NULL; 
	for(n in 1:genetics$nrep){
		table <- rbind(table,table.csp[n,])
		table <- rbind(table,table.unc[n,])
		}
	colnames(table) <- colnames(genetics$cspData)
	row.names(table) <- rep(c('alleles','uncertain'),genetics$nrep)
	text = c()
	N.col <- ncol(table)
	N.row <- nrow(table)
	for(row in 1:N.row){
		text.line <- character(N.col)
		for(col in 1:N.col){
			if(as.character(table[row,col])=='')text.line[col] <- '--'
			if(as.character(table[row,col])!='')text.line[col] <- gsub(' ',',',table[row,col])
			}
		if(row%%2==1)text=c(text,paste(paste(row.names(table)[row],paste(text.line,collapse='&'),sep='&'),'\\\\',sep=''))
		if(row%%2==0)text=c(text,paste(paste(row.names(table)[row],paste(text.line,collapse='&'),sep='&'),'\\\\\\hline',sep=''))
		}
return(text)}

#--------------------------------------------------------------------------------------------------------------------
ref.table.to.latex <- function(genetics){
	# helper function for latex.maker()
	# table: Reference table, from genetics$summary$table$latex
	table <- genetics$summary$table$latex
	text = paste('\\multicolumn{',ncol(table)+1,'}{|l|}{Summary:}\\\\\\hline',sep='')
	for(row in 1:nrow(table)){
		text=c(text,paste(paste(row.names(table)[row],paste(table[row,],collapse='&'),sep='&'),'\\\\\\hline',sep=''))
		}
return(text)}

#--------------------------------------------------------------------------------------------------------------------
latex.table.footer <- function(){
	# helper function for latex.maker()
	text <- c('
	\n
	\\end{tabular}\n
	\\caption{Alleles that are \\textbf{replicated} \\textit{unreplicated} and \\fbox{absent} in the crime scene profile}\n
	\\end{center}\n
	\\end{sidewaystable}\n
	\\end{document}\n
	')
return(text)}

#--------------------------------------------------------------------------------------------------------------------	
latex.maker <- function(genetics,filename){
# table1: CSP table produced by allele.table() 
# table2: Reference table, from genetics$summary$table$latex
	text.1 <- latex.table.header(genetics)
	text.2 <- csp.table.to.latex(genetics)
	text.3 <- ref.table.to.latex(genetics)
	text.4 <- latex.table.footer()
	text <- c(text.1,text.2,text.3,text.4)

	file <- file(filename)
	writeLines(text,file)
	close(file)}

#--------------------------------------------------------------------------------------------------------------------
pack.admin.input <- function(cspFile, refFile, caseName='dummy',databaseFile=NULL, kit=NULL, linkageFile=NULL, outputPath=getwd() ) {
	# Packs and verifies administrative information.
	# Documentation in man directory.
    	paths <- c(cspFile, refFile) 
	if(!is.null(databaseFile)) paths <- c(databaseFile, paths, recursive=TRUE)
	if(!is.null(linkageFile)) paths <- c(linkageFile, paths, recursive=TRUE)
	for(path in paths) {
		if(!file.exists(path))
			stop(paste(path, "does not exist."))
		else { 
			info <- file.info(path)
			if(info$isdir) stop(paste(path, "is not a file."))
      		}
    		} # loop over files.
	if(file.exists(outputPath) & !file.info(outputPath)$isdir) 
	stop(paste(outputPath, " exists and is not a directory."))
	admin <- list( caseName=caseName,
                databaseFile=databaseFile,
                linkageFile=linkageFile,
                cspFile=cspFile,
                refFile=refFile,
                outputPath=outputPath,
		kit=kit )
	return(admin)}

#--------------------------------------------------------------------------------------------------------------------
load.allele.database <- function(path=NULL,kit=NULL) {
	# Loads allele database
	# Documentation is in man directory.
	if(is.null(path)) { # Load default database
	if(is.null(kit)) kit = "DNA17"
	dummyEnv <- new.env()
	data(list=paste0(kit,'-db'), package='likeLTD', envir=dummyEnv)
	return(dummyEnv[[paste0(kit,'-db')]])
	}
	if(!file.exists(path)) stop(paste(path, "does not exist."))
	read.table(path, sep="\t", header=TRUE)
	}

#--------------------------------------------------------------------------------------------------------------------
unattributable.plot.maker <- function(genetics){
# define ggplot variables to avoid NOTE from CRAN
aes <- geom_bar <- scale_fill_grey <- NULL
	loci <- counts <- status <- NULL
	plot <- ggplot(data=genetics$summary$counts, aes(x=loci,y=counts,fill=status))+
		 geom_bar(stat='identity')+
		scale_fill_grey()	
return(plot)}

#--------------------------------------------------------------------------------------------------------------------
table.collapser <- function(table){
	# collapses a table so that fields stored as lists become comma separated strings
	result <- array(,dim(table))
	for(row in 1:nrow(table)){
		for(locus in 1:ncol(table)){
			result[row,locus] <- paste(unlist(table[row,locus]),collapse=',')
			}}
return(result)}

#--------------------------------------------------------------------------------------------------------------------
unusual.alleles.per.table <- function(table,afreq){
	# finds unusual alleles in any specific table, (provided in original listed format)
	# Get names of populations in database
	startCol = grep("BP",colnames(afreq))+1
	pops = colnames(afreq)[startCol:ncol(afreq)]	# first 3 columns are Marker, Allele, BP
	rare <- matrix(ncol=3+length(pops),nrow=0)
	colnames(rare) = c("locus","allele",paste(pops,".freq",sep=""),"error")
	loci <- colnames(table); loci <- loci[loci!='queried']# ref table has a unwanted column 'queried'
	for(locus in loci)
		{
		if(!locus%in%afreq$Marker)
			{ # check the loci are even in the database
			frame <- matrix(c(locus,NA,rep(NA,times=length(pops),"Entire locus missing from database")),ncol=3+length(pops))
			colnames(frame) = c("locus","allele",paste(pops,".freq",sep=""),"error")
		        rare <- rbind(rare, frame)
			}

		if(locus%in%afreq$Marker)
			{ # only continue if the locus is in the database
			for(row in 1:nrow(table))
				{
				alleles <- unique(unlist(table[row,locus]))
				for(allele in alleles[!is.na(alleles)])
					{ # ignore NAs in the csv file
					condition <- afreq$Marker==locus & afreq$Allele==allele
					x <- afreq[condition,]
					if(nrow(x)==1)
						{ # if the allele is present once in the database (should be!)
						# get whether each population is less than 2
						sizeCondition = NULL
						for(i in 1:length(pops))
							{
							eval(parse(text=paste("sizeCondition = c(sizeCondition, x$",pops[i],"<2)",sep="")))
							}
						if(any(sizeCondition)) 
							{
							print(paste(locus,allele,sep="_",collapse="_"))
							frame <- matrix(c(locus,allele,x[startCol:length(x)],'NA'),ncol=3+length(pops))
							colnames(frame) = c("locus","allele",paste(pops,".freq",sep=""),"error")
        	    					rare <- rbind(rare, frame)
							}
        					}
					if(nrow(x)==0)
						{ # if the allele is absent from database it is probably a typo	
						frame <- matrix(c(locus,allele,rep(NA,times=length(pops)),"Allele absent from database,check for typo"),ncol=3+length(pops))
						colnames(frame) = c("locus","allele",paste(pops,".freq",sep=""),"error")
            					rare <- rbind(rare, frame)
          					}
					if(nrow(x)>1)
						{ # if the allele is more than once there is a problem with the database!
						frame <- matrix(c(locus,allele,rep(NA,times=length(pops)),"Allele present multiple times in database"),ncol=3+length(pops))
						colnames(frame) = c("locus","allele",paste(pops,".freq",sep=""),"error")
            					rare <- rbind(rare, frame)
          					}
        				}  # loop over alleles
     				} # loop over replicates
			}} # loop over loci				
return(unique(rare))}

#--------------------------------------------------------------------------------------------------------------------
unusual.alleles <- function(genetics){
	#  creates and formats a combined table of unusual alleles

	t1.tmp <- unusual.alleles.per.table(genetics$refData,genetics$afreq)
	t1 <- cbind(rep('Reference profiles',nrow(t1.tmp)),t1.tmp)
	t2.tmp <- unusual.alleles.per.table(genetics$cspData,genetics$afreq)
	t2 <- cbind(rep('Crime scene certain',nrow(t2.tmp)),t2.tmp)
	t3.tmp <- unusual.alleles.per.table(genetics$uncData,genetics$afreq)
	t3 <- cbind(rep('Crime scene uncertain',nrow(t3.tmp)),t3.tmp)
	colnames(t1)[1]=colnames(t2)[1]=colnames(t3)[1]="source"
	table <- as.data.frame(rbind(t1,t2,t3))
	if(nrow(table)==0)table <- data.frame(status='no unusual alleles present')
return(table)}

#--------------------------------------------------------------------------------------------------------------------
csp.table.reformatter <- function(genetics){
	table.csp <- table.collapser(genetics$cspData)
	table.unc <- table.collapser(genetics$uncData)
	table <- NULL; 
	for(n in 1:genetics$nrep){
		table <- rbind(table,table.csp[n,])
		table <- rbind(table,table.unc[n,])
		}
	colnames(table) <- colnames(genetics$cspData)
	extra <- data.frame(run=c(rbind(1:genetics$nrep,rep(NA,genetics$nrep))),status=rep(c("certain","uncertain"),genetics$nrep))
	combined <- cbind(extra,table)
return(combined)}

#--------------------------------------------------------------------------------------------------------------------
reference.table.reformatter <- function(genetics){
	table <- genetics$summary$table$rtf
	extra <- data.frame(profile=row.names(table))
	combined <- cbind(extra,table)
return(combined)}

#--------------------------------------------------------------------------------------------------------------------
local.likelihood.table.reformatter <- function(prosecutionHypothesis,defenceHypothesis,prosecutionResults,defenceResults){
	P <- log10(locus.likes(prosecutionHypothesis,prosecutionResults))
	D <- log10(locus.likes(defenceHypothesis,defenceResults))
	table  <- t(data.frame(Prosecution.log10=P,Defence.log10=D,Ratio.log10=(P-D),Ratio=10^(P-D)))
	extra <- data.frame(Likelihood=row.names(table))
	result <- round.3(cbind(extra,table))
return(result)}

#--------------------------------------------------------------------------------------------------------------------
hypothesis.generator <- function(genetics){
	# replicated alleles may not be explained by dropin, therefore this dictates minimum number of Us (minU)
	# maximum Us determined by whichever locus has the most unattributables (rep and unrep combined)
	# reports all U + dropin combos between minU and maxU
	table <- genetics$summary$counts
	rep <- subset(table,table$status=='replicated')$counts
	unrep <- subset(table,table$status=='unreplicated')$counts
	minU <- ceiling(max(rep)/2)
	maxU <- ceiling(max(rep+unrep)/2) 
	unknowns <- minU:maxU
	N <- length(unknowns)
	dropins <- numeric(N)
	dropins.logic <- character(N)
	recommendation <- character(N)
	for(n in 1:N){
		dropins[n] <- sum(pmax(0,rep+unrep-2*unknowns[n]))
		if(dropins[n]<=2)recommendation[n]<-"recommended"
		if(dropins[n]==3)recommendation[n]<- "worth considering"
		if(unknowns[n]==3)recommendation[n]<-"Can only be evaluated by removing the additional U from defence"
		if(unknowns[n]>3)recommendation[n]<-"Too many U's to evaluate"
		}
	dropins.logic <- as.character(dropins!=0)
	result <- data.frame(nUnknowns=unknowns, doDropin=dropins.logic, Recommendation=recommendation)
return(result)}

#--------------------------------------------------------------------------------------------------------------------
system.info <- function(){
	date <- date()
	package.info <- sessionInfo()$otherPkgs$likeLTD
	sys.info <- Sys.info()
	Details <- t(data.frame(Details=c(date,package.info,sys.info) ))
	Details  <- gsub("\n","",Details ,fixed=T)
	Details  <- gsub("   ","",Details ,fixed=T)
	Type <- data.frame(Type=c('Date report generated:',names(package.info),names(sys.info)))
	all <- cbind(Type,Details)
return(all)}

#--------------------------------------------------------------------------------------------------------------------
filename.maker <- function(outputPath,caseName,filename,type=NULL){
	# type: report type, one of 'allele' or 'results'

	# construct title 
	if(is.null(type)) title <- paste0(caseName,'-Report-') # shouldn't be possible to remain as NULL, but just incase
	if(type=='allele')title <- paste0(caseName,'-Allele-Report-')
	if(type=='results')title <- paste0(caseName,'-Evaluation-Report-')

	# assign a filename, if NULL was handed down
	if(is.null(filename)){
		n <- 1
		filename <- file.path(outputPath,paste(title,n,'.doc',sep=''))
	
		# prevent overwriting
		while(file.exists(filename)){
			n <- n + 1
			filename <- file.path(outputPath,paste(title,n,'.doc',sep=''))
			}
		}
return(list(filename=filename,title=title))}

#--------------------------------------------------------------------------------------------------------------------
hyp.P <- function(genetics){
	Q <- paste(genetics$nameQ,'(Q)') 
	U <- paste(genetics$P.hyp$nUnknowns,'U',sep='')
	HP <- paste('Prosecution Hypothesis:',paste(c(Q,genetics$nameK,U),collapse=' + ')  )
	if(!is.null(genetics$doDropin))
	    {
	    if(genetics$doDropin)
		    {
		    HP = paste0(HP," + dropin")
		    }
		}
	if(is.null(genetics$nameK))HP <- NULL # genetics object is different for allele report or final report
return(HP)}

hyp.D <- function(genetics){
	X <- 'Unknown (X)'
	U <- paste(genetics$D.hyp$nUnknowns-1,'U',sep='')
	HD <- paste('Defence Hypothesis:',paste(c(X,genetics$nameK,U),collapse=' + ') )
	if(!is.null(genetics$doDropin))
	    {
	    if(genetics$doDropin)
		    {
		    HD = paste0(HD," + dropin")
		    }
		}
	if(is.null(genetics$nameK))HD <- NULL # genetics object is different for allele report or final report
return(HD)}

#--------------------------------------------------------------------------------------------------------------------
locus.likes <- function(hypothesis,results,...){
	# Generate locus likelihoods from overall likelihood
	# 	hypothesis: generated by either defence.hypothesis() or 
      #     prosecution.hypothesis()
	# results: results from do.call(optim,params)
	model <- create.likelihood.vectors(hypothesis)
	arguments <- relistArguments(results$optim$bestmem, hypothesis, ...)
	likes <- do.call(model,arguments)
	likes <- likes$objectives * likes$penalties
	}

#--------------------------------------------------------------------------------------------------------------------
pack.genetics.for.allele.report <- function(admin){
	# packs together all the genetic information required for the allele.report()

	# primary objects
	cspData <- read.csp.profile(admin$cspFile)
	uncData <- read.unc.profile(admin$cspFile)
	refData <- read.known.profiles(admin$refFile)
	afreq <- load.allele.database(admin$databaseFile,admin$kit)

	# secondary objects
	summary <- summary.generator(refData, cspData)
	estimates <- estimate.csp(refData, cspData)
	nrep	<- nrow(cspData)
	allele.report.genetics <- list( 
	cspData = cspData, 
	uncData = uncData,
	refData = refData,
	afreq = afreq,
	summary = summary,
	estimates = estimates,
	nrep = nrep)
return(allele.report.genetics)}

#--------------------------------------------------------------------------------------------------------------------	
pack.genetics.for.output.report <- function(P.hyp,D.hyp){
	# packs together all the genetic information required for the output.report()

	# Comparison checks between P.hyp and D.hyp 
	compare.hypothesis.inputs(P.hyp,D.hyp)

	# primary objects
	cspData <- read.csp.profile(P.hyp$cspFile)
	uncData <- read.unc.profile(P.hyp$cspFile)
	refData <- read.known.profiles(P.hyp$refFile)
	afreq <- load.allele.database(P.hyp$databaseFile,P.hyp$kit)

	# secondary objects
	summary <- summary.generator(refData, cspData)
	estimates <- estimate.csp(refData, cspData)
	nrep	<- nrow(cspData)
	QvK <- queried.vs.known(P.hyp$refFile)
	nameQ <- row.names(refData)[QvK]
	nameK <- row.names(refData)[!QvK]
	
	doDropin = P.hyp$doDropin

	output.report.genetics <- list( 
	cspData = cspData, 
	uncData = uncData,
	refData = refData,
	afreq = afreq,
	summary = summary,
	estimates = estimates,
	nrep = nrep,
	nameQ = nameQ,
	nameK = nameK, 
	P.hyp = P.hyp,
	D.hyp = D.hyp,
	doDropin=doDropin)  
return(output.report.genetics)}

#--------------------------------------------------------------------------------------------------------------------
compare.hypothesis.inputs <- function(P.hyp,D.hyp){
	# Checks the input data and parameters were the same for P and D
	if(!identical(P.hyp$cspFile,D.hyp$cspFile))warning("P and D hypotheses were constructed using two different crime scene files!!")
	if(!identical(P.hyp$refFile,D.hyp$refFile))warning("P and D hypotheses were constructed using two different reference files!!")
	if(!identical(P.hyp$databaseFile,D.hyp$databaseFile))warning("P and D hypotheses were constructed using two different allele database files!!")
	if(!identical(P.hyp$outputPath,D.hyp$outputPath))warning("P and D hypotheses were given two different output paths!!")
	if(!identical(P.hyp$ethnic,D.hyp$ethnic))warning("P and D hypotheses were constructed using two different ethnic codes!!")
	if(!identical(P.hyp$ethnic,D.hyp$ethnic))warning("P and D hypotheses were constructed using two different ethnic codes!!")
	if(!identical(P.hyp$adj,D.hyp$adj))warning("P and D hypotheses were constructed using two different locus adjustment parameter vectors!!")
	if(!identical(P.hyp$fst,D.hyp$fst))warning("P and D hypotheses were constructed using two different fsts!!")
	}

#--------------------------------------------------------------------------------------------------------------------
queried.vs.known <- function(path) {
	# Reads profile from path and returns queried vs known column.
	# Args:
	#	path: Path to file with the profile. 
	raw <- read.table(path, header=T, colClasses='character', row.names=1, sep=',', quote = "\"")
	if(is.null(raw$known.queried))stop("Reference csv must contain a column 'known/queried'. This column must contain one field 'queried'. ")
	if(sum(raw$known.queried=='queried')!=1)stop("The reference csv column 'known/queried' must contain one field 'queried'. ")
	return(raw$known.queried == 'queried')
	}

#--------------------------------------------------------------------------------------------------------------------
estimate.csp <- function(refData, cspData) {
  # Estimate how well each reference profile is represented in the CSP

	nrep <- nrow(cspData)
	# Constructs the result data frame.
	result <- data.frame(array(0, c(nrow(refData), nrep+1)), row.names=rownames(refData))
	colnames(result)[1:nrep] = sapply(1:nrep, function(n) {paste('Rep', n)})
	colnames(result)[nrep+1] = 'Total' 
  
	# now add data.
	for(person in row.names(refData)){
		for(rep in 1:nrep){
			# number of alleles in common for given person and CSP replicate, across all loci
			represented <- c()
			for(locus in 1:ncol(cspData)){
				if(!is.na(cspData[rep,locus])) {
					# figure out unique alleles in reference 
					ref.alleles  <- unique(unlist(refData[person,locus+1]))# +1 ignores first column(queried)
					csp.alleles <- unique(unlist(cspData[rep,locus]))
					# figure out how many of these allele are in CSP
					represented <- c(represented, ref.alleles %in% csp.alleles)
					}
				}
			if(length(represented) > 0) 
			result[person, rep] <- 100*sum(represented)/length(represented)
			}
  		}
	if(nrep==1)  result[, nrep+1] <- result[, nrep]
	else         result[, nrep+1] <- round.0(rowSums(result)/nrep)
   
	# Reorders rows 
	result <- round.0(result[order(result[, nrep+1], decreasing=T), ])
return(result)}

#--------------------------------------------------------------------------------------------------------------------
estimates.reformatter <- function(genetics){
	table <- genetics$estimates
	extra <- data.frame(Contributor=row.names(table))
	result <- cbind(extra,table)
return(result)}

#--------------------------------------------------------------------------------------------------------------------
summary.generator <- function(refData, cspData){
	# summary table for Q and K, showing which of their alleles are replicated, unreplicated, or absent
	table <- as.data.frame(array(,c(nrow(refData)+1,ncol(cspData))))
	colnames(table) <- colnames(cspData)
	row.names(table) <- c(row.names(refData),'Other')

	# two identical tables, will be slightly different formats for rtf and latex
	table.rtf <- table.latex <- table

	# count of unattributable alleles, to establish if they are replicated or not
	rep.counts <- unrep.counts <- c()

	# generate the results 
	for(locus in colnames(cspData)){
		# for unattributable alleles are rep, unrep, absent
		cspAlleles <- unlist(cspData[,locus])
		cspAlleles <- cspAlleles[!is.na(cspAlleles)] # remove NAs
		refAlleles <- unique(unlist(refData[,locus]))
		unattributableAlleles <- cspAlleles[!cspAlleles%in%refAlleles]
		table.rtf['Other',locus] <- summary.helper(unattributableAlleles,cspAlleles)$rtf
		table.latex['Other',locus] <- summary.helper(unattributableAlleles,cspAlleles)$latex

		# for unattributable alleles calculate how many are replicated /unreplicated
		rep.counts <- c(rep.counts, summary.helper(unattributableAlleles,cspAlleles)$rep )
		unrep.counts <- c(unrep.counts, summary.helper(unattributableAlleles,cspAlleles)$unrep )

		# for each contributor calculate which alleles are rep, unrep, absent
		for(name in row.names(refData)){
			refAlleles <- refData[name,locus][[1]]
			table.rtf[name,locus] <- summary.helper(refAlleles,cspAlleles)$rtf
			table.latex[name,locus] <- summary.helper(refAlleles,cspAlleles)$latex
			}}	

		# reformat the counts table (c.)
		c.rep  <- data.frame(loci=colnames(cspData), counts=rep.counts, status='replicated')
		c.unrep <- data.frame(loci=colnames(cspData), counts=unrep.counts, status='unreplicated')
		counts <- rbind(c.rep,c.unrep)
		table <- list(rtf=table.rtf,latex=table.latex)		
		summary <- list(table=table,counts=counts)
return(summary)}

#--------------------------------------------------------------------------------------------------------------------
summary.helper <- function(refAlleles,cspAlleles){
	# intricate and irritating operations required to check each allele 
	rep <- unrep <- absent <- NULL
	for(allele in unique(refAlleles)){
		condition <- sum(cspAlleles%in%allele)
		if(condition>1)rep <- c(rep,allele)
		if(condition==1)unrep <- c(unrep,allele)
		if(condition==0)absent <- c(absent,allele)
		}
	
	# separate by commas, add escape functions such that replicated=bold, and unreplicated=italic, absent=box (underline in rtf)
	# different codes for rtf and latex
	rep.rtf <- rep.latex <- unrep.rtf <- unrep.latex <- absent.rtf <- absent.latex <- NULL

	if(!is.null(rep)){
		rep.rtf <- paste('\\B',paste(rep,collapse=','),'}',sep='')
		rep.latex <- paste('\\textbf{',paste(rep,collapse=','),'}',sep='')
		}
	if(!is.null(unrep)){
		unrep.rtf <- paste('\\I',paste(unrep,collapse=','),'}',sep='')
		unrep.latex <- paste('\\textit{',paste(unrep,collapse=','),'}',sep='')
		}
	if(!is.null(absent)){
		absent.rtf <- paste('\\X',paste(absent,collapse=','),'}',sep='')
		absent.latex <- paste('\\fbox{',paste(absent,collapse=','),'}',sep='')
		}

	rtf <- paste(c(rep.rtf,unrep.rtf,absent.rtf),collapse=',')
	latex <- paste(c(rep.latex,unrep.latex,absent.latex),collapse=',')

return(list(rtf=rtf,latex=latex,rep=length(rep),unrep=length(unrep)))}

#--------------------------------------------------------------------------------------------------------------------
rtf.formater <- function(file){
	# Think of this function as a debugger for the rtf functions.
	# 1. 'TRUE' and 'FALSE' are irritatingly converted into 'Yes' and 'No'
	# 2. the addTable() function in package rtf uses the text length to choose the column width. This means that the 
	# additional escape characters to encode bold or italic or boxes causes havoc, making those columns too wide.
	# This inelegant solution uses my own invented escape characters \B for bold, \I for italic and \X for box, 
	# to keep the extra text to a minimum. This function then converts to the correct rtf coding AFTER the document is created.
	# note the additional '\' is required in R to escape the '\' in rtf
	data <- readLines(file)
	for(n in 1:length(data)){
		line <- data[n]
		line <- gsub('\\B','{\\b ',line,fixed=T)
		line <- gsub('\\I','{\\i ',line,fixed=T)
		line <- gsub('\\X','{\\chbrdr\\brdrs ',line,fixed=T)
		line <- gsub('Yes','TRUE',line,fixed=T)
		line <- gsub('No','FALSE',line,fixed=T)
		data[n] <- line
		}
	write(data,file=file)}

#--------------------------------------------------------------------------------------------------------------------
overall.likelihood.table.reformatter <- function(prosecutionResults,defenceResults){
	P <- -prosecutionResults$optim$bestval
	D <- -defenceResults$optim$bestval
	table <- cbind(
		round.1(data.frame(Prosecution.log10=P,Defence.log10=D,Ratio.log10=P-D)),
		round.0(data.frame(Ratio=toString(signif(10^(P-D),3))))
		)
	table  <- t(table); colnames(table) <- 'estimate'
	extra <- data.frame(calculation=row.names(table))
	result <- cbind(extra,table)
return(result)}

#--------------------------------------------------------------------------------------------------------------------
rcontConvert <- function(refIndiv,rcont){
	# Convert rcont to full rcont (including ref individual)
	# 	refIndiv: reference individual specified in args
	#	rcont: rcont parameters from do.call(optim,params)
	if(refIndiv == 1) rcont = c(1, rcont)
      else if(refIndiv > length(rcont)) rcont = c(rcont, 1)
      else rcont = c(rcont[1:refIndiv-1], 1, rcont[refIndiv:length(rcont)])
return(rcont)}

#--------------------------------------------------------------------------------------------------------------------
calc.dropout = function(results, hypothesis){ 
	# Calculates dropout rates for every contributor subject to dropout and for every replicate
	# 	hypothesis: generated by either defence.hypothesis() or  prosecution.hypothesis()
	#	results: results from do.call(optim,params)

	# Number of contributors subject to dropout + number of unknowns + 1 (dropin)
	N <- nrow(hypothesis$dropoutProfs) + hypothesis$nUnknowns + 1
	nrep <- nrow(hypothesis$cspProfile)
	do <- results$optim$bestmem[grep("dropout",names(results$optim$bestmem))]
	rcont <- results$optim$bestmem[grep("rcont",names(results$optim$bestmem))]
	rcont <- rcontConvert(hypothesis$refIndiv,rcont)
	BB <- results$optim$bestmem[grep("power",names(results$optim$bestmem))]
	drout <- matrix(0,N-1,nrep)
	if(N>1) for(x in 1:(N-1)) for(z in 1:nrep) drout[x,z] <- do[z]/(do[z]+rcont[x]^-BB*(1-do[z]))
return(drout)}

#--------------------------------------------------------------------------------------------------------------------
dropDeg <- function(hypothesis,results,genetics){
	# Output tables for dropout and degradation
	# 	hypothesis: generated by either defence.hypothesis() or prosecution.hypothesis() 
	#	results: results from do.call(optim,params)

	dropoutsLogical <- determine.dropout(genetics$refData,genetics$cspData)
	Qdrop <- dropoutsLogical[names(dropoutsLogical)==genetics$nameQ]
	knownDropoutsLogical <- dropoutsLogical[names(dropoutsLogical)!=rownames(hypothesis$queriedProfile)]
	# Number of Known contributors with No Dropout
	Nknd <- length(which(!knownDropoutsLogical)) 

	# create the row names, are arranged into the correct order: K, Q/X, U
	names.Dropout <- names(which(knownDropoutsLogical))
	names.NoDropout <- names(which(!knownDropoutsLogical))
	nameK <- c(names.NoDropout,names.Dropout)	
	Names=c()
	if(length(nameK)>0)for (n in 1:length(nameK)) Names[n]=paste(nameK[n],' (K',n,')',sep='')
	Names[length(c(genetics$nameQ,nameK))] = paste(genetics$nameQ,'(Q)')
	nU <- hypothesis$nUnknowns
	if(hypothesis$hypothesis=="defence"){
		Names[length(c(genetics$nameQ,nameK))] = 'X' 
		nU <- nU -1 # it already contains an extra one for X
		}
	if(nU>0)for(n in 1:nU)Names <- c(Names,paste('U',n,sep=''))

	# column names for the table
	runNames = c();for(rName in 1:genetics$nrep)runNames[rName]=paste('Dropout',paste('(Run ',rName,')',sep=''))

	# dropout values
	h <- h1 <- round.3(calc.dropout(results, hypothesis))	
	# degradation values
	d <- d1 <- 10^results$optim$bestmem[grep("degradation",names(results$optim$bestmem))]
	# under pros, if Q is not subject to dropout, an extra 0 needs to be added to both dropout and degradation for Q
	if(hypothesis$hypothesis=="prosecution"){
		if(Qdrop==F){
			h = rbind(h[0:nrow(hypothesis$dropoutProfs),,drop=F],0)
			d = c(d[0:nrow(hypothesis$dropoutProfs)],0)
			if(hypothesis$nUnknowns>0){
				h = rbind(h,h1[(nrow(hypothesis$dropoutProfs)+1):length(d1),,drop=F])
				d = c(d,d1[(nrow(hypothesis$dropoutProfs)+1):length(d1)])
				}
			}
		}

	# Dropout is assumed zero for fully represented profiles, therefore suffixed on the dropout table
	if(Nknd>0) for(n in 1:Nknd){
		h = rbind(0,h)
		d = c(0,d)
		}
	Dropout = round.3(data.frame(h,d))
	colnames(Dropout)= c(runNames[1:genetics$nrep],'Degradation (overall)')
	row.names(Dropout) = Names
return(Dropout)}

#--------------------------------------------------------------------------------------------------------------------
overall.dropout.table.reformatter <- function(prosecutionHypothesis,defenceHypothesis,prosecutionResults,defenceResults,genetics){
	P.dropDeg <- dropDeg(prosecutionHypothesis,prosecutionResults,genetics)
	P.extra <- data.frame(hypothesis=rep('Prosecution',nrow(P.dropDeg)),contributor=rownames(P.dropDeg))
	P.combined <- cbind(P.extra,P.dropDeg)
	D.dropDeg <- dropDeg(defenceHypothesis,defenceResults,genetics)
	D.extra <- data.frame(hypothesis=rep('Defence',nrow(D.dropDeg)),contributor=rownames(D.dropDeg))
	D.combined <- cbind(D.extra,D.dropDeg)
	combined <- rbind(P.combined,D.combined)
return(combined)}

#--------------------------------------------------------------------------------------------------------------------
overall.dropin.table.reformatter <- function(prosecutionResults,defenceResults){
	P.dropin <- prosecutionResults$optim$bestmem["dropin"]
	D.dropin <- defenceResults$optim$bestmem["dropin"]
	table <- round.3(data.frame(hypothesis=c('Prosecution','Defence'),dropin=c(P.dropin,D.dropin)))
return(table)}

#--------------------------------------------------------------------------------------------------------------------
optimised.parameter.table.reformatter <- function(hypothesis,result){
	table <- t(rbind(result$optim$bestmem,result$member$lower,result$member$upper))
	extra <- data.frame(parameter=rownames(table))
	combined <- round.3(cbind(extra,table))
	colnames(combined) <- c('parameter','estimate','lower bound','upper bound')
return(combined)}

#--------------------------------------------------------------------------------------------------------------------
chosen.parameter.table.reformatter <- function(prosecutionHypothesis){
	keep <- c(7,8,9,11,12,13,14,15)
	table <- as.data.frame(unlist(prosecutionHypothesis[keep]))
	extra <- data.frame(Parameter= rownames(table))
	combined <- cbind(extra,table)
	colnames(combined) <- c('Parameter','User input')
return(combined)}

#--------------------------------------------------------------------------------------------------------------------

file.inputs.table.reformatter <- function(prosecutionHypothesis)
	{
	tmp = strsplit(prosecutionHypothesis$cspFile,split="/")[[1]]
	cspFile = tmp[length(tmp)]
	tmp = strsplit(prosecutionHypothesis$refFile,split="/")[[1]]
	refFile = tmp[length(tmp)]
	if(is.null(prosecutionHypothesis$databaseFile))
		{
		databaseFile = "DNA17-db.txt (Default)"
		} else {
		tmp = strsplit(prosecutionHypothesis$databaseFile,split="/")[[1]]
		databaseFile = tmp[length(tmp)]
		}
	table = data.frame(file=c("CSP","Reference","Database"),input=c(cspFile,refFile,databaseFile))
	colnames(table) = c("File","Used")
	return(table)
	}

#--------------------------------------------------------------------------------------------------------------------


# homozygous match probability
hom <- function(pa, fst=0.02)

    {

    numerator = ((2*fst)+((1-fst)*pa))*((3*fst)+((1-fst)*pa))

    denominator = (1+fst)*(1+(2*fst))

    return(numerator/denominator)

    }

# heterozygous match probability
het <- function(pa, pb, fst=0.02)

    {

    numerator = (fst+((1-fst)*pa))*(fst+((1-fst)*pb))

    denominator = (1+fst)*(1+(2*fst))

    return(2*(numerator/denominator))

    }


#singleFst = function(p,fst)
#	{
#	(fst+(1-fst)*p)/(1+fst)
#	}

matchProb = function(hypothesis,rr,fst=0.02,sep=FALSE)
    {
	ideal.match <- c()
	for(j in 1:ncol(hypothesis$queriedProfile))
	    {
	    print(paste0("...",colnames(hypothesis$queriedProfile)[j],"..."))
		af = hypothesis$alleleDb[j][[1]]
		kn = hypothesis$queriedProfile[,j][[1]]
		p1 = af[row.names(af)==kn[1],1]
		p2 = af[row.names(af)==kn[2],1]

        # undo most of fst adjustment
        p1 = (p1*(1+fst))-fst
        p2 = (p2*(1+fst))-fst

        if(kn[1]==kn[2])
            {
            # undo last bit of fst adjustment
            p1 = (p1-fst)/(1-fst)
            p2 = (p2-fst)/(1-fst)
            # get match new fst adjusted match prob
            ideal.match = c(ideal.match,rr[2]+(rr[1]*((2*fst+(1-fst)*p1)/(1+fst)))+((1-sum(rr))*hom(p1,fst=fst)))
            } else {
            # undo last bit of fst adjustment
            p1 = p1/(1-fst)
            p2 = p2/(1-fst)
            # get match new fst adjusted match prob
            ideal.match = c(ideal.match,rr[2]+(rr[1]*((fst+(1-fst)*((p1+p2)/2))/(1+fst)))+((1-sum(rr))*het(p1,p2,fst=fst)))
            }
		}
	if(sep)
		{
		return(1/ideal.match)
		} else {
		return(1/prod(ideal.match))
		}
    }

ideal <- function(hypothesis)
    {
	# Calculates idealised likelihood assuming Q is perfect match
	# Parameters:
	# 	hypothesis: generated by either defence.hypothesis() or prosecution.hypothesis(), although rr only makes sense under defence
	#	rr: relatedness arguments from args
	ideal.match = getMatchProb(hypothesis)

	result <- data.frame(calculation =c('likelihood ratio','Log10 likelihood ratio'),estimate=c(toString(signif(ideal.match,3)),round.1(log10(ideal.match))))
return(result)}

#--------------------------------------------------------------------------------------------------------------------
# a few odds and sods to simplify the reports
line <- paste(rep('-',95),collapse='-') # creates a straight line
spacer <- function(doc,n=1) for(x in 1:n)addNewLine(doc) # adds blank lines
fs0 <- 26 # font size for header (main)
fs1 <- 20 # font size for header (sub1)
fs2 <- 15 # font size for header (sub2)
fs3 <- 8 # tiny, for the big tables
#--------------------------------------------------------------------------------------------------------------------
common.report.section <- function(names,genetics,resTable=NULL){
	# objects common to both the allele report and the final output report are done once here, for consistency, and saves repeating code

	# Create a new Docx. 
	doc <- RTF(names$filename, width=11,height=8.5,omi=c(1,1,1,1))

	addParagraph(doc, line)
	spacer(doc,3)
	addHeader( doc, title=substr(names$title,1,nchar(names$title)-1), subtitle=names$subtitle, font.size=fs0 )
	addHeader( doc, hyp.P(genetics), font.size=fs2 )
	addHeader( doc, hyp.D(genetics), font.size=fs2 )
	addParagraph(doc, line)
	if(!is.null(resTable))
		{
		addHeader(doc, "Overall Likelihood", TOC.level=2, font.size=fs2)
		addTable(doc, resTable ,col.justify='C', header.col.justify='C')
		spacer(doc,3)
		}
	addPageBreak(doc, width=11,height=8.5,omi=c(1,1,1,1) )
#	addTOC(doc)
#	addPageBreak(doc, width=11,height=8.5,omi=c(1,1,1,1) )

	addHeader(doc, "Data provided by forensic scientist", TOC.level=1,font.size=fs1)
	addHeader(doc, "Crime scene profiles (CSP)",TOC.level=2,font.size=fs2)
	addTable(doc, csp.table.reformatter(genetics),col.justify='C', header.col.justify='C',font.size=fs3)
	spacer(doc,3)

	addHeader(doc, "Reference profiles", TOC.level=2, font.size=fs2 )
	addTable(doc, reference.table.reformatter(genetics), col.justify='C', header.col.justify='C',font.size=fs3)
	spacer(doc,1)
	addParagraph(doc, "Alleles that are \\Breplicated}, \\Iunreplicated} or \\Xabsent} in the crime scene profile, using the certain designations only." ) # will use rtf.formater() to convert '\\B','\\I' and '\\X' into rtf encoding
	spacer(doc,3)

	addHeader(doc, "Summary", TOC.level=1,font.size=fs1)
	addHeader(doc, "Unattributable alleles", TOC.level=2, font.size=fs2)
	plot.function <- unattributable.plot.maker(genetics)
	addPlot( doc, plot.fun = print, width = 9, height = 2.8, x = plot.function )
	addParagraph( doc, "The number of 'certain' alleles that cannot be attributed to the known profile(s).")
	spacer(doc,3)

	addHeader(doc, "Unusual alleles", TOC.level=2, font.size=fs2 )
	addTable(doc, unusual.alleles(genetics), col.justify='C', header.col.justify='C')
	spacer(doc,1)
	addParagraph( doc, "Alleles are automatically checked against the database. An error will be reported if an allele is absent from the database, or present more than once, or if a locus is absent.")
	spacer(doc,3)

	addHeader(doc, "Approximate representation", TOC.level=2, font.size=fs2)
	addTable(doc, estimates.reformatter(genetics), col.justify='C', header.col.justify='C')
	spacer(doc,1)
	addParagraph( doc, "The fraction of an individual's alleles (as a percentage) that have been designated as 'certain' alleles in each replicate. This estimate is not used by likeLTD, and is intended to assist informal assessments of possible known contributors to the CSP. A more formal approach is to do a likeLTD run to compute the likelihood ratio (LR) for that individual contributor.")
	spacer(doc,3)

return(doc)}

#--------------------------------------------------------------------------------------------------------------------
allele.report <- function(admin,file=NULL){

 	# admin: List containing administration data, as packed by pack.admin.input()
	# file: defaults to creating its own sequential file names (to avoid overwriting)

	# create genetics information
	genetics <- pack.genetics.for.allele.report(admin)

	# Latex output
	latex.maker(genetics, file.path(admin$outputPath,'table.tex')  )

	names <- filename.maker(admin$outputPath,admin$caseName,file,type='allele')
	names$subtitle <- admin$caseName

	doc <- common.report.section(names,genetics)

	addHeader(doc, "Suggested parameter values", , TOC.level=2, font.size=fs2)
	addTable(doc, hypothesis.generator(genetics), col.justify='C', header.col.justify='C')
	spacer(doc,1)
	addParagraph( doc, "Recommended values for 'nUnknowns', choose from 0,1 or 2 (likeLTD automatically adds and additional unknown X to the defence hypothesis in place of the queried profile Q).")
	addParagraph( doc, "Recommended values for 'doDropin', choose from 'TRUE' or 'FALSE'.")
	addParagraph( doc, "All the attributable alleles must either come from an unknown or dropin.")
	spacer(doc,3)

	addHeader(doc, "System information", TOC.level=1,font.size=fs1)
	addTable(doc,  system.info(), col.justify='L', header.col.justify='L')

done(doc)
rtf.formater(names$filename)
}

#--------------------------------------------------------------------------------------------------------------------
output.report <- function(prosecutionHypothesis,defenceHypothesis,results,file=NULL){

 	# prosecutionHypothesis: generated by prosecution.hypothesis()
 	# defenceHypothesis: generated by defence.hypothesis()
	# results: results from enumerate(prosecutionParams, defenceParams)
	# file: defaults to creating its own sequential file names (to avoid overwriting)
	
	prosecutionResults = results$Pros
	defenceResults = results$Def

	# create genetics information 
	genetics <- pack.genetics.for.output.report(prosecutionHypothesis,defenceHypothesis)
	names <- filename.maker(prosecutionHypothesis$outputPath,prosecutionHypothesis$caseName,file,type='results')
	names$subtitle <- prosecutionHypothesis$caseName

	resTable = overall.likelihood.table.reformatter(prosecutionResults,defenceResults)
	doc <- common.report.section(names,genetics,resTable)

	addHeader(doc, "Likelihoods at each locus", TOC.level=2, font.size=fs2)
	addTable(doc, local.likelihood.table.reformatter(prosecutionHypothesis,defenceHypothesis,prosecutionResults,defenceResults) ,col.justify='C', header.col.justify='C',font.size=8)
	spacer(doc,3)

	addHeader(doc, "Theoretical maximum LR", TOC.level=2, font.size=fs2)
	addTable(doc, ideal(defenceHypothesis), col.justify='C', header.col.justify='C')
	spacer(doc,3)

	addHeader(doc, "Dropout and degradation parameter estimates", TOC.level=2, font.size=fs2)
	addTable(doc, overall.dropout.table.reformatter(prosecutionHypothesis,defenceHypothesis,prosecutionResults,defenceResults,genetics), col.justify='C', header.col.justify='C')
	spacer(doc,3)

	addHeader(doc, "Dropin parameter estimates", TOC.level=2, font.size=fs2)
	addTable(doc, overall.dropin.table.reformatter(prosecutionResults,defenceResults), col.justify='C', header.col.justify='C')
	spacer(doc,3)

	addHeader(doc, "User defined parameters", TOC.level=1, font.size=fs1)
	addTable(doc, chosen.parameter.table.reformatter(prosecutionHypothesis), col.justify='L', header.col.justify='L')
	spacer(doc,3)

	addHeader(doc, "Input files", TOC.level=1, font.size=fs1)
	addTable(doc, file.inputs.table.reformatter(prosecutionHypothesis), col.justify='L', header.col.justify='L')
	spacer(doc,3)

	# seed used
	addHeader(doc, "Seed used", TOC.level=1, font.size=fs1)
	addTable(doc, seedTable(results), col.justify='L', header.col.justify='L')

	addHeader(doc, "Optimised parameters", TOC.level=1, font.size=fs1)
	addHeader(doc, "Prosecution parameters", TOC.level=2, font.size=fs2)
	addTable(doc, optimised.parameter.table.reformatter(prosecutionHypothesis,prosecutionResults), col.justify='L', header.col.justify='L')
	spacer(doc,3)

	addHeader(doc, "Defence parameters", TOC.level=2, font.size=fs2)
	addTable(doc, optimised.parameter.table.reformatter(defenceHypothesis,defenceResults), col.justify='L', header.col.justify='L')
	spacer(doc,3)

	addHeader(doc, "System information", TOC.level=1,font.size=fs1)
	addTable(doc,  system.info(), col.justify='L', header.col.justify='L')

done(doc)
rtf.formater(names$filename)	
}
#--------------------------------------------------------------------------------------------------------------------

Try the likeLTD package in your browser

Any scripts or data that you put into this service are public.

likeLTD documentation built on Feb. 11, 2018, 3:10 p.m.