Nothing
#--------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------
# 15th March 2014, Adrian Timpson
# All functions required to produce both main reports. Utilising package 'rtf' to produce .doc outputs.
# A premininary 'allele report' is generated by allele.report(), using only 'admin'. This is intended to assist the user in choosing parameter values for a proper run of the likeLTD program.
# Once the user has obtained Pros and Def results, a full output report may be generated by output.report().
# Many sections are common to both reports, so for simplicity these are performed by common.report.section()
# Most other functions perform a specific task in preparing a section of the report
# The exception to this is latex.maker() and its three helper functions, which outputs a separate .tex file containing the two allele tables (crime scene and references)
# The admin report draws its administrative data (default parameters, filepaths etc) from the 'admin' object
# In contrast, the final output report draws its administrative data from the values that were ACTUALLY used (since there are various opportunities downstream of the 'admin' object for these to be changed by the user).
# This means that the object 'genetics' generated by pack.genetics.for.allele.report() for the allele report is different to the 'genetics' object generated by pack.genetics.for.output.report() for the final report.
#--------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------
rounder <- function(x,n){
# deals with three repetitive issues (that are more insipid than might be expected! :
# numerics are best converted to text, to prevent MS Word from formatting them
# rounding
# rounding a number with a few zeros results in 0 when we actually want 0.00
if(is.numeric(x))x <- round(x,n) # method for vectors
if(is.data.frame(x)){ # method for data.frames, each column at a time
for(c in 1:ncol(x))if(is.numeric(x[,c]))x[,c] <- round(x[,c],n)
}
result <- format(x,nsmall=n,scientific=F)
return(result)}
#--------------------------------------------------------------------------------------------------------------------
# shorthand to avoid bothering with the argument n, for clarity in the code. Can handle vectors and data.frames
round.0 <- function(x)rounder(x,0)
round.1 <- function(x)rounder(x,1)
round.3 <- function(x)rounder(x,3)
#--------------------------------------------------------------------------------------------------------------------
latex.table.header <- function(genetics){
# helper function for latex.maker()
N <- ncol(genetics$summary$table$latex)+1
text <- c('
\\documentclass{article}\n
\\usepackage{rotating}\n
\\begin{document}\n
\\begin{sidewaystable}[p]\n
%\\setcounter{table}{1}\n
',
paste('\\begin{center}\\begin{tabular}{|',paste(rep('c|',N),collapse=''),'}\\hline',sep=''),
paste('&',paste(names(genetics$summary$table$latex),collapse='&'),'\\\\\\hline',sep=''),
paste('\\multicolumn{',N,'}{|l|}{Crime scene profiles}\\\\\\hline',sep=''))
return(text)}
#--------------------------------------------------------------------------------------------------------------------
csp.table.to.latex <- function(genetics){
# helper function for latex.maker()
# formats the CSP table of alleles for latex
table.csp <- table.collapser(genetics$cspData)
table.unc <- table.collapser(genetics$uncData)
table <- NULL;
for(n in 1:genetics$nrep){
table <- rbind(table,table.csp[n,])
table <- rbind(table,table.unc[n,])
}
colnames(table) <- colnames(genetics$cspData)
row.names(table) <- rep(c('alleles','uncertain'),genetics$nrep)
text = c()
N.col <- ncol(table)
N.row <- nrow(table)
for(row in 1:N.row){
text.line <- character(N.col)
for(col in 1:N.col){
if(as.character(table[row,col])=='')text.line[col] <- '--'
if(as.character(table[row,col])!='')text.line[col] <- gsub(' ',',',table[row,col])
}
if(row%%2==1)text=c(text,paste(paste(row.names(table)[row],paste(text.line,collapse='&'),sep='&'),'\\\\',sep=''))
if(row%%2==0)text=c(text,paste(paste(row.names(table)[row],paste(text.line,collapse='&'),sep='&'),'\\\\\\hline',sep=''))
}
return(text)}
#--------------------------------------------------------------------------------------------------------------------
ref.table.to.latex <- function(genetics){
# helper function for latex.maker()
# table: Reference table, from genetics$summary$table$latex
table <- genetics$summary$table$latex
text = paste('\\multicolumn{',ncol(table)+1,'}{|l|}{Summary:}\\\\\\hline',sep='')
for(row in 1:nrow(table)){
text=c(text,paste(paste(row.names(table)[row],paste(table[row,],collapse='&'),sep='&'),'\\\\\\hline',sep=''))
}
return(text)}
#--------------------------------------------------------------------------------------------------------------------
latex.table.footer <- function(){
# helper function for latex.maker()
text <- c('
\n
\\end{tabular}\n
\\caption{Alleles that are \\textbf{replicated} \\textit{unreplicated} and \\fbox{absent} in the crime scene profile}\n
\\end{center}\n
\\end{sidewaystable}\n
\\end{document}\n
')
return(text)}
#--------------------------------------------------------------------------------------------------------------------
latex.maker <- function(genetics,filename){
# table1: CSP table produced by allele.table()
# table2: Reference table, from genetics$summary$table$latex
text.1 <- latex.table.header(genetics)
text.2 <- csp.table.to.latex(genetics)
text.3 <- ref.table.to.latex(genetics)
text.4 <- latex.table.footer()
text <- c(text.1,text.2,text.3,text.4)
file <- file(filename)
writeLines(text,file)
close(file)}
#--------------------------------------------------------------------------------------------------------------------
pack.admin.input <- function(cspFile, refFile, caseName='dummy',databaseFile=NULL, kit=NULL, linkageFile=NULL, outputPath=getwd() ) {
# Packs and verifies administrative information.
# Documentation in man directory.
paths <- c(cspFile, refFile)
if(!is.null(databaseFile)) paths <- c(databaseFile, paths, recursive=TRUE)
if(!is.null(linkageFile)) paths <- c(linkageFile, paths, recursive=TRUE)
for(path in paths) {
if(!file.exists(path))
stop(paste(path, "does not exist."))
else {
info <- file.info(path)
if(info$isdir) stop(paste(path, "is not a file."))
}
} # loop over files.
if(file.exists(outputPath) & !file.info(outputPath)$isdir)
stop(paste(outputPath, " exists and is not a directory."))
admin <- list( caseName=caseName,
databaseFile=databaseFile,
linkageFile=linkageFile,
cspFile=cspFile,
refFile=refFile,
outputPath=outputPath,
kit=kit )
return(admin)}
#--------------------------------------------------------------------------------------------------------------------
load.allele.database <- function(path=NULL,kit=NULL) {
# Loads allele database
# Documentation is in man directory.
if(is.null(path)) { # Load default database
if(is.null(kit)) kit = "DNA17"
dummyEnv <- new.env()
data(list=paste0(kit,'-db'), package='likeLTD', envir=dummyEnv)
return(dummyEnv[[paste0(kit,'-db')]])
}
if(!file.exists(path)) stop(paste(path, "does not exist."))
read.table(path, sep="\t", header=TRUE)
}
#--------------------------------------------------------------------------------------------------------------------
unattributable.plot.maker <- function(genetics){
# define ggplot variables to avoid NOTE from CRAN
aes <- geom_bar <- scale_fill_grey <- NULL
loci <- counts <- status <- NULL
plot <- ggplot(data=genetics$summary$counts, aes(x=loci,y=counts,fill=status))+
geom_bar(stat='identity')+
scale_fill_grey()
return(plot)}
#--------------------------------------------------------------------------------------------------------------------
table.collapser <- function(table){
# collapses a table so that fields stored as lists become comma separated strings
result <- array(,dim(table))
for(row in 1:nrow(table)){
for(locus in 1:ncol(table)){
result[row,locus] <- paste(unlist(table[row,locus]),collapse=',')
}}
return(result)}
#--------------------------------------------------------------------------------------------------------------------
unusual.alleles.per.table <- function(table,afreq){
# finds unusual alleles in any specific table, (provided in original listed format)
# Get names of populations in database
startCol = grep("BP",colnames(afreq))+1
pops = colnames(afreq)[startCol:ncol(afreq)] # first 3 columns are Marker, Allele, BP
rare <- matrix(ncol=3+length(pops),nrow=0)
colnames(rare) = c("locus","allele",paste(pops,".freq",sep=""),"error")
loci <- colnames(table); loci <- loci[loci!='queried']# ref table has a unwanted column 'queried'
for(locus in loci)
{
if(!locus%in%afreq$Marker)
{ # check the loci are even in the database
frame <- matrix(c(locus,NA,rep(NA,times=length(pops),"Entire locus missing from database")),ncol=3+length(pops))
colnames(frame) = c("locus","allele",paste(pops,".freq",sep=""),"error")
rare <- rbind(rare, frame)
}
if(locus%in%afreq$Marker)
{ # only continue if the locus is in the database
for(row in 1:nrow(table))
{
alleles <- unique(unlist(table[row,locus]))
for(allele in alleles[!is.na(alleles)])
{ # ignore NAs in the csv file
condition <- afreq$Marker==locus & afreq$Allele==allele
x <- afreq[condition,]
if(nrow(x)==1)
{ # if the allele is present once in the database (should be!)
# get whether each population is less than 2
sizeCondition = NULL
for(i in 1:length(pops))
{
eval(parse(text=paste("sizeCondition = c(sizeCondition, x$",pops[i],"<2)",sep="")))
}
if(any(sizeCondition))
{
print(paste(locus,allele,sep="_",collapse="_"))
frame <- matrix(c(locus,allele,x[startCol:length(x)],'NA'),ncol=3+length(pops))
colnames(frame) = c("locus","allele",paste(pops,".freq",sep=""),"error")
rare <- rbind(rare, frame)
}
}
if(nrow(x)==0)
{ # if the allele is absent from database it is probably a typo
frame <- matrix(c(locus,allele,rep(NA,times=length(pops)),"Allele absent from database,check for typo"),ncol=3+length(pops))
colnames(frame) = c("locus","allele",paste(pops,".freq",sep=""),"error")
rare <- rbind(rare, frame)
}
if(nrow(x)>1)
{ # if the allele is more than once there is a problem with the database!
frame <- matrix(c(locus,allele,rep(NA,times=length(pops)),"Allele present multiple times in database"),ncol=3+length(pops))
colnames(frame) = c("locus","allele",paste(pops,".freq",sep=""),"error")
rare <- rbind(rare, frame)
}
} # loop over alleles
} # loop over replicates
}} # loop over loci
return(unique(rare))}
#--------------------------------------------------------------------------------------------------------------------
unusual.alleles <- function(genetics){
# creates and formats a combined table of unusual alleles
t1.tmp <- unusual.alleles.per.table(genetics$refData,genetics$afreq)
t1 <- cbind(rep('Reference profiles',nrow(t1.tmp)),t1.tmp)
t2.tmp <- unusual.alleles.per.table(genetics$cspData,genetics$afreq)
t2 <- cbind(rep('Crime scene certain',nrow(t2.tmp)),t2.tmp)
t3.tmp <- unusual.alleles.per.table(genetics$uncData,genetics$afreq)
t3 <- cbind(rep('Crime scene uncertain',nrow(t3.tmp)),t3.tmp)
colnames(t1)[1]=colnames(t2)[1]=colnames(t3)[1]="source"
table <- as.data.frame(rbind(t1,t2,t3))
if(nrow(table)==0)table <- data.frame(status='no unusual alleles present')
return(table)}
#--------------------------------------------------------------------------------------------------------------------
csp.table.reformatter <- function(genetics){
table.csp <- table.collapser(genetics$cspData)
table.unc <- table.collapser(genetics$uncData)
table <- NULL;
for(n in 1:genetics$nrep){
table <- rbind(table,table.csp[n,])
table <- rbind(table,table.unc[n,])
}
colnames(table) <- colnames(genetics$cspData)
extra <- data.frame(run=c(rbind(1:genetics$nrep,rep(NA,genetics$nrep))),status=rep(c("certain","uncertain"),genetics$nrep))
combined <- cbind(extra,table)
return(combined)}
#--------------------------------------------------------------------------------------------------------------------
reference.table.reformatter <- function(genetics){
table <- genetics$summary$table$rtf
extra <- data.frame(profile=row.names(table))
combined <- cbind(extra,table)
return(combined)}
#--------------------------------------------------------------------------------------------------------------------
local.likelihood.table.reformatter <- function(prosecutionHypothesis,defenceHypothesis,prosecutionResults,defenceResults){
P <- log10(locus.likes(prosecutionHypothesis,prosecutionResults))
D <- log10(locus.likes(defenceHypothesis,defenceResults))
table <- t(data.frame(Prosecution.log10=P,Defence.log10=D,Ratio.log10=(P-D),Ratio=10^(P-D)))
extra <- data.frame(Likelihood=row.names(table))
result <- round.3(cbind(extra,table))
return(result)}
#--------------------------------------------------------------------------------------------------------------------
hypothesis.generator <- function(genetics){
# replicated alleles may not be explained by dropin, therefore this dictates minimum number of Us (minU)
# maximum Us determined by whichever locus has the most unattributables (rep and unrep combined)
# reports all U + dropin combos between minU and maxU
table <- genetics$summary$counts
rep <- subset(table,table$status=='replicated')$counts
unrep <- subset(table,table$status=='unreplicated')$counts
minU <- ceiling(max(rep)/2)
maxU <- ceiling(max(rep+unrep)/2)
unknowns <- minU:maxU
N <- length(unknowns)
dropins <- numeric(N)
dropins.logic <- character(N)
recommendation <- character(N)
for(n in 1:N){
dropins[n] <- sum(pmax(0,rep+unrep-2*unknowns[n]))
if(dropins[n]<=2)recommendation[n]<-"recommended"
if(dropins[n]==3)recommendation[n]<- "worth considering"
if(unknowns[n]==3)recommendation[n]<-"Can only be evaluated by removing the additional U from defence"
if(unknowns[n]>3)recommendation[n]<-"Too many U's to evaluate"
}
dropins.logic <- as.character(dropins!=0)
result <- data.frame(nUnknowns=unknowns, doDropin=dropins.logic, Recommendation=recommendation)
return(result)}
#--------------------------------------------------------------------------------------------------------------------
system.info <- function(){
date <- date()
package.info <- sessionInfo()$otherPkgs$likeLTD
sys.info <- Sys.info()
Details <- t(data.frame(Details=c(date,package.info,sys.info) ))
Details <- gsub("\n","",Details ,fixed=T)
Details <- gsub(" ","",Details ,fixed=T)
Type <- data.frame(Type=c('Date report generated:',names(package.info),names(sys.info)))
all <- cbind(Type,Details)
return(all)}
#--------------------------------------------------------------------------------------------------------------------
filename.maker <- function(outputPath,caseName,filename,type=NULL){
# type: report type, one of 'allele' or 'results'
# construct title
if(is.null(type)) title <- paste0(caseName,'-Report-') # shouldn't be possible to remain as NULL, but just incase
if(type=='allele')title <- paste0(caseName,'-Allele-Report-')
if(type=='results')title <- paste0(caseName,'-Evaluation-Report-')
# assign a filename, if NULL was handed down
if(is.null(filename)){
n <- 1
filename <- file.path(outputPath,paste(title,n,'.doc',sep=''))
# prevent overwriting
while(file.exists(filename)){
n <- n + 1
filename <- file.path(outputPath,paste(title,n,'.doc',sep=''))
}
}
return(list(filename=filename,title=title))}
#--------------------------------------------------------------------------------------------------------------------
hyp.P <- function(genetics){
Q <- paste(genetics$nameQ,'(Q)')
U <- paste(genetics$P.hyp$nUnknowns,'U',sep='')
HP <- paste('Prosecution Hypothesis:',paste(c(Q,genetics$nameK,U),collapse=' + ') )
if(!is.null(genetics$doDropin))
{
if(genetics$doDropin)
{
HP = paste0(HP," + dropin")
}
}
if(is.null(genetics$nameK))HP <- NULL # genetics object is different for allele report or final report
return(HP)}
hyp.D <- function(genetics){
X <- 'Unknown (X)'
U <- paste(genetics$D.hyp$nUnknowns-1,'U',sep='')
HD <- paste('Defence Hypothesis:',paste(c(X,genetics$nameK,U),collapse=' + ') )
if(!is.null(genetics$doDropin))
{
if(genetics$doDropin)
{
HD = paste0(HD," + dropin")
}
}
if(is.null(genetics$nameK))HD <- NULL # genetics object is different for allele report or final report
return(HD)}
#--------------------------------------------------------------------------------------------------------------------
locus.likes <- function(hypothesis,results,...){
# Generate locus likelihoods from overall likelihood
# hypothesis: generated by either defence.hypothesis() or
# prosecution.hypothesis()
# results: results from do.call(optim,params)
model <- create.likelihood.vectors(hypothesis)
arguments <- relistArguments(results$optim$bestmem, hypothesis, ...)
likes <- do.call(model,arguments)
likes <- likes$objectives * likes$penalties
}
#--------------------------------------------------------------------------------------------------------------------
pack.genetics.for.allele.report <- function(admin){
# packs together all the genetic information required for the allele.report()
# primary objects
cspData <- read.csp.profile(admin$cspFile)
uncData <- read.unc.profile(admin$cspFile)
refData <- read.known.profiles(admin$refFile)
afreq <- load.allele.database(admin$databaseFile,admin$kit)
# secondary objects
summary <- summary.generator(refData, cspData)
estimates <- estimate.csp(refData, cspData)
nrep <- nrow(cspData)
allele.report.genetics <- list(
cspData = cspData,
uncData = uncData,
refData = refData,
afreq = afreq,
summary = summary,
estimates = estimates,
nrep = nrep)
return(allele.report.genetics)}
#--------------------------------------------------------------------------------------------------------------------
pack.genetics.for.output.report <- function(P.hyp,D.hyp){
# packs together all the genetic information required for the output.report()
# Comparison checks between P.hyp and D.hyp
compare.hypothesis.inputs(P.hyp,D.hyp)
# primary objects
cspData <- read.csp.profile(P.hyp$cspFile)
uncData <- read.unc.profile(P.hyp$cspFile)
refData <- read.known.profiles(P.hyp$refFile)
afreq <- load.allele.database(P.hyp$databaseFile,P.hyp$kit)
# secondary objects
summary <- summary.generator(refData, cspData)
estimates <- estimate.csp(refData, cspData)
nrep <- nrow(cspData)
QvK <- queried.vs.known(P.hyp$refFile)
nameQ <- row.names(refData)[QvK]
nameK <- row.names(refData)[!QvK]
doDropin = P.hyp$doDropin
output.report.genetics <- list(
cspData = cspData,
uncData = uncData,
refData = refData,
afreq = afreq,
summary = summary,
estimates = estimates,
nrep = nrep,
nameQ = nameQ,
nameK = nameK,
P.hyp = P.hyp,
D.hyp = D.hyp,
doDropin=doDropin)
return(output.report.genetics)}
#--------------------------------------------------------------------------------------------------------------------
compare.hypothesis.inputs <- function(P.hyp,D.hyp){
# Checks the input data and parameters were the same for P and D
if(!identical(P.hyp$cspFile,D.hyp$cspFile))warning("P and D hypotheses were constructed using two different crime scene files!!")
if(!identical(P.hyp$refFile,D.hyp$refFile))warning("P and D hypotheses were constructed using two different reference files!!")
if(!identical(P.hyp$databaseFile,D.hyp$databaseFile))warning("P and D hypotheses were constructed using two different allele database files!!")
if(!identical(P.hyp$outputPath,D.hyp$outputPath))warning("P and D hypotheses were given two different output paths!!")
if(!identical(P.hyp$ethnic,D.hyp$ethnic))warning("P and D hypotheses were constructed using two different ethnic codes!!")
if(!identical(P.hyp$ethnic,D.hyp$ethnic))warning("P and D hypotheses were constructed using two different ethnic codes!!")
if(!identical(P.hyp$adj,D.hyp$adj))warning("P and D hypotheses were constructed using two different locus adjustment parameter vectors!!")
if(!identical(P.hyp$fst,D.hyp$fst))warning("P and D hypotheses were constructed using two different fsts!!")
}
#--------------------------------------------------------------------------------------------------------------------
queried.vs.known <- function(path) {
# Reads profile from path and returns queried vs known column.
# Args:
# path: Path to file with the profile.
raw <- read.table(path, header=T, colClasses='character', row.names=1, sep=',', quote = "\"")
if(is.null(raw$known.queried))stop("Reference csv must contain a column 'known/queried'. This column must contain one field 'queried'. ")
if(sum(raw$known.queried=='queried')!=1)stop("The reference csv column 'known/queried' must contain one field 'queried'. ")
return(raw$known.queried == 'queried')
}
#--------------------------------------------------------------------------------------------------------------------
estimate.csp <- function(refData, cspData) {
# Estimate how well each reference profile is represented in the CSP
nrep <- nrow(cspData)
# Constructs the result data frame.
result <- data.frame(array(0, c(nrow(refData), nrep+1)), row.names=rownames(refData))
colnames(result)[1:nrep] = sapply(1:nrep, function(n) {paste('Rep', n)})
colnames(result)[nrep+1] = 'Total'
# now add data.
for(person in row.names(refData)){
for(rep in 1:nrep){
# number of alleles in common for given person and CSP replicate, across all loci
represented <- c()
for(locus in 1:ncol(cspData)){
if(!is.na(cspData[rep,locus])) {
# figure out unique alleles in reference
ref.alleles <- unique(unlist(refData[person,locus+1]))# +1 ignores first column(queried)
csp.alleles <- unique(unlist(cspData[rep,locus]))
# figure out how many of these allele are in CSP
represented <- c(represented, ref.alleles %in% csp.alleles)
}
}
if(length(represented) > 0)
result[person, rep] <- 100*sum(represented)/length(represented)
}
}
if(nrep==1) result[, nrep+1] <- result[, nrep]
else result[, nrep+1] <- round.0(rowSums(result)/nrep)
# Reorders rows
result <- round.0(result[order(result[, nrep+1], decreasing=T), ])
return(result)}
#--------------------------------------------------------------------------------------------------------------------
estimates.reformatter <- function(genetics){
table <- genetics$estimates
extra <- data.frame(Contributor=row.names(table))
result <- cbind(extra,table)
return(result)}
#--------------------------------------------------------------------------------------------------------------------
summary.generator <- function(refData, cspData){
# summary table for Q and K, showing which of their alleles are replicated, unreplicated, or absent
table <- as.data.frame(array(,c(nrow(refData)+1,ncol(cspData))))
colnames(table) <- colnames(cspData)
row.names(table) <- c(row.names(refData),'Other')
# two identical tables, will be slightly different formats for rtf and latex
table.rtf <- table.latex <- table
# count of unattributable alleles, to establish if they are replicated or not
rep.counts <- unrep.counts <- c()
# generate the results
for(locus in colnames(cspData)){
# for unattributable alleles are rep, unrep, absent
cspAlleles <- unlist(cspData[,locus])
cspAlleles <- cspAlleles[!is.na(cspAlleles)] # remove NAs
refAlleles <- unique(unlist(refData[,locus]))
unattributableAlleles <- cspAlleles[!cspAlleles%in%refAlleles]
table.rtf['Other',locus] <- summary.helper(unattributableAlleles,cspAlleles)$rtf
table.latex['Other',locus] <- summary.helper(unattributableAlleles,cspAlleles)$latex
# for unattributable alleles calculate how many are replicated /unreplicated
rep.counts <- c(rep.counts, summary.helper(unattributableAlleles,cspAlleles)$rep )
unrep.counts <- c(unrep.counts, summary.helper(unattributableAlleles,cspAlleles)$unrep )
# for each contributor calculate which alleles are rep, unrep, absent
for(name in row.names(refData)){
refAlleles <- refData[name,locus][[1]]
table.rtf[name,locus] <- summary.helper(refAlleles,cspAlleles)$rtf
table.latex[name,locus] <- summary.helper(refAlleles,cspAlleles)$latex
}}
# reformat the counts table (c.)
c.rep <- data.frame(loci=colnames(cspData), counts=rep.counts, status='replicated')
c.unrep <- data.frame(loci=colnames(cspData), counts=unrep.counts, status='unreplicated')
counts <- rbind(c.rep,c.unrep)
table <- list(rtf=table.rtf,latex=table.latex)
summary <- list(table=table,counts=counts)
return(summary)}
#--------------------------------------------------------------------------------------------------------------------
summary.helper <- function(refAlleles,cspAlleles){
# intricate and irritating operations required to check each allele
rep <- unrep <- absent <- NULL
for(allele in unique(refAlleles)){
condition <- sum(cspAlleles%in%allele)
if(condition>1)rep <- c(rep,allele)
if(condition==1)unrep <- c(unrep,allele)
if(condition==0)absent <- c(absent,allele)
}
# separate by commas, add escape functions such that replicated=bold, and unreplicated=italic, absent=box (underline in rtf)
# different codes for rtf and latex
rep.rtf <- rep.latex <- unrep.rtf <- unrep.latex <- absent.rtf <- absent.latex <- NULL
if(!is.null(rep)){
rep.rtf <- paste('\\B',paste(rep,collapse=','),'}',sep='')
rep.latex <- paste('\\textbf{',paste(rep,collapse=','),'}',sep='')
}
if(!is.null(unrep)){
unrep.rtf <- paste('\\I',paste(unrep,collapse=','),'}',sep='')
unrep.latex <- paste('\\textit{',paste(unrep,collapse=','),'}',sep='')
}
if(!is.null(absent)){
absent.rtf <- paste('\\X',paste(absent,collapse=','),'}',sep='')
absent.latex <- paste('\\fbox{',paste(absent,collapse=','),'}',sep='')
}
rtf <- paste(c(rep.rtf,unrep.rtf,absent.rtf),collapse=',')
latex <- paste(c(rep.latex,unrep.latex,absent.latex),collapse=',')
return(list(rtf=rtf,latex=latex,rep=length(rep),unrep=length(unrep)))}
#--------------------------------------------------------------------------------------------------------------------
rtf.formater <- function(file){
# Think of this function as a debugger for the rtf functions.
# 1. 'TRUE' and 'FALSE' are irritatingly converted into 'Yes' and 'No'
# 2. the addTable() function in package rtf uses the text length to choose the column width. This means that the
# additional escape characters to encode bold or italic or boxes causes havoc, making those columns too wide.
# This inelegant solution uses my own invented escape characters \B for bold, \I for italic and \X for box,
# to keep the extra text to a minimum. This function then converts to the correct rtf coding AFTER the document is created.
# note the additional '\' is required in R to escape the '\' in rtf
data <- readLines(file)
for(n in 1:length(data)){
line <- data[n]
line <- gsub('\\B','{\\b ',line,fixed=T)
line <- gsub('\\I','{\\i ',line,fixed=T)
line <- gsub('\\X','{\\chbrdr\\brdrs ',line,fixed=T)
line <- gsub('Yes','TRUE',line,fixed=T)
line <- gsub('No','FALSE',line,fixed=T)
data[n] <- line
}
write(data,file=file)}
#--------------------------------------------------------------------------------------------------------------------
overall.likelihood.table.reformatter <- function(prosecutionResults,defenceResults){
P <- -prosecutionResults$optim$bestval
D <- -defenceResults$optim$bestval
table <- cbind(
round.1(data.frame(Prosecution.log10=P,Defence.log10=D,Ratio.log10=P-D)),
round.0(data.frame(Ratio=toString(signif(10^(P-D),3))))
)
table <- t(table); colnames(table) <- 'estimate'
extra <- data.frame(calculation=row.names(table))
result <- cbind(extra,table)
return(result)}
#--------------------------------------------------------------------------------------------------------------------
rcontConvert <- function(refIndiv,rcont){
# Convert rcont to full rcont (including ref individual)
# refIndiv: reference individual specified in args
# rcont: rcont parameters from do.call(optim,params)
if(refIndiv == 1) rcont = c(1, rcont)
else if(refIndiv > length(rcont)) rcont = c(rcont, 1)
else rcont = c(rcont[1:refIndiv-1], 1, rcont[refIndiv:length(rcont)])
return(rcont)}
#--------------------------------------------------------------------------------------------------------------------
calc.dropout = function(results, hypothesis){
# Calculates dropout rates for every contributor subject to dropout and for every replicate
# hypothesis: generated by either defence.hypothesis() or prosecution.hypothesis()
# results: results from do.call(optim,params)
# Number of contributors subject to dropout + number of unknowns + 1 (dropin)
N <- nrow(hypothesis$dropoutProfs) + hypothesis$nUnknowns + 1
nrep <- nrow(hypothesis$cspProfile)
do <- results$optim$bestmem[grep("dropout",names(results$optim$bestmem))]
rcont <- results$optim$bestmem[grep("rcont",names(results$optim$bestmem))]
rcont <- rcontConvert(hypothesis$refIndiv,rcont)
BB <- results$optim$bestmem[grep("power",names(results$optim$bestmem))]
drout <- matrix(0,N-1,nrep)
if(N>1) for(x in 1:(N-1)) for(z in 1:nrep) drout[x,z] <- do[z]/(do[z]+rcont[x]^-BB*(1-do[z]))
return(drout)}
#--------------------------------------------------------------------------------------------------------------------
dropDeg <- function(hypothesis,results,genetics){
# Output tables for dropout and degradation
# hypothesis: generated by either defence.hypothesis() or prosecution.hypothesis()
# results: results from do.call(optim,params)
dropoutsLogical <- determine.dropout(genetics$refData,genetics$cspData)
Qdrop <- dropoutsLogical[names(dropoutsLogical)==genetics$nameQ]
knownDropoutsLogical <- dropoutsLogical[names(dropoutsLogical)!=rownames(hypothesis$queriedProfile)]
# Number of Known contributors with No Dropout
Nknd <- length(which(!knownDropoutsLogical))
# create the row names, are arranged into the correct order: K, Q/X, U
names.Dropout <- names(which(knownDropoutsLogical))
names.NoDropout <- names(which(!knownDropoutsLogical))
nameK <- c(names.NoDropout,names.Dropout)
Names=c()
if(length(nameK)>0)for (n in 1:length(nameK)) Names[n]=paste(nameK[n],' (K',n,')',sep='')
Names[length(c(genetics$nameQ,nameK))] = paste(genetics$nameQ,'(Q)')
nU <- hypothesis$nUnknowns
if(hypothesis$hypothesis=="defence"){
Names[length(c(genetics$nameQ,nameK))] = 'X'
nU <- nU -1 # it already contains an extra one for X
}
if(nU>0)for(n in 1:nU)Names <- c(Names,paste('U',n,sep=''))
# column names for the table
runNames = c();for(rName in 1:genetics$nrep)runNames[rName]=paste('Dropout',paste('(Run ',rName,')',sep=''))
# dropout values
h <- h1 <- round.3(calc.dropout(results, hypothesis))
# degradation values
d <- d1 <- 10^results$optim$bestmem[grep("degradation",names(results$optim$bestmem))]
# under pros, if Q is not subject to dropout, an extra 0 needs to be added to both dropout and degradation for Q
if(hypothesis$hypothesis=="prosecution"){
if(Qdrop==F){
h = rbind(h[0:nrow(hypothesis$dropoutProfs),,drop=F],0)
d = c(d[0:nrow(hypothesis$dropoutProfs)],0)
if(hypothesis$nUnknowns>0){
h = rbind(h,h1[(nrow(hypothesis$dropoutProfs)+1):length(d1),,drop=F])
d = c(d,d1[(nrow(hypothesis$dropoutProfs)+1):length(d1)])
}
}
}
# Dropout is assumed zero for fully represented profiles, therefore suffixed on the dropout table
if(Nknd>0) for(n in 1:Nknd){
h = rbind(0,h)
d = c(0,d)
}
Dropout = round.3(data.frame(h,d))
colnames(Dropout)= c(runNames[1:genetics$nrep],'Degradation (overall)')
row.names(Dropout) = Names
return(Dropout)}
#--------------------------------------------------------------------------------------------------------------------
overall.dropout.table.reformatter <- function(prosecutionHypothesis,defenceHypothesis,prosecutionResults,defenceResults,genetics){
P.dropDeg <- dropDeg(prosecutionHypothesis,prosecutionResults,genetics)
P.extra <- data.frame(hypothesis=rep('Prosecution',nrow(P.dropDeg)),contributor=rownames(P.dropDeg))
P.combined <- cbind(P.extra,P.dropDeg)
D.dropDeg <- dropDeg(defenceHypothesis,defenceResults,genetics)
D.extra <- data.frame(hypothesis=rep('Defence',nrow(D.dropDeg)),contributor=rownames(D.dropDeg))
D.combined <- cbind(D.extra,D.dropDeg)
combined <- rbind(P.combined,D.combined)
return(combined)}
#--------------------------------------------------------------------------------------------------------------------
overall.dropin.table.reformatter <- function(prosecutionResults,defenceResults){
P.dropin <- prosecutionResults$optim$bestmem["dropin"]
D.dropin <- defenceResults$optim$bestmem["dropin"]
table <- round.3(data.frame(hypothesis=c('Prosecution','Defence'),dropin=c(P.dropin,D.dropin)))
return(table)}
#--------------------------------------------------------------------------------------------------------------------
optimised.parameter.table.reformatter <- function(hypothesis,result){
table <- t(rbind(result$optim$bestmem,result$member$lower,result$member$upper))
extra <- data.frame(parameter=rownames(table))
combined <- round.3(cbind(extra,table))
colnames(combined) <- c('parameter','estimate','lower bound','upper bound')
return(combined)}
#--------------------------------------------------------------------------------------------------------------------
chosen.parameter.table.reformatter <- function(prosecutionHypothesis){
keep <- c(7,8,9,11,12,13,14,15)
table <- as.data.frame(unlist(prosecutionHypothesis[keep]))
extra <- data.frame(Parameter= rownames(table))
combined <- cbind(extra,table)
colnames(combined) <- c('Parameter','User input')
return(combined)}
#--------------------------------------------------------------------------------------------------------------------
file.inputs.table.reformatter <- function(prosecutionHypothesis)
{
tmp = strsplit(prosecutionHypothesis$cspFile,split="/")[[1]]
cspFile = tmp[length(tmp)]
tmp = strsplit(prosecutionHypothesis$refFile,split="/")[[1]]
refFile = tmp[length(tmp)]
if(is.null(prosecutionHypothesis$databaseFile))
{
databaseFile = "DNA17-db.txt (Default)"
} else {
tmp = strsplit(prosecutionHypothesis$databaseFile,split="/")[[1]]
databaseFile = tmp[length(tmp)]
}
table = data.frame(file=c("CSP","Reference","Database"),input=c(cspFile,refFile,databaseFile))
colnames(table) = c("File","Used")
return(table)
}
#--------------------------------------------------------------------------------------------------------------------
# homozygous match probability
hom <- function(pa, fst=0.02)
{
numerator = ((2*fst)+((1-fst)*pa))*((3*fst)+((1-fst)*pa))
denominator = (1+fst)*(1+(2*fst))
return(numerator/denominator)
}
# heterozygous match probability
het <- function(pa, pb, fst=0.02)
{
numerator = (fst+((1-fst)*pa))*(fst+((1-fst)*pb))
denominator = (1+fst)*(1+(2*fst))
return(2*(numerator/denominator))
}
#singleFst = function(p,fst)
# {
# (fst+(1-fst)*p)/(1+fst)
# }
matchProb = function(hypothesis,rr,fst=0.02,sep=FALSE)
{
ideal.match <- c()
for(j in 1:ncol(hypothesis$queriedProfile))
{
print(paste0("...",colnames(hypothesis$queriedProfile)[j],"..."))
af = hypothesis$alleleDb[j][[1]]
kn = hypothesis$queriedProfile[,j][[1]]
p1 = af[row.names(af)==kn[1],1]
p2 = af[row.names(af)==kn[2],1]
# undo most of fst adjustment
p1 = (p1*(1+fst))-fst
p2 = (p2*(1+fst))-fst
if(kn[1]==kn[2])
{
# undo last bit of fst adjustment
p1 = (p1-fst)/(1-fst)
p2 = (p2-fst)/(1-fst)
# get match new fst adjusted match prob
ideal.match = c(ideal.match,rr[2]+(rr[1]*((2*fst+(1-fst)*p1)/(1+fst)))+((1-sum(rr))*hom(p1,fst=fst)))
} else {
# undo last bit of fst adjustment
p1 = p1/(1-fst)
p2 = p2/(1-fst)
# get match new fst adjusted match prob
ideal.match = c(ideal.match,rr[2]+(rr[1]*((fst+(1-fst)*((p1+p2)/2))/(1+fst)))+((1-sum(rr))*het(p1,p2,fst=fst)))
}
}
if(sep)
{
return(1/ideal.match)
} else {
return(1/prod(ideal.match))
}
}
ideal <- function(hypothesis)
{
# Calculates idealised likelihood assuming Q is perfect match
# Parameters:
# hypothesis: generated by either defence.hypothesis() or prosecution.hypothesis(), although rr only makes sense under defence
# rr: relatedness arguments from args
ideal.match = getMatchProb(hypothesis)
result <- data.frame(calculation =c('likelihood ratio','Log10 likelihood ratio'),estimate=c(toString(signif(ideal.match,3)),round.1(log10(ideal.match))))
return(result)}
#--------------------------------------------------------------------------------------------------------------------
# a few odds and sods to simplify the reports
line <- paste(rep('-',95),collapse='-') # creates a straight line
spacer <- function(doc,n=1) for(x in 1:n)addNewLine(doc) # adds blank lines
fs0 <- 26 # font size for header (main)
fs1 <- 20 # font size for header (sub1)
fs2 <- 15 # font size for header (sub2)
fs3 <- 8 # tiny, for the big tables
#--------------------------------------------------------------------------------------------------------------------
common.report.section <- function(names,genetics,resTable=NULL){
# objects common to both the allele report and the final output report are done once here, for consistency, and saves repeating code
# Create a new Docx.
doc <- RTF(names$filename, width=11,height=8.5,omi=c(1,1,1,1))
addParagraph(doc, line)
spacer(doc,3)
addHeader( doc, title=substr(names$title,1,nchar(names$title)-1), subtitle=names$subtitle, font.size=fs0 )
addHeader( doc, hyp.P(genetics), font.size=fs2 )
addHeader( doc, hyp.D(genetics), font.size=fs2 )
addParagraph(doc, line)
if(!is.null(resTable))
{
addHeader(doc, "Overall Likelihood", TOC.level=2, font.size=fs2)
addTable(doc, resTable ,col.justify='C', header.col.justify='C')
spacer(doc,3)
}
addPageBreak(doc, width=11,height=8.5,omi=c(1,1,1,1) )
# addTOC(doc)
# addPageBreak(doc, width=11,height=8.5,omi=c(1,1,1,1) )
addHeader(doc, "Data provided by forensic scientist", TOC.level=1,font.size=fs1)
addHeader(doc, "Crime scene profiles (CSP)",TOC.level=2,font.size=fs2)
addTable(doc, csp.table.reformatter(genetics),col.justify='C', header.col.justify='C',font.size=fs3)
spacer(doc,3)
addHeader(doc, "Reference profiles", TOC.level=2, font.size=fs2 )
addTable(doc, reference.table.reformatter(genetics), col.justify='C', header.col.justify='C',font.size=fs3)
spacer(doc,1)
addParagraph(doc, "Alleles that are \\Breplicated}, \\Iunreplicated} or \\Xabsent} in the crime scene profile, using the certain designations only." ) # will use rtf.formater() to convert '\\B','\\I' and '\\X' into rtf encoding
spacer(doc,3)
addHeader(doc, "Summary", TOC.level=1,font.size=fs1)
addHeader(doc, "Unattributable alleles", TOC.level=2, font.size=fs2)
plot.function <- unattributable.plot.maker(genetics)
addPlot( doc, plot.fun = print, width = 9, height = 2.8, x = plot.function )
addParagraph( doc, "The number of 'certain' alleles that cannot be attributed to the known profile(s).")
spacer(doc,3)
addHeader(doc, "Unusual alleles", TOC.level=2, font.size=fs2 )
addTable(doc, unusual.alleles(genetics), col.justify='C', header.col.justify='C')
spacer(doc,1)
addParagraph( doc, "Alleles are automatically checked against the database. An error will be reported if an allele is absent from the database, or present more than once, or if a locus is absent.")
spacer(doc,3)
addHeader(doc, "Approximate representation", TOC.level=2, font.size=fs2)
addTable(doc, estimates.reformatter(genetics), col.justify='C', header.col.justify='C')
spacer(doc,1)
addParagraph( doc, "The fraction of an individual's alleles (as a percentage) that have been designated as 'certain' alleles in each replicate. This estimate is not used by likeLTD, and is intended to assist informal assessments of possible known contributors to the CSP. A more formal approach is to do a likeLTD run to compute the likelihood ratio (LR) for that individual contributor.")
spacer(doc,3)
return(doc)}
#--------------------------------------------------------------------------------------------------------------------
allele.report <- function(admin,file=NULL){
# admin: List containing administration data, as packed by pack.admin.input()
# file: defaults to creating its own sequential file names (to avoid overwriting)
# create genetics information
genetics <- pack.genetics.for.allele.report(admin)
# Latex output
latex.maker(genetics, file.path(admin$outputPath,'table.tex') )
names <- filename.maker(admin$outputPath,admin$caseName,file,type='allele')
names$subtitle <- admin$caseName
doc <- common.report.section(names,genetics)
addHeader(doc, "Suggested parameter values", , TOC.level=2, font.size=fs2)
addTable(doc, hypothesis.generator(genetics), col.justify='C', header.col.justify='C')
spacer(doc,1)
addParagraph( doc, "Recommended values for 'nUnknowns', choose from 0,1 or 2 (likeLTD automatically adds and additional unknown X to the defence hypothesis in place of the queried profile Q).")
addParagraph( doc, "Recommended values for 'doDropin', choose from 'TRUE' or 'FALSE'.")
addParagraph( doc, "All the attributable alleles must either come from an unknown or dropin.")
spacer(doc,3)
addHeader(doc, "System information", TOC.level=1,font.size=fs1)
addTable(doc, system.info(), col.justify='L', header.col.justify='L')
done(doc)
rtf.formater(names$filename)
}
#--------------------------------------------------------------------------------------------------------------------
output.report <- function(prosecutionHypothesis,defenceHypothesis,results,file=NULL){
# prosecutionHypothesis: generated by prosecution.hypothesis()
# defenceHypothesis: generated by defence.hypothesis()
# results: results from enumerate(prosecutionParams, defenceParams)
# file: defaults to creating its own sequential file names (to avoid overwriting)
prosecutionResults = results$Pros
defenceResults = results$Def
# create genetics information
genetics <- pack.genetics.for.output.report(prosecutionHypothesis,defenceHypothesis)
names <- filename.maker(prosecutionHypothesis$outputPath,prosecutionHypothesis$caseName,file,type='results')
names$subtitle <- prosecutionHypothesis$caseName
resTable = overall.likelihood.table.reformatter(prosecutionResults,defenceResults)
doc <- common.report.section(names,genetics,resTable)
addHeader(doc, "Likelihoods at each locus", TOC.level=2, font.size=fs2)
addTable(doc, local.likelihood.table.reformatter(prosecutionHypothesis,defenceHypothesis,prosecutionResults,defenceResults) ,col.justify='C', header.col.justify='C',font.size=8)
spacer(doc,3)
addHeader(doc, "Theoretical maximum LR", TOC.level=2, font.size=fs2)
addTable(doc, ideal(defenceHypothesis), col.justify='C', header.col.justify='C')
spacer(doc,3)
addHeader(doc, "Dropout and degradation parameter estimates", TOC.level=2, font.size=fs2)
addTable(doc, overall.dropout.table.reformatter(prosecutionHypothesis,defenceHypothesis,prosecutionResults,defenceResults,genetics), col.justify='C', header.col.justify='C')
spacer(doc,3)
addHeader(doc, "Dropin parameter estimates", TOC.level=2, font.size=fs2)
addTable(doc, overall.dropin.table.reformatter(prosecutionResults,defenceResults), col.justify='C', header.col.justify='C')
spacer(doc,3)
addHeader(doc, "User defined parameters", TOC.level=1, font.size=fs1)
addTable(doc, chosen.parameter.table.reformatter(prosecutionHypothesis), col.justify='L', header.col.justify='L')
spacer(doc,3)
addHeader(doc, "Input files", TOC.level=1, font.size=fs1)
addTable(doc, file.inputs.table.reformatter(prosecutionHypothesis), col.justify='L', header.col.justify='L')
spacer(doc,3)
# seed used
addHeader(doc, "Seed used", TOC.level=1, font.size=fs1)
addTable(doc, seedTable(results), col.justify='L', header.col.justify='L')
addHeader(doc, "Optimised parameters", TOC.level=1, font.size=fs1)
addHeader(doc, "Prosecution parameters", TOC.level=2, font.size=fs2)
addTable(doc, optimised.parameter.table.reformatter(prosecutionHypothesis,prosecutionResults), col.justify='L', header.col.justify='L')
spacer(doc,3)
addHeader(doc, "Defence parameters", TOC.level=2, font.size=fs2)
addTable(doc, optimised.parameter.table.reformatter(defenceHypothesis,defenceResults), col.justify='L', header.col.justify='L')
spacer(doc,3)
addHeader(doc, "System information", TOC.level=1,font.size=fs1)
addTable(doc, system.info(), col.justify='L', header.col.justify='L')
done(doc)
rtf.formater(names$filename)
}
#--------------------------------------------------------------------------------------------------------------------
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.