Nothing
#' Read a set of profiles from a file
#'
#' Reads a set of profiles from a file
#'
#' The alleles are recorded integers rather than the STR repeat numbers. This
#' speeds up computation immensely when calculating any of the LRs or IBS.
#'
#' @param fileName a path to the profile file.
#' @param freqs A list containing two lists labelled loci and freqs. The second
#' list is a list of vectors containing the allele frequencies of each allele
#' at each locus in the multiplex. If this is left \code{NULL}, then it is calculated from
#' the profile file.
#' @param sep a character that delimits the fields in the profile file.
#' @param header a boolean which is \code{TRUE} if the profile file has a column header line.
#' @param id a column number indicating which column the profile id's are stored. If \code{id == -1}, then this means there is no id information.
#' @param discardMissing if \code{TRUE}, then all profiles which have alleles which cannot be matched to the frequency file are ommitted and returned
#' in the return list.
#' @return a list containing a \code{data.frame} of profiles where the alleles have been recoded
#' to the allele index number, rather than the allele itself, and a set of frequencies in the same
#' format as you would get from \code{\link{readFreqs}}. If \code{freqs} have been supplied, then this
#' will just be the same set of frequencies, if they have not, then this will be calculated
#' from the profiles. Given that the profiles generally do not have any locus name information
#' the loci will just be labelled Locus1, Locus2, \ldots. If there are missing values then the raw missing profiles are returned
#' @author James M. Curran
#'
#' @importFrom utils read.table
#' @importFrom stats complete.cases
#' @export readProfiles
readProfiles = function(fileName,
freqs = NULL,
sep = "\t",
header = FALSE,
id = 1,
discardMissing = TRUE) {
if (header) {
rawProfiles = read.table(fileName,
sep = sep,
header = FALSE,
skip = 1)
} else{
rawProfiles = read.table(fileName, sep = sep, header = FALSE)
}
cat(paste0("Read ", nrow(rawProfiles), " profiles from ", fileName, "\n"))
prof = rawProfiles
idCol = NULL
if (id != -1) {
idCol = rawProfiles[, id]
prof = prof[, -id]
}
if(is.null(freqs)){
cat("No frequency file supplied so compiling from raw profiles\n")
if(ncol(rawProfiles) %% 2 != 0){
stop("The profile file must contain an even number of columns excluding the id column\n")
}else{
numLoci = ncol(rawProfiles) / 2
freqs$loci = paste("Locus", 1:numLoci, sep = "_")
freqs$freqs = vector(mode = "list", length = numLoci)
names(freqs$freqs) = freqs$loci
for(nloc in 1:numLoci){
i1 = 2 * nloc - 1
i2 = i1 + 1
freqs$freqs[[nloc]] = table(c(rawProfiles[,i1], rawProfiles[,i2]))
freqs$freqs[[nloc]] = freqs$freqs[[nloc]] / sum(freqs$freqs[[nloc]])
}
}
}
numLoci = length(freqs$freqs)
for (loc in 1:numLoci) {
locus = freqs[["freqs"]][[loc]]
Alleles = as.numeric(names(locus))
i1 = 2 * loc - 1
i2 = i1 + 1
prof[, i1] = match(prof[, i1], Alleles)
prof[, i2] = match(prof[, i2], Alleles)
}
if(is.null(idCol)){
rownames(prof) = 1:nrow(prof)
}else{
rownames(prof) = idCol
}
colnames(prof) = paste0(rep(freqs$loci, rep(2, length(freqs$loci))),
rep(c("a", "b"), rep(length(freqs$loci))))
missing = !complete.cases(prof)
if(any(missing)){
cat(paste0("File contained ", sum(missing), " profiles with alleles that couldn't be resovled from frequency file\n"))
cat("These profiles will be ommitted from analysis, but are returned in is object as $missing\n")
result = list(profiles = prof[!missing, ], freqs = freqs, missing = list(raw = rawProfiles[missing,], mapped = prof[missing, ]))
}else{
result = list(profiles = prof, freqs = freqs)
}
class(result) = "relSimDB"
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.