Nothing
#' Read IBD probabilities from file
#'
#' Reads IBD probabilities from a plain text, tab-delimited .txt or .ibd file.
#' Information about the file format can be found in the vignette (
#' \code{vignette("IBDFileFormat", package = "statgenIBD")}). A data.frame with
#' the map must be specified as well.
#'
#' @param infile A character string specifying the path of the input file.
#' Compressed files with extension ".gz" or ".bz2" are supported as well.
#' @param map A data.frame with columns \code{chr} for chromosome and
#' \code{pos} for position. Positions should be in centimorgan (cM). They
#' should not be cumulative over the chromosomes. Other columns are ignored.
#' Marker names should be in the row names. These should match the marker names
#' in the input file.
#'
#' @return An object of class \code{IBDprob}.
#'
#' @examples
#' ## Read map for Steptoe Morex.
#' SxMmap <- read.delim(system.file("extdata/SxM", "SxM_map.txt",
#' package = "statgenIBD"), header = FALSE)
#' rownames(SxMmap) <- SxMmap$V1
#' SxMmap <- SxMmap[, -1]
#' colnames(SxMmap) <- c("chr", "pos")
#'
#'
#' ## Read IBD probabilities for Steptoe Morex.
#' SxMIBD <- readIBDs(system.file("extdata/SxM", "SxM_IBDs.txt",
#' package = "statgenIBD"),
#' map = SxMmap)
#'
#' ## Print summary.
#' summary(SxMIBD)
#'
#' @importFrom utils hasName read.table
#' @export
readIBDs <- function(infile,
map) {
if (missing(infile) || !is.character(infile) || length(infile) > 1 ||
file.access(infile, mode = 4) == -1 ||
## Compressed .csv files can be read by fread and should be
## allowed as inputs as well.
!(tools::file_ext(infile) %in% c("txt", "ibd") ||
(tools::file_ext(infile) %in% c("gz", "bz2") &&
tools::file_ext(tools::file_path_sans_ext(infile)) %in% c("txt", "ibd")))) {
stop("infile should be a character string indicating a readable ",
".txt or .ibd file.\n")
}
if (!is.data.frame(map)) {
stop("map should be a data.frame.\n")
}
if (!all(hasName(x = map, name = c("chr", "pos")))) {
## chr and pos are obligatory cols.
stop("chr and pos should be columns in map.\n")
}
if (!is.numeric(map[["pos"]])) {
stop("pos should be a numeric column in map.\n")
}
## Extract columns and order.
chkNum <- tryCatch(as.numeric(map[["chr"]]), warning = function(w) w)
if (is.numeric(map[["chr"]]) || inherits(chkNum, "warning")) {
map <- map[order(map[["chr"]], map[["pos"]]), c("chr", "pos")]
} else {
map <- map[order(chkNum, map[["pos"]]), c("chr", "pos")]
}
if (!is.character(map[["chr"]])) {
map[["chr"]] <- as.character(map[["chr"]])
}
## Read file.
inDat <- data.table::fread(infile, sep = "\t", header = TRUE)
## Check that data has required columns.
if (!all(colnames(inDat)[1:2] == c("Marker", "Genotype"))) {
stop("First two columns in infile should be named Marker and Genotype.\n")
}
if (ncol(inDat) < 4) {
stop("At least two parent columns should be present in input.\n")
}
inDat[, -c(1,2)] <- apply(inDat[, -c(1,2)], 2, function(x) as.numeric(x))
## Check that inDat and map contain the same markers.
missMrkMap <- rownames(map)[!rownames(map) %in% inDat[["Marker"]]]
if (length(missMrkMap) > 0) {
warning(length(missMrkMap), " markers in map are not in infile.\n",
call. = FALSE)
map <- map[!rownames(map) %in% missMrkMap, ]
}
missMrkDat <- unique(inDat[["Marker"]])[!unique(inDat[["Marker"]]) %in%
rownames(map)]
if (length(missMrkDat) > 0) {
warning(length(missMrkDat), " markers in infile are not in map.\n",
call. = FALSE)
inDat <- inDat[!inDat[["Marker"]] %in% missMrkDat, ]
}
genoNamesIn <- unique(inDat[["Genotype"]])
markerNamesIn <- unique(inDat[["Marker"]])
## Sort input data to get everything in expected order.
inDat <- inDat[order(inDat[["Marker"]], inDat[["Genotype"]]), ]
genoNames <- unique(inDat[["Genotype"]])
markerNames <- unique(inDat[["Marker"]])
parents <- colnames(inDat)[-c(1, 2)]
nGeno <- length(genoNames)
nMarkers <- length(markerNames)
nPar <- length(parents)
markers <- array(NA_real_,
dim = c(nGeno, nMarkers, nPar),
dimnames = list(genoNames, markerNames, parents)
)
for (geno in genoNames) {
for (parent in parents) {
markers[geno, , parent] <- inDat[inDat$Genotype == geno, parent]
}
markers[geno, , ] <- markers[geno, , ] / rowSums(markers[geno, , ])
}
markers <- markers[genoNamesIn, markerNamesIn, ]
res <- structure(list(map = map,
markers = markers,
popType = NULL,
parents = parents),
class = c("IBDprob", "list"))
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.