Nothing
#' Read multiple idat files.
#'
#' This function is a wrapper to for illuminaio's \code{readIDAT} function.
#' A dictionary will translate the standard IDs from the IDAT file into SNP names.
#' A position file, will filter out all SNPs where the position is unknown.
#' This might save a lot of memory and allow to read in more samples at once.
#' This function is provided to simplify the use of the example data.
#' For data produced with other arrays use respective functions available in CRAN or Bioconductor.
#'
#' @param files A vector of filenames.
#' Two consecutive lines per sample.
#' The order (e.g. green,red) must be consistent for all samples.
#' @param dict A dataframe with columns "idatID" and "name".
#' @param cnames Vector of characters as column names.
#' @param pos Position object with three columns "name", "chromosome" and "position".
#' Can be used to filter out SNPs where the position is not known.
#' @param beads Logical, if beads should be read in.
#' @param sd Logical, if standard deviation should be read in.
#' @return An object containing the identities from the idat files.
#' @examples
#' if(require(brassicaData)){
#' files <- list.files(system.file("extdata", package = "brassicaData"),
#' full.names = TRUE, pattern = "idat")
#' samples <- read_sample_sheets(files = list.files(system.file("extdata",
#' package = "brassicaData"), full.names = TRUE, pattern = "csv"))
#' column_names <- sapply(strsplit(files,split="/"), FUN=function(x) x[length(x)])
#' data("dictionary", package = "brassicaData", envir = environment())
#' data("chrPos", package = "brassicaData", envir = environment())
#' raw_data <- read_intensities(files = files, dict = dictionary,
#' cnames = column_names, pos = chrPos)
#' }
#' @export
read_intensities <-
function(files, dict = NULL, cnames = NULL, pos = NULL,
beads = FALSE, sd = FALSE) {
if (is.data.frame(pos) && ncol(pos) >= 3 && !names(pos) == c("name", "chromosome", "position"))
stop("pos object needs to be data.fram with columns name, chromosome and position.")
require_package("illuminaio")
out <-
list(
chr = c(), pos = c(), raw = matrix(), snps = c(), beads = matrix(), sd = matrix()
)
cols <- 1
if (sd)
cols <- c(cols, 2)
if (beads)
cols <- c(cols, 3)
if (is.null(dict)) {
out$raw <-
matrix(unlist(
lapply(
files,FUN = function(x)
illuminaio::readIDAT(x)$Quants[, cols]
)
),
ncol = length(files), byrow = FALSE)
}else{
if (!is.null(pos)) {
dict <- dict[dict$name %in% pos$name,]
if (is.vector(pos$chromosome) & is.vector(pos$position)) {
tmp <- pos$name %in% dict$name
out$chr <- pos$chromosome[tmp]
out$pos <- pos$position[tmp]
rm(tmp)
}
}
out$raw <-
matrix(unlist(
lapply(
files,FUN = function(x)
illuminaio::readIDAT(x)$Quants[as.character(dict$idatID), cols]
)
),
ncol <- length(files) * length(cols), byrow = FALSE)
out$snps <- dict$name
}
if (beads & sd) {
out$sd <- out$raw[, seq(2, ncol(out$raw), 3)]
out$raw <- out$raw[,-seq(2, ncol(out$raw), 3)]
out$beads <- out$raw[, seq(2, ncol(out$raw), 2)]
out$raw <- out$raw[,-seq(2, ncol(out$raw), 2)]
}else if (sd) {
out$sd <- out$raw[, seq(2, ncol(out$raw), 2)]
out$raw <- out$raw[,-seq(2, ncol(out$raw), 2)]
}else if (beads) {
out$beads <- out$raw[, seq(2, ncol(out$raw), 2)]
out$raw <- out$raw[,-seq(2, ncol(out$raw), 2)]
}
if (!is.null(cnames)) {
out$samples <- cnames
}
class(out) <- "raw_data"
out
}
#' Read sample sheet(s)
#'
#' Read one or multiple sample sheets in csv format.
#' The information is used to translate between cryptic names
#' (e.g. provided by service provider) and names that can be interpreted.
#' As an alternative to this function, the information can be provided as data.frame with the two columns
#' "Name" and "ID".
#'
#' @param files Path to sample sheet.
#' @param skip Integer, lines to skip, before data is read.
#' If not provided, the program looks for entry __[Data]__
#' @param cols Character vector, column names to use. First one is the Sample ID, Second and third
#' ones are barcode and position.
#' @return A data.frame containing the idat names and the meaningful names of the samples.
#' @examples
#' if(require(brassicaData)){
#' samples <- read_sample_sheets(files = list.files(system.file("extdata",
#' package = "brassicaData"), full.names = TRUE, pattern = "csv"))
#' }
#' @export
read_sample_sheets <- function(files, skip = NULL, cols =
c("Sample_ID", "SentrixBarcode_A", "SentrixPosition_A")) {
tab <- c()
for (i in files) {
if (missing(skip)) {
lines1 <- readLines(i)
skip_loc <- grep(lines1, pattern = "\\[Data\\]")
}else{
skip_loc <- skip
}
tab2 <-
utils::read.table(
i, skip = skip_loc, sep = ";", header = TRUE, stringsAsFactors = FALSE, check.names = FALSE
)
if (ncol(tab2) < length(cols)) {
tab2 <-
utils::read.table(
i, skip = skip_loc, sep = ",", header = TRUE, stringsAsFactors = FALSE, check.names = FALSE
)
}
if (!is.null(tab) & is.null(cols)) {
both <- intersect(colnames(tab), colnames(tab2))
if (length(both) < length(cols))
stop("Not all files have the required column names.")
tab <- tab[, both]
tab2 <- tab2[, both]
}else if (!is.null(tab)) {
tab <- tab[, cols]
tab2 <- tab2[, cols]
colnames(tab) <- colnames(tab2) <- cols
}else if (is.null(tab)) {
tab2 <- tab2[, cols]
colnames(tab2) <- cols
}
tab <- rbind(tab, tab2)
}
return(data.frame(
Names = tab[, cols[1]], ID = paste(tab[, cols[2]], tab[, cols[3]], sep = "_"), stringsAsFactors = FALSE
))
}
#' Check raw data
#'
#' Before preprocessing it is advised to check the data for failed samples.
#' This function allows to visualize the signals and return sample indices of failed samples.
#'
#' @param raw Matrix with raw data.
#' Two consecutive columns per sample.
#' The order (e.g. green,red) must be consistent for all samples.
#' @param plot Logical. If TRUE data will be plotted.
#' @param thresh Threshold for filtering.
#' @param ... arguments are forwarded to \code{hist()}.
#' @return Vector containing Samples below threshold.
#' @examples
#' if(require(brassicaData)){
#' data("raw_napus", package = "brassicaData", envir = environment())
#' to_filter <- check_raw(raw_napus, thresh = 28000, breaks = 20)
#' }
#' @export
check_raw <- function(raw, plot = TRUE, thresh = 0, ...) {
cmeans <- colMeans(raw$raw)
cmeans <-
cmeans[seq(1,ncol(raw$raw),2)] + cmeans[seq(2,ncol(raw$raw),2)]
if (plot) {
graphics::hist(cmeans, main = "Histogram", xlab = "Mean signal per sample", ...)
graphics::abline(v = thresh, col = "red")
}
out <- which(cmeans < thresh) * 2
sort(c(out - 1,out))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.