#' Extract scan date
#'
#' Extracts date of scanning from original array files (e.g. CEL for Affymetrix
#' and idat for Illumina arrays).
#'
#' Currently only implemented for Affymetrix arrays.
#'
#' @param se
#' [\code{\link[SummarizedExperiment]{SummarizedExperiment-class}} object]
#' study data set
#' @param col.sample.id [character(1)] Column in colData() with
#' sample accession information (default: "geo_accession")
#' @param array.type [character(1)] type of array ("affy" (default), "agilent"
#' or "illumina")
#' @param temp.dir [character(1)] directory where temporary files should be
#' stored
#' @param tryFormats [character(n)] Possible format strings to try when
#' converting date as character to an object of class
#' \code{\link[base]{Date}}
#'
#' @return \code{\link[SummarizedExperiment]{SummarizedExperiment-class}}
#' object with scan date added as column scan.date in colData.
#'
#' @export
extract_scan_date <- function(
se,
col.sample.id = "geo_accession",
array.type = "affy",
temp.dir = tempdir(),
tryFormats = c("%Y-%m-%d",
"%Y/%m/%d",
"%m/%d/%Y",
"%m/%d/%y",
"%m-%d-%Y",
"%d-%b-%Y")) {
if (!(col.sample.id %in%
colnames(SummarizedExperiment::colData(se)))) {
stop(paste("column", col.sample.id, "not available in colData!"))
}
accession.samples = as.character(
SummarizedExperiment::colData(se)[, col.sample.id])
if (!(array.type %in% c("affy", "agilent", "illumina"))) {
stop("array.type needs to be 'affy', 'agilent' or 'illumina'")
}
scan.date.all = rep(NA, length(accession.samples))
if (array.type == "affy") {
for (i in seq_len(length(accession.samples))) {
## extract scan date from header of CEL file
cel.file = download_cel_file(
sample = accession.samples[i],
temp.dir = temp.dir)
scan.info = affyio::read.celfile.header(
file = cel.file,
info = "full")
scan.date = ifelse(length(scan.info$ScanDate) == 0,
NA, scan.info$ScanDate)
scan.date.all[i] = scan.date
}
} else {
stop(paste(array.type, "not implemented yet!"))
}
## convert to Date
scan.date.formats = lapply(tryFormats, function(x) {
as.Date(scan.date.all,
tryFormats = x,
optional = TRUE)
})
no.na = vapply(X = scan.date.formats,
FUN = function(x) {sum(is.na(x))},
FUN.VALUE = integer(1))
ind.min = which.min(no.na)
if (no.na[ind.min] > sum(is.na(scan.date.all))) {
warning("some dates could not be correctly converted!")
}
se$scan.date = scan.date.formats[[ind.min]]
return(se)
}
#' Define batches based on scan date
#'
#' Groups samples into batches based on scan date. Samples run within a short
#' time interval can be defined to belong to the same batch.
#'
#' @param se
#' [\code{\link[SummarizedExperiment]{SummarizedExperiment-class}} object]
#' study data set
#' @param col.scan.date [character(1)] Column in colData() with scan
#' dates (default: scan.date as generated by the function
#' \code{\link{extract_scan_date}}
#' @param diff.ignore [numeric(1)] Time difference (in days) defined as
#' negligible (default: 1)
#'
#' @return \code{\link[SummarizedExperiment]{SummarizedExperiment-class}}
#' object with batch information added as column batch in colData()
#'
#' @export
define_batches <- function(
se,
col.scan.date = "scan.date",
diff.ignore = 1) {
if (!(col.scan.date %in%
colnames(SummarizedExperiment::colData(se)))) {
stop(paste("column", col.scan.date, "not available in column data!"))
}
if ("batch" %in%
colnames(SummarizedExperiment::colData(se))) {
stop(paste("column batch already available in column data!"))
}
scan.date = SummarizedExperiment::colData(se)[, col.scan.date]
if (!methods::is(scan.date, "Date")) {
stop(paste("column", col.scan.date, "needs to be of type Date!"))
}
names(scan.date) = colnames(se)
scan.date = sort(scan.date)
diff = c(0, as.numeric(diff(scan.date)))
diff[diff == diff.ignore] = 0
batch.num = as.numeric(as.factor(cumsum(diff)))
## add leading zeros to enable numeric sorting of strings
batch = formatC(
batch.num,
width = max(nchar(batch.num)),
format = "d",
flag = "0")
names(batch) = names(scan.date)
se$batch = batch[colnames(se)]
return(se)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.