# helper variable that maps between R and perseus types
.typeMap <- function(readPerseus = FALSE, writePerseus = FALSE,
additionalMatrices = FALSE){
if (additionalMatrices){
if (readPerseus){
return(list(Perseus = c('N', 'E', 'C', 'T', 'M'),
R = c('numeric', 'character',
'factor', 'character',
'character')))
} else if (writePerseus){
return(list(Perseus = c('N', 'C', 'T', 'M'),
R = c('numeric', 'factor',
'character', 'character')))
}
} else {
return(list(Perseus = c('N', 'E', 'C', 'T', 'M'),
R = c('numeric', 'numeric',
'factor', 'character',
'character')))
}
}
#' @importFrom plyr mapvalues
map_perseus_types <- function(typeAnnotation, typeMap) {
plyr::mapvalues(typeAnnotation,
from = typeMap$Perseus,
to = typeMap$R,
warn_missing = FALSE)
}
#' Infer Perseus type annotation row from DataFrame column classes
#'
#' @param df The data.frame
#' @param typeMap A list with elements 'Perseus' and 'R'. The ordering determines the mapping
#' @importFrom plyr laply
#' @return A vector with perseus type annotations
#' @seealso Based on \code{\link{mapvalues}}
infer_perseus_annotation_types <- function(df, typeMap) {
colClasses <- plyr::laply(df, class)
if (length(colClasses) == 0) return(colClasses)
plyr::mapvalues(colClasses,
from = typeMap$R,
to = typeMap$Perseus,
warn_missing = FALSE)
}
#' Create annotation rows
#'
#' Create the annotation rows data.frame from the list
#' of comment rows parsed from the input file and the main columns indicator
#' @param commentRows list of comment rows
#' @param isMain logical array indicating all main columns
#' @seealso used by \code{\link{read.perseus}}
create_annotRows <- function(commentRows, isMain) {
annotRows <- list()
for (name in names(commentRows)) {
if (startsWith(name, 'C:')) {
annotRows[[substring(name, 3)]] <- factor(commentRows[[name]][isMain])
}
else if (startsWith(name, 'N:')) {
annotRows[[substring(name, 3)]] <- as.numeric(commentRows[[name]][isMain])
}
else {
warning("Found unrecognized annotation row: ", name)
}
}
return(as.data.frame(annotRows))
}
#' @describeIn read.perseus Returns a list used internally to generate all other outputs
#' @importFrom stringr str_split
#' @export
read.perseus.default <- function(con, check = TRUE, additionalMatrices = FALSE) {
if (is.character(con)) {
con <- file(con, open = 'r')
} else if (!isSeekable(con)) {
fileCon <- file()
writeLines(readLines(con), fileCon)
close(con)
con <- fileCon
}
invisible(strsplit(readLines(con, n = 1), '\t')[[1]])
commentRows <- list()
while (startsWith(oneLine <- readLines(con, n = 1), '#!')) {
name <- stringr::str_split(substring(oneLine, 4), '[}]')[[1]][1]
rowStr <- substring(oneLine, nchar(name) + 5)
rowValues <- stringr::str_split(rowStr, '\t')[[1]]
commentRows[[name]] <- rowValues
}
types <- commentRows$Type
descr <- commentRows$Description
commentRows[c('Type', 'Description')] <- NULL
if (additionalMatrices){
colClasses <- map_perseus_types(types, .typeMap(readPerseus = TRUE, additionalMatrices = TRUE))
} else {
colClasses <- map_perseus_types(types, .typeMap(readPerseus = TRUE))
}
seek(con, 0)
df <- utils::read.table(con, header = TRUE,
sep = '\t', comment.char = '#',
colClasses = colClasses, fill = TRUE,
quote = "")
close(con)
isMain <- types == 'E'
main <- df[isMain]
imputeData <- matrix('False', ncol = ncol(main), nrow = nrow(main))
qualityData <- matrix(0, ncol = ncol(main), nrow = nrow(main))
if (additionalMatrices) {
for (i in 1:nrow(main)){
for (j in 1:ncol(main)){
mainDataList <- unlist(strsplit(main[i, j], ';'))
if (length(mainDataList) == 1){
} else {
main[i, j] <- mainDataList[1]
imputeData[i, j] <- mainDataList[2]
qualityData[i, j] <- mainDataList[3]
}
}
}
}
imputeData <- as.data.frame(imputeData)
colnames(imputeData) <- colnames(main)
qualityData <- as.data.frame(qualityData)
colnames(qualityData) <- colnames(main)
main <- as.data.frame(sapply(main, as.numeric))
annotCols <- df[!isMain]
annotRows <- create_annotRows(commentRows, isMain)
if (is.null(descr)) {
descr <- character(0)
}
if ('Name' %in% colnames(df)) {
rowNames <- make.names(df$Name, unique = T)
} else {
rowNames <- as.character(seq_len(nrow(df)))
}
colNames <- colnames(main)
rownames(main) <- rowNames
rownames(annotCols) <- rowNames
if (all(dim(annotRows) != 0)) {
# This fixes a bug where import would fail on matrices without annot. rows
rownames(annotRows) <- colNames
}
perseus.list <- list(main = main,
annotCols = annotCols,
annotRows = annotRows,
description = descr,
imputeData = imputeData,
qualityData = qualityData)
if (check) MatrixDataCheck(perseus.list)
return(perseus.list)
}
#' @describeIn read.perseus Returns explicitly as a list
#' @export
read.perseus.as.list <- function(con, check = TRUE) {
return(read.perseus.default(con, check = check))
}
#' @describeIn read.perseus Returns explicitly as a specialized matrix data object
#' @export
read.perseus.as.matrixData <- function(con, check = TRUE, additionalMatrices = FALSE) {
perseus.list <- read.perseus.default(con, check = check, additionalMatrices = additionalMatrices)
return(matrixData(main = perseus.list$main,
annotCols = perseus.list$annotCols,
annotRows = perseus.list$annotRows,
description = perseus.list$descr,
imputeData = perseus.list$imputeData,
qualityData = perseus.list$qualityData))
}
#' @describeIn read.perseus Returns a bioconductor expression set object
#' @export
read.perseus.as.ExpressionSet <- function(con, check = TRUE) {
if (!requireNamespace("Biobase", quietly = TRUE)) {
stop('This function requires the Biobase package, please install it in the bioconductor repository')
}
perseus.list <- read.perseus.default(con, check = check)
if (max(dim(perseus.list$annotRows)) > 0) {
eSet <- Biobase::ExpressionSet(
assayData = data.matrix(perseus.list$main),
phenoData = methods::new('AnnotatedDataFrame',
perseus.list$annotRows),
annotation = perseus.list$descr,
featureData = methods::new('AnnotatedDataFrame',
perseus.list$annotCols),
imputeData = perseus.list$imputeData,
qualityData = perseus.list$qualityData)
} else {
eSet <- Biobase::ExpressionSet(
assayData = data.matrix(perseus.list$main),
annotation = perseus.list$descr,
featureData = methods::new('AnnotatedDataFrame',
perseus.list$annotCols),
imputeData = perseus.list$imputeData,
qualityData = perseus.list$qualityData)
}
return(eSet)
}
#' Read Perseus matrix files
#'
#' Read the custom Perseus matrix file format *.txt into R.
#'
#' @note Limitations to column names in R still apply. Column names valid
#' in Perseus, such as 'Column 1' will be changed to 'Column.1'
#'
#' @param con A \code{\link{connection}} object or the path to input file
#' @param check Logical indicating whether to check for the validity of the exported object (slightly slower)
#' @param additionalMatrices Logical indication whether to write out quality and imputation matrices in perseus format
#'
#' @return Defaults to a \code{\link{matrixData}} object.
#'
#' @note If the provided connection \code{con} is a character string, it will assumed
#' to be a file path. A \code{\link{connection}} which is not seekable (see \code{\link{isSeekable}})
#' will be written to a temporary file. Any connection will be closed when \code{read.perseus} exits.
#' \code{read.perseus.as.list}, \code{read.perseus.as.matrixData} and \code{read.perseus.as.ExpressionSet} are also available depending on the class desired as an output
#' @examples
#' tmp <- tempfile(fileext = ".txt")
#' write('Column_1\tColumn_2\tColumn_3
#' #!{Description}\t\t
#' #!{Type}E\tE\tE
#' -1.860574\t-0.3910594\t0.2870352
#' NaN\t-0.4742951\t0.849998', file=tmp)
#' mdata <- read.perseus(tmp)
#'
#' @rdname read.perseus
#' @aliases read.perseus
#' @seealso \code{\link{write.perseus}}
#' @seealso \code{\link{matrixData}}
#'
#' @export
read.perseus <- read.perseus.as.matrixData
#' Write data to a perseus text file or connection
#'
#' @title write.perseus: function to generate a perseus-readable text document
#'
#' @param object an expressionSet, matrixData, list or table-like object.
#'
#' @return writes to disk a perseus-interpretable text representation of an R object
#' @rdname write.perseus
#' @examples
#' df <- matrixData(
#' main=data.frame(a=1:3, b=6:8),
#' annotCols=data.frame(b=c('a','b','c')),
#' annotRows=data.frame(x=factor(c('1','1'))),
#' description=c('a','a','b'))
#' con <- textConnection('df1', 'w')
#' write.perseus(df, con)
#' close(con)
#' @export write.perseus
write.perseus <- function(object = NULL, con = NULL, ...) {
if (is.character(object)) {
stop("First argument should be the object to write to file.")
}
UseMethod("write.perseus", object)
}
#' Write Data to Perseus matrix format
#'
#' Write Data to file in the custom Perseus matrix file format.
#'
#' @param main a data frame containing
#' @param annotCols a df containing columns containing metadata (about the rows)
#' @param annotRows a df containing columns containing metadata (about the columns)
#' @param descr a character vector that describes the columns in main and in annotCols (in that order)
#' @param imputeData a df containing imputations -- True or False of main data frame
#' @param qualityData a df containing quality values of main data frame
#' @param con A \code{\link{connection}} object or the path to output file
#' @param ... additional arguments passed to other functions
#' @seealso \code{\link{read.perseus}} \code{\link{matrixData}}
#' @inheritParams write.perseus
#'
#' @rdname write.perseus
#' @method write.perseus default
#'
#' @export
write.perseus.default <- function(object = NULL, con = NULL, main, annotCols = NULL,
annotRows = NULL, descr = NULL, imputeData = NULL,
qualityData = NULL, ...) {
stopifnot(is.data.frame(main) | is.data.frame(annotCols))
if (is.null(annotCols)) assign('annotCols', value = data.frame())
if ((!plyr::empty(imputeData)) || (!plyr::empty(qualityData))) {
imputeVector <-as.vector(t(imputeData))
qualityVector <-as.vector(t(qualityData))
if ((length(unique(imputeVector)) != 1) || (length(unique(qualityVector)) != 1)) {
importAdditionalMatrix = TRUE
} else {
importAdditionalMatrix = FALSE
}
if (importAdditionalMatrix) {
for (i in 1:nrow(main)){
for (j in 1:ncol(main)){
mergeMain <- unlist(list(main[i, j], as.character(imputeData[i, j]), qualityData[i, j]))
main[i, j] <- paste(mergeMain, collapse = ';')
}
}
}
}
columns <- c(names(main), names(annotCols))
df <- main
closeAtEnd <- FALSE
if (is.character(con)) {
con <- file(con, open = 'w')
closeAtEnd <- TRUE
}
writeLines(paste0(columns, collapse = '\t'), con)
if (length(descr) != 0) {
descr[1] <- paste0('#!{Description}', descr[1])
writeLines(paste0(descr, collapse = '\t'), con)
}
if ((!plyr::empty(imputeData)) || (!plyr::empty(qualityData))) {
type <- c(rep('E', ncol(main)),
infer_perseus_annotation_types(annotCols, .typeMap(writePerseus = TRUE, additionalMatrices = TRUE)))
} else {
type <- c(rep('E', ncol(main)),
infer_perseus_annotation_types(annotCols, .typeMap(writePerseus = TRUE)))
}
type[1] <- paste0('#!{Type}', type[1])
writeLines(paste0(type, collapse = '\t'), con)
for (name in names(annotRows)) {
values <- annotRows[[name]]
line <- paste0(c(as.character(values), rep('', ncol(annotCols))), collapse = '\t')
if (is.numeric(values)) {
writeLines(sprintf('#!{N:%s}%s', name, line), con)
}
else {
writeLines(sprintf('#!{C:%s}%s', name, line), con)
}
}
if (nrow(annotCols) != 0) {
if (nrow(main) == 0) {
df <- annotCols
}
else {
df <- cbind(main, annotCols)
}
}
utils::write.table(df, con, sep = '\t', quote = FALSE,
row.names = FALSE, col.names = FALSE,
na = 'NaN')
if (closeAtEnd) close(con)
return()
}
#' @return \code{NULL}
#'
#' @inheritParams write.perseus.default
#'
#' @export
#' @method write.perseus matrixData
#' @rdname write.perseus
write.perseus.matrixData <- function(object, con , ...) {
descr <- description(object)
annotRows <- as.list(annotRows(object))
main <- main(object)
annotCols <- annotCols(object)
imputeData <- imputeData(object)
qualityData <- qualityData(object)
(function(...){
write.perseus.default(main = main, annotCols = annotCols,
annotRows = annotRows, descr = descr,
imputeData = imputeData, qualityData = qualityData,
con = con)})(...)
}
#' @return \code{NULL}
#'
#' @inheritParams write.perseus.default
#'
#' @rdname write.perseus
#' @method write.perseus list
#'
#' @export
write.perseus.list <- function(object, con, ...) {
stopifnot(any(c('main', 'annotCols') %in% names(object)))
object$con <- con
do.call(write.perseus.default, c(list(...), object))
}
#' @return \code{NULL}
#'
#' @inheritParams write.perseus.default
#'
#' @rdname write.perseus
#' @method write.perseus data.frame
#'
#' @export
write.perseus.data.frame <- function(object, con, annotCols = NULL, ...) {
stopifnot(is.data.frame(object))
numeric_cols <- plyr::laply(object, is.numeric)
main <- subset.data.frame(object, select = numeric_cols, subset = T)
if (is.null(annotCols)) {
annotCols <- subset.data.frame(object, select = !numeric_cols, subset = T)
}
(function(...){
write.perseus.default(main = main,
annotCols = annotCols,
con = con, ...)})(...)
}
#' @return \code{NULL}
#'
#' @inheritParams write.perseus.default
#'
#' @rdname write.perseus
#' @method write.perseus matrix
#'
#' @export
write.perseus.matrix <- function(object, con, annotCols = NULL, ...) {
if (is.null(annotCols) & !is.null(rownames(object))) {
annotCols <- as.data.frame(rownames(object))
colnames(annotCols) <- 'Names'
}
(function(...){
write.perseus.default(main = as.data.frame(object),
annotCols = annotCols,
con = con, ...)})(...)
}
#' @return \code{NULL}
#'
#' @inheritParams write.perseus.default
#'
#' @rdname write.perseus
#' @method write.perseus ExpressionSet
#'
#' @export
write.perseus.ExpressionSet <- function(object, con, ...) {
mainDF <- data.frame(Biobase::exprs(object))
annotationRows <- methods::as(object@phenoData, 'data.frame')
descriptions <- Biobase::annotation(object)
annotationCols <- methods::as(object@featureData, 'data.frame')
(function(...){
write.perseus.default(main = mainDF, annotCols = annotationCols,
annotRows = annotationRows, descr = descriptions,
con = con)})(...)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.