#' An R6 class representing a single dataset.
#'
#' EDADat is a container class representing a single dataset (matrix or data
#' frame) along with some basic information about its indexing, orientation,
#' and representation.
#'
#' The purpose of this class is to provide a way for each atomic dataset to
#' carry its own relevant metadata, as needed by the other major `eda` classes.
#' This class is different from all other `eda` classes in that it does not
#' inherit from `AbstractMultiDataSet`, and, aside from being passed into other
#' `eda` class constructors, it is not intended to be used directly by the user.
#'
#' @section Arguments:
#' - `dat`: A data frame or matrix
#' - `key`: Character string or number indicating the row or column containg
#' the dataset primary keys. If 'rownames', or 'colnames', row or column
#' names will be used, respectively. If NULL, defaults to 'rownames' or
#' 'colnames', depending on orientation.
#' - `transposed`: Logical indicating whether the dataset orientation is
#' transposed, relative to the primary dataset. For example, if a primary
#' and secondary dataset both share the same column identifiers, but the
#' second dataset is oriented with those id's as rows, then transposed
#' should be set to TRUE to indicate such.
#' `xlab`: Character string containing x-axis label
#' `ylab`: Character string containing y-axis label
#'
#' @section Fields:
#' - `dat`: Formatted data frame or matrix
#'
#' @importFrom R6 R6Class
#' @name EDADat
#' @export
#'
NULL
EDADat <- R6Class("EDADat",
# ------------------------------------------------------------------------
# public
# ------------------------------------------------------------------------
public = list(
# public properties
xid = NULL,
yid = NULL,
xlab = NULL,
ylab = NULL,
# style properties
row_color = NULL,
row_shape = NULL,
row_label = NULL,
row_edat = NULL,
col_color = NULL,
col_shape = NULL,
col_label = NULL,
col_edat = NULL,
# EDADat constructor
initialize = function(dat, xid=NULL, yid=NULL,
row_names='rownames', col_names='colnames',
row_color=NULL, row_shape=NULL, row_label=NULL, row_edat=NULL,
col_color=NULL, col_shape=NULL, col_label=NULL, col_edat=NULL,
xlab=NULL, ylab=NULL) {
# assign random axis identifiers if none specified
if (is.null(xid)) {
xid <- private$get_hash()
}
if (is.null(yid)) {
yid <- private$get_hash()
}
# public properties
self$xid <- xid
self$yid <- yid
self$xlab <- xlab
self$ylab <- ylab
self$row_color <- row_color
self$row_shape <- row_shape
self$row_label <- row_label
self$row_edat <- row_edat
self$col_color <- col_color
self$col_shape <- col_shape
self$col_label <- col_label
self$col_edat <- col_edat
# store data
private$data <- private$format_data(dat, row_names, col_names)
},
# returns the vector for the specified axis and row/column name
# "other_axis" indicates that the row/column to be retrieved is
# on the opposite axis as to that which was specified by the "axis"
# parameter.
# If not axis name is specified, row or column names are returned
get = function(axis, name=NULL, other_axis=FALSE) {
# make sure valid axis identifier specified
if (!axis %in% c(self$xid, self$yid)) {
stop("Invalid axis ID specified.")
}
# determine whether target entry is stored as a row or column
row_target <- axis == self$xid
if (other_axis) {
row_target <- !row_target
}
if (is.data.frame(private$data) && private$transposed) {
row_target <- !row_target
}
# return target row or column names
if (row_target) {
# if no variable name specified column ids
if (is.null(name)) {
colnames(private$data)
} else {
# otherwise return requested row
as.vector(unlist(private$data[name, ]))
}
} else {
# return target column or row names
if (is.null(name)) {
rownames(private$data)
} else {
# otherwise return requested column
as.vector(private$data[, name])
}
}
},
# subsamples dataset rows and/or columns in-place
subsample = function(row_n=NULL, col_n=NULL, row_ratio=NULL, col_ratio=NULL) {
# get underlying data
dat <- private$data
# indices to sample from
if (is.data.frame(private$data) && private$transposed) {
# for transposed data frames, swap row and column indices
row_ind <- 1:ncol(dat)
col_ind <- 1:nrow(dat)
} else {
# otherwise operate as-is
row_ind <- 1:nrow(dat)
col_ind <- 1:ncol(dat)
}
# subsample rows
if (!is.null(row_n)) {
row_ind <- sample(row_ind, row_n)
} else if (!is.null(row_ratio)) {
row_ind <- sample(row_ind, round(row_ratio * length(row_ind)))
}
# subsample columns
if (!is.null(col_n)) {
col_ind <- sample(col_ind, col_n)
} else if (!is.null(col_ratio)) {
col_ind <- sample(col_ind, round(col_ratio * length(col_ind)))
}
# preserve row and column order
row_ind <- sort(row_ind)
col_ind <- sort(col_ind)
# update data
if (is.data.frame(private$data) && private$transposed) {
# transposed data frames
private$data <- dat[col_ind, row_ind, drop = FALSE]
} else {
# everything else
private$data <- dat[row_ind, col_ind, drop = FALSE]
}
},
# transpose data;
# matrices are transposed in-places while data frames are left alone,
# but have a transposition flag toggled
transpose = function() {
# for data frames, keep track of transposition status
if (is.data.frame(private$data)) {
private$transposed = !private$transposed
}
# transpose matrix data
if (is.matrix(private$data)) {
private$data <- t(private$data)
}
# swap axis ids and labels
xid <- self$xid
self$xid <- self$yid
self$yid <- xid
xlab <- self$xlab
self$xlab <- self$ylab
self$ylab <- xlab
# swap row and column style elements
row_color <- self$row_color
row_shape <- self$row_shape
row_label <- self$row_label
row_edat <- self$row_edat
self$row_color <- self$col_color
self$row_shape <- self$col_shape
self$row_label <- self$col_label
self$row_edat <- self$col_edat
self$col_color <- row_color
self$col_shape <- row_shape
self$col_label <- row_label
self$col_edat <- row_edat
}
),
# ------------------------------------------------------------------------
# private
# ------------------------------------------------------------------------
private = list(
# Parameters
data = NULL,
transposed = FALSE,
get_names_index = function(dat, key, names_fxn=colnames) {
# column number containing row ids specified, or
# row number containing column ids
if (is.numeric(key)) {
key
} else if (key %in% names_fxn(dat)) {
# column name containing row ids specified, or
# row name containing column ids
which(names_fxn(dat) == key)
}
},
# returns data with row / column names formatted as expected
format_data = function(dat, row_names, col_names) {
# convert tibbles to normal data frames
if ('tbl' %in% class(dat)) {
dat <- as.data.frame(dat)
}
# normalize rownames
if (row_names != 'rownames') {
ind <- private$get_names_index(dat, row_names, colnames)
if (sum(duplicated(dat[, ind])) > 0) {
stop("Row identifiers must be unique.")
}
rownames(dat) <- dat[, ind]
dat <- dat[, -ind]
}
# normalize colnames
if (col_names != 'colnames') {
ind <- private$get_names_index(dat, col_names, rownames)
if (sum(duplicated(dat[ind, ])) > 0) {
stop("Column identifiers must be unique.")
}
colnames(dat) <- dat[ind, ]
dat <- dat[-ind, ]
}
# ensure that formatted data has both row and column names;
# required by some functions.
if (is.null(rownames(dat))) {
rownames(dat) <- paste0('row_', 1:nrow(dat))
}
if (is.null(colnames(dat))) {
colnames(dat) <- paste0('col_', 1:ncol(dat))
}
dat
},
get_hash = function(len=6) {
paste0(sample(c(0:9, letters, toupper(letters)), len), collapse='')
}
),
# ------------------------------------------------------------------------
# active
# ------------------------------------------------------------------------
active = list(
# return data in expected orientation
dat = function(value) {
# return data
if (missing(value)) {
# for transposed data frames, transpose on the fly
if (is.data.frame(private$data) && private$transposed) {
rn <- rownames(private$data)
cn <- colnames(private$data)
dat <- data.table::transpose(private$data)
rownames(dat) <- cn
colnames(dat) <- rn
return(dat)
} else {
# otherwise, return as-is
return(private$data)
}
} else {
# store updated matrix / data frame
private$data <- value
# for data frames, reset transposed flag
if (is.data.frame(value)) {
private$transposed <- FALSE
}
}
},
# return data in transposed order
tdat = function() {
self$transpose()
dat <- self$dat
self$transpose()
dat
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.