Nothing
#' @name PgR6
#' @title PgR6 basic class
#' @description A basic \code{PgR6} class constructor. It contains basic fields
#' and subset functions to handle a pangenome. Final users should use \code{\link{pagoo}}
#' instead of this, since is more easy to understand.
#' @importFrom R6 R6Class
#' @importFrom S4Vectors DataFrame
#' @importFrom reshape2 dcast
# #' @importFrom data.table as.data.table setcolorder dcast
#' @export
PgR6 <- R6Class('PgR6',
# Private fields #
private = list(
version = NULL,
.data = NULL,
.panmatrix = NULL,
.organisms = NULL,
.clusters = NULL,
.dropped = NULL,
.level = NULL,
.sep = NULL
),
# Public functions #
public = list(
#' @description A basic \code{PgR6} class constructor. It contains basic fields
#' and subset functions to handle a pangenome.
#' @param data A \code{data.frame} or \code{\link[S4Vectors:DataFrame-class]{DataFrame}} containing at least the
#' following columns: \code{gene} (gene name), \code{org} (organism name to which the gene belongs to),
#' and \code{cluster} (group of orthologous to which the gene belongs to). More columns can be added as metadata
#' for each gene.
#' @param org_meta (optional) A \code{data.frame} or \code{\link[S4Vectors:DataFrame-class]{DataFrame}}
#' containing additional metadata for organisms. This \code{data.frame} must have a column named "org" with
#' valid organisms names (that is, they should match with those provided in \code{data}, column \code{org}), and
#' additional columns will be used as metadata. Each row should correspond to each organism.
#' @param cluster_meta (optional) A \code{data.frame} or \code{\link[S4Vectors:DataFrame-class]{DataFrame}}
#' containing additional metadata for clusters. This \code{data.frame} must have a column named "cluster" with
#' valid organisms names (that is, they should match with those provided in \code{data}, column \code{cluster}), and
#' additional columns will be used as metadata. Each row should correspond to each cluster.
#' @param core_level The initial core_level (that's the percentage of organisms a core cluster must be in to be
#' considered as part of the core genome). Must be a number between 100 and 85, (default: 95). You can change it
#' later by using the \code{$core_level} field once the object was created.
#' @param sep A separator. By default is '__'(two underscores). It will be used to
#' create a unique \code{gid} (gene identifier) for each gene. \code{gid}s are created by pasting
#' \code{org} to \code{gene}, separated by \code{sep}.
#' @param verbose \code{logical}. Whether to display progress messages when loading class.
#' @param DF Deprecated. Use \code{data} instead.
#' @param group_meta Deprecated. Use \code{cluster_meta} instead.
#' @return An R6 object of class PgR6. It contains basic fields and methods for
#' analyzing a pangenome.
initialize = function(data,
org_meta,
cluster_meta,
core_level = 95,
sep = '__',
verbose = TRUE,
DF,
group_meta){
# Deprecated args
if (!missing(DF)){
data <- DF
warning('Argument "DF" is deprecated. You should use "data" instead. ',
'This still exists for compatibility with previous versions, ',
'but will throw an error in the future.',
immediate. = TRUE, noBreaks. = TRUE)
}
if (!missing(group_meta)){
cluster_meta <- group_meta
warning('Argument "group_meta" is deprecated. You should use "cluster_meta" instead. ',
'This still exists for compatibility with previous versions, ',
'but will throw an error in the future.',
immediate. = TRUE, noBreaks. = TRUE)
}
# Check data input
# 1. Is data.frame or DataFrame?
if (verbose) message('Checking class.')
cl <- class(data)
if (! cl %in% c('data.frame', 'DataFrame', 'DFrame'))
stop('"data" is not a data.frame.')
# 2. Are colnames correct?
if (verbose) message('Checking dimnames.')
dn <- dimnames(data)[[2]]
if (!all(c('cluster', 'org', 'gene') %in% dn))
stop('colnames must have "cluster", "org", and "gene".')
# 3. To DataFrame if not yet.
if(cl[1]=='data.frame'){
data <- DataFrame(data)
}
# 4. Create gid
if (verbose) message('Creating gid (gene ids).')
data$gid <- paste(data$org, data$gene, sep = sep)
if (any(duplicated(data$gid)))
stop('Duplicated gene names in data.')
# 5. Column order, just for format
dn <- dimnames(data)[[2]]
req <- c('cluster', 'org', 'gene', 'gid')
extra <- dn[which(!dn%in%req)]
data <- data[, c(req, extra)]
# # 6. Change class
data$cluster <- factor(data$cluster)
# data[, group := factor(group)]
orgs <- unique(data$org)
data$org <- factor(data$org, levels = orgs)
data$gene <- factor(data$gene)
# data[, org := factor(org, levels = orgs)]
# Create organism field. Add metadata (if provided)
names(orgs) <- seq_along(orgs)
orgs_data <- DataFrame(org=factor(orgs, levels = orgs), row.names = seq_along(orgs))
if (!missing(org_meta)){
if (verbose) message('Checking provided organism metadata.')
if (!class(org_meta)%in%c('data.frame', 'DataFrame', 'DFrame'))
stop('"org_meta" should be a data.frame.')
if('org'%in%colnames(org_meta)){
ma <- match(orgs_data$org, org_meta$org)
if (any(is.na(ma))) stop('org_meta$org do not match with data$org')
org_meta <- org_meta[ma, ]
oc <- which(colnames(org_meta)=='org')
orgs_data <- cbind(orgs_data, DataFrame(org_meta[, -oc, drop=F]))
}else{
warning('"org_meta" should contain an "org" column. Ignoring this parameter.')
}
}
#create group field. Add metadata (if provided)
clus_lvls <- levels(data$cluster)
cluster_data <- DataFrame(cluster=factor(clus_lvls, levels = clus_lvls))
if (!missing(cluster_meta)){
if (verbose) message('Checking provided cluster metadata.')
if (!class(cluster_meta)%in%c('data.frame', 'DataFrame', 'DFrame'))
stop('"cluster_meta" should be a data.frame')
if ('cluster'%in%colnames(cluster_meta)){
ma <- match(cluster_data$cluster, cluster_meta$cluster)
if (any(is.na(ma))) stop('cluster_meta$cluster do not match with data$cluster')
cluster_meta <- cluster_meta[ma, ]
oc <- which(colnames(cluster_meta)=='cluster')
cluster_data <- cbind(cluster_data, DataFrame(cluster_meta[, -oc, drop = F]))
}else{
warning('"cluster_meta" should contain a "cluster" column. Ignoring this parameter.')
}
}
# Create panmatrix #
if (verbose) message('Creating panmatrix.')
panmatrix <- dcast(as.data.frame(data),
org~cluster,
value.var = 'gene',
fun.aggregate = length)
rn <- panmatrix$org
panmatrix$org <- NULL
panmatrix <- as.matrix(panmatrix)
rownames(panmatrix) <- rn
# Populate private$ #
if (verbose) message('Populating class.')
private$version <- packageVersion('pagoo')
private$.data <- data
private$.organisms <- orgs_data
private$.clusters <- cluster_data
private$.panmatrix <- panmatrix
private$.level <- core_level
private$.sep <- sep
},
# # Print method #
# print = function(){
# clss <- class(self)[1]
# cat(paste0('<',clss,'> object. Package pgr6, version: ',
# private$version,
# '\n'))
# },
#' @description
#' Add metadata to the object. You can add metadata to each organism, to each
#' group of orthologous (cluster), or to each gene. Elements with missing data should be filled
#' by \code{NA} (dimensions of the provided data.frame must be coherent with object
#' data).
#' @param map \code{character} identifying the metadata to map. Can
#' be one of \code{"org"}, \code{"cluster"}, or \code{"gid"}.
#' @param data \code{data.frame} or \code{DataFrame} with the metadata to
#' add. For each case, a column named as \code{"map"} must exists, which should
#' contain identifiers for each element. In the case of adding gene (\code{gid})
#' metadata,each gene should be referenced by the name of the organism and the name
#' of the gene as provided in the \code{"data"} data.frame, separated by the
#' \code{"sep"} argument.
#' @return \code{self} invisibly, but with additional metadata.
add_metadata = function(map = 'org', data){
map <- match.arg(map, choices = c('org', 'cluster', 'gid'))
cl <- class(data)
if (!cl%in%c('data.frame', 'DataFrame')){
stop('data should be a data.frame .')
}
if (map == 'org'){
if('org'%in%colnames(data)){
ma <- match(data$org, private$.organisms$org)
if (any(is.na(ma))) stop('data$org do not match with object organisms.')
oc <- which(colnames(data)=='org')
nwcls <- colnames(data)[-oc]
repcol <- nwcls %in% colnames(private$.organisms)
invisible(lapply(which(repcol), function(x){
wr <- paste0("Column '", nwcls[x], "' already exists, overwriting.")
warning(wr, immediate. = TRUE)
private$.organisms[[nwcls[x]]] <- NULL
}))
for (i in seq_along(nwcls)){
dd <- data[[nwcls[i]]]
cl <- class(dd)
na <- switch(cl,
"character" = NA_character_,
"integer" = NA_integer_,
"numeric" = NA_real_,
"logical" = NA
)
nas <- rep(na, dim(private$.organisms)[1])
private$.organisms[[nwcls[i]]] <- nas
private$.organisms[[nwcls[i]]][ma] <- dd
}
}else{
stop('"data" should contain an "org" column.')
}
}else if (map == 'cluster'){
if('cluster'%in%colnames(data)){
ma <- match(data$cluster, private$.clusters$cluster)
if (any(is.na(ma))) stop('data$cluster do not match with object "cluster" column.')
oc <- which(colnames(data)=='cluster')
nwcls <- colnames(data)[-oc]
repcol <- nwcls %in% colnames(private$.clusters)
invisible(lapply(which(repcol), function(x){
wr <- paste0("Column '", nwcls[x], "' already exists, overwriting.")
warning(wr, immediate. = TRUE)
private$.clusters[[nwcls[x]]] <- NULL
}))
for (i in seq_along(nwcls)){
dd <- data[[nwcls[i]]]
cl <- class(dd)
na <- switch(cl,
"character" = NA_character_,
"integer" = NA_integer_,
"numeric" = NA_real_,
"logical" = NA
)
nas <- rep(na, dim(private$.clusters)[1])
private$.clusters[[nwcls[i]]] <- nas
private$.clusters[[nwcls[i]]][ma] <- dd
}
}else{
stop('"data" should contain a "cluster" column.')
}
}else{
if('gid'%in%colnames(data)){
ma <- match(data$gid, private$.data$gid)
if (any(is.na(ma))) stop('data$gid do not match with object gid.')
oc <- which(colnames(data) %in% c("cluster", "org", "gene", "gid"))
nwcls <- colnames(data)[-oc]
repcol <- nwcls %in% colnames(private$.data)
invisible(lapply(which(repcol), function(x){
wr <- paste0("Column '", nwcls[x], "' already exists, overwriting.")
warning(wr, immediate. = TRUE)
private$.data[[nwcls[x]]] <- NULL
}))
for (i in seq_along(nwcls)){
dd <- data[[nwcls[i]]]
cl <- class(dd)
na <- switch(cl,
"character" = NA_character_,
"integer" = NA_integer_,
"numeric" = NA_real_,
"logical" = NA
)
nas <- rep(na, dim(private$.data)[1])
private$.data[[nwcls[i]]] <- nas
private$.data[[nwcls[i]]][ma] <- dd
}
}else{
stop('"data" should contain a "gid" column.')
}
}
invisible(self)
},
# Basic Subset Methods #
# Drop organisms from dataset
#' @description
#' Drop an organism from the dataset. This method allows to hide an organism
#' from the real dataset, ignoring it in downstream analyses. All the fields and
#' methods will behave as it doesn't exist. For instance, if you decide to drop
#' organism 1, the \code{$pan_matrix} field (see below) would not show it when
#' called.
#' @param x \code{character} or \code{numeric}. The name of the
#' organism wanted to be dropped, or its numeric id as returned in
#' \code{$organism} field (see below).
#' @return \code{self} invisibly, but with \code{x} dropped. It isn't necessary
#' to assign the function call to a new object, nor to re-write it as R6 objects
#' are mutable.
drop = function(x){
orgs <- as.character(private$.organisms[['org']])
orgs <- setNames(orgs, seq_along(orgs))
if (is.numeric(x)){
vec <- c(private$.dropped, orgs[x])
un <- vec[unique(names(vec))]
dp <- un[!is.na(un)]
}else if (is.character(x)){
vec <- c(private$.dropped, orgs[orgs%in%x])
un <- vec[unique(names(vec))]
dp <- un[!is.na(un)]
}else{
stop('x must be numeric or character')
}
if (length(dp)) private$.dropped <- dp else private$.dropped <- NULL
invisible(self)
},
# Recover from trash previously dropped organisms
#' @description
#' Recover a previously \code{$drop()}ped organism (see above). All fields
#' and methods will start to behave considering this organism again.
#' @param x \code{character} or \code{numeric}. The name of the
#' organism wanted to be recover, or its numeric id as returned in
#' \code{$dropped} field (see below).
#' @return \code{self} invisibly, but with \code{x} recovered. It isn't necessary
#' to assign the function call to a new object, nor to re-write it as R6 objects
#' are mutable.
recover = function(x){
orgs <- as.character(private$.organisms[['org']])
orgs <- setNames(orgs, seq_along(orgs))
if (is.numeric(x)){
dp <- private$.dropped[!names(private$.dropped)%in%x]
} else if (is.character(x)){
dp <- private$.dropped[!private$.dropped%in%x]
}
if (length(dp)) private$.dropped <- dp else private$.dropped <- NULL
invisible(self)
},
# Method for saving the pangenome as flat files (text)
#' @description
#' Write the pangenome data as flat tables (text). Is not the most recommended way
#' to save a pangenome, since you can loose information as numeric precision,
#' column classes (factor, numeric, integer), and the state of the object itself
#' (i.e. dropped organisms, or core_level), loosing reproducibility. Use
#' \code{$save_pangenomeRDS} for a more precise way of saving a pagoo object.
#' Still, it is useful if you want to work with the data outside R, just keep
#' the above in mind.
#' @param dir The non-existing directory name where to put the data files. Default
#' is "pangenome".
#' @param force \code{logical}. Whether to overwrite the directory if it already
#' exists. Default: \code{FALSE}.
#' @return A directory with at least 3 files. "data.tsv" contain the basic
#' pangenome data as it is provided to the \code{data} argument in the
#' initialization method (\code{$new(...)}). "clusters.tsv" contain any metadata
#' associated to the clusters. "organisms.tsv" contain any metadata associated to
#' the organisms. The latter 2 files will contain a single column if no metadata
#' was provided.
write_pangenome = function(dir = "pangenome", force = FALSE){
dexist <- dir.exists(dir)
if (dexist) {
if (force) {
unlink(dir, recursive = TRUE)
dir.create(dir)
} else {
stop("dir already exists. Use force = TRUE to overwrite.")
}
} else {
dir.create(dir)
}
dir <- paste0(normalizePath(dir), "/")
# Genes
dn <- dimnames(self$pan_matrix)
ogs <- dn[[2]]
orgs <- dn[[1]]
data <- private$.data
act <- which(data$cluster%in%ogs & data$org%in%orgs)
x <- data[act, -4L, drop=FALSE] #Don't include "gid" column (4) as it's generated by pagoo
write.table(x = x,
file = paste0(dir, "data.tsv"),
quote = FALSE,
row.names = FALSE,
col.names = TRUE,
sep = "\t")
rm(x)
# Clusters
write.table(self$clusters,
file = paste0(dir, "clusters.tsv"),
quote = FALSE,
row.names = FALSE,
col.names = TRUE,
sep = "\t")
# Organisms
write.table(self$organisms,
file = paste0(dir, "organisms.tsv"),
quote = FALSE,
row.names = FALSE,
col.names = TRUE,
sep = "\t")
},
#' @description
#' Save a pagoo pangenome object. This function provides a method for saving a pagoo
#' object and its state into a "RDS" file. To load the pangenome, use the
#' \code{load_pangenomeRDS} function in this package. It *should* be compatible between
#' pagoo versions, so you could update pagoo and still recover the same pangenome. Even
#' \code{sep} and \code{core_level} are restored unless the user provides those
#' arguments in \code{load_pangenomeRDS}. \code{dropped} organisms also kept hidden, as
#' you where working with the original object.
#' @param file The name of the file to save. Default: "pangenome.rds".
#' @return Writes a list with all the information needed to restore the object by
#' using the load_pangenomeRDS function, into an RDS (binary) file.
save_pangenomeRDS = function(file = "pangenome.rds"){
clss <- class(self)
dn <- dimnames(self$pan_matrix)
ogs <- dn[[2]]
orgs <- dn[[1]]
data <- private$.data[, -4, drop=FALSE] #Don't include "gid" column (4) as it's generated by pagoo
data <- as.data.frame(data)
cluster_meta <- as.data.frame(private$.clusters)
org_meta <- as.data.frame(private$.organisms)
core_level <- self$core_level
sep <- private$.sep
dropped <- self$dropped
version <- private$version
pg_data <- list()
pg_data$data <- data
pg_data$sep <- sep
pg_data$dropped <- dropped
pg_data$core_level <- core_level
if (dim(cluster_meta)[2] > 1){
pg_data$cluster_meta <- cluster_meta
}
if (dim(org_meta)[2] > 1){
pg_data$org_meta <- org_meta
}
if (!is.null(private$.sequences)){
sqs <- private$.sequences
spl <- split(sqs, mcols(sqs)$org)
pg_data$sequences <- lapply(spl, function(x) {
patt <- paste0("^.+", sep, collapse = "")
names(x) <- sub(patt, "", names(x))
as.character(x)
})
has_seqs <- TRUE
} else {
has_seqs <- FALSE
}
# attr(pg_data, "package") <- eval(parse(text=paste0(clss[1], "$parent_env$.packageName")))
attr(pg_data, "package") <- "pagoo"
attr(pg_data, "parent_package") <- environmentName(parent.env(self$.__enclos_env__))
attr(pg_data, "has_seqs") <- has_seqs
attr(pg_data, "class") <- clss
attr(pg_data, "version") <- version
saveRDS(pg_data, file = file)
}
),
# Active binding variables #
active = list(
#' @field pan_matrix The panmatrix. Rows are organisms, and
#' columns are groups of orthologous. Cells indicates the presence (>=1) or
#' absence (0) of a given gene, in a given organism. Cells can have values
#' greater than 1 if contain in-paralogs.
pan_matrix = function(){
pdp <- private$.dropped
if (length(pdp)){
pm2 <- private$.panmatrix[ -as.integer(names(pdp)), , drop = FALSE]
cs0 <- colSums(pm2)==0
if (any(cs0)){
pm2[, -which(cs0), drop = FALSE]
}else{
pm2
}
}else{
private$.panmatrix
}
},
#' @field organisms A \code{\link[S4Vectors:DataFrame-class]{DataFrame}} with available
#' organism names, and organism number identifier as \code{rownames()}. (Dropped
#' organisms will not be displayed in this field, see \code{$dropped} below).
#' Additional metadata will be shown if provided, as additional columns.
organisms = function(){
orgs <- private$.organisms
drp <- private$.dropped
if (length(drp)>0){
idx <- as.integer(names(drp))
orgs[-idx, ,drop = FALSE]
}else{
orgs
}
},
#' @field genes A \code{\link[IRanges:DataFrameList-class]{SplitDataFrameList}} object with
#' one entry per cluster. Each element contains a \code{\link[S4Vectors:DataFrame-class]{DataFrame}}
#' with gene ids (\code{<gid>}) and additional metadata, if provided. \code{gid} are
#' created by \code{paste}ing organism and gene names, so duplication in gene names
#' are avoided.
genes = function(){
dn <- dimnames(self$pan_matrix)
ogs <- dn[[2]]
orgs <- dn[[1]]
data <- private$.data
act <- which(data$cluster%in%ogs & data$org%in%orgs)
data <- data[act, ]
split(data[, , drop = FALSE], f = data$cluster, drop = TRUE)
},
#' @field clusters A \code{\link[S4Vectors:DataFrame-class]{DataFrame}} with the groups
#' of orthologous (clusters). Additional metadata will be shown as additional columns,
#' if provided before. Each row corresponds to each cluster.
clusters = function(){
dn <- dimnames(self$pan_matrix)
ogs <- dn[[2]]
data <- private$.clusters
act <- which(data$cluster%in%ogs)
data <- data[act, , drop = FALSE]
data
},
#' @field core_level The percentage of organisms a gene must be in
#' to be considered as part of the coregenome. \code{core_level = 95} by default.
#' Can't be set above 100, and below 85 raises a warning.
core_level = function(value){
if (missing(value)) return(private$.level)
if (value>100) stop("can't set 'core_level' > 100%.")
if (value<85) warning("setting 'core_level' under 85%")
private$.level <- value
private$.level
},
#' @field core_genes Like \code{genes}, but only showing core genes.
core_genes = function(){
ln <- length(self$organisms[['org']])
co <- floor(private$.level * ln / 100)
pm <- self$pan_matrix
orgs <- dimnames(pm)[[1]]
ogs <- dimnames(pm)[[2]]
pm[which(pm>1L, arr.ind = TRUE)] <- 1L
ogs <- ogs[which(colSums(pm) >= co)]
data <- private$.data
act <- which(data$cluster%in%ogs & data$org%in%orgs)
data <- data[act, ]
split(data[, , drop = FALSE], f = data$cluster, drop = TRUE)
},
#' @field core_clusters Like \code{$clusters}, but only showing core
#' clusters.
core_clusters = function(){
ln <- length(self$organisms[['org']])
co <- floor(private$.level * ln / 100)
pm <- self$pan_matrix
pm[which(pm>1L, arr.ind = TRUE)] <- 1L
wh <- which(colSums(pm) >= co)
self$clusters[wh, ,drop = FALSE]
# dimnames(self$pan_matrix)[[2]][wh]
},
#' @field cloud_genes Like \code{genes}, but only showing cloud genes.
#' These are defined as those clusters which contain a single gene (singletons), plus
#' those which have more than one but its organisms are probably clonal due to identical
#' general gene content. Colloquially defined as strain-specific genes.
cloud_genes = function(){
pm <- self$pan_matrix
orgs <- dimnames(pm)[[1]]
pm[which(pm>1L, arr.ind = TRUE)] <- 1L
dups <- duplicated(pm, MARGIN = 1)
wdups <- which(dups)
if (length(wdups)){
pm <- pm[-wdups, ]
}
ogs <- dimnames(pm)[[2]]
ogs <- ogs[which(colSums(pm) == 1L)]
data <- private$.data
act <- which(data$cluster%in%ogs & data$org%in%orgs)
data <- data[act, ]
split(data[, , drop = FALSE], f = data$cluster, drop = TRUE)
},
#' @field cloud_clusters Like \code{$clusters}, but only showing cloud
#' clusters as defined above.
cloud_clusters = function(){
pm <- self$pan_matrix
pm[which(pm>1L, arr.ind = TRUE)] <- 1L
dups <- duplicated(pm, MARGIN = 1)
wdups <- which(dups)
if (length(wdups)){
pm <- pm[-wdups, ]
}
wh <- which(colSums(pm) == 1L)
self$clusters[wh, , drop = FALSE]
},
#' @field shell_genes Like \code{genes}, but only showing shell genes.
#' These are defined as those clusters than don't belong neither to the core genome,
#' nor to cloud genome. Colloquially defined as genes that are present in some but not
#' all strains, and that aren't strain-specific.
shell_genes = function(){
data <- private$.data
dn <- dimnames(self$pan_matrix)
orgs <- dn[[1]]
ogs <- dn[[2]]
core <- self$core_clusters$cluster
cloud <- self$cloud_clusters$cluster
shell <- ogs[which(!ogs %in% unlist(list(core,cloud)))]
act <- which(data$cluster%in%shell & data$org%in%orgs)
data <- data[act, ]
split(data[, , drop = FALSE], f = data$cluster, drop = TRUE)
},
#' @field shell_clusters Like \code{$clusters}, but only showing shell
#' clusters, as defined above.
shell_clusters = function(){
ogs <- dimnames(self$pan_matrix)[[2]]
core <- self$core_clusters$cluster
cloud <- self$cloud_clusters$cluster
wh <- which(!ogs %in% unlist(list(core,cloud)))
self$clusters[wh, , drop = FALSE]
},
#' @field summary_stats A \code{\link[S4Vectors:DataFrame-class]{DataFrame}} with
#' information about the number of core, shell, and cloud clusters, as well as the
#' total number of clusters.
summary_stats = function(){
total <- dim(self$pan_matrix)[2]
core <- dim(self$core_clusters)[1]
cloud <- dim(self$cloud_clusters)[1]
shell <- total - core - cloud
DataFrame(Category = c('Total',
'Core',
'Shell',
'Cloud'),
Number = c(total,
core,
shell,
cloud))
},
#' @field random_seed The last \code{.Random.seed}. Used for
#' reproducibility purposes only.
random_seed = function(){
if(!exists('.Random.seed')) set.seed(NULL)
.Random.seed
},
#' @field dropped A \code{character} vector with dropped organism
#' names, and organism number identifier as \code{names()}
dropped = function(){
private$.dropped
}
)
)
# #' @importFrom data.table `[.data.table`
PgR6_subset <- R6::R6Class('PgR6_subset',
private = list(.x = NULL,
.i = NULL,
.j = NULL,
.simple = NULL
),
public = list(
initialize = function(x, i, j, .simple){
private$.simple <- .simple
nnclu <- dimnames(x$pan_matrix)[[2]]
names(nnclu) <- seq_along(nnclu)
nnorg <- x$organisms[['org']]
# Simple subset
if(.simple){
j <- i
private$.i <- nnorg
if (is.numeric(j)){
private$.j <- nnclu[j]
}else{
wh <- which(nnclu%in%j)
private$.j <- nnclu[wh]
}
# Double arg
}else{
## Take all orgs, subset clusters
if (missing(i)){
private$.i <- nnorg
if (is.numeric(j)){
private$.j <- nnclu[j]
}else{
wh <- which(nnclu%in%j)
private$.j <- nnclu[wh]
}
}
## Take all cluster, subset orgs
if (missing(j)){
private$.j <- nnclu
if (is.numeric(i)){
private$.i <- nnorg[i]
}else{
wh <- which(nnorg%in%i)
private$.i <- nnorg[wh]
}
}
## Subset cluster and orgs
if (!missing(i) & !missing(j)){
if (is.numeric(i)){
private$.i <- nnorg[i]
}else{
wh <- which(nnorg%in%i)
private$.i <- nnorg[wh]
}
if (is.numeric(j)){
private$.j <- nnclu[j]
}else{
wh <- which(nnclu%in%j)
private$.j <- nnclu[wh]
}
}
}
private$.x <- x
}
),
cloneable = FALSE, class = FALSE, portable = TRUE
)
## S3 Subset Methods for some active bindings
#' @export
`[.PgR6` <- function(self, i, j){
Nargs <- nargs()
mj <- missing(j)
isSimple <- (Nargs + !(mj))<3
asst <- names(.active_subset)
cl_abind <- which(sapply(names(self), bindingIsActive, env = self))
avail <- which(names(cl_abind) %in% asst)
addm <- names(cl_abind)[avail]
lapply(addm, function(x) PgR6_subset$set('active', x, .active_subset[[x]], overwrite = TRUE))
on.exit(lapply(addm, function(x) PgR6_subset$active[[x]] <- NULL ))
rr <- PgR6_subset$new(x = self, i, j, .simple = isSimple)
invisible(rr)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.