#' initialize a \code{\link{BNDataset}} object.
#'
#' @name BNDataset
#' @rdname BNDataset-class
#' @docType method
#' @aliases initialize,BNDataset-method
#'
#' @param .Object an empty BNDataset.
#'
#' @return a BNDataset object.
setMethod("initialize",
"BNDataset", function(.Object)
{
validObject(.Object)
return(.Object)
})
#' @export
BNDataset <- function(data, discreteness, variables = NULL, node.sizes = NULL, ...)
{
dataset <- new("BNDataset")
# # this is here for 2 purposes:
# # 1. spare changes all over the package in order to remove name field
# # 2. keep a suggestion on how to get variable name
# # name(dataset) <- deparse(substitute(dataset))
# above: seems heavy...
name(dataset) <- "BNDataset"
# The presence of ONLY data and discreteness, and them being 2 strings, mean that two files are passed:
# - data file (data)
# - header file (discreteness)
if ( is.null(variables) && is.null(node.sizes) &&
!is.null(data) && !is.null(discreteness) &&
length(discreteness) == 1 && is.character(discreteness) &&
length(data) == 1 && is.character(data) ) {
dataset <- read.dataset(dataset, data, discreteness, ...)
validObject(dataset)
return(dataset)
}
other.args <- list(...)
if ("starts.from" %in% names(other.args))
starts.from <- other.args$starts.from
else
starts.from <- 1
if ("num.time.steps" %in% names(other.args))
num.time.steps <- other.args$num.time.steps
else
num.time.steps <- 1
num.time.steps(dataset) <- num.time.steps
if(length(variables) > 1)
{
vars <- variables
if (length(vars) == ncol(as.matrix(data))) {
variables(dataset) <- vars
} else if (num.time.steps > 1 && length(vars) * num.time.steps == ncol(as.matrix(data))) {
copyvars <- c()
for (t in 1:num.time.steps) {
for (w in vars) {
copyvars <- c(copyvars, paste(w, as.character(t), sep='_t'))
}
}
variables(dataset) <- copyvars
} else {
stop("Incoherent number of variables in the dataset header.")
}
num.variables(dataset) <- length(variables(dataset))
}
if (length(node.sizes) > 1) {
if (length(node.sizes) == ncol(as.matrix(data))) {
node.sizes(dataset) <- node.sizes
} else if (num.time.steps > 1 && length(node.sizes) * num.time.steps == ncol(as.matrix(data))) {
node.sizes(dataset) <- rep(node.sizes, num.time.steps)
} else {
stop("Incoherent number of variables in the dataset definition.")
}
}
if (length(discreteness) > 1) {
for (d in 1:length(discreteness)) {
if (discreteness[d] %in% c("d","D","T","TRUE")) discreteness[d] <- 'D'
else if (discreteness[d] %in% c("c","C","F","FALSE")) discreteness[d] <- 'C'
else {
bnstruct.log("Unrecognized status for variable ",variables(dataset)[d],", converting it to discrete.")
discreteness[d] <- 'D'
}
}
if (length(discreteness) == ncol(as.matrix(data))) {
discreteness(dataset) <- discreteness
} else if (num.time.steps > 1 && length(discreteness) * num.time.steps == ncol(as.matrix(data))) {
discreteness(dataset) <- rep(discreteness, num.time.steps)
} else {
stop("Incoherent number of variables in the dataset definition.")
}
}
if (!is.null(data))
{
raw.data(dataset) <- as.matrix(data) + (1 - starts.from)
if (is.null(variables)) {
variables(dataset) <- rownames(data)
warning("Variable names guessed from data. Please check for consistency with your actual data.")
}
if (is.null(node.sizes)) {
node.sizes <- rep(0, length(variables))
for (v in 1:length(variables))
{
node.sizes[v] <- max(data[,v][which(!is.na(data[,v]))]) - min(data[,v][which(!is.na(data[,v]))]) + 1
}
warning("Variable cardinalities guessed from data. Please check for consistency with your actual data. Otherwise, execution may terminate with errors later.")
}
}
num.items(dataset) <- nrow(dataset@raw.data)
validObject(dataset)
if(length(dataset@variables) > 0 && has.raw.data(dataset))
colnames(dataset@raw.data) <- dataset@variables
return(dataset)
}
# validator
setValidity("BNDataset",
function(object)
{
retval <- NULL
if (object@num.variables > 0 && length(object@variables) > 0 && length(object@variables) != object@num.variables)
{
retval <- c(retval, "incoherent number of variable names")
}
if (object@has.raw.data && ncol(object@raw.data) != object@num.variables)
{
retval <- c(retval, "incoherent number of variables in raw dataset")
}
if (object@has.imputed.data && ncol(object@imputed.data) != object@num.variables)
{
retval <- c(retval, "incoherent number of variables in imputed dataset")
}
if(object@num.variables > 0 && length(object@discreteness) > 1 &&
length(object@discreteness) != object@num.variables)
{
retval <- c(retval, "incoherent number of variable statuses")
}
if (object@num.variables > 0 && length(object@node.sizes) == object@num.variables && object@has.raw.data)
{
warn <- c()
halt <- c()
for (var in 1:object@num.variables)
{
if ( object@discreteness[var] &&
(min(object@raw.data[,var][which(!is.na(object@raw.data[,var]))]) > 1 ||
max(object@raw.data[,var][which(!is.na(object@raw.data[,var]))]) < object@node.sizes[var]))
{
warn <- c(warn, var)
}
if ( object@discreteness[var] &&
(min(object@raw.data[,var][which(!is.na(object@raw.data[,var]))]) < 1 ||
max(object@raw.data[,var][which(!is.na(object@raw.data[,var]))]) > object@node.sizes[var]))
{
halt <- c(halt, var)
}
}
if (length(halt) > 0)
{
wrongs <- strcat("Dataset contains values out of bounds for variables ", halt, sep=" ")
retval <- c(retval, wrongs)
} else if (length(warn) > 0)
{
wrongs <- strcat("Not all of the possible values have been observed for variables ", warn, sep = " ")
warning(wrongs)
}
}
if (object@num.variables > 0 && length(object@node.sizes) == object@num.variables && object@has.imputed.data)
{
warn <- c()
halt <- c()
for (var in 1:object@num.variables)
{
if ( object@discreteness[var] &&
(min(object@imputed.data[,var][which(!is.na(object@imputed.data[,var]))]) > 1 ||
max(object@imputed.data[,var][which(!is.na(object@imputed.data[,var]))]) < object@node.sizes[var]))
{
warn <- c(warn, var)
}
if ( object@discreteness[var] &&
(min(object@imputed.data[,var][which(!is.na(object@imputed.data[,var]))]) < 1 ||
max(object@imputed.data[,var][which(!is.na(object@imputed.data[,var]))]) > object@node.sizes[var]))
{
halt <- c(halt, var)
}
}
if (length(halt) > 0)
{
wrongs <- strcat("Dataset contains values out of bounds for variables ", halt, sep=" ")
retval <- c(retval, wrongs)
} else if (length(warn) > 0)
{
wrongs <- strcat("Not all of the possible values have been observed for variables ", warn, sep= " ")
warning(wrongs)
}
}
if (object@num.time.steps < 1) {
retval <- c(retval, "impossible number of time steps in the dataset")
}
if (length(object@quantiles) > 1 && length(object@quantiles) != length(object@variables)) {
retval <- c(retval, "incorrect list of quantiles")
}
if (is.null(retval)) return (TRUE)
return(retval)
}
)
#' @rdname name
#' @aliases name,BNDataset
setMethod("name", "BNDataset", function(x) { return(slot(x, "name")) } )
#' @rdname num.variables
#' @aliases num.variables,BNDataset
setMethod("num.variables", "BNDataset", function(x) { return(slot(x, "num.variables")) } )
#' @rdname variables
#' @aliases variables,BNDataset
setMethod("variables", "BNDataset", function(x) { return(slot(x, "variables")) } )
#' @rdname discreteness
#' @aliases discreteness,BNDataset
setMethod("discreteness",
"BNDataset",
function(x)
{
return(slot(x, "discreteness"))
})
#' @aliases quantiles,BNDataset
#' @rdname quantiles
setMethod("quantiles",
"BNDataset",
function(x)
{
return(slot(x, "quantiles"))
})
#' @rdname node.sizes
#' @aliases node.sizes,BNDataset
setMethod("node.sizes", "BNDataset", function(x) { return(slot(x, "node.sizes")) } )
#' @rdname header.file
#' @aliases header.file,BNDataset
setMethod("header.file", "BNDataset", function(x) return(slot(x, "header.file")))
#' @rdname data.file
#' @aliases data.file,BNDataset
setMethod("data.file", "BNDataset", function(x) return(slot(x, "data.file")))
#' @rdname num.variables
#' @aliases num.variables,BNDataset
setMethod("num.variables","BNDataset", function(x) return(slot(x, "num.variables")))
#' @rdname num.items
#' @aliases num.items,BNDataset
setMethod("num.items", "BNDataset", function(x) return(slot(x, "num.items")))
#' @rdname has.boots
#' @aliases has.boots,BNDataset
setMethod("has.boots", "BNDataset", function(x) return(slot(x, "has.boots")))
#' @rdname has.imputed.boots
#' @aliases has.imputed.boots,BNDataset
setMethod("has.imputed.boots", "BNDataset", function(x) return(slot(x, "has.imputed.boots")))
#' @rdname boots
#' @aliases boots,BNDataset
setMethod("boots", "BNDataset", function(x) return(slot(x, "boots")))
#' @rdname imp.boots
#' @aliases imp.boots,BNDataset
setMethod("imp.boots", "BNDataset", function(x) return(slot(x, "imp.boots")))
#' @rdname num.boots
#' @aliases num.boots,BNDataset
setMethod("num.boots", "BNDataset", function(x) return(slot(x, "num.boots")))
#' @rdname num.time.steps
#' @aliases num.time.steps,BNDataset
setMethod("num.time.steps", "BNDataset", function(x) return(slot(x, "num.time.steps")))
#' @name name<-
#' @aliases name<-,BNDataset-method
#' @docType methods
#' @rdname name-set
setReplaceMethod("name",
"BNDataset",
function(x, value)
{
slot(x, "name") <- value
validObject(x)
return(x)
})
#' @name variables<-
#' @aliases variables<-,BNDataset-method
#' @docType methods
#' @rdname variables-set
setReplaceMethod("variables",
"BNDataset",
function(x, value)
{
slot(x, "variables") <- value
num.variables(x) <- length(value)
validObject(x)
return(x)
})
#' @name discreteness<-
#' @aliases discreteness<-,BNDataset-method
#' @docType methods
#' @rdname discreteness-set
setReplaceMethod("discreteness",
"BNDataset",
function(x, value)
{
if (is.logical(value))
slot(x, "discreteness") <- value
if (is.integer(value) || is.numeric(value))
{
d <- rep(F, num.variables(x))
d[value] <- T
slot(x, "discreteness") <- value
}
if (is.character(value))
slot(x, "discreteness") <- sapply(1:length(value), FUN=function(i){ !is.na(match(value[i],c('d',"D"))) })
validObject(x)
return(x)
})
#' @name quantiles<-
#' @aliases quantiles<-,BNDataset-method
#' @docType methods
#' @rdname quantiles-set
setReplaceMethod("quantiles",
"BNDataset",
function(x, value)
{
slot(x, "quantiles") <- value
validObject(x)
return(x)
})
#' @name node.sizes<-
#' @aliases node.sizes<-,BNDataset-method
#' @docType methods
#' @rdname node.sizes-set
setReplaceMethod("node.sizes",
"BNDataset",
function(x, value)
{
slot(x, "node.sizes") <- value
validObject(x)
return(x)
})
#' @rdname has.raw.data
#' @aliases has.raw.data,BNDataset
setMethod("has.raw.data",
"BNDataset",
function(x)
{
return(slot(x, "has.raw.data"))
})
#' @rdname has.imputed.data
#' @aliases has.imputed.data,BNDataset
setMethod("has.imputed.data",
"BNDataset",
function(x)
{
return(slot(x, "has.imputed.data"))
})
#' @rdname raw.data
#' @aliases raw.data,BNDataset
setMethod("raw.data",
"BNDataset",
function(x)
{
if (has.raw.data(x))
return (x@raw.data)
stop("The dataset contains no data.")
})
#' @rdname imputed.data
#' @aliases imputed.data,BNDataset
setMethod("imputed.data",
"BNDataset",
function(x)
{
if (has.imputed.data(x))
return (x@imputed.data)
stop("The dataset contains no imputed data. ",
"Please impute data before learning.\nSee > ?impute for help.")
})
#' @name header.file<-
#' @aliases header.file<-,BNDataset-method
#' @docType methods
#' @rdname header.file-set
setReplaceMethod("header.file",
"BNDataset",
function(x, value)
{
slot(x, "header.file") <- value
return(x)
})
#' @name data.file<-
#' @aliases data.file<-,BNDataset-method
#' @docType methods
#' @rdname data.file-set
setReplaceMethod("data.file",
"BNDataset",
function(x, value)
{
slot(x, "data.file") <- value
return(x)
})
#' @name num.variables<-
#' @aliases num.variables<-,BNDataset-method
#' @docType methods
#' @rdname num.variables-set
setReplaceMethod("num.variables",
"BNDataset",
function(x, value)
{
slot(x, "num.variables") <- value
validObject(x)
return(x)
})
#' @name num.items<-
#' @aliases num.items<-,BNDataset-method
#' @docType methods
#' @rdname num.items-set
setReplaceMethod("num.items",
"BNDataset",
function(x, value)
{
slot(x, "num.items") <- value
validObject(x)
return(x)
})
#' @name boots<-
#' @aliases boots<-,BNDataset-method
#' @docType methods
#' @rdname boots-set
setReplaceMethod("boots",
"BNDataset",
function(x, value)
{
slot(x, "boots") <- value
slot(x, "num.boots") <- length(value)
slot(x, "has.boots") <- TRUE
validObject(x)
return(x)
})
#' @name num.boots<-
#' @aliases num.boots<-,BNDataset-method
#' @docType methods
#' @rdname num.boots-set
setReplaceMethod("num.boots",
"BNDataset",
function(x, value)
{
slot(x, "num.boots") <- value
validObject(x)
return(x)
})
#' @name num.time.steps<-
#' @aliases num.time.steps<-,BNDataset-method
#' @docType methods
#' @rdname num.time.steps-set
setReplaceMethod("num.time.steps",
"BNDataset",
function(x, value)
{
slot(x, "num.time.steps") <- value
validObject(x)
return(x)
})
#' @name imp.boots<-
#' @aliases imp.boots<-,BNDataset-method
#' @docType methods
#' @rdname imp.boots-set
setReplaceMethod("imp.boots",
"BNDataset",
function(x, value)
{
slot(x, "imp.boots") <- value
slot(x, "num.boots") <- length(value)
slot(x, "has.imputed.boots") <- TRUE
validObject(x)
return(x)
})
#' @name raw.data<-
#' @aliases raw.data<-,BNDataset-method
#' @docType methods
#' @rdname raw.data-set
setReplaceMethod("raw.data",
"BNDataset",
function(x, value)
{
slot(x, "raw.data") <- value
slot(x, "has.raw.data") <- TRUE
num.items(x) <- nrow(value)
validObject(x)
return(x)
})
#' @name imputed.data<-
#' @aliases imputed.data<-,BNDataset-method
#' @docType methods
#' @rdname imputed.data-set
setReplaceMethod("imputed.data",
"BNDataset",
function(x, value)
{
slot(x, "imputed.data") <- value
slot(x, "has.imputed.data") <- TRUE
num.items(x) <- nrow(value)
validObject(x)
return(x)
})
#' @rdname complete
#' @aliases complete,BNDataset
setMethod("complete",
"BNDataset",
function(x, complete.vars=seq_len(num.variables(x)))
{
y <- x
rd <- raw.data(y)
raw.data(y) <- rd[complete.cases(rd[,complete.vars]),]
num.items(y) <- nrow(raw.data(y))
slot(y, "imputed.data") <- matrix(c(0))
slot(y, "has.imputed.data") <- FALSE
slot(y, "boots") <- list(NULL)
slot(y, "has.boots") <- FALSE
slot(y, "imp.boots") <- list(NULL)
slot(y, "has.imputed.boots") <- FALSE
slot(y, "num.boots") <- 0
validObject(y)
return(y)
})
# redefinition of print() for BNDataset objects
# ' print a \code{\link{BNDataset}} to \code{stdout}.
#'
#' @method print BNDataset
#' @name print
#'
# ' @param x a \code{\link{BNDataset}}.
#' @param show.raw.data if \code{x} is a \code{\link{BNDataset}}, print also raw dataset, if available.
#' @param show.imputed.data if \code{x} is a \code{\link{BNDataset}}, print also imputed dataset, if available.
# ' @param ... potential other arguments.
#'
#' @rdname print
#' @aliases print,BNDataset print.BNDataset,BNDataset
#' @export
#setMethod("print.BNDataset",
# "BNDataset",
print.BNDataset <- function(x, show.raw.data = FALSE, show.imputed.data = FALSE, ...)
{
str <- "\nDataset: \n"
#str <- paste(str, name(x), sep = '')
#str <- paste(str, "\n", sep = '')
cat(str)
str <- "\nnum.variables "
str <- paste(str, num.variables(x), sep = '')
str <- paste(str, "\n", sep = '')
cat(str)
str <- "\nvariables\n"
cat(str)
cat(variables(x))
str <- "\ndiscreteness\n"
cat(str)
cat(discreteness(x))
str <- "\nnode.sizes\n"
cat(str)
cat(node.sizes(x))
str <- "\nnum.items\n"
cat(str)
cat(num.items(x))
str <- "\nimputation\n"
cat(str)
cat(has.imputed.data(x))
str <- "\nhas.boots\n"
cat(str)
cat(has.boots(x))
str <- "\nhas.imputed.boots\n"
cat(str)
cat(has.imputed.boots(x))
str <- "\nnum.boots\n"
cat(str)
cat(num.boots(x))
if (num.time.steps(x) > 1) {
str <- "\ntime steps\n"
cat(str)
cat(num.time.steps(x))
}
if (show.raw.data == TRUE && has.raw.data(x))
{
cat("\nRaw data:\n")
print(raw.data(x))
}
if (show.imputed.data == TRUE && has.imputed.data(x))
{
cat("\nImputed data:\n")
print(imputed.data(x))
}
cat("\n")
}#)
#' @rdname impute
#' @aliases impute,BNDataset
setMethod("impute",
"BNDataset",
function(object, k.impute = 10)
{
# assumes raw data is ok
bnstruct.start.log("performing imputation ...")
object@imputed.data <- knn.impute(object@raw.data, k.impute,
setdiff(1:length(object@node.sizes), c()))
object@has.imputed.data <- TRUE
bnstruct.end.log("imputation finished.")
return(object)
})
#' @rdname bootstrap
#' @aliases bootstrap,BNDataset
setMethod("bootstrap",
"BNDataset",
function(object, num.boots = 100, seed = 0, imputation = FALSE, k.impute = 10)
{
if (imputation)
bnstruct.start.log("Generating bootstrap samples with imputation ...")
else
bnstruct.start.log("Generating bootstrap samples ...")
# assumes raw data is ok
object@has.boots <- TRUE
object@num.boots <- num.boots
set.seed(seed)
if (num.boots >= 1)
{
boot.sample <- matrix(sample.int(object@num.items,
size = num.boots * object@num.items,
replace=TRUE),
object@num.items, num.boots)
if (imputation)
object@has.imputed.boots <- TRUE
for (i in 1:num.boots)
{
object@boots[[i]] <- object@raw.data[boot.sample[,i],]
if (imputation)
object@imp.boots[[i]] <- knn.impute(object@boots[[i]],
k.impute,
setdiff(1:length(object@node.sizes),c()) )
}
}
bnstruct.end.log("Bootstrap samples generated.")
return(object)
})
#' @rdname boot
#' @aliases boot,BNDataset
setMethod("boot",
c("BNDataset", "numeric"),
function(dataset, index, use.imputed.data = FALSE)
{
if (!use.imputed.data && !dataset@has.boots)
stop('No bootstrap samples available for dataset.')
if (use.imputed.data && !dataset@has.imputed.boots)
stop('No imputed bootstrap samples available for dataset. ',
"Please impute data before learning.\nSee > ?impute for help.")
if (index <= 0 || index > dataset@num.boots)
stop('Sample index out of range for dataset.\n')
if (use.imputed.data)
return(dataset@imp.boots[[index]])
return(dataset@boots[[index]])
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.