# removed option to provide y response vector, this will be used later in a
# different S3 ModelTrain method
ReadInData <- function(data, ycol, xcols, idcol, type = "ANALYZE") {
if (is.na(idcol)) {
if (!all(c(ycol, xcols) <= ncol(data)))
stop("there is a column number larger than number of columns in 'data'")
} else {
if (!all(c(ycol, xcols, idcol) <= ncol(data)))
stop("there is a column number larger than number of columns in 'data'")
}
# might still need this for prediction
#-----Read in data
if (!is.na(ycol)) {
work.data.response <- subset(data, select = ycol)
work.data <- subset(data, select = xcols)
work.data <- cbind(work.data.response, work.data)
rm(work.data.response)
}
else {
work.data <- subset(data, select = xcols)
}
if (!is.na(idcol)) {
row.names(work.data) <- data[, idcol]
}
names(work.data)[1] <- "y"
rm.y <- is.na(work.data$y)
if (sum(rm.y) > 0) {
warning(paste("WARNING.....", sum(rm.y),
"missing responses were provided and will not be used"))
work.data <- subset(work.data, !rm.y)
rm.y.idx <- which(rm.y)
} else {
rm.y.idx <- c()
}
rm.desc <- apply(work.data, 1, function(x) sum(is.na(x)) > 0)
if (sum(rm.desc) > 0) {
warning(paste("WARNING.....", sum(rm.desc),
"rows with missing responses or descriptors were provided and will not be used"))
work.data <- subset(work.data, !rm.desc)
rm.desc.idx <- which(rm.desc)
} else {
rm.desc.idx <- c()
}
if (sum(!(apply(work.data, 1, is.numeric))) > 0) {
stop(paste(
"ERROR..... non-numeric responses or descriptors were found,
responses and descriptors must be numeric",
dim(work.data)[1]))
}
if (type == "ANALYZE") {
if (length(unique(work.data$y)) == 1) {
stop(paste("ERROR..... at least 2 different responses are required for analysis"))
}
# removing constant columns
rm.cols <- (apply(work.data, 2, var) == 0)
if (sum(rm.cols) > 0) {
warning(paste("WARNING.....", sum(rm.cols),
"constant descriptor columns were provided and will not be used"))
work.data <- subset(work.data, select = (!rm.cols))
}
} else {
rm.cols <- 0
}
if (type == "PREDICTION") {
work.data <- cbind(rep(NA,nrow(work.data)), work.data)
names(work.data)[1] <- 'y'
}
return(list(work.data, rm.cols, c(rm.y.idx, rm.desc.idx)))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.