Nothing
#' Generates a number of datasets from one metadata scenario
#'
#' @param name The path to the setup file
#' @param setnr The metadata scenario, as taken from the info table
#' @param draws The number of datasets that are drawn from the metadata scenario
#' @param seedinfo The random number generator seed parameters
#' @param metaseedinfo If necessary, a separate set of random number generator parameters for the metadata (e.g. cluster centers)
#' @param file A custom file name for the output database. Defaults to the pattern setupname_setnr_seed.db
#' @param seedincrement The random number seed will by default increase by 1 for each draw from the base seed given in seedinfo unless specified otherwise here
#' @return An SQLite database that contains the desired number of data sets drawn from a certain metadata scenario
#' @examples
#' \dontrun{
#' source(system.file("dangl2014.R", package="bdlp"))
#' generateDatabase(name="dangl2014.R", setnr=1, draws=10)
#' unlink("dangl2014_set_1_seed_100.db")
#' }
#' @export
#' @importFrom utils capture.output txtProgressBar setTxtProgressBar
generateDatabase <- function(name = NULL, setnr = NULL, draws = 1,
seedinfo = list(100,
paste(R.version$major, R.version$minor, sep = "."),
RNGkind()),
metaseedinfo = list(100,
paste(R.version$major, R.version$minor, sep = "."),
RNGkind()),
file = NULL, seedincrement = 1){
if(!is.null(name)) source(name)
spl <- strsplit(name, "/")
spl <- unlist(spl)
name <- spl[length(spl)]
name <- unlist(strsplit(name, ".R"))
pb <- txtProgressBar(min = 0, max = draws, style = 3)
if(is.null(file)){
dbname <- paste(name, "_set_", setnr, "_", "seed_", seedinfo[[1]] ,".db", sep="")
file.create(dbname)
}
else {
dbname <- file
file.create(dbname)
}
for(i in 1:draws){
seedinfo[[1]] <- seedinfo[[1]] + seedincrement
metadata <- readMetadata(name = name, setnr = setnr, seedinfo = seedinfo, metaseedinfo = metaseedinfo)
output <- generateData(metadata)
writeDatabase(output, dbname, i)
Sys.sleep(0.1)
setTxtProgressBar(pb, i)
}
close(pb)
cat("\n")
cat(paste(draws, "version(s) of set no.", setnr, "of", name, "experimental setup generated.\n"))
cat(paste("Base seed ", seedinfo[[1]]-draws, " was used and is included in filename.\n", sep=""))
}
#' Generate a dataset from a metadata object
#'
#' @param m A metadata object
#' @return A dataset as specified by the metadata object
#' @examples
#' require(MASS)
#' m <- new("metadata.metric",
#' clusters = list(c1 = list(n = 25, mu = c(4,5), Sigma=diag(1,2)),
#' c2 = list(n = 25, mu = c(-1,-2), Sigma=diag(1,2))),
#' genfunc = mvrnorm)
#' generateData(m)
#' @export
setGeneric("generateData", function(m) {standardGeneric("generateData")})
#' Generate a dataset from a metadata object
#'
#' @param m A metadata object
#' @return A dataset as specified by the metadata object
#' @export
setMethod("generateData", signature(m = "metadata.metric"),
function(m){
set.seed(m@seedinfo[[1]])
RNGversion(m@seedinfo[[2]])
RNGkind(m@seedinfo[[3]][1], m@seedinfo[[3]][2])
total_n <- sum(unlist(lapply(m@clusters, function(x) x$n)))
k <- length(m@clusters)
#zum Ermitteln der Variablenanzahl - noch zu Verbessern
for(i in 1:length(m@clusters[[1]])){
if(is.matrix(m@clusters[[1]][[i]]))
vars <- ncol(m@clusters[[1]][[i]])
}
samp <- m@genfunc
datamatrix <- matrix(0, nrow=total_n, ncol=vars)
clus <- list()
for(i in 1:k){
clus[[i]] <- do.call(samp, m@clusters[[i]])
}
datamatrix <- do.call(rbind, clus)
if(m@standardization == "NONE")
as.data.frame(datamatrix)
else {
stdz <- get(m@standardization)
as.data.frame(stdz(datamatrix))
}
})
#' Generate a dataset from a metadata object
#'
#' @param m A metadata object
#' @return A dataset as specified by the metadata object
#' @export
setMethod("generateData", signature(m = "metadata.functional"),
function(m){
set.seed(m@seedinfo[[1]])
RNGversion(m@seedinfo[[2]])
RNGkind(m@seedinfo[[3]][1], m@seedinfo[[3]][2])
nf <- length(m@functions)
gridMatrix <- m@gridMatrix
xvalvector <- yvalvector <- cluster <- vector()
xvals <- seq(m@interval[1], m@interval[2], by=1/(m@resolution-1))
pointsPerFunc <- rowSums(gridMatrix)
curves <- rep(1:m@total_n, pointsPerFunc)
sddist <- get(m@sd_distribution)
for(i in 1:m@total_n) xvalvector <- c(xvalvector, xvals[gridMatrix[i,] == 1])
for(i in 1:nf){
upper <- floor(m@total_n*i/nf)
for(j in (floor(m@total_n*(i-1)/nf) + 1):upper){
yvals <- m@functions[[i]](xvals[gridMatrix[j,] == 1])
yvalvector <- c(yvalvector, yvals + sddist(yvals, sd=m@sd))
cluster[j] <- i
}
}
data <- cbind(curves, xvalvector, yvalvector)
as.data.frame(data)
})
#' Generate a dataset from a metadata object
#'
#' @param m A metadata object
#' @return A dataset as specified by the metadata object
#' @export
setMethod("generateData", signature(m = "metadata.ordinal"),
function(m){
set.seed(m@seedinfo[[1]])
RNGversion(m@seedinfo[[2]])
RNGkind(m@seedinfo[[3]][1], m@seedinfo[[3]][2])
samp <- m@genfunc
total_n <- sum(unlist(lapply(m@clusters, function(x) x$n)))
k <- length(m@clusters)
for(i in 1:length(m@clusters$c1)){
if(is.matrix(m@clusters$c1[[i]]))
vars <- ncol(m@clusters$c1[[i]])
}
datamatrix <- matrix(0, nrow=total_n, ncol=vars)
clus <- list()
for(i in 1:k){
clus[[i]] <- do.call(samp, m@clusters[[i]])
}
datamatrix <- do.call(rbind, clus)
as.data.frame(datamatrix)
})
#' Generate a dataset from a metadata object
#'
#' @param m A metadata object
#' @return A dataset as specified by the metadata object
#' @export
setMethod("generateData", signature(m = "metadata.binary"),
function(m){
set.seed(m@seedinfo[[1]])
RNGversion(m@seedinfo[[2]])
RNGkind(m@seedinfo[[3]][1], m@seedinfo[[3]][2])
samp <- m@genfunc
total_n <- sum(unlist(lapply(m@clusters, function(x) x$n)))
k <- length(m@clusters)
for(i in 1:length(m@clusters$c1)){
if(is.matrix(m@clusters$c1[[i]]))
vars <- ncol(m@clusters$c1[[i]])
}
datamatrix <- matrix(0, nrow=total_n, ncol=vars)
clus <- list()
for(i in 1:k){
clus[[i]] <- do.call(samp, m@clusters[[i]])
}
datamatrix <- do.call(rbind, clus)
as.data.frame(datamatrix)
})
#setMethod("generate.data", signature(m = "metadata.wordnet"),
#function(m){
# set.seed(m@seedinfo[[1]])
# RNGversion(m@seedinfo[[2]])
# RNGkind(m@seedinfo[[3]][1], m@seedinfo[[3]][2])
#
# require("wordnet")
#
# clus <- list()
# for(i in 1:length(m@clusters)) {
# filter <- getTermFilter(m@filtertype, m@clusters[[i]]$word, m@case_ignore)
# terms <- getIndexTerms(m@clusters[[i]]$wordtype, m@clusters[[i]]$n, filter)
# clus[[i]] <- sapply(terms, getLemma)
# }
#
# data.frame(wordlist = unlist(clus))
#})
#' Generate a dataset from a metadata object
#'
#' @param m A metadata object
#' @return A dataset as specified by the metadata object
#' @export
setMethod("generateData", signature(m = "metadata.randomstring"),
function(m){
set.seed(m@seedinfo[[1]])
RNGversion(m@seedinfo[[2]])
RNGkind(m@seedinfo[[3]][1], m@seedinfo[[3]][2])
samp <- m@genfunc
clus <- list()
for(i in 1:length(m@clusters)) {
clus[[i]] <- do.call(samp, m@clusters[[i]])
}
data.frame(stringlist = unlist(clus))
})
#' Generates random strings
#'
#' @param center Reference string, i.e. the cluster center
#' @param maxdist The maximum allowed string distance
#' @param length The length of the string
#' @param n Number of strings to be generated
#' @param method The string distance method used to calculate the string, defaults to Levensthein distance
#' @return A character string
#' @examples
#' getRandomstrings(center="hello", maxdist = 2, n = 5)
#' @export
getRandomstrings <- function(center = NULL, maxdist = NULL, length = nchar(center), n = 1, method = "lv"){
l <- vector()
l[1:n] <- ""
for(i in 1:n){
while(TRUE){
str <- paste(sample(letters, length, T), collapse="")
if(stringdist::stringdist(center, str, method) <= maxdist) {
l[i] <- str
break
}
}
}
return(l)
}
writeDatabase <- function(output, dbname, draw){
requireNamespace("RSQLite")
driver <- DBI::dbDriver("SQLite")
con <- RSQLite::dbConnect(driver, dbname)
if(draw == 1)
RSQLite::dbWriteTable(con, paste("draw", draw, sep="_"), output, row.names=FALSE)
else
RSQLite::dbWriteTable(con, paste("draw", draw, sep="_"), output, append=T, row.names=FALSE)
RSQLite::dbDisconnect(con)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.