Nothing
# INTERPRET ################### interpret takes a vector and makes a data.frame out of it (to be used in e.g. make.ovariable).
### It also changes abbreviations into probability samples.
# Lognormal distribution parametrization functions
lmean <- function(parmean, parsd) {return(log(parmean)-log(1+(parsd^2)/(parmean^2))/2)}
lsd <- function(parmean, parsd) {return(log(1+(parsd^2)/(parmean^2)))}
# Actual interpretation function. Takes already pre-processed information and returns a distribution. This is a separate function because
# it isn't vectorizable (easily anyway).
interpf <- function(
n,
res.char,
brackets.pos,
brackets.length,
minus.length,
minus.exists,
plusminus,
plusminus.length,
plusminus.pos,
doublePoint,
minus.relevant,
fromzero,
dbug = FALSE
) {
if(doublePoint[1] > 0) {
tempArgs <- sort(as.numeric(unlist(strsplit(res.char, "\\:"))))
if(dbug) cat("Triangular distribution. \n")
if (n == 0) {
if (tempArgs[1] + tempArgs[3] == 2 * tempArgs[2]) {
return(tempArgs[2])
} else if (tempArgs[1] + tempArgs[3] > 2 * tempArgs[2]) {
return(tempArgs[1] + ((tempArgs[3]-tempArgs[1])(tempArgs[2]-tempArgs[1])/2)^(1/2))
} else {
return(tempArgs[3] - ((tempArgs[3]-tempArgs[1])(tempArgs[3]-tempArgs[2])/2)^(1/2))
}
} else {
return(rtriangle(n,tempArgs[1],tempArgs[3],tempArgs[2]))
}
}
if(brackets.pos > 0) {
n.minus.inside.brackets <- sum(minus.relevant > brackets.pos & minus.relevant < brackets.pos + brackets.length)
imean <- as.numeric(substr(res.char, 1, brackets.pos - 1))
if (n == 0) {
return(imean)
}
if(n.minus.inside.brackets == 1) {
ici <- c(as.numeric(substr(res.char, brackets.pos + 1, minus.relevant[minus.relevant > brackets.pos] - 1)), as.numeric(substr(res.char,
minus.relevant[minus.relevant > brackets.pos] + 1, brackets.pos + brackets.length - 2)))
if((ici[2] - imean) / (imean - ici[1]) < 1.5) {
if(dbug) cat("Normal distribution. \n")
isd <- sum(abs(ici - imean) / 2) / qnorm(0.975)
return(rnorm(n, imean, isd))
} else {
if(dbug) cat("Lognormal distribution. \n")
isd <- sum(abs(log(ici) - log(imean)) / 2) / qnorm(0.975)
return(exp(rnorm(n, log(imean), isd)))
}
}
if(n.minus.inside.brackets %in% c(2,3)) {
ici <- c(as.numeric(substr(res.char, brackets.pos + 1, minus.relevant[minus.relevant > brackets.pos][2] - 1)), as.numeric(substr(res.char,
minus.relevant[minus.relevant > brackets.pos][2] + 1, brackets.pos + brackets.length - 2)))
isd <- sum(abs(ici - imean) / 2) / qnorm(0.975)
if(dbug) cat("Normal distribution. \n")
return(rnorm(n, imean, isd))
}
warning(paste("Unable to interpret \"", res.char, "\"", sep = ""))
return(rep(NA, n))
}
if(plusminus.pos > 0) {
imean <- as.numeric(substr(res.char, 1, plusminus.pos - 1))
if (n == 0) {
return(imean)
}
if(dbug) cat("Normal distribution. \n")
return(rnorm(n, imean, as.numeric(substr(res.char, plusminus.pos + plusminus.length, nchar(res.char)))))
}
if(minus.exists) {
if(length(minus.relevant) == 1) {
a <- as.numeric(substr(res.char, 1, minus.relevant - 1))
b <- as.numeric(substr(res.char, minus.relevant + 1, nchar(res.char)))
if(a / b >= 1/100 | a == 0) {
if(dbug) cat("Uniform distribution. \n")
if (n == 0) {
return((a+b)/2)
}
return(runif(n, a, b))
} else {
if(dbug) cat("Loguniform distribution. \n")
if (n == 0) {
return(exp((log(a)+log(b))/2))
}
return(exp(runif(n, log(a), log(b))))
}
}
if(length(minus.relevant) %in% c(2,3)) { # If there is more than one '-' we're porbably dealing with negative boundaries. (More than 3 will produce NAs.)
if(dbug) cat("Uniform distribution. \n")
# Assume that negative number is always first.
a <- as.numeric(substr(res.char, 1, minus.relevant[2] - 1))
b <- as.numeric(substr(res.char, minus.relevant[2] + 1, nchar(res.char)))
if (n == 0) {
return((a+b)/2)
}
return(runif(n, a, b))
}
}
if(sum(unlist(strsplit(res.char, ""))==";") > 0) {
if(dbug) cat("Discrete random samples. \n")
a <- as.numeric(unlist(strsplit(res.char, ";")))
if (n == 0) {
return(mean(a))
}
return(sample(a, n, replace = TRUE))
}
if(fromzero[[1]][1] == 1) {
temp <- interpret(
paste("0-", substr(res.char, 2, nchar(res.char)), sep = ""), n, dbug = dbug
)
return(
temp$Result
)
}
warning(paste("Unable to interpret \"", res.char, "\"", sep = ""))
return(rep(NA, n))
}
# The following function processes character strings and loops the interpretation function.
input.interp <- function(res.char, n = 1000, dbug = FALSE) {
res.char <- gsub(" ", "", res.char)
res.char <- gsub(",", ".", res.char)
plusminus <- gregexpr(paste("\\+-|", rawToChar(as.raw(177)), sep = ""), res.char)
plusminus.length <- as.numeric(unlist(sapply(plusminus, attributes)))
plusminus.pos <- unlist(plusminus)
minus <- gregexpr("-", res.char)
e <- gregexpr("e-|E-", res.char) # ignore negative signs in exponents when data is given in form 1e-27
for (i in 1:length(minus)){
minus[[i]] <- minus[[i]][!(minus[[i]] %in% (e[[i]] + 1))]
}
minus.length <- sapply(minus, length)
minus.exists <- unlist(minus)[cumsum(c(0, minus.length[-length(minus.length)])) + 1] > 0
brackets <- gregexpr("\\(.*\\)", res.char) # matches for brackets "(...)"
brackets.length <- as.numeric(unlist(sapply(brackets, attributes)[1,]))
brackets.pos <- unlist(brackets)
doublePoint <- gregexpr(":", res.char)
fromzero <- gregexpr("<", res.char)
out <- list()
for(i in 1:length(res.char)) {
if(res.char[i] %in% c("NA") | nchar(gsub(" ", "", res.char[i])) == 0) {
out[[i]] <- NA
} else {
val <- suppressWarnings(as.numeric(res.char[i]))
if(is.na(val)) {
minus.relevant <- unlist(minus)[(cumsum(c(0, minus.length)) + 1)[i]:cumsum(minus.length)[i]]
out[[i]] <- interpf(n, res.char[i], brackets.pos[i], brackets.length[i], minus.length[i], minus.exists[i], plusminus[[i]],
plusminus.length[i], plusminus.pos[i], doublePoint[[i]], minus.relevant, fromzero[i], dbug
)
} else out[[i]] <- val
}
}
if (any(sapply(out, length) > 1)) {
for(i in 1:length(res.char)) {
if (length(out[[i]]) == 1) {
out[[i]] <- rep(out[[i]], n)
}
}
}
out
}
# Assisting function for data.frame wrapper.
f.iter <- function(x) {
1:x
}
# Data.frame wrapper for the functions.
interpret <- function(idata, N = NULL, rescol = "Result", dbug = FALSE, ...) {
if (length(N) == 0) N <- get("N", envir = openv) # use custom environment variable N if not given
if (!is.data.frame(idata)) idata <- as.data.frame(idata)
if (ncol(idata) == 0) stop("Empty data.frame!")
if (!rescol %in% colnames(idata)) stop(paste("No \"", rescol, "\" column found", sep = ""))
temp <- input.interp(idata[[rescol]], N, dbug)
temp.lengths <- sapply(temp, length)
if (ncol(idata) == 1) {
out <- list()
out[[rescol]] <- unlist(temp)
out <- as.data.frame(out)
} else {
out <- data.frame(idata[rep(1:nrow(idata), times = temp.lengths),])
out[[rescol]] <- unlist(temp)
}
if (prod(temp.lengths) > 1) {
dim(temp.lengths) <- length(temp.lengths)
out$Iter<- c(apply(temp.lengths, 1, f.iter))
}
return(out)
}
setGeneric("interpret")
setMethod(
f = "interpret",
signature = signature(idata = "character"),
definition = function(idata, N = NULL, dbug = FALSE) {
callGeneric(data.frame(Result = idata), N = N, dbug = dbug)
}
)
setMethod(
f = "interpret",
signature = signature(idata = "numeric"),
definition = function(idata, N = NULL, dbug = FALSE) {
return(data.frame(Iter = 1:length(idata), Result = idata))
}
)
#setMethod(
# f = "interpret",
# signature = signature(idata = "interpret"),
# definition = function(idata, N = 1000, dbug = FALSE) {
# callGeneric(data.frame(Result = idata), N = N, dbug = dbug)
# }
#)
# Interpreting empty locations in indices
# fillna takes a data.frame and fills the cells with NA with each level in that column.
# object is the data.frame, marginals is a vector of columns (either column names or positions) that are to be filled.
# This version of fillna accepts column positions (as the previous version) and also column names in marginals.
fillna <- function (object, marginals) {
a <- dropall(object)
if(!is.numeric(marginals)) marginals <- match(marginals, colnames(object))
for (i in marginals) {
a[[i]] <- as.factor(a[[i]])
a1 <- a[!is.na(a[[i]]), ]
a2 <- a[is.na(a[[i]]), ][-i]
addition <- data.frame(A = levels(a[[i]]))
colnames(addition) <- colnames(a)[i]
a2 <- merge(addition, a2)
a <- rbind(a1, a2)
}
return(a)
}
is.na.ext <- function(x){
a <- is.na(x) | x == "NA" | nchar(gsub(" ", "", x)) == 0 | x == "*"
return(a)
}
# Fill NAs in matching columns in x with union of locations in x and y
# Compare to fill.na which replaces NA with own
fill.na.merge <- function(x, y) {
common <- intersect(colnames(x@output), colnames(y@output))
locs <- list()
# Loop through common columns
for (i in common) {
testx <- is.na.ext(x@output[i])
testy <- is.na.ext(y@output[i])
# For x
if (any(testx)) {
locs[[i]] <- union(levels(as.factor(x@output[[i]])), levels(as.factor(y@output[[i]])))
if (length(locs[[i]]) > 1) {
# Duplicate rows with wildcards
temp <- ifelse(testx, length(locs[[i]]), 1)
ind <- rep(1:length(temp), temp)
x@output <- x@output[ind,]
# Insert locations to duplicated rows
duplicates <- rep(testx, temp)
temp <- as.character(x@output[[i]])
# Since number of duplicates is fixed to length of locs this essentially repeats locs
# as many times as there are wildcards in this particular index at the moment.
temp[duplicates] <- locs[[i]]
x@output[[i]] <- factor(temp)
}
}
# For y
if (any(testy)) {
if (length(locs[[i]]) == 0) {
locs[[i]] <- union(levels(as.factor(x@output[[i]])), levels(as.factor(y@output[[i]])))
}
if (length(locs[[i]]) > 1) {
# Duplicate rows with wildcards
temp <- ifelse(testy, length(locs[[i]]), 1)
ind <- rep(1:length(temp), temp)
y@output <- y@output[ind,]
# Insert locations to duplicated rows
duplicates <- rep(testy, temp)
temp <- as.character(y@output[[i]])
temp[duplicates] <- locs[[i]]
y@output[[i]] <- factor(temp)
}
}
}
return(list(x, y))
}
#interpret("500(490-5000)", N = 2, dbug= TRUE)
#interpret("1;2;3;4", N = 20, dbug = TRUE)
#interpret("<9", N = 4, dbug = TRUE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.