Nothing
# data is a list
# x: either a list or a matrix containing the data rowwise
# y: vector of labels/values
constructData = function(x, y) {
stopifnot(is.list(x) || is.vector(x) || is.matrix(x))
stopifnot(is.list(y) || is.vector(y) || is.factor(y))
data = list(x=x, y=y)
class(data) = "CVST.data"
return(data)
}
getN = function(data) {
stopifnot(inherits(data, "CVST.data"))
if (is.list(data$x) || is.vector(data$x)) {
N = length(data$x)
}
else {
N = nrow(data$x)
}
return(N)
}
shuffleData = function(data) {
stopifnot(inherits(data, "CVST.data"))
shuffle = sample.int(getN(data))
return(getSubset(data, shuffle))
}
getSubset = function(data, subset) {
stopifnot(inherits(data, "CVST.data"))
x = getX(data, subset)
y = data$y[subset]
ret = constructData(x=x, y=y)
return(ret)
}
getX = function(data, subset=NULL) {
stopifnot(inherits(data, "CVST.data"))
if (is.null(subset)) {
ret = data$x
}
else {
if (is.list(data$x) || is.vector(data$x)) {
ret = data$x[subset]
}
else {
ret = data$x[subset, ,drop=FALSE]
}
}
return(ret)
}
isClassification = function(data) {
stopifnot(inherits(data, "CVST.data"))
return(is.factor(data$y))
}
isRegression = function(data) {
stopifnot(inherits(data, "CVST.data"))
return(!isClassification(data))
}
constructLearner = function(learn, predict) {
stopifnot(is.function(learn) && is.function(predict))
learner = list(learn=learn, predict=predict)
class(learner) = "CVST.learner"
return(learner)
}
constructCVSTModel = function(steps=10, beta=.1, alpha=.01, similaritySignificance=.05, earlyStoppingSignificance=.05, earlyStoppingWindow=3, regressionSimilarityViaOutliers=FALSE) {
ret = list(steps=steps,
beta=beta,
alpha=alpha,
similaritySignificance=similaritySignificance,
earlyStoppingSignificance=earlyStoppingSignificance,
earlyStoppingWindow=earlyStoppingWindow,
regressionSimilarityViaOutliers=regressionSimilarityViaOutliers)
class(ret) = "CVST.setup"
return(ret)
}
constructParams = function(...) {
pn = names(substitute(c(...)))[-1]
ret = expand.grid(..., stringsAsFactors=FALSE, KEEP.OUT.ATTRS = FALSE)
params = lapply(1:nrow(ret), function(ind) as.list(ret[ind, ]))
paramNames = lapply(1:nrow(ret), function(ind) paste(pn, ret[ind, ], sep="=", collapse=" "))
names(params) = paramNames
class(params) = "CVST.params"
return(params)
}
.getResult = function(train, test, learner, param, squared=TRUE) {
stopifnot(inherits(learner, "CVST.learner") && inherits(train, "CVST.data") && inherits(test, "CVST.data"))
model = try(learner$learn(train, param))
if (inherits(model, "try-error")) {
pred = rep(NA, length(test$y))
}
else {
pred = try(learner$predict(model, test))
if (inherits(pred, "try-error")) {
pred = rep(NA, length(test$y))
}
}
if (isClassification(test)) {
res = (test$y != pred)
}
else {
if (squared) {
res = (pred - test$y)^2
}
else {
res = (pred - test$y)
}
}
return(res)
}
cochranq.test = function(mat) {
cochransQtest = list(statistic = 0, parameter = 0, p.value = 1,
method = "Cochran's Q Test",
data.name = deparse(substitute(mat)))
class(cochransQtest) = "htest"
if (is.vector(mat) || any(dim(mat) <= 1)) {
return(cochransQtest)
}
# we expect the individuals in the rows, repetitions/treatments in the columns
m = ncol(mat)
df = m - 1
L = apply(mat, 1, sum)
index = (L > 0 & L < m)
if (sum(index) <= 1) {
# all rows are either one or zero... no effect!
return(cochransQtest)
}
if (sum(index) * m <= 24) {
return(.perm.cochranq.test(mat[index, ]))
}
L = L[index]
T = apply(mat[index, ], 2, sum)
Q = ((m-1) * (m * sum(T^2) - sum(T)^2)) / (m * sum(L) - sum(L^2))
names(df) = "df"
names(Q) = "Cochran's Q"
if (is.nan(Q)) {
p.val = 1.0
}
else {
p.val = pchisq(Q, df, lower.tail=FALSE)
}
cochransQtest$statistic = Q
cochransQtest$parameter = df
cochransQtest$p.value = p.val
return(cochransQtest)
}
.perm.cochranq.test = function(mat, nperm=1000) {
if (is.vector(mat) || any(dim(mat) <= 1)) {
cochransQtest = list(statistic = 0, parameter = 0, p.value = 1,
method = "Cochran's Q Test",
data.name = deparse(substitute(mat)))
class(cochransQtest) = "htest"
return(cochransQtest)
}
# we expect no straight zero or one-rows in mat
m = ncol(mat)
df = m - 1
L = apply(mat, 1, sum)
T = apply(mat, 2, sum)
quot = (m * sum(L) - sum(L^2))
Q = ((m-1) * (m * sum(T^2) - sum(T)^2)) / quot
names(df) = "df"
names(Q) = "Cochran's Q"
permFun = function() {
newPerm = mat
for (i in 1:nrow(mat)) {
newPerm[i, ] = mat[i, sample(m)]
}
T = apply(newPerm, 2, sum)
Q = ((m-1) * (m * sum(T^2) - sum(T)^2)) / quot
return(Q)
}
QS = replicate(nperm, permFun())
p.value = mean(QS >= Q)
cochransQtest = list(statistic = Q, parameter = df, p.value = p.value,
method = "Cochran's Q Test (monte-carlo)",
data.name = deparse(substitute(mat)))
class(cochransQtest) = "htest"
return(cochransQtest)
}
constructSequentialTest = function(piH0=.5, piH1=.9, beta, alpha) {
a1 = log((1 - beta) / alpha) / (log(piH1 / piH0) + log((1 - piH0) / (1 - piH1)))
a0 = -log(beta / (1 - alpha)) / (log(piH1 / piH0) + log((1 - piH0) / (1 - piH1)))
b = log((1 - piH0) / (1 - piH1)) / (log(piH1 / piH0) + log((1 - piH0) / (1 - piH1)))
ret = list(a1=a1, a0=a0, b=b, piH0=piH0, piH1=piH1, alpha=alpha, beta=beta)
class(ret) = "CVST.sequentialTest"
return(ret)
}
plotSequence = function(st, s) {
y = cumsum(s)
if (!is.null(st$steps)) {
plot(y, xlim=c(1, st$steps), ylim=c(1, st$steps))
}
else {
plot(y)
}
abline(a=st$a1, b=st$b, col="red")
abline(a=-st$a0, b=st$b, col="red", lty=2)
abline(h=0)
abline(a=0, b=1)
title(sprintf("one-sided H0:%0.2f; H1:%0.2f", st$piH0, st$piH1))
}
testSequence = function(st, s) {
stopifnot(inherits(st, "CVST.sequentialTest"))
n = length(s)
y = cumsum(s)
ret = 0
if (y[n] >= st$b * n + st$a1) {
ret = 1
}
else if (y[n] <= st$b * n - st$a0) {
ret = -1
}
return(ret)
}
noisySinc = function(n, dim=2, sigma=0.1) {
if (length(n) > 1) {
x = n
}
else {
x = runif(n, -pi, pi)
}
sinc = function(d) sin(d) / (d)
y = sinc(4 * x) + 0.2 * sin(15 * x * dim) + sigma*rnorm(n)
y[is.nan(y)] = 1
return(constructData(x=as.matrix(x), y=y))
}
noisySine = function(n, dim=5, sigma=.25) {
x = runif(n, 0, 2 * pi * dim)
y = sin(x)
if (!is.null(sigma) && sigma > 0) {
y = y + rnorm(n, sd=sigma)
}
label = factor(y == abs(y))
return(constructData(x=as.matrix(x), y=label))
}
noisyDonoho = function(n, fun=doppler, sigma=1) {
x = matrix(runif(n, 0, 1), n, 1)
y = as.vector(fun(x)) + rnorm(n, sd=sigma)
return(constructData(x=x, y=y))
}
blocks = function(x, scale=3.656993) {
t = c(0.1, 0.13, 0.15, 0.23, 0.25, 0.40, 0.44, 0.65, 0.76, 0.78, 0.81)
h = c(4, -5, 3, -4, 5, -4.2, 2.1, 4.3, -3.1, 2.1, -4.2)
ret = t(sapply(x, function(xx) (1 + sign(xx - t)) / 2)) %*% h
ret = ret * scale
return(ret)
}
bumps = function(x, scale=10.52884) {
t = c(0.1, 0.13, 0.15, 0.23, 0.25, 0.40, 0.44, 0.65, 0.76, 0.78, 0.81)
h = c(4, 5, 3, 4, 5, 4.2, 2.1, 4.3, 3.1, 5.1, 4.2)
w = c(0.005, 0.005, 0.006, 0.01, 0.01, 0.03, 0.01, 0.01, 0.005, 0.008, 0.005)
ret = t(sapply(x, function(xx) (1 + abs((xx - t) / w))^-4 )) %*% h
ret = ret * scale
return(ret)
}
heavisine = function(x, scale=2.356934) {
ret = 4 * sin(4 * pi * x) - sign(x - 0.3) - sign(0.72 - x)
ret = ret * scale
return(ret)
}
doppler = function(x, scale=24.22172) {
ret = sqrt(x * (1 - x)) * sin((2.1 * pi) / (x + 0.05))
ret = ret * scale
return(ret)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.