knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.path='Figs/',
                      warning=FALSE, message=FALSE)
knitr::opts_knit$set(root.dir="../")
library(MLlibrary)
library(ggplot2)
library(dplyr)
#library(doMC)
#registerDoMC(cores=3)
K = 5
gaussian <- function(mean, sd) {
  function(response) {response + rnorm(length(response), mean=mean, sd=sd)}
}

gaussian_x <- function(mean, sd) {
  function(response) {apply(response,2,gaussian(mean,sd))}
}

add_noise(d, noise_y=function(response) {response}, noise_x=function(response) {response}) {
  d$y <- noise_y(d$y)
  d$X <- noise_x(d$X)
  d
}

linear <- function(nvar=10, nrow=100) {
  variables <- as.character(seq_len(nvar))
  coefficients <- seq_along(variables)
  X <- matrix(rnorm(nvar * nrow), nrow=nrow, ncol=nvar)
  y <- (X %*% coefficients) + 10
  list(y=y, X=X)
}

exponential <- function(nvar=10, nrow=100) {
 d <- linear_(nvar, nrow)
 d$y <- exp(d$y / 20)
 d
}

cuts <- function(nvar=10, nrow=100) {
  X <- matrix(rnorm(nvar * nrow), nrow=nrow, ncol=nvar)
  cuts <- rnorm(nvar)
  cuts <- matrix(cuts, nrow=nrow, ncol=nvar, byrow = TRUE)
  X_transformed <- X<cuts
  variables <- as.character(seq_len(nvar))
  coefficients <- seq_along(variables)
  y <- as.vector((X_transformed %*% coefficients) + 10)
  list(y=y, X=X)
}

cuts_and_linear <- function(nvar=10, nrow=100, alpha=0.5) {
  X <- matrix(rnorm(nvar * nrow), nrow=nrow, ncol=nvar)
  cuts <- rnorm(nvar)
  cuts <- matrix(cuts, nrow=nrow, ncol=nvar, byrow = TRUE)
  X_transformed <- X<cuts
  variables <- as.character(seq_len(nvar))
  coefficients <- seq_along(variables)
  y <- (alpha*as.vector((X_transformed %*% coefficients) + (1-alpha)*(X %*% coefficients))) + 10
  list(y=y, X=X)
}
run_all <- function(dataset,NAME) {
  X <- dataset$X
  y <- dataset$y
  ksplit <- kfold_split(K, y, X, seed=1)
  ksplit_nmm <- kfold_split(K, y, data.frame(X), seed=1)
  run_all_models(NAME, dataset, "y", ksplit, ksplit_nmm)
}

plot_all <- function(NAME) {
  all_models <- load_models(NAME)
  plot_swf_(all_models)
  plot_reach_vs_waste_(all_models)
  plot_reach_vs_waste_(all_models, THRESHOLD=.4)
}

Linear Data

No noise

dataset <- linear(nvar=10, nrow=1000)
run_all(dataset, "linear")
plot_all("linear")

Linear + Zero Mean Gaussian

+10%

noise <- gaussian(0, 1)
dataset <- linear(nvar=10, nrow=1000, noise=noise)
run_all(dataset, linear_10)
plot_all(linear_10)

+50%

noise <- gaussian(0, 5)
dataset <- linear(nvar=10, nrow=1000, noise=noise)
run_all(dataset, linear_50)
plot_all(linear_50)

+90%

noise <- gaussian(0, 9)
dataset <- linear(nvar=10, nrow=1000, noise=noise)
run_all(dataset, linear_90)
plot_all(linear_90)

Heteroskedasticity

noise <- function(response) {
  rnorm(length(response), mean=response, sd=abs(response / 10))
}
dataset <- linear(nvar=10, nrow=1000, noise=noise)
res <- run_all(dataset)
plot_all(res)

Lognormal noise

Mean 0

noise <- function(response) {
  response + rlnorm(length(response), meanlog=0, sdlog=3)
}
dataset <- linear(nvar=10, nrow=1000, noise=noise)
res <- run_all(dataset)
plot_all(res)

Heteroskedastic lognormal

noise <- function(response) {
  response + rlnorm(length(response), meanlog=0, sdlog=abs(response / 10))
}
dataset <- linear(nvar=10, nrow=1000, noise=noise)
res <- run_all(dataset)
plot_all(res)

Wide

dataset <- linear(nvar=500, nrow=100)
res <- run_all(dataset, ix=FALSE)
plot_all(res)

Extraneous

dataset <- linear(nvar=10, nrow=500)
dataset$X <- cbind(matrix(rnorm(1000 * 500, mean=10, sd=5), nrow=500, ncol=1000), dataset$X)
res <- run_all(dataset, ix=FALSE)
plot_all(res)

Exponential data

dataset <- exponential(nvar=10, nrow=1000)
res <- run_all(dataset)
plot_all(res)

Cuts data

dataset <- cuts(nvar=10, nrow=1000)
res <- run_all(dataset)
plot_all(res)

Cuts data + Gaussian 50%

noise <- gaussian(0, 5)
dataset <- cuts(nvar=10, nrow=1000, noise=noise)
res <- run_all(dataset)
plot_all(res)

0.3 Cuts + 0.7 linear data

  dataset <- cuts_and_linear(nvar=10, nrow=1000, alpha=0.3)
  res <- run_all(dataset)
  plot_all(res)

0.7 Cuts + 0.3 linear data

dataset <- cuts_and_linear(nvar=10, nrow=1000, alpha=0.7)
res <- run_all(dataset)
plot_all(res)


ml-e/ML-library documentation built on May 23, 2019, 2:03 a.m.