knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(tidyverse) library(ggplot2) library(standardPrintOutput) #library(tidyinfostats) devtools::load_all("..") set.seed(101) theme_set(standardPrintOutput::defaultFigureLayout())
gaussian test params y0 = [ .4 .5 .8 ]; % the center of the gaussian sigma_y = [ .2 .3 .25 ]; % the gaussian decay constant p{2} = [ .2 1 0.5 ]; % the (normalized) amplitude p(x)
gaussians = ConditionalDistribution$new() gaussians$withDistribution(0.2,NormalDistribution$new(0.4,0.2)) gaussians$withDistribution(1,NormalDistribution$new(0.5,0.3)) gaussians$withDistribution(0.5,NormalDistribution$new(0.8,0.25))
square wave test params
a = [ 0 .1 .2 ]; % the left side of each square wave b_a = [ 1 1.1 1.1 ]; % the length in y of each square wave p{1} = [ .2 1 0.5 ]; % the (normalized) amplitude p(x)
squareWaves = ConditionalDistribution$new() squareWaves$withDistribution(0.2,UniformDistribution$new(min=0,max=1)) squareWaves$withDistribution(1,UniformDistribution$new(min=0.1,max=1.2)) squareWaves$withDistribution(0.5,UniformDistribution$new(min=0.2,max=1.3))
lognorm = ConditionalDistribution$new() lognorm$withDistribution(0.5,LogNormalDistribution$new(mode=0.5,var=0.25)) lognorm$withDistribution(1,LogNormalDistribution$new(mode=1.5,var=1)) lognorm$withDistribution(0.3,LogNormalDistribution$new(mode=2.5,var=0.5))
Objectives: * Look at sample size versus accuracy MI, Mean and SD with increasing sample size regression to predict SD of MI from est mean versus theoretical mean & est versus theoretical SD
#devtools::load_all("..") #TODO: Include Discretise by value -> Grassberger in here experiment3 = function(distribution, reps, meth = c("KWindow","KNN","SGolay","DiscretiseByRank","DiscretiseByValue","Compression")) { set.seed(101) result = NULL thMi = distribution$theoreticalMI() thMu = distribution$theoreticalMean() thSd = sqrt(distribution$theoreticalVariance()) j=0 for (samples in c(8,16,32,64,128,256,512,1024,2048,4096,8192,16384)) { for (i in c(1:reps)) { j=j+1 df = distribution$sample(samples) minGroupSize = min(df %>% group_by(y) %>% count() %>% pull(n)) estMi = sapply(meth, function(m) { # browser() calculateDiscreteContinuousMI(df, vars(y), x, method = m) %>% pull(I) }) estMean = mean(df$x) estSd = sd(df$x) result = result %>% bind_rows( tibble( id = j, minGroupSize = minGroupSize, sample = samples, param = c(meth,"Mean","Std deviation"), theoretical = c(rep(thMi,length(estMi)),thMu,thSd), estimated = c(estMi,estMean,estSd) ) ) } } return(result) } # setup error quantifyError = function(df) { return( df %>% mutate( absoluteError = estimated-theoretical, relativeError = ifelse(theoretical<0.05,NA,absoluteError/theoretical) ) %>% filter(!is.na(relativeError)) %>% group_by(sample,param) %>% summarise( minGroupSize =mean(minGroupSize), theoretical = mean(theoretical, na.rm=TRUE), rmse = sqrt(mean(absoluteError^2, na.rm=TRUE)), mae = mean(absoluteError, na.rm=TRUE), vae = var(absoluteError, na.rm=TRUE), nmae = mean(relativeError, na.rm=TRUE), nvae = var(relativeError, na.rm=TRUE), median_ae = quantile(absoluteError, probs=c(0.5), names=FALSE, na.rm=TRUE), upper_iqr_ae = quantile(absoluteError, probs=c(0.75), names=FALSE, na.rm=TRUE), lower_iqr_ae = quantile(absoluteError, probs=c(0.25), names=FALSE, na.rm=TRUE) ) ) } exp3aData = experiment3(lognorm,100) exp3bData = experiment3(gaussians,100) exp3cData = experiment3(squareWaves,100)
# todo summarise experimental data plotExperiment3 = function(df, components) { summary = df %>% group_by(sample,param) %>% quantifyError() return(ggplot(summary %>% filter(param %in% components),aes(x=sample))+ geom_line(aes(y=mae),colour="blue")+ geom_ribbon(aes(ymin=mae-1.96*vae,ymax=mae+1.96*vae),fill="blue",alpha=0.1)+ #geom_line(aes(y=estimatedMedian),colour="grey75")+ #geom_ribbon(aes(ymin=estimatedLower,ymax=estimatedUpper),fill="grey75",alpha=0.1)+ coord_cartesian(ylim = c(-0.25,0.25))+ geom_hline(yintercept=0, colour="red")+ ylab("absolute error")+ xlab("sample size")+ facet_wrap(vars(param))+scale_x_log10()) }
# devtools::load_all("..") plotExperiment3(exp3aData,c("Mean","Std deviation")) plotExperiment3(exp3aData,c("KWindow","KNN","SGolay","DiscretiseByRank","DiscretiseByValue","Compression")) standardPrintOutput::saveThirdPageFigure(filename="~/Dropbox/featureSelection/mutinfo/bootstrappingLogNorm")
#devtools::load_all("..") plotExperiment3(exp3bData,c("Mean","Std deviation")) plotExperiment3(exp3bData,c("KWindow","KNN","SGolay","DiscretiseByRank","DiscretiseByValue","Compression")) standardPrintOutput::saveThirdPageFigure(filename="~/Dropbox/featureSelection/mutinfo/bootstrappingGaussians")
# devtools::load_all("..") plotExperiment3(exp3cData,c("Mean","Std deviation")) plotExperiment3(exp3cData,c("KWindow","KNN","SGolay","DiscretiseByRank","DiscretiseByValue","Compression")) standardPrintOutput::saveThirdPageFigure(filename="~/Dropbox/featureSelection/mutinfo/bootstrappingUniform")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.