knitr::opts_chunk$set(echo = TRUE)
HiggsCSV = "C:/Users/Naveed/Documents/HIGGS.csv.gz"

R Markdown

Col 29 plot is below

fulltrans = read.csv(file = HiggsCSV, colClasses = c(NA,rep("NULL",times = 21),rep(NA,times = 7)), header = FALSE, nrows = 1000000)
dim(fulltrans)

noisedataind = fulltrans[,1]== 0

X = fulltrans[noisedataind,]
#This is "background noise"
Y = fulltrans[!noisedataind,]

fulltrans2 = fulltrans[1:1000000,]

usedsamptrans = fulltrans2[1:20000,]
noisedataind3 = usedsamptrans[,1]== 0
X2 = usedsamptrans[noisedataind3,]
#This is "background noise"
Y2 = usedsamptrans[!noisedataind3,]

library(BSCRN)
library(matrixStats)
library(foreach)
library(rootSolve)
library(doRNG)


trainsizes = seq(from = 100, to = 400, by = 60)

trainsizes2 = seq(from = 1000, to = 5000, by = 1000)
numberofsplits = 5
logBFmat3 = matrix(nrow = length(trainsizes), ncol = numberofsplits)
set.seed(1000)
ncores = 20


logBFmat3 = matrix(nrow = length(trainsizes), ncol = numberofsplits)

ncores = 20
for(i in 1:length(trainsizes))
{
  for(j in 1:5)
  {
    logBFmat3[i,j] = BSCRN::CVBFtestrsplit(dataset1 = X2[,8], dataset2 = Y2[,8], trainsize1 = trainsizes[i], trainsize2 = trainsizes2[i])$logBF
    print(j)
  }
}

logBFmeans3 = rowMeans(logBFmat3[1:5, 1:5])
logBFranges3 = rowRanges(logBFmat3[1:5, 1:5])

dfmat3 <- data.frame(trainsize=trainsizes[1:5], min=logBFranges3[,1], max=logBFranges3[,2], mean = logBFmeans3)
library(ggplot2)
ggplot(dfmat3, aes(x=trainsize))+
  geom_linerange(aes(ymin=min,ymax=max),linetype=2,color="blue")+
  geom_point(aes(y=min),size=3,color="red")+
  geom_point(aes(y=max),size=3,color="red")+
  geom_point(aes(y=mean),size = 4, color = "green")+
  geom_line(aes(x = trainsize, y = mean), size = 3, color = "purple", linetype = 2)
#theme_bw()

PolyaTreetest((X2[,2] - mean(c(X2[,2],Y2[,2]))) / sd(c(X2[,2],Y2[,2])), (Y2[,2] - mean(c(X2[,2],Y2[,2]))) / sd(c(X2[,2],Y2[,2])), Ginv = qnorm, c = 1, leveltot = 13)

PolyaTreetest((X2[,8] - mean(c(X2[,8],Y2[,8]))) / sd(c(X2[,8],Y2[,8])), (Y2[,8] - mean(c(X2[,8],Y2[,8]))) / sd(c(X2[,8],Y2[,8])), Ginv = qnorm, c = 1, leveltot = 13)

The plot is the plot of col 29's CVBFs



naveedmerchant/BayesScreening documentation built on June 13, 2024, 7:56 a.m.