BayesScreening: Classical and Bayesian Screening for Binary Classification

Documented in CVBFtestrsplit HallKernel KHall laplace.kernH2c logintegrand.Hall loglike.KHall logpriorused PredCVBFDens PredCVBFIndepMHbw PredCVBFMHbw

#' Evaluate the log prior on one of the data sets. Called by other functions.
#'
#' @param h A value to evaluate the prior on
#' @param hhat Tuning parameter for the prior
#'
#' @return Evaluates a particular type of prior placed on the bandwidth for CVBF.
#' @export
#'
#' @examples
#' logpriorused(.1,.1)
logpriorused <- function(h,hhat)
{
  beta = hhat
  Prior = log(2*beta) - .5*log(pi) - 2*log(h) - (beta^2 / h^2)
  return(Prior)
}


#' Evaluate a log likelihood using the Hall Kernel
#'
#' @param h A bandwidth or vector of bandwidths tot ry out
#' @param y A validation set to evaluate the KDE on
#' @param x A training set to build the Hall kernel density estimate on
#'
#' @return -log likelihood evaluation, where the likelihood is constructed using the training data
#' @export
#'
#' @examples
#' dataset1 = rnorm(100)
#' DT = dataset1[1:50]
#' DV = dataset1[51:100]
#' loglike.Khall(.01, DT, DV)
loglike.KHall=function(h, y, x){
  
  n = length(x)
  m = length(y)
  nh = length(h)
  M = t(matrix(y,m,n))
  llike=1:nh
  
  for(j in 1:nh){
    
    M1 = (x-M)/ h[j]
    M1 = KHall(M1) / h[j]
    fhat=as.vector(M1 %*% matrix(1,m,1))/m
    fhat[fhat<10^(-320)]=10^(-320)
    llike[j]=sum(log(fhat))
  }
  return(-llike)
}

#' Title Hall Kernel evaluation
#'
#' @param x The parameter to evaluate the Hall Kernel density on 
#'
#' @return Evaluation of K(X), where K is a heavy tailed kernel. For more information on K(x), see
#'  K_0 on Hall's paper on Kullback Leibler loss (Annals of Statistics 1957)
#' @export
#'
#' @examples
#' Khall(.1)
KHall=function(x){
  
  con = sqrt(8 * pi * exp(1)) * pnorm(1)
  K=exp(-0.5 * (log(1 + abs(x)))^2) / con
  return(K)
}

#' Compute log of the integrand of the Marginal likelihood for CVBF
#'
#' @param h Bandwidth parameter, this is the variable that is being integrated over
#' @param y Validation set to evaluate the likelihood over
#' @param x Training set to build the KDE
#' @param hhat Parameter that specifies where prior should be centered
#'
#' @return Evaluation of the integrand at a particular log likelihood value.
#' @export
#'
#' @examples
#' dataset1 = rnorm(100)
#' DT = dataset1[1:50]
#' DV = dataset1[51:100]
#' logintegrand.Hall(.01, DT, DV, .1)
logintegrand.Hall=function(h, y, x, hhat){
  
  n = length(x)
  beta = hhat
  Prior = log(2 * beta / sqrt(2 * pi)) - beta^2 / h^2 - 2 * log(h)
  f=loglike.KHall(h, y, x) - Prior
  return(f)
  
}


#' Compute Marginal Likelihoods for CVBF
#'
#' @param y Validation Set
#' @param x Training set
#' @param hhat Bandwidth parameter that maximizes the log likelihood
#' @param c A constant that is equal to the log likelihood + log prior evaluated at the maximum
#'
#' @return Evaluation of the CVBF marginal likelihood via Laplace Approximation. Also returns bandwidth that maximized log integrand, and hessian of log integrand at maximum.
#' @export
#'
#' @examples
#' dataset1 = rnorm(100)
#' DT = dataset1[1:50]
#' DV = dataset1[51:100]
#' likvec = function(h) {sum(log(HallKernel(h,datagen2 = DT, x = DT)))}
#' bwlikcy = optimize(f = function(h){  likvec(h)}, lower = 0, upper = 10, maximum = TRUE)
#' ExpectedKernML2 = laplace.kernH2c(y = DT, x = DV, hhat = bwlikcy$maximum, c= bwlikcy$objective + logpriorused(h = bwlikcy$maximum, hhat = bwlikcy$maximum))
#' ExpectedKernML2

laplace.kernH2c = function(y, x, hhat, c){
  
  n = length(x)
  out = hessian(f = function(h) {logintegrand.Hall(h, y=y, x=x, hhat = hhat)}, x = hhat, pert = 10^(-7))
  cons = c
  hess= abs(out)
  laplace = 0.5 * log(2 * pi) - 0.5 * log(hess) + cons
  c(laplace,hhat,hess)
  
}


#' Compute a CVBF, a Bayes factor that checks if two data sets share the same distribution.
#'
#' @param dataset1 One dataset that we want to check if it has the same distribution as another data set
#' @param dataset2 Another dataset that we want to check if it has the same distribution as another data set
#' @param trainsize1 The training set size of dataset 1
#' @param trainsize2 The training set size of dataset 2
#' @param train1_ids Indices for the training set of dataset 1
#' @param train2_ids Indices for the training set of dataset 2
#' @param seed The seed used to generate training set and validation sets for both of the data sets
#'
#' @return A list containing a log BF that tests whether two distributions are the same via CVBF, and the training sets used to generate the CVBF.
#' @export
#'
#' @examples
#' set.seed(100)
#' dataset1 = rnorm(200)
#' dataset2 = rnorm(200) 
#' CVBF1 = CVBFtestrsplit(dataset1, dataset2, trainsize1 = 100, trainsize2 = 100)
#' CVBF1$logBF #Gives back the log Bayes factor
#' CVBF1$train1_ids #Gives back the training set of the first data set
#' CVBF1$train2_ids #Gives back the training set of the second data set
#' 
CVBFtestrsplit = function(dataset1, dataset2, trainsize1, trainsize2, seed = NULL, train1_ids = NULL, train2_ids = NULL)
{
  if(is.null(trainsize1))
  {
    stop("Please enter a trainsize for the dataset of class1.")
  }
  if(is.null(trainsize2))
  {
    stop("Please enter a trainsize for the dataset of class2.")
  }
  if(!is.null(seed))
  {
    set.seed(seed)
  }
  if(!is.null(train1_ids))
  {
    train_ids = train1_ids
  } else{
    train_ids = sample(1:length(dataset1), size = trainsize1)
  }
  if(!is.null(train2_ids))
  {
    train_ids2 = train2_ids
  } else{
    train_ids2 = sample(1:length(dataset2), size = trainsize2)
  }
  
  XT1 = dataset1[train_ids]
  XV1 = dataset1[-train_ids]
  
  YT1 = dataset2[train_ids2]
  YV1 = dataset2[-train_ids2]
  
  
  likvec = function(h) {sum(log(HallKernel(h,datagen2 = XT1, x = XV1)))}
  bwlik2 = optimize(f = function(h){  likvec(h)}, lower = 0, upper = 10, maximum = TRUE)
  ExpectedKernML1 = laplace.kernH2c(y = XT1, x = XV1, hhat = bwlik2$maximum, c = bwlik2$objective + logpriorused(h = bwlik2$maximum, hhat = bwlik2$maximum))
  
  likvec = function(h) {sum(log(HallKernel(h,datagen2 = YT1, x = YV1)))}
  bwlikcy = optimize(f = function(h){  likvec(h)}, lower = 0, upper = 10, maximum = TRUE)
  ExpectedKernML2 = laplace.kernH2c(y = YT1, x = YV1, hhat = bwlikcy$maximum, c= bwlikcy$objective + logpriorused(h = bwlikcy$maximum, hhat = bwlikcy$maximum))
  
  likveccombc = function(h) {sum(log(HallKernel(h,datagen2 = c(XT1,YT1), x = c(XV1,YV1))))}
  bwlikcombc = optimize(f = function(h){  likveccombc(h)}, lower = 0, upper = 10, maximum = TRUE)
  ExpectedKernMLcomb = laplace.kernH2c(y = c(XT1,YT1), x = c(XV1,YV1), hhat = bwlikcombc$maximum, c= bwlikcombc$objective + logpriorused(h = bwlikcombc$maximum, hhat = bwlikcombc$maximum))
  
  return(list(logBF = ExpectedKernML1[1] + ExpectedKernML2[1] - ExpectedKernMLcomb[1], train1_ids = train_ids, train2_ids = train_ids2))
}


#' Compute Hall Kernel density estimate, given a training set and a set of values to evaluate on
#'
#' @param h Bandwidth parameter
#' @param datagen2 Training set for Hall KDE
#' @param x Value (or values) to evaluate KDE on. Can be a scalar, a matrix, or a vector.
#'
#' @return An object of the same dimension as x that evaluates a KDE trained on datagen2 on everyone of the points in x
#' @export
#'
#' @examples
#' 
HallKernel = function(h,datagen2,x)
{
  sum = 0
  for(i in 1:length(datagen2))
  {
    sum = sum + (((8*pi*exp(1))^.5)*pnorm(1))^(-1)*exp(-.5*(log(1+abs(x - datagen2[i])/h)^2))
  }
  return((1/(length(datagen2) * h)) * sum)
}



#' Draw bandwidths from CVBF predictive posterior by Metropolis Hasting sampling
#'
#' @param ndraw Number of unique draws desired for the bandwidth parameter from the posterior.
#' @param propsd A tuning parameter, corresponds to what proposal standard deviation should be for when using MH to traverse the posterior. Should be chosen with care to ensure good mixing.
#' @param maxIter The max number of MH iterations to try. Do not set to be too large. It will kick the code out if acceptance rates for MH are small.
#' @param XT1 Training set for a data set
#' @param XV1 Validation set for a data set
#' @param startingbw A value to start the MH chain at. If not provided, starts at posterior mode.
#'
#' @return A list of bandwidths that come from the posterior distribution. This will be larger than ndraw, as some draws will be repeats.
#' @export
#'
#' @examples
#' set.seed(500)
#' datasetsample1 = rnorm(600)
#' trainingindices1 = sample(1:600, size = 300)
#' XT1 = datasetsample1[trainingindices1]
#' XV1 = datasetsample1[-trainingindices1]
#' predbwvec1 = PredCVBFMHbw(ndraw = 500, maxIter = 5000, XT1 = XT1, XV1 = XV1)

PredCVBFMHbw = function(ndraw = 100, propsd = NULL, maxIter = 10000, XT1, XV1, startingbw = NULL)
{
  bwvec = c()
  if(is.null(startingbw))
  {
    likvec = function(h) {sum(log(HallKernel(h,datagen2 = XT1, x = XV1)))}
    bwlik = optimize(f = function(h){  likvec(h)}, lower = 0, upper = 10, maximum = TRUE)
    startingbw = bwvec[1] = bwlik$maximum
  }
  else{
    bwvec[1] = startingbw
  }
  if(is.null(propsd))
  {
    propsd = startingbw * length(XT1) ^ (-1 / 10)
  }
  beta = startingbw
  postcurr = sum(log(HallKernel(bwvec[1], datagen2 = XT1, x = XV1))) + log(2*beta) - .5*log(pi) - 2*log(bwvec[1]) - (beta^2 / bwvec[1]^2)
  
  acceptances = 0
  for(j in 1:maxIter)
  {
    bwprop = rnorm(1, mean = bwvec[j], sd = propsd)
    if(bwprop <= 0)
    {
      #bandwidths cannot be negative, ignore sample that was drawn.
      bwvec[j+1] = bwvec[j]
    }
    else
    {
      #MH procedure for drawing bandwidths
      postprop = sum(log(HallKernel(bwprop, datagen2 = XT1, x = XV1))) + log(2*beta) - .5*log(pi) - 2*log(bwprop) - (beta^2 / bwprop^2)
      logpostdif = postprop - postcurr
      if(exp(logpostdif) > runif(1))
      {
        bwvec[j+1] = bwprop
        postcurr = sum(log(HallKernel(bwvec[j+1], datagen2 = XT1, x = XV1))) + log(2*beta) - .5*log(pi) - 2*log(bwvec[j+1]) - (beta^2 / bwvec[j+1]^2)
        acceptances = acceptances + 1
      }
      else
      {
        bwvec[j+1] = bwvec[j]
      }
    }
    if(acceptances >= ndraw)
    {
      break
    }
  }
  if(acceptances < ndraw)
  {
    warning(paste("Terminated because max iterations reached. The number of acceptances is", acceptances, "consider changing max iter or propsd"))
  }
  return(list(predbwsamp = bwvec, acceptancetot = acceptances, drawtot = j))
}


#' Draw bandwidths from CVBF predictive posterior by independent Metropolis Hasting sampling
#'
#' @param ndraw Number of unique draws desired for the bandwidth parameter from the posterior.
#' @param propsd A tuning parameter, corresponds to what proposal standard deviation should be for when using MH to traverse the posterior. We give a decent theoretical default. May need to be altered if performance is bad.
#' @param maxIter The max number of MH iterations to try. Do not set to be too large. It will kick the code out if acceptance rates for MH are small.
#' @param XT1 Training set for a data set
#' @param XV1 Validation set for a data set
#' @param startingbw A value to start the MH chain at. If not provided, starts at posterior mode. All proposals will be drawn from a distribution whose center is startingbw. This is normally a bad idea, but the posterior is some type of unimodal distribution, so this is actually effective.
#'
#' @return A list of bandwidths that come from the posterior distribution. This will be larger than ndraw, as some draws will be repeats.
#' @export
#'
#' @examples
#' set.seed(500)
#' datasetsample1 = rnorm(600)
#' trainingindices1 = sample(1:600, size = 300)
#' XT1 = datasetsample1[trainingindices1]
#' XV1 = datasetsample1[-trainingindices1]
#' predbwvec1 = PredCVBFIndepMHbw(ndraw = 500, maxIter = 5000, XT1 = XT1, XV1 = XV1)
#'
 
PredCVBFIndepMHbw = function(ndraw = 100, propsd = NULL, maxIter = 10000, XT1, XV1, startingbw = NULL)
{
  bwvec = c()
  if(is.null(startingbw))
  {
    likvec = function(h) {sum(log(HallKernel(h,datagen2 = XT1, x = XV1)))}
    bwlik = optimize(f = function(h){  likvec(h)}, lower = 0, upper = 10, maximum = TRUE)
    startingbw = bwvec[1] = bwlik$maximum
  }
  else{
    bwvec[1] = startingbw
  }
  if(is.null(propsd))
  {
    propsd = startingbw * length(XT1) ^ (-1 / 10)
  }
  beta = startingbw
  postcurr = sum(log(HallKernel(bwvec[1], datagen2 = XT1, x = XV1))) + log(2*beta) - .5*log(pi) - 2*log(bwvec[1]) - (beta^2 / bwvec[1]^2)
  
  acceptances = 0
  for(j in 1:maxIter)
  {
    bwprop = rnorm(1, mean = startingbw, sd = propsd)
    if(bwprop <= 0)
    {
      #bandwidths cannot be negative, ignore sample that was drawn.
      bwvec[j+1] = bwvec[j]
    }
    else
    {
      #MH procedure for drawing bandwidths
      postprop = sum(log(HallKernel(bwprop, datagen2 = XT1, x = XV1))) + log(2*beta) - .5*log(pi) - 2*log(bwprop) - (beta^2 / bwprop^2)
      logpostdif = postprop - postcurr
      if(exp(logpostdif) > runif(1))
      {
        bwvec[j+1] = bwprop
        postcurr = sum(log(HallKernel(bwvec[j+1], datagen2 = XT1, x = XV1))) + log(2*beta) - .5*log(pi) - 2*log(bwvec[j+1]) - (beta^2 / bwvec[j+1]^2)
        acceptances = acceptances + 1
      }
      else
      {
        bwvec[j+1] = bwvec[j]
      }
    }
    if(acceptances >= ndraw)
    {
      break
    }
  }
  if(acceptances < ndraw)
  {
    warning(paste("Terminated because max iterations reached. The number of acceptances is", acceptances, "consider changing max iter or propsd"))
  }
  return(list(predbwsamp = bwvec, acceptancetot = acceptances, drawtot = j, propmean = startingbw, propsd = propsd))
}

#' Compute a Predictive Posterior function given a sequence of bandwidths from the predictive posterior
#'
#' @param bwvec A vector of bandwidths, can come from either of the methods that draw bandwidths from the posterior.
#' @param XT1 The training set
#'
#' @return A function that evaluates the predictive posterior at particular values
#' @export
#'
#' @examples
#' set.seed(500)
#' datasetsample1 = rnorm(600)
#' trainingindices1 = sample(1:600, size = 300)
#' XT1 = datasetsample1[trainingindices1]
#' XV1 = datasetsample1[-trainingindices1]
#' predbwvec1 = PredCVBFIndepMHbw(ndraw = 500, maxIter = 5000, XT1 = XT1, XV1 = XV1)
#' predpost = PredCVBFDens(predbwvec1, XT1)
#' plot(seq(from = -3, to = 3, by = .1), Predpost(predbwvec1, XT1)(seq(from = -3, to = 3, by = .1)))
#' 
PredCVBFDens = function(bwvec, XT1)
{
  PredictedDens = function(support)
  {
    predpostavg = 0
    for(j in 1:length(bwvec))
    {
      predpostavg = (1/length(bwvec))*HallKernel(bwvec[j], datagen = XT1, support) + predpostavg
    }
    return(predpostavg)
  }
  return(PredictedDens)
}

naveedmerchant/BayesScreening documentation built on June 13, 2024, 7:56 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

naveedmerchant/BayesScreening
Classical and Bayesian Screening for Binary Classification

R/CVBFFunctions.R
In naveedmerchant/BayesScreening: Classical and Bayesian Screening for Binary Classification

Defines functions PredCVBFDens PredCVBFIndepMHbw PredCVBFMHbw HallKernel CVBFtestrsplit laplace.kernH2c logintegrand.Hall KHall loglike.KHall logpriorused

Documented in CVBFtestrsplit HallKernel KHall laplace.kernH2c logintegrand.Hall loglike.KHall logpriorused PredCVBFDens PredCVBFIndepMHbw PredCVBFMHbw

R Package Documentation

Browse R Packages

We want your feedback!

naveedmerchant/BayesScreening Classical and Bayesian Screening for Binary Classification

R/CVBFFunctions.R In naveedmerchant/BayesScreening: Classical and Bayesian Screening for Binary Classification

Defines functions PredCVBFDens PredCVBFIndepMHbw PredCVBFMHbw HallKernel CVBFtestrsplit laplace.kernH2c logintegrand.Hall KHall loglike.KHall logpriorused

Documented in CVBFtestrsplit HallKernel KHall laplace.kernH2c logintegrand.Hall loglike.KHall logpriorused PredCVBFDens PredCVBFIndepMHbw PredCVBFMHbw

R Package Documentation

Browse R Packages

We want your feedback!

naveedmerchant/BayesScreening
Classical and Bayesian Screening for Binary Classification

R/CVBFFunctions.R
In naveedmerchant/BayesScreening: Classical and Bayesian Screening for Binary Classification