R/creating_distance_hists.R

Defines functions resample dist_hist

resample <- function(x, ...) x[sample.int(length(x), ...)] # avoid weird behaviour of sample if the vector has length 1


dist_hist = function(Dist,k = 10,n = 100,main = '',...)
{
  # this function creates a histogram of a specified distance from uniformity Dist in (0,2) with k bins. It takes n steps from a uniform histogram to get there. main and ... are passed on to hist.

  if(Dist >=2 ) stop('Distance too large.')


  if(n %% 2 != 0 ) n = n + 1

  bins = rep(0,k)

  signed_bins = 0
  counter = 0


  max_dist_per_neg_bar = abs( k *(ceiling(1-n/(k*Dist)) - 1)) / (k*n/Dist)  # The maximum distance that can be generated by 1 bar. This is 1/k if k*Dist | n, otherwise slightly smaller.
                                                                          # This is relevant, since the negative bars can never create more distance than this, and we need to reserve enough negative bars.

  min_n_neg_bars = ceiling(Dist/(2*max_dist_per_neg_bar))

  if(ceiling(min_n_neg_bars) >= k ) stop('The required distance is too large for the chosen number of bins. Try more bins or smaller Dist.')

  while(counter < n)
  {


    if(counter %% 2 == 0) # resembles positive: one bin get stacked higher than before
    {
      if(sum(which(bins > 0)) + 1 > k - min_n_neg_bars)
      {
        pos_bins = which(bins > 0)
      } else {
      pos_bins = which(bins >= 0)
      }
      bin = resample(pos_bins,1)
      bins[bin] = bins[bin] + 1
    }
    if(counter %% 2 != 0)
    {

      neg_bins = which(bins <= 0 & bins >= 1-n/(k* Dist)) # the latter restriction excludes the bins that already reached 0

      bin = resample(as.vector(neg_bins),1)
      bins[bin] = bins[bin] - 1

    }



    counter = counter + 1

  }


  bins_natural  = n/Dist + k * bins  #get natural numbers in each bin that will lead to a histogram with L1- distance of Dist to uniformity


  center_points = (1:k)/k - 1/(2*k) # center points for bins

  brks = seq(0,1,by = 1/k)

  hist_sam = c()
  for(i in 1:k)
  {
    hist_sam = c(hist_sam,rep(center_points[i],bins_natural[i]))
  }

  hist(hist_sam,breaks = brks,freq = FALSE,xlab = '',main = main,...)

}


par(mfrow = c(4,4))
for(d in seq(0.1,1.6,by = 0.1))
{
  dist_hist(d,main = d)
}
ClaudioHeinrich/RankHistBins documentation built on March 11, 2020, 12:53 a.m.