R/benford.R

# Generated by LaTeX DogWagger Version 4.0.5 from file <NCTLL_904.tex>
# Date: [2020-9-17 13:16:5] 
# Do NOT edit this file. Edit the LaTeX source!!

# - <Section 12> - 
#' Create various statistical distributions
#' 
#' Build a normal or log-normal distribution from simple components.  
#' Large numbers e.g. n=1e6 will take some time to run. 
#' 
#' @param n is the number of samples
#' @param method is either 'multiply' or 'add' 
#' @param pdf defaults to FALSE 
#' @param log take logarithm of values (for 'multiply') 
#' @param runs number of iterations (default 7)
#' @param xscale a scaling factor, can use values < 1.0 to magnify (x) e.g. 0.4
#' @param bins defaults to 64 
#' @keywords corona log-normal lognormal normal central limit theorem Benford law
#' @export 
#' @import ggplot2 
#' @importFrom stats dnorm
#' @importFrom stats sd
#' @importFrom graphics hist
#' @importFrom stats runif
#' @importFrom graphics frame 
#' @importFrom gridExtra tableGrob 
#' @examples
#' corona_converge( n=10000, method='multiply', xscale=0.4, bins=128, runs=5 )

# - <Section 13> - 
corona_converge <- function (n=100000, method='add', runs=7, pdf=FALSE, xscale=1.0, bins=64, log=FALSE) 
{ DAT <- runif(n, 1, 10);
  PdfFilename <- paste('Converge_', method, '.pdf', sep=''); 
  normfx <- function(x, mean, sd, n) { return( n*dnorm(x=x,mean=mean,sd=sd) ); }; 

if(method == 'multiply')
  { fx <- '*'; 
  } else if(method == 'add')
  { fx <- '+'; 
  } else
  { stop( paste('Unknown method:', method) ); 
  }; 

# - <Section 14> - 
for(i in 1:runs) 
  { DAT <- mapply(fx, DAT, runif(n,1,10)); 
  }; 
  FRQ <- corona_benford(DAT); 
  FRQ$Freq <- round(FRQ$Freq, digits=3); # no silliness

if(log)
  { DAT <- log(DAT); 
  }; 
  DF <- as.data.frame(DAT); 
  Lo <- floor(min(DF$DAT));
  Hi <- ceiling(max(DF$DAT)); 
  Mn <- mean(DF$DAT);
  Sd <- sd(DF$DAT); 
  brk <- seq( from=Lo, by=(Hi-Lo)/bins, to=Hi); 
  HiLim <- xscale*Hi; 

  mytheme <- gridExtra::ttheme_default(
    core = list(fg_params=list(cex = 1.5)),
    colhead = list(fg_params=list(cex = 1.5)),
    rowhead = list(fg_params=list(cex = 1.5)));    # enlarge inset 
  INTABLE <- gridExtra::tableGrob(FRQ[,1:2], theme=mytheme); 

  bar <- hist(DF$DAT, breaks=brk); 
  TopGraph <- max(bar$counts); 
  Titl <- paste('Distribution (', method, ')', sep=''); 
  Labx <- 'Value'; 
if( abs(xscale-1) > 0.01 )
  { Labx <- paste('Value, scale =', xscale); 
  }; 
  Adj <- dnorm(Mn,Mn,Sd);

# - <Section 15> - 
  corona_pdf(PdfFilename, 10, 6, pdf); 
  myplot <- ggplot( DF, aes(x=DAT) ) + geom_histogram(breaks=brk) + 
    labs( title=Titl, x='Value', y='Count' ) + xlim(Lo,HiLim) + 
   annotation_custom( INTABLE[,2:3], 
                      ymin=0.3*TopGraph, ymax=0.95*TopGraph,  # 0.9
                      xmax=HiLim, xmin=HiLim*0.80 ); 
if( (method == 'add') | log )
  { myplot <- myplot + 
      stat_function(fun=normfx, args = list(mean=Mn, sd=Sd, n=TopGraph/Adj), colour='red'); # hack
  }; 
  corona_print(myplot); 
  corona_pdf_off(PdfFilename, pdf); 
}
# -END OF FILE- 

Try the corona package in your browser

Any scripts or data that you put into this service are public.

corona documentation built on Oct. 23, 2020, 7:15 p.m.