R/poisGoF.R

#' calculates probability of data not being Poisson distributed usingR's inbuilt
#' random poisson generator
#'
#' \code{poisGoF(Classes,freq,perm=1000,Bestfitmean='TRUE')}
#'
#' @param Bestfitmean='TRUE' will use the mean value which produces the closest fit
#' to a Poisson distribution for the data, rather than simply the mean of the data.
#'
#' @details the chi-squared value from the original GoF is compared with those of
#' randomly generated poisson data of the same length. Can use either the mean value
#' of the original data, or the mean value that gives the best fit to a poisson
#' distribution.
#'
#' @examples
#' # London homicides per day 2004-2007
#' Homicides = 0:5
#' Freq = c(713, 299, 66, 16, 1, 0)
#' poisGoF(Homicides, Freq, Bestfitmean=FALSE)


poisGoF=function(Classes,freq,perm=1000,Bestfitmean=TRUE){
  if(length(Classes)<3)(cat("Warning: Unreliable results
                            with <3 classes","\n"))
  par(mar=c(5,5,2,2))
  MnR=mean(rep(Classes,freq))
  Mnseq=seq(MnR-(0.1*MnR),MnR+(0.1*MnR),length=400)
  options(warn=-1)
  Cstat=numeric(400)
  for(a in 1:400){
    props=Mnseq[a]^Classes/(factorial(Classes)*exp(Mnseq[a]))
    L=length(props)
    props[L]=1-sum(props[-L])
    Cstat[a]=chisq.test(freq,p=props)$stat
  }
  Mn=Mnseq[Cstat==min(Cstat)]
  Mnx=Mn
  plot(Mnseq,Cstat,typ='l',ann=F)
  title(xlab=expression(lambda),ylab=expression(chi^2),cex.lab=1.4)
  title(main=expression(paste("line shows amount of error (",chi^2,
                              ") for a range of means (",lambda,")"),cex.main=1))
  BFM=Mnseq[Cstat==min(Cstat)]
  if(Bestfitmean==FALSE){
    Mn=mean(rep(Classes,freq))
  }
  props=Mn^Classes/(factorial(Classes)*exp(Mn))	# calculate expected proportions
  L=length(props)					# number of classes
  props[L]=1-sum(props[-L])			# lump all subsequent classes with last
  EXP=props*sum(freq)				# convert to expected values
  S=sum(freq)					# calculate number of raw data items
  cat("\n","expected frequencies",EXP,"\n")	# print expected values
  CHI=numeric(eval(perm))
  Chiref=chisq.test(freq,p=props)$stat		# GoF chi-square value for real freq
  for(a in 1:perm){
    Rand=rpois(S,Mn)				# create a random poisson
    Rclass=min(Rand):max(Rand)			# the random classes
    RandX=ordered(Rand,levels=Rclass)		# convert to ordinal so table includes 0s
    Robs=table(RandX)[]				# the 'observed' random values
    Rmean=mean(rep(Rclass,Robs))			# mean of the random data
    Rprop=Rmean^Rclass/(factorial(Rclass)*exp(Rmean))# expected values based upon random sample
    LRC=length(Rprop)
    Rprop[LRC]=1-sum(Rprop[-LRC])			# lump all subsequent classes with last
    CHI[a]=chisq.test(Robs,p=Rprop)$stat		# calculate chi-squared for random sample
  }
  pval=length(CHI[CHI>Chiref])/perm
  options(warn=0)
  options(digits=3)
  cat("\n","G-o-F Randomization test","\n",
      "\n"," lambda =",MnR,"\n")
  cat(if(Bestfitmean=='TRUE')c("best fit lambda =",round(Mnx,4),"\n"))
  cat("\n"," original chi-squared =",Chiref,"\n","randomisation p-value =",pval,"\n",
      "\n","number of permutations =",perm,"\n","\n")	# Output original chi and P
}
helophilus/ColsTools documentation built on May 30, 2019, 4:03 p.m.