R/FDist.R
In FitUltD: Fit Univariate Mixed and Usual Distributions

Documented in FDist

#' Fit of univariate distributions with censored data ignored by default or can be inputed.
#'
#'
#' @param X A random sample to be fitted.
#' @param gen A positive integer, indicates the sample length to be generated by the fit, 1 by default.
#' @param Cont TRUE, by default the distribution is considered as continuos.
#' @param inputNA A number to replace censored values, if is missing, only non-censored values will be evaluated.
#' @param plot FALSE. If TRUE, a plot showing the data distribution will be given.
#' @param p.val_min 0.05, minimum p.value for Anderson Darling and KS Test to non-reject the null hypothesis and continue with the process.
#' @param crit A positive integer to define which test will use. If 1, show the distributions which were non-rejected by the Anderson Darling or Kolmogorov Smirnov tests, in other cases the criterion is that they mustn't be rejected by both tests.
#' @param DPQR TRUE, creates the distribution function, density and quantile function with the names dfit, pfit and qfit.
#'
#' @return Calculate the distribution name with parameters, a function to reproduce random values from that distribution, a numeric vector of random numbers from that function, Anderson Darling and KS p.values, a plot showing the distribution difference between the real sample and the generated values and a list with the random deviates genetator, the distribution function, density and quantile function
#' @export
#'
#' @importFrom purrr map
#' @importFrom purrr map_lgl
#' @importFrom assertthat is.error
#' @importFrom ADGofTest ad.test
#' @importFrom MASS fitdistr
#' @importFrom fitdistrplus fitdist
#' @importFrom methods formalArgs
#' @importFrom stats kmeans
#' @importFrom stats na.omit
#' @importFrom stats sd
#' @importFrom stats rnorm
#'
#' @examples
#' set.seed(31109)
#' FIT1<-FDist(rnorm(1000,10),p.val_min=.03,crit=1,plot=TRUE)
#'
#' #Random Variable
#' FIT1[[1]]
#'
#' #Random numbers generator
#' FIT1[[2]]()
#'
#' #Random sample
#' FIT1[[3]]
#'
#' #Goodness of fit tests results
#' FIT1[[4]]
#'
#' #Plot
#' FIT1[[5]]
#'
#' #Functions r, p, d, q
#' FIT1[[6]]
#'
#'
#'
FDist<-function(X,gen=1,Cont=TRUE,inputNA,plot=FALSE,p.val_min=.05,crit=2,DPQR=TRUE){
  if(missing(inputNA)){X<-na.omit(X)}
  else{X<-ifelse(is.na(X),inputNA,X)}
  if(length(X)==0){
    return(NULL)
  }
  X<-X[X!=(-Inf) & X!=Inf]
  if (length(unique(X))<2) {
    fun_g<-function(n=gen){return(rep(X[1],n))}
    return(list(paste0("norm(",X[1],",0)"),fun_g,rep(X[1],gen),data.frame(Dist="norm",AD_p.v=1,KS_p.v=1,estimate1=X[1],estimate2=0,estimateLL1=0,estimateLL2=1,PV_S=2),NULL))
  }
  if (length(unique(X))==2) {
    X<-sort(X)
    p<-length(X[X==unique(X)[2]])/length(X)
    gene<-stats::rbinom
    formals(gene)[1]<-length(X)
    formals(gene)[2]<-1
    formals(gene)[3]<-p
    distribu<-paste0("binom(",p,")")
    MA=gene(n = gen)
    if(plot){
      DF<-rbind(data.frame(A="Fit",DT=MA),
                data.frame(A="Real",DT=X))
      pl <- ggplot2::ggplot(DF,ggplot2::aes(x=DF$DT,fill=DF$A)) + ggplot2::geom_density(alpha=0.4) +ggplot2::ggtitle(distribu)
    }else{
      pl<-NULL
    }
    return(list(distribu,gene,MA[1:gen],data.frame(Dist="binom",AD_p.v=1,KS_p.v=1,estimate1=1,estimate2=p,estimateLL1=0,estimateLL2=1,PV_S=2),pl))
  }
  DIS<-list(Nombres=c("exp","pois","beta","gamma","lnorm","norm","weibull","nbinom","hyper","cauchy","binom"),
            p=c(stats::pexp,stats::ppois,stats::pbeta,stats::pgamma,stats::plnorm,stats::pnorm,stats::pweibull,stats::pnbinom,stats::phyper,stats::pcauchy,stats::pbinom),
            d=c(stats::dexp,stats::dpois,stats::dbeta,stats::dgamma,stats::dlnorm,stats::dnorm,stats::dweibull,stats::dnbinom,stats::dhyper,stats::dcauchy,stats::dbinom),
            q=c(stats::qexp,stats::qpois,stats::qbeta,stats::qgamma,stats::qlnorm,stats::qnorm,stats::qweibull,stats::qnbinom,stats::qhyper,stats::qcauchy,stats::qbinom),
            r=c(stats::rexp,stats::rpois,stats::rbeta,stats::rgamma,stats::rlnorm,stats::rnorm,stats::rweibull,stats::rnbinom,stats::rhyper,stats::rcauchy,stats::rbinom),
            d_c=c(1,0,1,1,1,1,1,0,0,1,0),
            indicadora=c("0","0","01","0","0","R","0","0","0","R","0")
  )
  DIS<-purrr::map(DIS,~subset(.x, DIS$d_c==as.numeric(Cont)))
  DIS_0<-purrr::map(DIS,~subset(.x, DIS$indicadora=="0"))
  DIS_R<-purrr::map(DIS,~subset(.x, DIS$indicadora=="R"))
  DIS_01<-purrr::map(DIS,~subset(.x, DIS$indicadora=="01"))
  if(sum(purrr::map_dbl(DIS_0,~length(.x)))==0){DIS_0<-NULL}
  if(sum(purrr::map_dbl(DIS_R,~length(.x)))==0){DIS_R<-NULL}
  if(sum(purrr::map_dbl(DIS_01,~length(.x)))==0){DIS_01<-NULL}
  bt<-X
  despl<-0
  escala<-1
  eps<-1E-15
  if (sum(X<0)>0){
    if (sum(X<0)/length(X)<0.03){
      bt<-ifelse(X<0,eps,X)
      b_0<-bt
    }else{
      b_0<-bt-min(bt)+eps
      despl<- min(bt)
    }
  }else{
    b_0<-bt
  }
  if(max(X)>1){
    escala<-max(bt)
    b_01<-(bt-despl)/(escala-despl)
  }else{
    b_01<-bt
  }
  fit_b<-function(bt,dist="",Cont.=Cont){
    if(is.null(dist)){return(NULL)}
    Disc<-!Cont
    aju<-list()
    if(!dist %in% DIS_01$Nombres){
      suppressWarnings(aju[[1]]<-try(fitdistrplus::fitdist(bt,dist,method = "mle",discrete = Disc),silent = TRUE))
    }
    suppressWarnings(aju[[2]]<-try(fitdistrplus::fitdist(bt,dist,method = "mme",discrete = Disc),silent = TRUE))
    suppressWarnings(aju[[3]]<-try(fitdistrplus::fitdist(bt,dist,method = c("mge"),discrete = Disc),silent = TRUE))
    suppressWarnings(aju[[4]]<-try(MASS::fitdistr(bt,dist),silent = TRUE))
    if(!assertthat::is.error(aju[[4]])){aju[[4]]$distname<-dist}
    if(assertthat::is.error(aju[[1]]) & assertthat::is.error(aju[[2]]) &
       assertthat::is.error(aju[[3]]) & assertthat::is.error(aju[[4]])){
      return(list())
    }
    funcionales<-!purrr::map_lgl(aju,~assertthat::is.error(.x))
    names(aju)<-c("mle","mme","mge","mlg2")
    aju<-aju[funcionales]
    return(aju)
  }
  suppressWarnings(try(aju_0<-purrr::map(DIS_0$Nombres,~fit_b(b_0,.x)),silent = TRUE))
  suppressWarnings(try(aju_R<-purrr::map(DIS_R$Nombres,~fit_b(bt,.x)),silent = TRUE))
  suppressWarnings(try(aju_01<-purrr::map(DIS_01$Nombres,~fit_b(b_01,.x)),silent = TRUE))
  AAA<-list(aju_0,aju_R,aju_01)
  descate<-purrr::map(AAA,~length(.x))!=0
  AAA<-AAA[descate]
  bts<-list(b_0,bt,b_01)[descate]
  num<-0
  Compe<-data.frame()
  for (aju_ls in 1:length(AAA)) {
    aju<-AAA[[aju_ls]]
    aju<-aju[purrr::map_lgl(aju,~length(.x)>0)]
    bs<-bts[[aju_ls]]
    for (comp in 1:length(aju)) {
      if(length(aju)==0 ||length(aju[[comp]])==0){next()}
      for (ress in 1:length(aju[[comp]])) {
        num<-num+1
        if(length(aju[[comp]])!=0){evaluar<-aju[[comp]][[ress]]
        }else{evaluar<-NULL}
        if (is.null(evaluar) | length(evaluar)==0 |
            c(NA) %in% evaluar$estimate | c(NaN) %in% evaluar$estimate) {next()}
        distname<-evaluar$distname
        method<-names(aju[[comp]])[[ress]]
        dist_pfun<-try(get(paste0("p",distname)),silent = TRUE)
        dist_rfun<-try(get(paste0("r",distname)),silent = TRUE)
        if(assertthat::is.error(dist_rfun)){next()}
        argumentos<-formalArgs(dist_pfun)
        argumentos<-argumentos[argumentos %in% names(evaluar$estimate)]
        num_param<-length(argumentos)
        evaluar$estimate<-evaluar$estimate[names(evaluar$estimate) %in% argumentos]
        if(num_param==1){
          EAD<-try(AD<-ADGofTest::ad.test(bs,dist_pfun,evaluar$estimate[1]),silent = TRUE)
          KS<-try(stats::ks.test(bs,dist_pfun,evaluar$estimate[1]),silent = TRUE)
          if(assertthat::is.error(KS)){KS<-data.frame(p.value=NA)}
          if(assertthat::is.error(EAD)){next()}
          if(is.na(KS$p.value)){next()}
          Chs<-data.frame(p.value=0)
        }
        if(num_param==2){
          suppressWarnings(
            Err_pl<-try(AD<-ADGofTest::ad.test(bs,dist_pfun,evaluar$estimate[1],evaluar$estimate[2]),silent = TRUE))

          if (assertthat::is.error(Err_pl)) {
            Err_pl<-try(AD<-ADGofTest::ad.test(bs,dist_pfun,evaluar$estimate[1],,evaluar$estimate[2]),silent = TRUE)
          }
          KS<-try(stats::ks.test(bs,dist_pfun,evaluar$estimate[1],evaluar$estimate[2]),silent = TRUE)
          if(assertthat::is.error(KS)){KS<-data.frame(p.value=NA)}
          if(assertthat::is.error(Err_pl)){next()}
          if(is.na(KS$p.value)){next()}
          suppressWarnings(
            EE_Chs<-try(dst_chsq<-dist_rfun(length(bs),evaluar$estimate[1],evaluar$estimate[2]))
          )
          if(assertthat::is.error(EE_Chs) | prod(is.na(EE_Chs))==1){
            dst_chsq<-dist_rfun(length(bs),evaluar$estimate[1],,evaluar$estimate[2])
          }
          Chs<-data.frame(p.value=0)
        }
        pvvv<-p.val_min
        if(all(is.na(KS$p.value))){
          crit<-AD$p.value>pvvv
        }else{
          if(crit==1){
            crit<-AD$p.value>pvvv | KS$p.value>pvvv
          }else{
            crit<-AD$p.value>(pvvv) & KS$p.value>(pvvv)
          }
        }
        if(crit){
          if(aju_ls %in% 3){
            estimate3=despl
            estimate4=escala
          }else if(aju_ls==1){
            estimate3=despl
            estimate4=1
          }else{
            estimate3=0
            estimate4=1
          }
          Compe<-rbind(Compe,data.frame(Dist=distname,AD_p.v=AD$p.value,KS_p.v=KS$p.value,
                                        Chs_p.v=Chs$p.value,
                                        estimate1=evaluar$estimate[1],estimate2=evaluar$estimate[2],
                                        estimateLL1=estimate3,estimateLL2=estimate4,method=method
          ))
          }else{
          next()
        }

      }
    }
  }
  if (nrow(Compe)==0) {
    warning("No fit")
    return(NULL)
  }
  Compe$PV_S<-rowSums(Compe[,2:4])
  WNR<-Compe[Compe$PV_S %in% max(Compe$PV_S),][1,]
  distW<-WNR$Dist
  paramsW<-WNR[1,names(Compe)[startsWith(names(Compe),"estim")]]
  paramsW<-paramsW[,!is.na(paramsW)]
  if(gen<=0){gen<-1}
  generadora_r<-function(n=gen,dist=distW,params=paramsW){
    fn<-get(paste0("r",dist))
    formals(fn)[1]<-n
    for (pr in 1:(length(params)-2)) {
      formals(fn)[pr+1]<-as.numeric(params[pr])
    }
    fn()*params[,length(params)]+params[,length(params)-1]
  }
  if(DPQR){
    generadoras<-function(x,tipo,dist=distW,params=paramsW){
      fn<-get(paste0(tipo,dist))
      formals(fn)[1]<-x
      for (pr in 1:(length(params)-2)) {
        formals(fn)[pr+1]<-as.numeric(params[pr])
      }
      class(fn)<-"gl_fun"
      fn
    }
    rfit<-generadora_r
    class(rfit)<-"gl_fun"
    pfit<-generadoras(1,"p")
    qfit<-generadoras(1,"q")
    dfit<-generadoras(1,"d")
  }
  MA<-generadora_r()
  paramsAUX<-c()
  paramsW2<-data.frame()
  for(cl in 1:nrow(paramsW)){
    paramsW2<-rbind(paramsW2,round(paramsW[1,],3))
  }
  if(paramsW2[,length(paramsW2)]!=1 | paramsW2[,length(paramsW2)-1]!=0){
    distribu<-paste0(WNR$Dist,"(",paste0(paramsW2[,1:(length(paramsW2)-2)],collapse = ", "),")*",paramsW2[,length(paramsW2)],"+",paramsW2[,length(paramsW2)-1])
  }else{
    distribu<-paste0(WNR$Dist,"(",paste0(paramsW2[,1:(length(paramsW2)-2)],collapse = ", "),")")
  }
  p<-c()
  if(plot){
    DF<-rbind(data.frame(A="Fit",DT=MA),
              data.frame(A="Real",DT=X))
    p <- ggplot2::ggplot(DF,ggplot2::aes(x=DF$DT,fill=DF$A)) + ggplot2::geom_density(alpha=0.4) +ggplot2::ggtitle(distribu)
  }
  return(list(distribu,generadora_r,MA,WNR[,-4],p,list(rfit,pfit,dfit,qfit),Compe))
}