R/fitFunc.R

fitFunc <-
function(ID, hb, bin_min, bin_max, obs_mean, ID_name, distribution="LOGNO",distName='LNO',links=c(muLink="identity", sigmaLink="log", nuLink=NULL, tauLink=NULL), qFunc=qLOGNO, quantiles=seq(0.006,0.996,length.out=1000), linksq=c(identity,exp,NULL,NULL), con=gamlss.control(c.crit = 0.1, n.cyc = 200, trace = FALSE), saveQuants=FALSE, muStart=NULL, sigmaStart=NULL, nuStart=NULL, tauStart=NULL, muFix=FALSE, sigmaFix=FALSE, nuFix=FALSE, tauFix=FALSE, freeParams=c(TRUE,TRUE,FALSE,FALSE), smartStart=FALSE, tstamp = as.numeric(Sys.time())){
  start<-Sys.time()
  
  dat<-data.frame(ID, hb, bin_min, bin_max, obs_mean)
  colnames(dat) <- c(ID_name, 'hb', 'bin_min', 'bin_max', 'obs_mean')
    
  dist<-unique(dat[,ID_name])
  
  #empty matrix to hold parameters
  parameters<-matrix(NA, ncol=4, nrow=length(dist))
  colnames(parameters)<-c('mu','sigma','nu','tau')
  rownames(parameters)<-dist
  
  if(saveQuants==TRUE){
    quants<-matrix(NA, ncol=length(quantiles), nrow=length(dist))
    colnames(quants)<-quantiles
    rownames(parameters)<-dist
  }#end if saveQuants
  
  medians<-c()
  sds<-c()
  est<-c()
  obs<-c()
  aics<-c()
  bics<-c()
  didCon<-c()
  nparams<-c()
  logLikes<-c()
  vars<-c()
  cvs<-c()
  cv2<-c()
  gini<-c()
  theil<-c()
  mld<-c()
  sdl<-c()
  
  for(i in 1:length(dist)){
    if(i %% 500 == 0){print(i)}
    NAgate<-'CLOSED'
    use.i<-which(dat[,ID_name]==dist[i])
    int.i<-dat[use.i,c('bin_min','bin_max')]
    hb.i<-dat[use.i,'hb']
    intW.i<-hb.i
    rmhb<-which(intW.i==0)
    if(length(rmhb)>0){
      int.i<-int.i[-rmhb,]
      hb.i<-hb.i[-rmhb]
      intW.i<-intW.i[-rmhb]
    }#end if length(rmhb)
    if(nrow(int.i)<1){
      NAgate<-'OPEN' 
    }else{
      intCensW.i<-makeIntWeight(int.i,hb.i)
      intCens.i<-intCensW.i$int
      
      cens.raw.i <- cens(distribution, type='interval', local = FALSE)
      
      if(sum(grep(pattern = "mu.link", x = deparse(body(fun = cens.raw.i)[2]))) == 1){
        body(fun = cens.raw.i)[[2]] <- parse(text = gsub(pattern = "substitute(mu.link)", replacement = paste0("'",links[1],"'"), x = deparse(body(fun = cens.raw.i)[[2]]), fixed = TRUE))[[1]]
      }
      if(sum(grep(pattern = "sigma.link", x = body(fun = cens.raw.i)[3])) == 1){
        body(fun = cens.raw.i)[[3]] <-  parse(text = gsub(pattern = "substitute(sigma.link)", replacement = paste0("'",links[2],"'"), x = deparse(body(fun = cens.raw.i)[[3]]), fixed = TRUE))[[1]]
      }
      if(length(body(fun = cens.raw.i)) > 3){
        if(sum(grep(pattern = "nu.link", x = body(fun = cens.raw.i)[[4]])) == 1){
          body(fun = cens.raw.i)[[4]] <-  parse(text = gsub(pattern = "substitute(nu.link)", replacement = paste0("'",links[4],"'"), x = deparse(body(fun = cens.raw.i)[[4]]), fixed = TRUE))[[1]]
        }
      }
      if(length(body(fun = cens.raw.i)) > 4){
        if(sum(grep(pattern = "tau.link", x = body(fun = cens.raw.i)[[5]])) == 1){
          body(fun = cens.raw.i)[[5]] <-  parse(text = gsub(pattern = "substitute(mu.link)", replacement = paste0("'",links[5],"'"), x = deparse(body(fun = cens.raw.i)[[5]]), fixed = TRUE))[[1]]
        }
      }
      
      if(smartStart==TRUE& distName == "GB2"){
        mu.start.i<-as.numeric(int.i[which(hb.i==max(hb.i,na.rm=TRUE))[1],])
        mu.start.i<-mean(c(mu.start.i[1],mu.start.i[2]))
        mu.start.i<-log(mu.start.i*3.76)
        if(length(sigmaStart)==0){
          sigma.start.i<-log(2.5)
        }else{
          sigma.start.i<-sigmaStart
        }
        fit.i<-try(gamlss(intCens.i~1, weights=intW.i, family = cens.raw.i(), censmu.start=mu.start.i, sigma.start=sigma.start.i, nu.start=nuStart,tau.start=tauStart,mu.fix=muFix, sigma.fix=sigmaFix,nu.fix=nuFix,tau.fix=tauFix,control=con),silent=TRUE)
      }else{
        fit.i<-try(gamlss(intCens.i~1,weights=intW.i,family=cens.raw.i(), mu.start=muStart,sigma.start=sigmaStart, nu.start=nuStart,tau.start=tauStart,mu.fix=muFix, sigma.fix=sigmaFix,nu.fix=nuFix,tau.fix=tauFix,control=con),silent=TRUE)
      }#end if/else smartStart
      
      testStateVar <- attr(fit.i, "class")
      testState <- FALSE
      if(length(testStateVar) > 0){
      	if(testStateVar[1] == "try-error"){
      		testState <- TRUE
      	}
      }
      if(testState==TRUE){
        NAgate<-'OPEN' 
      }else{
        #quantiles
        quanParam.i<-getQuantilesParams(fit.i, qFunc, quantiles, linksq, freeParams,c(muStart,sigmaStart, nuStart,tauStart))
        samps.i<-quanParam.i$samps
        params.i<-quanParam.i$params
        #parameters
        parameters[i,1:length(params.i)]<-params.i
        
        #quantiles
        if(saveQuants==TRUE){
          quants[i,]<-samps.i
        }
        
        #summary stats
        medians<-c(medians,median(samps.i))
        sds<-c(sds,sd(samps.i))
        est.i<-mean(samps.i)
        var.i<-var(samps.i)
        gin.i<-giniCoef(quantiles,samps.i)
        the.i<-theilInd(samps.i)
        ll.i<-logLik(fit.i)
        est<-c(est,est.i)
        obs.i.obs<-dat[use.i[1],'hin_mean']
        mld.i<-MLD(samps.i)
        sdl.i<-SDL(samps.i)
      	if(length(obs.i.obs)==0){
      		obs.i.obs<-NA
      	}
      	obs<-c(obs, obs.i.obs)
        aics<-c(aics,fit.i$aic)
        bics<-c(bics,fit.i$sbc)
        didCon<-c(didCon,fit.i$converged)
        nparams<-c(nparams,attr(ll.i,'df'))
        logLikes<-c(logLikes,as.numeric(ll.i))
        vars<-c(vars,var.i)
        cvs<-c(cvs, sd(samps.i)/est.i)
        cv2<-c(cv2,(sd(samps.i)/est.i)^2)
        gini<-c(gini,gin.i)
        theil<-c(theil,the.i)
        mld<-c(mld,mld.i)
        sdl<-c(sdl,sdl.i)
      }#end if/else testState!=TRUE
    }#end if/else nrow(int.i)<1
    if(NAgate=='OPEN'){
      medians<-c(medians,NA)
      sds<-c(sds,NA)
      ll.i<-NA
      est<-c(est,NA)
      obs.i.obs<-dat[use.i[1],'hin_mean']
      if(length(obs.i.obs)==0){
      	obs.i.obs<-NA
      }
      obs<-c(obs, obs.i.obs)
      aics<-c(aics,NA)
      bics<-c(bics,NA)
      didCon<-c(didCon,FALSE)
      nparams<-c(nparams,NA)
      logLikes<-c(logLikes,NA)
      vars<-c(vars,NA)
      cvs<-c(cvs,NA)
      cv2<-c(cv2,NA)
      gini<-c(gini,NA)
      theil<-c(theil, NA)
      mld<-c(mld,NA)
      sdl<-c(sdl,NA)
    }#end if NAgate='OPEN'
  }#end loop over dists. for i
  elasp<-Sys.time()-start
  print(elasp)
  cat('for', distName, 'fit across', length(dist), 'distributions','\n', '\n')
  distri<-rep(distName,length(est))
  datOut<-data.frame(dist,obs,distri,est,vars,cvs,cv2,gini,theil,mld,sdl,aics,bics,didCon,logLikes,nparams,medians,sds)
  colnames(datOut)<-c(ID_name,'obsMean','distribution','estMean','var','cv','cv_sqr','gini','theil','MLD','SDL','aic','bic','didConverge','logLikelihood','nparams','median','sd')
 
  
  #saving quantiles
  if(saveQuants==TRUE){
    datOut<-list('datOut' = datOut, 'timeStamp' = tstamp, 'parameters' = 'parameters', 'quantiles' = quants)
  }else{
  	datOut<-list('datOut' = datOut, 'timeStamp' = tstamp, 'parameters' = parameters, 'quantiles' = NULL)
  }
  
  return(datOut)
}

Try the binequality package in your browser

Any scripts or data that you put into this service are public.

binequality documentation built on May 2, 2019, 9:58 a.m.