R/2-estimation-functions.R

Defines functions bmopPar define_bmop bmop_fit bmop_fit.default bmop_fit.data.frame bmop_fit.bins search_bmop

Documented in bmop_fit bmop_fit.bins bmop_fit.data.frame bmop_fit.default bmopPar search_bmop

.parnames<-c("mle","N","order","alpha","knotsMethod","k","toll","repMax","MIN"
             ,"autoReduce","warnings")
.bmopenv<-new.env()
lockBinding(".bmopenv",environment())
lockBinding(".parnames",environment())
assign(x = ".bmopPars",value = list(mle = FALSE, 
                                    N=NA,
                                    order=3,
                                    alpha=3,
                                    knotsMethod="uniform",
                                    k=2,
                                    toll=10^(-10),
                                    repMax=100,
                                    MIN=10^(-10),
                                    autoReduce=200,
                                    warnings=TRUE),
       envir = .bmopenv)
lockEnvironment(env = .bmopenv,bindings = ".bmopPars")

 
#' Set Rbmop parameters
#' 
#' Set appropriate global parameters to be used by \code{Rbmop}
#' functions . This is the appropriate way to change those parameters.
#' The use is similar to \code{par()} in package \code{graphics}.
#' 
#'  @param ...  see details.
#'  
#'  @details This function set parameters used for estimation.The call \code{
#'           bmopPar()} just print the values of the paramters.
#'  
#'  
#'           \code{mle=FALSE}: logical. If use maximum-likelihood estimation of 
#'           bmop coefficient, setting \code{mle=TRUE} just force 
#'           \code{repMax=1}.
#'            
#'           \code{N=NA}: If present, the number of knots in every dimensions. 
#'           Vector of positive integer, if needed values will be recycled.
#'           
#'           \code{order=3}: The order of the B-spline in every dimensions, 
#'           vector of positive integer, if needed values will be recycled.
#'           
#'           \code{alpha=3}: The penalization 
#'                         exponent to compute the number of knots. This is the
#'                         default method to compute the number of knots, 
#'                         with the formula: \eqn{ floor(n^(1/alpha))}, where 
#'                         \eqn{n} is the number of observations in the 
#'                         dataset.
#'                         If \code{!is.na(N)} then the number of knots will be 
#'                         set to \eqn{N^d} where \eqn{d} is the number of
#'                          dimensions
#'                         in the dataset (num. of variables).
#'                         
#'          \code{knotsMethod="uniform"}:
#'           \code{"uniform"} or \code{"quantiles"} knots, 
#'           how knots are computed by \code{\link{generate_knots}}. 
#'           
#'           \code{k=2}: Coefficient of \code{AIC} (penalized likelihood), 
#'           positive integer or \code{"BIC"} string. This is used by 
#'           \code{\link{search_bmop}}.
#'           
#'            \code{toll=10^{-10}}: Tollerance for the increment of the likelihood in
#'                         the mle estimation of the coefficient.
#'                         
#'           \code{repMax=100}: maximum number of iteration in the mle 
#'                              estimation of the coefficient  
#'            
#'          \code{MIN=10^{-10}}: This is not a learning parameter but instead 
#'                               define the \code{MIN} parameter in the 
#'                               evaluation of bmop object. Observe that some 
#'                               functions like \code{\link{logLik}} or 
#'                               \code{plot},
#'                               set this parameter independently.
#'                               
#'          \code{autoReduce=200}: This value set the maximum dimension of an 
#'                                 accepted dataset as raw-data, for larger
#'                                 dataset, functions \code{\link{bmop_fit}}
#'                                  and 
#'                                 \code{\link{search_bmop}}
#'                                  will be applied over the 
#'                                 reduced bins (histogram). Setting it to 
#'                                 \code{Inf} disable this features.
#'                                 
#'          \code{warnings=TRUE}: boolean, setting this value to FALSE, 
#'          avoid the warnings generated by autoreduce. 
#'           @export
bmopPar<-function(...){
  
  l <- list(...)
  
  old <- get(".bmopPars",envir = .bmopenv)
  if (length(l) == 0){
    return(old)
  }
  unlockBinding(sym = ".bmopPars",env = .bmopenv)
  for (a in names(l)){
    if (a %in% .parnames){
    old[[a]] <- l[[a]]
    }
    else{
      warning(paste(a,"is not a Rbmop parameter"))
    }
  }
  if (is.list(l[[1]])){
    for (a in names(l[[1]])){
      old[[a]] <- l[[1]][[a]]  
    }
      
  }
  assign(".bmopPars",old,envir = .bmopenv)
  lockBinding(sym = ".bmopPars",env = .bmopenv)
}


define_bmop<-function(bmop=NULL,data=NULL,Max=NULL,Min=NULL,
                      N=get(".bmopPars",
                            envir = .bmopenv)$N,
                      order=get(".bmopPars",
                                envir = .bmopenv)$order,
                      alpha=get(".bmopPars",
                                envir = .bmopenv)$alpha,
                      method=get(".bmopPars",
                                 envir = .bmopenv)$knotsMethod,...){
  
  if (is.null(alpha)){alpha<-3}
  
  if (is.null(order[1])){order <- 3}
  if (is.null(method)){method <- "uniform"}
  
  if (is.bmop(bmop)){
    return(bmop)
  }
  if (is.null(data)){ 
    return(
      new_bmop(ctrpoints = 1,
               knots = generate_knots(data = data,N = max(1,N),
                                      Max = Max,Min = Min),
               order = order))}
  if (inherits(data,what = "histogram")|inherits(data,what = "bins")){
    counts <- data$counts
    data <- data$mids
    data <- as.data.frame(data)
    alpha <- 1/log(sum(counts) ^ ( 1/alpha ),base = dim(data)[1])
  }
  else if (inherits(data,"values")){
    data <- data$mids
    data <- as.data.frame(data)
  }
  else{
  data<-as.data.frame(data)
  }
  if (is.na(N[1])){
    N<-max(1,floor(dim(data)[1] ^ (1 / (dim(data)[2]*alpha))))
    if (length(order)!=dim(data)[2]){ 
      order<-rep(order,dim(data)[2])[1:(dim(data)[2])]}
  }
  return(
  new_bmop(ctrpoints = 1,knots = 
            generate_knots(data = data,N =N,Max = Max,Min=Min,method = method )
            ,order = order))
  }


#'Estimation of bmop density or conditional density
#'
#'@param data data.frame, matrix, vector or object of class "histogram" or
#' "bins"
#' @param conditional logic, if \code{TRUE} a conditional density is learned
#' @param Min vector of lower bounds
#' @param Max vector of upper bounds
#' @param bmop  a bmop object
#' @param ... see \code{bmopPar}
#' @return a bmop object, a density if \code{conditional=FALSE} or a 
#' conditional density if \code{conditional=TRUE}. In the latter case the first 
#' variable of the dataset is considered as the conditioned one and the rest of 
#' the variables as the conditioning ones. If the dataset has only one variable 
#' a normal density is generated discarding the value of \code{conditional}. 
#' @export
#' @examples
#' plot(bmop_fit(rnorm(100)))
#' plot(bmop_fit(hist(rnorm(100000))))
#' #############################
#' Data<-data.frame(rnorm(100),rexp(100))
#' bmop<-bmop_fit(Data)
#' plot(bmop)
#' #############################
#' X<-rnorm(100)
#' Y<-rnorm(100,mean=X)
#' Data<-data.frame(X,Y)
#' bmopPar(mle=TRUE)
#' bmopC<-bmop_fit(Data,conditional=TRUE)
#' bmopPar(mle=FALSE)
#' plot(bmopC)
bmop_fit<-function(data,conditional=FALSE,Min=NULL,Max=NULL,bmop=NULL,...){
  UseMethod("bmop_fit",object = data)
}

#'Estimation of bmop density or conditional density
#'
#'@param data data.frame, matrix, vector or object of class "histogram" or
#' "bins"
#' @param conditional logic, if \code{TRUE} a conditional density is learned
#' @param Min vector of lower bounds
#' @param Max vector of upper bounds
#' @param bmop  a bmop object
#' @param ... see \code{bmopPar}
#' @return a bmop object
#' @export
 bmop_fit.default<-function(data,conditional=FALSE,Min=NULL,
                            Max=NULL,bmop=NULL,...){
 return(bmop_fit.data.frame(as.data.frame(data),
                            conditional=conditional,
                     Min=Min,Max=Max,bmop=bmop,...) )
}


  
  
#'Estimation of bmop density or conditional density
#'
#'@param data data.frame, matrix or vector
#'the variables must be in the right order (the columns of data)
#' @param conditional logic, if \code{TRUE} a conditional density is learned
#' @param Min vector of lower bounds
#' @param Max vector of upper bounds
 #' @param bmop  a bmop object
#' @param ... see \code{bmopPar}
#' @return a bmop object
#'@export
bmop_fit.data.frame<-function(data, conditional=F,
                              Min=NULL,
                              Max=NULL,
                              bmop=NULL,
                              ... ){
  data<-as.data.frame(data)
  data<-removeNA(data)
  if (dim(data)[1]>bmopPar()$autoReduce){
    if (bmopPar()$warnings){
    warning("Data dimension exceded bmopPar()$autoreduce parameter. 
            The data is grouped into bins. Modifying bmopPar()autoReduce 
            to a greater value (or Inf) prevent that behaviour.
            If you want to use the autoreduce capability and avoid those 
            warnings set bmopPar(warnings=FALSE).
            See the help of bmopPar for more
            informations.")
    }
    if (conditional){
      breaks<-max(1,floor(nclass.FD(data[,1])^{1/(dim(data)[2])}) )
    }
    else{
      breaks<-max(1, floor(max(sapply(data,nclass.FD))^{1/(dim(data)[2])} ))
    }
    return(bmop_fit.bins(data = as.bins(data = data,breaks = breaks),
                                        conditional=conditional,...))
  }
  m<-define_bmop(bmop = bmop,data = data,Max = Max,Min = Min,...)
  m<-normalize.bmop(m)
  d<-length(m$order) 
  C<-integration_constants(m)
  N<-dim(data)[1]
  D<-apply(data,MARGIN = 1,FUN = delta,bmop=m,MIN=10^(-10)) 
  dim(D)<-c(dim(m$ctrpoints),dim(data)[1])
  E<-slice.index(D,MARGIN = length(dim(D)))
  m$logLik<-logLik.bmop(object=m,data=data)
  
  if (!bmopPar()$mle){ repmax <- 1}
  else{
    repmax<-bmopPar()$repMax
  }
  
  
  for (s in 1:repmax){
    K<-array(dim=dim(m$ctrpoints),0)
    
      for (i in 1:(dim(data)[1])){
        K<-K+D[E==i]/sum(D[E==i]*m$ctrpoints)
      }
    
    
    mnew<-m
    mnew$ctrpoints<-m$ctrpoints*K
    KK<-N
    if (conditional){
      C<-integration_constants(new_bmop(knots = m$knots[1],
                                        order = m$order[1]))
      ix<-slice.index(mnew$ctrpoints,MARGIN=1)
      KK <- mnew$ctrpoints
      for (i in 1:(dim(mnew$ctrpoints)[1])){
        KK [ix==i] <- (C[i]*apply(mnew$ctrpoints,MARGIN = -1,sum)) 
      }
    }
    else{
      KK<- N*C
    }
    mnew$ctrpoints<- mnew$ctrpoints/ KK
    mnew$logLik<-logLik.bmop(object = mnew,data = data)
    if ((mnew$logLik-m$logLik)<bmopPar()$toll){
      break 
    }
    m<-mnew
  }
  
  return(mnew)
}


#'Estimation of bmop density or conditional density
#'
#'@param data \code{histogram} or \code{bins} object
#'the variables must be in the right order
#' @param conditional logic, if \code{TRUE} a conditional density is learned
#' @param Min vector of lower bounds
#' @param Max vector of upper bounds
#' @param bmop  a bmop object
#' @param ... see \code{bmopPar}
#' @return a bmop object
#'@export
bmop_fit.bins<-function(data, conditional=F,
                              Min=NULL,
                              Max=NULL,
                              bmop=NULL,
                              ... ){
  m<-define_bmop(bmop = bmop,data = data,Max = Max,Min = Min,...)
  m<-normalize.bmop(m)
  d<-length(m$order) 
  C<-integration_constants(m)
  counts<-data$counts
  DD<-data
  data<-as.data.frame(data$mids)
  N<-sum(counts)
  D<-apply(data,MARGIN = 1,FUN = delta,bmop=m,MIN=10^(-10)) 
  dim(D)<-c(dim(m$ctrpoints),dim(data)[1])
  E<-slice.index(D,MARGIN = length(dim(D)))
  m$logLik<-logLik.bmop(object=m,data=data)
  
  if (!bmopPar()$mle){ repmax <- 1}
  else{
    repmax<-bmopPar()$repMax
  }
  for (s in 1:repmax){
    K<-array(dim=dim(m$ctrpoints),0)
    for (i in 1:(dim(data)[1])){
     K<-K+D[E==i]*counts[i]/sum(D[E==i]*m$ctrpoints)
     # K<-K+D[E==i]*counts[i]
    }
    mnew<-m
    mnew$ctrpoints<-m$ctrpoints*K
    KK<-dim(data)[1]*C
    if (conditional){
      C<-integration_constants(new_bmop(knots = m$knots[1],
                                        order = m$order[1]))
      ix<-slice.index(mnew$ctrpoints,MARGIN=1)
      KK <- mnew$ctrpoints
      for (i in 1:(dim(mnew$ctrpoints)[1])){
        KK [ix==i] <- (C[i]*apply(mnew$ctrpoints,MARGIN = -1,sum)) 
      }
    }
    else{
      KK<- N*C
    }
    mnew$ctrpoints<- mnew$ctrpoints/ KK
    mnew$logLik<-logLik.bmop(object = mnew,data = DD)
    if ((mnew$logLik-m$logLik)<bmopPar()$toll){
      break 
    }
    m<-mnew
  }
  mnew$info$density<- !conditional
  mnew$info$conditional <- conditional
  return(mnew)
}

#'Estimation of bmop density or conditional density
#'
#'@param data \code{histogram} or \code{bins} object
#'the variables must be in the right order
#' @param conditional logic, if \code{TRUE} a conditional density is learned
#' @param Min vector of lower bounds
#' @param Max vector of upper bounds
#' @param bmop  a bmop object
#' @param ... see \code{bmopPar}
#' @return a bmop object
#'@export
bmop_fit.histogram <- bmop_fit.bins


#' Greedy penalized log-likelihood search
#'
#'Aproximation of a density \eqn{f(x_1,\ldots,x_n)} or conditional density
#'@param data data.frame, matrix or vector, the variables must be in
#' the right order (the columns of data)
#' @param conditional logical 
#'@param k positive number or \code{"BIC"}
#'@param corrected logical
#'@param knotsMethod the method to use in knots generation
#'@param ... additional parameters 
#'@return A bmop object, the aproximations of \eqn{f}.
#'@export
#'@examples
#' data<-rnorm(100)
#' bmopS<-search_bmop(data=data)
#' plot(bmopS)
search_bmop<-function(data,conditional=F,k=Rbmop::bmopPar()$k,corrected=FALSE,
                      knotsMethod=Rbmop::bmopPar()$knotsMethod,...){
  
  oldpar<-bmopPar()
  if (inherits(data,"histogram")|inherits(data,"bins")){
    N<-sum(data$counts)
    d<-dim(as.data.frame(data$mids))[2]
    Ddata<-as.data.frame(data$mids)
  }
  else {
    data<-as.data.frame(data)
    Ddata<-data
    N<-dim(data)[1]
    d<-dim(data)[2]
  }
  
  N<-rep(2,d)
  order=rep(3,d)
  bmopPar(N=N,order=order,mle=T)
  mop<-bmop_fit(data = data,conditional = conditional,...)
  score<-AIC.bmop(object = mop,k = k,data = data,corrected = corrected)
  finish<-F
  while (!finish){
    Mlist<-list()
    finish<-T
    for (i in 1:d){
      norder<-order
      norder[i]<-norder[i]+1
      bmopPar(order=norder,N=N)
      Mlist[[i]]<-bmop_fit(data = data,
                           conditional = conditional,...)
      Nnew<-N
      Nnew[i]<-Nnew[i]+1
      bmopPar(order=order,N=Nnew)
      Mlist[[d+i]]<-bmop_fit(data=data,
                             conditional=conditional,...)
    }
    Mlist<-Mlist[!sapply(Mlist,is.null)]
    nScores<-sort(x = sapply(X =Mlist ,FUN = AIC.bmop,data=data,k=k,
                             corrected=corrected),index.return=T)
    if (nScores$x[1]<score){
      mop<-Mlist[[nScores$ix[1]]]
      score<-nScores$x[1]
      N<-sapply(mop$knots,function(x){length(unique(x))})
      order<-mop$order
      finish<-F
    }
  }
  mop$extra$AIC$value<-score
  mop$extra$AIC$k<-k
  bmopPar(oldpar)
  return(mop)
}
gherardovarando/Rbmop documentation built on May 17, 2019, 4:17 a.m.