R/features.R

Defines functions .getFeatureFrame .getFeatureGenerator .getFeatureType .getFeatHingeThreshold .getInteraction .getFeatFactor .dummy .getFeatFunc .getFeatLQCP .getFeature.product .getPCAfunction .getScaleFunction .getThreshold .getHinge .thresh .invhinge .hinge .getFeature.poly.Raster .getFeature.poly.SpatRaster .getFeature.poly .getFeature.cubic .getFeature.quad .getFeature.linear

# Author: Babak Naimi, naimi.b@gmail.com
# Date (last update):  March 2024
# Version 1.2
# Licence GPL v3
#-------

.getFeature.linear <- function(x) {
  x
}


.getFeature.quad <- function(x) {
  x * x
}

.getFeature.cubic <- function(x) {
  x * x * x
}

.getFeature.poly <- function(x,degree=3,raw=TRUE) {
  d <- as.data.frame(poly(x,degree=degree,raw=raw))
  colnames(d) <- paste0('poly',1:degree)
  d
}
#-----
.getFeature.poly.SpatRaster <- function(x,degree=3,raw=TRUE) {
  .xx <- values(x)
  .xf <- rast(x[[1]])
  n <- names(x)
  for (i in 1:ncol(.xx)) {
    .tmp <- .getFeature.poly(.xx[,i,drop=TRUE],degree = degree, raw = raw)
    colnames(.tmp) <- paste0(n[i],'_',colnames(.tmp))
    .p <- rast(x,nlyrs=ncol(.tmp),names=colnames(.tmp))
    .p <- setValues(.p,.tmp)
    if (hasValues(.xf)) .xf <- c(.xf,.p)
    else .xf <- .p
  }
  .xf
}
#----

.getFeature.poly.Raster <- function(x,degree=3,raw=TRUE) {
  x <- rast(x)
  .xx <- values(x)
  .xf <- rast(x[[1]])
  n <- names(x)
  for (i in 1:ncol(.xx)) {
    .tmp <- .getFeature.poly(.xx[,i,drop=TRUE],degree = degree, raw = raw)
    colnames(.tmp) <- paste0(n[i],'_',colnames(.tmp))
    .p <- rast(x,nlyrs=ncol(.tmp),names=colnames(.tmp))
    .p <- setValues(.p,.tmp)
    if (hasValues(.xf)) .xf <- c(.xf,.p)
    else .xf <- .p
  }
  as(.xf,'Raster')
}

#----
.hinge <- function(x,th) {
  ifelse(x <= th,0,(x - th) / (max(x,na.rm=TRUE) - th))
}

.invhinge <- function(x,th) {
  ifelse(x >= th,0,1 - ((x - min(x,na.rm=TRUE)) / (th - min(x,na.rm=TRUE))))
}
#---------

.thresh <- function(x,th) {
  ifelse(x <= th,0,1)
}

#-------
.getHinge <- function(x,k) {
  # k is the sequence of knots (breaks)
  h1 <- as.data.frame(lapply(k[-length(k)],function(th,x,...) {
    .hinge(x,th)
  },x=x))
  colnames(h1) <- paste0('hi_',k[-length(k)])
  #----
  h2 <- as.data.frame(lapply(k[-1],function(th,x,...) {
    .invhinge(x,th)
  },x=x))
  colnames(h2) <- paste0('hd_',k[-1])
  cbind(h1,h2)
}
#-------
.getThreshold <- function(x,k) {
  # k is the sequence of knots breaks!
  t1 <- as.data.frame(lapply(k,function(th,x,...) {
    .thresh(x,th)
  },x=x))
  colnames(t1) <- paste0('thr_',k)
  
  t1
}
############
# scale of data as function factory form (works for data.frame and raster)
.getScaleFunction <- function(v,scl='minmax') {
  
  force(v)
  switch(scl,
         minmax=function(x) {
           n <- v$names
           if (inherits(x,'data.frame')) {
             
             if (!all(n %in% colnames(x))) stop('The variables required by the scale function are not available in data!')
             
             if (!any(n %in% colnames(x))) {
               warning('Some of the variables required by the scale function are not available in data (they are ignored)!')
               n <- n[n %in% colnames(x)]
             }
             
             for (nn in n) {
               x[,nn] <- (x[,nn] - v[nn,'min']) / (v[nn,'max'] - v[nn,'min'])
             }
             x 
           } else {
             
             if (!all(n %in% names(x))) stop('The variables required by the scale function are not available in data!')
             
             if (!any(n %in% names(x))) {
               warning('Some of the variables required by the scale function are not available in data (they are ignored)!')
               n <- n[n %in% names(x)]
             }
             
             for (nn in n) {
               x[[nn]] <- (x[[nn]] - v[nn,'min']) / (v[nn,'max'] - v[nn,'min'])
             }
             x
           }
           
         },
         center=function(x) {
           n <- v$names
           if (inherits(x,'data.frame')) {
             if (!all(n %in% colnames(x))) stop('The variables required by the scale function are not available in data!')
             
             if (!any(n %in% colnames(x))) {
               warning('Some of the variables required by the scale function are not available in data (they are ignored)!')
               n <- n[n %in% colnames(x)]
             }
             for (nn in n) {
               x[,nn] <- (x[,nn] - v[nn,'mean']) / v[nn,'sd']
             }
             x 
           } else {
             if (!all(n %in% names(x))) stop('The variables required by the scale function are not available in data!')
             
             if (!any(n %in% names(x))) {
               warning('Some of the variables required by the scale function are not available in data (they are ignored)!')
               n <- n[n %in% names(x)]
             }
             for (nn in n) {
               x[[nn]] <- (x[[nn]] - v[nn,'mean']) / v[nn,'sd']
             }
             x
           }
         }
  )
}
#--------
# .scale can be either NULL, or TRUE (to use cor=T in princomp), or a function (generated by scaleGenerator):
.getPCAfunction <- function(x,v,n='auto',.scale=NULL) {
  if (missing(n) || is.null(n)) n <- 'auto'
  if (!is.null(.scale)) {
    if (is.function(.scale)) {
      x <- .scale(x)
    }
  }
  
  x <- x[,v$names,drop=FALSE]
  
  if (ncol(x > 1)) {
    if (is.logical(.scale) && .scale) .pr <- princomp(x,cor=TRUE)
    else .pr <- princomp(x)
    #----
    
    if (is.numeric(n)) {
      if (n > 1 && n > ncol(x)) n <- 0.85
      else if (n <= 0) n <- 0.85
      
    } else if (is.character(n)) {
      if (n == 'auto') n <- 0.85
      else if (grepl('%',n)) {
        n <- as.numeric(strsplit(n,'%')[[1]])
        if (!is.numeric(n)) {
          n <- 0.85
          warning('n argument in pca transformer is not identified, so, default is used: n="90%"!')
        } else n <- n / 100
      } else if (is.null(n)) n <- 0.85
    }
    #--
    if (n < 1) {
      .vi <- .pr$sdev * .pr$sdev
      .vi <- .vi / sum(.vi)
      .vi <- cumsum(.vi)
      n <- max(c(1,which(.vi >= n)[1]))
      rm(.vi)
    }
    
  } else stop('PCA cannot be done for a dataset with a single numeric variable!')
  rm(x); gc()
  if (is.function(.scale)) {
    function(x) {
      if (inherits(x,'data.frame')) {
        .p <- predict(.pr,.scale(x))[,1:n,drop=FALSE]
        colnames(.p) <- paste0('PC',1:n)
        if (any(!colnames(x) %in% v$names)) {
          .p <- cbind(.p,x[,!colnames(x) %in% v$names,drop=FALSE])
          .p
        } else .p
      } else {
        .p <- predict(.scale(x),.pr)[[1:n]]
        names(.p) <- paste0('PC',1:n)
        if (any(!names(x) %in% v$names)) {
          .p <- c(.p,x[[!colnames(x) %in% v$names]])
          .p
        } else .p
      }
    }
  } else {
    function(x) {
      if (inherits(x,'data.frame')) {
        .p <- predict(.pr,x)[,1:n,drop=FALSE]
        colnames(.p) <- paste0('PC',1:n)
        if (any(!colnames(x) %in% v$names)) {
          .p <- cbind(.p,x[,!colnames(x) %in% v$names,drop=FALSE])
          .p
        } else .p
      } else {
        .p <- predict(x,.pr)[[1:n]]
        names(.p) <- paste0('PC',1:n)
        if (any(!names(x) %in% v$names)) {
          .p <- c(.p,x[[!colnames(x) %in% v$names]])
          .p
        } else .p
      }
    }
  }
  
  
}
#------
.getFeature.product <- function(data) {
  x <- data[,1]
  for (i in 2:ncol(data)) {
    x <- x * data[,i]
  }
  x
}


#---- Linear, Quandratic, Cubic, and Polynomial:
.getFeatLQCP <- function(nv,model.terms) {
  .mcls <- sapply(model.terms, class)
  #----
  .featList <- list()
  
  w <- which(.mcls == '.var')
  if (length(w) > 0) {
    .v <- sapply(model.terms[w],function(x) x@name)
    .featList[['linear']] <- .v[.v %in% nv]
  } else .featList[['linear']] <- NULL
  #----
  w <- which(.mcls == '.quad')
  if (length(w) > 0) {
    .v <- sapply(model.terms[w],function(x) x@x)
    if ('.' %in% .v) .featList[['quad']] <- nv
    else {
      .v <- .v[.v %in% nv]
      if (length(.v) > 0) .featList[['quad']] <- .v
    }
  }
  #---
  w <- which(.mcls == '.cubic')
  if (length(w) > 0) {
    .v <- sapply(model.terms[w],function(x) x@x)
    if ('.' %in% .v) .featList[['cubic']] <- nv
    else {
      .v <- .v[.v %in% nv]
      if (length(.v) > 0) .featList[['cubic']] <- .v
    }
  }
  #---
  w <- which(.mcls == '.poly')
  if (length(w) > 0) {
    .v <- sapply(model.terms[w],function(x) x@x)
    if ('.' %in% .v) {
      .o <- model.terms[[w[which(.v == '.')[1]]]]
      if (.o@degree >= 3) {
        .featList[['linear']] <- NULL
        if (!is.null(.featList[['quad']])) .featList <- .featList[-which(names(.featList) == 'quad')]
        if (!is.null(.featList[['cubic']])) .featList <- .featList[-which(names(.featList) == 'cubic')]
      } else if (.o@degree == 2) {
        .featList[['linear']] <- NULL
        if (!is.null(.featList[['quad']])) .featList <- .featList[-which(names(.featList) == 'quad')]
      } else if (.o@degree == 1) {
        .featList[['linear']] <- nv
        warning('The order of a polynomial function in the formula cannot be 1; linear feature(s) is considered instead!')
      }
      #----
      .featList[['poly']][['.']] <- list(x=nv,degree=.o@degree,raw=.o@raw)
    } else {
      for (n in .v) {
        if (n %in% nv) {
          .o <- model.terms[[w[which(.v == n)]]]
          
          if (.o@degree >= 3) {
            if (.o@x %in% .featList[['linear']]) .featList[['linear']] <- .excludeVector(.featList[['linear']],.o@x)
            if (.o@x %in% .featList[['quad']]) .featList[['quad']] <- .excludeVector(.featList[['quad']],.o@x)
            if (.o@x %in% .featList[['cubic']]) .featList[['cubic']] <- .excludeVector(.featList[['cubic']],.o@x)
          } else if (.o@degree == 2) {
            if (.o@x %in% .featList[['linear']]) .featList[['linear']] <- .excludeVector(.featList[['linear']],.o@x)
            if (.o@x %in% .featList[['quad']]) .featList[['quad']] <- .excludeVector(.featList[['quad']],.o@x)
          } else if (.o@degree == 1) {
            if (!.o@x %in% .featList[['linear']]) .featList[['linear']] <- c(.featList[['linear']],.o@x)
          }
          .featList[['poly']][[n]] <- list(x=.o@x,degree=.o@degree,raw=.o@raw)
        }
      }
      #---
      # && '.' %in% all.vars(sF@formula)
      if (any(!nv %in% c(.featList$linear,names(.featList$poly)))) {
        .featList$linear <- c(.featList$linear, nv[!nv %in% c(.featList$linear,names(.featList$poly))])
      }
    }
  }
  #----
  .ls <- ls(all.names=TRUE)
  .ls <- .ls[.ls != '.featList']
  rm(list=.ls);rm(.ls); gc()
  ##########
  function(x) {
    ft <- names(.featList)
    .xf <- x[,0]
    for (.type in ft) {
      if (.type == 'linear') {
        n <- .featList[[.type]]
        if (inherits(x,'tbl')) .xf <- .eval('tibble::add_column(.xf,x[,n])',env=environment())
        else .xf <- cbind(.xf,x[,n,drop=FALSE])
      } else if (.type == 'quad') {
        n <- .featList[[.type]]
        .tmp <- .getFeature.quad(x[,n,drop=FALSE])
        colnames(.tmp) <- paste0('q_',n)
        if (inherits(x,'tbl')) .xf <- .eval('tibble::add_column(.xf,.tmp)',env=environment())
        else .xf <- cbind(.xf,.tmp)
      } else if (.type == 'cubic') {
        n <- .featList[[.type]]
        .tmp <- .getFeature.cubic(x[,n,drop=FALSE])
        colnames(.tmp) <- paste0('c_',n)
        if (inherits(x,'tbl')) .xf <- .eval('tibble::add_column(.xf,.tmp)',env=environment())
        else .xf <- cbind(.xf,.tmp)
      } else if (.type == 'poly') {
        .n <- names(.featList[[.type]])
        for (n in .n) {
          if (n == '.') {
            nn <- .featList[[.type]][[n]]$x
            for (j in nn) {
              .tmp <- .getFeature.poly(x[,j,drop=TRUE],.featList[[.type]][[n]]$degree,.featList[[.type]][[n]]$raw)
              colnames(.tmp) <- paste0(j,'_',colnames(.tmp))
              
              if (inherits(x,'tbl')) .xf <- .eval('tibble::add_column(.xf,.tmp)',env=environment())
              else .xf <- cbind(.xf,.tmp)
              
            }
          } else {
            .tmp <- .getFeature.poly(x[,n,drop=TRUE],.featList[[.type]][[n]]$degree,.featList[[.type]][[n]]$raw)
            colnames(.tmp) <- paste0(n,'_',colnames(.tmp))
            
            if (inherits(x,'tbl')) .xf <- .eval('tibble::add_column(.xf,.tmp)',env=environment())
            else .xf <- cbind(.xf,.tmp)
          }
        }
      }
    }
    .xf
  }
}
##########
#---- Func (exp, log, log10, simp.func):
.getFeatFunc <- function(nv, model.terms) {
  .mcls <- sapply(model.terms, class)
  
  .featList <- list()
  
  w <- which(.mcls == '.log')
  
  if (length(w) > 0) {
    .v <- sapply(model.terms[w],function(x) x@x)
    if ('.' %in% .v) .featList[['log']] <- nv
    else {
      .v <- .v[.v %in% nv]
      if (length(.v) > 0) .featList[['log']] <- .v
    }
  }
  #---
  w <- which(.mcls == '.log10')
  
  if (length(w) > 0) {
    .v <- sapply(model.terms[w],function(x) x@x)
    if ('.' %in% .v) .featList[['log10']] <- nv
    else {
      .v <- .v[.v %in% nv]
      if (length(.v) > 0) .featList[['log10']] <- .v
    }
  }
  #---
  w <- which(.mcls == '.exp')
  
  if (length(w) > 0) {
    .v <- sapply(model.terms[w],function(x) x@x)
    if ('.' %in% .v) .featList[['exp']] <- nv
    else {
      .v <- .v[.v %in% nv]
      if (length(.v) > 0) .featList[['exp']] <- .v
    }
  }
  #---
  w <- which(.mcls %in% c('.func','.simple.func'))
  
  if (length(w) > 0) {
    .featList[['func']] <- lapply(model.terms[w],function(x) x@term)
  }
  #---
  #----
  .ls <- ls(all.names=TRUE)
  .ls <- .ls[.ls != '.featList']
  rm(list=.ls);rm(.ls); gc()
  ##########
  function(x) {
    ft <- names(.featList)
    .xf <- x[,0]
    for (.type in ft) {
      if (.type == 'log') {
        n <- .featList[[.type]]
        .tmp <- log(x[,n,drop=FALSE])
        colnames(.tmp) <- paste0('log_',n)
        if (inherits(x,'tbl')) .xf <- .eval('tibble::add_column(.xf,.tmp)',env=environment())
        else .xf <- cbind(.xf,.tmp)
      } else if (.type == 'exp') {
        n <- .featList[[.type]]
        .tmp <- exp(x[,n,drop=FALSE])
        colnames(.tmp) <- paste0('exp_',n)
        if (inherits(x,'tbl')) .xf <- .eval('tibble::add_column(.xf,.tmp)',env=environment())
        else .xf <- cbind(.xf,.tmp)
      } else if (.type == 'log10') {
        n <- .featList[[.type]]
        .tmp <- log10(x[,n,drop=FALSE])
        colnames(.tmp) <- paste0('log10_',n)
        if (inherits(x,'tbl')) .xf <- .eval('tibble::add_column(.xf,.tmp)',env=environment())
        else .xf <- cbind(.xf,.tmp)
      } else if (.type == 'func') {
        n <- .featList[[.type]]
        for (j in 1:length(n)) {
          .tmp <- model.frame(as.formula(paste('~',deparse(n[[j]]))),data=x)
          if (inherits(x,'tbl')) .xf <- .eval('tibble::add_column(.xf,.tmp)',env=environment())
          else .xf <- cbind(.xf,.tmp)
        }
      }
      
    }
    .xf
  }
}



#---- Factor:
.dummy <- function(x,levels) {
  .xf <- data.frame(matrix(0,nrow=length(x),ncol=length(levels)))
  colnames(.xf) <- levels
  for (.l in levels) {
    w <- which(x == .l)
    if (length(w) > 0)
      .xf[w,.l] <- 1
  }
  .xf
}

.getFeatFactor <- function(fv,model.terms) {
  .mcls <- sapply(model.terms, class)
  w <- which(.mcls == '.factor')
  
  .featList <- list()
  
  if (length(w) > 0) {
    for (i in w) {
      .tmp <- model.terms[[i]]
      .v <- .tmp@x
      .v <- .v[.v %in% names(fv)]
      if (length(.v) > 0) {
        .featList[['factor']][[.v]] <- fv[[.v]]
      }
    }
  }
  #------
  # if (any(!names(fv) %in% names(.featList[['factor']]))) {
  #   
  # }
  .ls <- ls(all.names=TRUE)
  .ls <- .ls[.ls != '.featList']
  rm(list=.ls);rm(.ls); gc()
  ########
  function(x) {
    .w <- names(.featList$factor) %in% colnames(x)
    if (!any(.w)) {
      .w <- names(.featList$factor)[!.w]
      stop(paste0('The required categorical (factor) variables are not available: ',paste(.w,collapse = ', ')))
    }
    #--------
    .xf <- x[,0]
    for (n in names(.featList$factor)) {
      .tmp <- .dummy(x[[n]],levels =.featList$factor[[n]]$levels)
      colnames(.tmp) <- paste0(n,'__',colnames(.tmp))
      if (inherits(x,'tbl')) .xf <- .eval('tibble::add_column(.xf,.tmp)',env=environment())
      else .xf <- cbind(.xf,.tmp)
    }
    .xf
  }
}
#---------
############
#----- Interaction and product:
.getInteraction <- function(nv,model.terms) {
  .mcls <- sapply(model.terms, class)
  #----
  nnv <- nv
  .featList <- list()
  
  w <- which(.mcls == '.interaction')
  if (length(w) > 0) {
    # in .featList, based on interaction depth:
    .dp <- sapply(model.terms[w],function(x) x@depth)
    
    if (length(unique(.dp)) > 1) {
      w <- w[order(.dp,decreasing = TRUE)]
      for (i in w) {
        .tmp <- model.terms[[i]]
        if ('.' %in% .tmp@x) {
          .featList[['int']][[as.character(.tmp@depth)]] <- list(vars=nv)
          break
        } else {
          .n <- .tmp@x
          .n <- .n[.n %in% nv]
          if (length(.n) > 1) {
            .featList[['int']][[as.character(.tmp@depth)]] <- list(vars=.n)
            nv <- .excludeVector(nv,.n)
            if (length(nv) < 2 || is.null(nv)) break
          }
        }
      }
    } else {
      .v <- unlist(sapply(model.terms[w],function(x) x@x))
      if ('.' %in% .v) {
        .featList[['int']][[as.character(.dp[1])]] <- list(vars=nv)
        nv <- .excludeVector(nv,nv)
      } else {
        .v <- .v[.v %in% nv]
        if (length(.v) > 0) {
          .featList[['int']][[as.character(.dp[1])]] <- list(vars=.v)
          nv <- .excludeVector(nv,.v)
        }
      }
    }
  }
  #-------
  w <- which(.mcls == '.product')
  if (length(w) > 0) {
    for (i in w) {
      .tmp <- model.terms[[i]]
      .v <- .tmp@x
      if ('.' %in% .v) {
        if (!is.null(nv) && length(nv) > 0) {
          .featList[['int']][[as.character(1)]][['vars']] <- unique(c(.featList[['int']][[as.character(1)]][['vars']],nv))
          nv <- NULL
        }
      } else {
        if (as.character(length(.v) - 1) %in% names(.featList$int)) {
          .v <- .v[.v %in% nnv]
          if (!all(.v %in% .featList[['int']][[as.character(length(.v)-1)]][['vars']])) {
            .featList[['product']] <- c(.featList[['product']],list(.v))
          }
        } else if (any(as.numeric(names(.featList$int)) > (length(.v)-1))) {
          .v <- .v[.v %in% nnv]
          if (length(.v) > 1) {
            if (!is.null(.featList[['int']][[as.character(length(.v)-1)]][['vars']]) && !all(.v %in% .featList[['int']][[as.character(length(.v)-1)]][['vars']])) {
              .featList[['product']] <- c(.featList[['product']],list(.v))
            }
          }
        } else {
          .v <- .v[.v %in% nnv]
          if (length(.v) > 1) .featList[['product']] <- c(.featList[['product']],list(.v))
        }
        
      }
    }
  }
  #-------
  .dp <- as.numeric(names(.featList$int))
  for (i in .dp) {
    .v <- .featList$int[[as.character(i)]][['vars']]
    .featList$int[[as.character(i)]][['interactions']] <- list()
    k <- min(c(i + 1,length(.v)))
    if (k > 1) {
      for (.c in seq(2,k,1)) {
        .co <- data.frame(combn(.v, .c))
        colnames(.co) <- sapply(.co,function(x) paste(x,collapse='__'))
        .featList$int[[as.character(i)]][['interactions']][[as.character(.c)]] <- .co
      }  
    }
    #---
    if (i == 1 && !is.null(.featList[['product']])) {
      for (j in seq_along(.featList[['product']])) {
        .n <- .featList$product[[j]]
        .co <- data.frame(combn(.n, length(.n)))
        colnames(.co) <- sapply(.co,function(x) paste(x,collapse='__'))
        if (is.null(.featList[['int']][[as.character(length(.v)-1)]][['interactions']][[as.character(length(.n))]])) {
          .featList[['int']][[as.character(length(.v)-1)]][['interactions']][[as.character(length(.n))]] <- .co
        } else {
          .featList[['int']][[as.character(length(.v)-1)]][['interactions']][[as.character(length(.n))]] <- cbind(.featList[['int']][[as.character(length(.v)-1)]][['interactions']][[as.character(length(.n))]],.co)
        }
      }
    }
  }
  #------
  .ls <- ls(all.names=TRUE)
  .ls <- .ls[.ls != '.featList']
  rm(list=.ls);rm(.ls); gc()
  
  
  #-------
  # depth (interaction.depth): 1 refers to combination of 2 variables, 2 for 2 & 3 variables and so on
  
  function(x) {
    .xf <- x[,0]
    .dp <- names(.featList$int)
    for (i in .dp) {
      .v <- .featList$int[[i]]$interactions
      if (!is.null(.v)) {
        for (k in names(.v)) {
          .co <- .v[[k]]
          for (j in 1:ncol(.co)) {
            .tmp <- data.frame(a=.getFeature.product(x[,.co[,j]]))
            colnames(.tmp) <- colnames(.co)[j]
            if (inherits(x,'tbl')) .xf <- .eval('tibble::add_column(.xf,.tmp)',env=environment())
            else .xf <- cbind(.xf,.tmp)
          }
        }
      }
    }
    .xf
  }
}
#---------
.getFeatHingeThreshold <- function(nT,model.terms) {
  .mcls <- sapply(model.terms, class)
  
  .featList <- list()
  
  w <- which(.mcls == '.hinge')
  
  if (length(w) > 0) {
    .v <- sapply(model.terms[w],function(x) x@x)
    if ('.' %in% .v) {
      .o <- model.terms[[w[which(.v == '.')[1]]]]
      n <- nT$names
      
      for (.n in n) {
        j <- which(nT$names == .n)
        .featList[['hinge']][[.n]] <- seq(nT$min[j],nT$max[j],length.out = .o@k)
      }
    } else {
      n <- .v[.v %in% nT$names]
      if (length(n) > 0) {
        for (.n in n) {
          .o <- model.terms[[w[which(.v == .n)[1]]]]
          j <- which(nT$names == .n)
          .featList[['hinge']][[.n]] <- seq(nT$min[j],nT$max[j],length.out = .o@k)
        }
      }
    }
  }
  #==========
  w <- which(.mcls == '.threshold')
  
  if (length(w) > 0) {
    .v <- sapply(model.terms[w],function(x) x@x)
    if ('.' %in% .v) {
      .o <- model.terms[[w[which(.v == '.')[1]]]]
      n <- nT$names
      
      for (.n in n) {
        j <- which(nT$names == .n)
        .featList[['threshold']][[.n]] <- seq(nT$min[j],nT$max[j],length.out = .o@k)[-c(1,.o@k)]
      }
    } else {
      n <- .v[.v %in% nT$names]
      if (length(n) > 0) {
        for (.n in n) {
          .o <- model.terms[[w[which(.v == .n)[1]]]]
          j <- which(nT$names == .n)
          .featList[['threshold']][[.n]] <- seq(nT$min[j],nT$max[j],length.out = .o@k)[-c(1,.o@k)]
        }
      }
    }
  }
  ############
  .ls <- ls(all.names=TRUE)
  .ls <- .ls[.ls != '.featList']
  rm(list=.ls);rm(.ls); gc()
  #############
  function(x) {
    ft <- names(.featList)
    .xf <- x[,0]
    for (.type in ft) {
      if (.type == 'hinge') {
        for (n in names(.featList$hinge)) {
          .tmp <- .getHinge(x[[n]],k = .featList$hinge[[n]])
          colnames(.tmp) <- paste0(n,'_hinge_',1:ncol(.tmp))
          if (inherits(x,'tbl')) .xf <- .eval('tibble::add_column(.xf,.tmp)',env=environment())
          else .xf <- cbind(.xf,.tmp)
        }
      } else if (.type == 'threshold') {
        for (n in names(.featList$threshold)) {
          .tmp <- .getThreshold(x[[n]],k = .featList$threshold[[n]])
          colnames(.tmp) <- paste0(n,'_thr_',1:ncol(.tmp))
          if (inherits(x,'tbl')) .xf <- .eval('tibble::add_column(.xf,.tmp)',env=environment())
          else .xf <- cbind(.xf,.tmp)
        }
      }
    }
    .xf
  }
}
########

.getFeatureType <- function(.mcls) {
  ft <- c()
  if (any(c('.var','.quad','.cubic','.poly') %in% .mcls)) ft <- 'L'
  if (c('.factor') %in% .mcls) ft <- c(ft,'F')
  if (c('.hinge') %in% .mcls) ft <- c(ft,'H')
  if (any(c('.interaction','.product') %in% .mcls)) ft <- c(ft,'P')
  if (c('.threshold') %in% .mcls) ft <- c(ft,'T')
  if (any(c('.func','.exp','.log','.log10','.simple.func') %in% .mcls)) c(ft,'Fu')
  if (c('.nestedModel') %in% .mcls) ft <- c(ft,'nM')
  
  ft
}
#----
.getFeatureGenerator <- function(ff,mT) {
  .mcls <- sapply(mT, class)
  .tr <- ff@transformers
  nv <- ff@numeric
  ft <- .getFeatureType(.mcls)
  
  .f <- list()
  if ('L' %in% ft) {
    .f[['L']] <- .getFeatLQCP(nv$names,mT)
  }
  
  if (any(c('H','T') %in% ft)) {
    .f[['HT']] <- .getFeatHingeThreshold(nv,mT)
  }
  
  if ('F' %in% ft || !is.null(ff@categorical)) {
    .f[['F']] <- .getFeatFactor(ff@categorical,mT)
  }
  
  if ('P' %in% ft) {
    .f[['P']] <- .getInteraction(nv$names,mT)
  }
  
  if ('Fu' %in% ft) {
    .f[['Fu']] <- .getFeatFunc(nv$names,mT)
  }
  #----
  .ls <- ls(all.names=TRUE)
  .ls <- .ls[!.ls %in% c('.f','.tr')]
  rm(list=.ls);rm(.ls); gc()
  #----
  
  function(x) {
    if (!is.null(.tr)) x <- .tr(x)
    #----
    if (length(.f) > 0) {
      .xf <- x[,0]
      for (f in .f) {
        .tmp <- f(x)
        if (ncol(.tmp) > 0) {
          if (inherits(x,'tbl')) .xf <- .eval('tibble::add_column(.xf,.tmp)',env=environment())
          else .xf <- cbind(.xf,.tmp)
        }
      }
    }
    
    .xf
  }
}
#--------

.getFeatureFrame <- function(sF, data) {
  ff <- new('.featureFrame',responses=sF@vars@species,predictors=c(sF@vars@numeric$names,names(sF@vars@categorical)),numeric=sF@vars@numeric,categorical=sF@vars@categorical)
  .dcls <- sapply(sF@data.terms, class)
  .mcls <- sapply(sF@model.terms, class)
  
  if (length(.dcls) > 0) {
    if (".scaleSetting" %in% .dcls) {
      .sc <- sF@data.terms[.dcls == '.scaleSetting'][[1]]
      
      if ('.' %in% .sc@vars) {
        .v <- ff@numeric
      } else {
        if (!any(.sc@vars %in% ff@numeric$names)) {
          warning('The variables specified in the scale function in the formula are not available in dataset, so the default (all numeric variables) are considered for scale! ')
          .v <- ff@numeric
        } else {
          if (!all(.sc@vars %in% ff@numeric$names)) warning(paste0('The variables: ',paste(.sc@vars[!.sc@vars %in% ff@numeric$names],collapse = ', '),' specified in the scale function in the formula is not available in the data, and so ignored!'))
          .v <- ff@numeric[ff@numeric$names %in% .sc@vars,]
        }
      }
      #--------
      
      ff@transformers <- .getScaleFunction(.v,scl = .sc@method)
      
    }
    #-----
    if (".pcaSetting" %in% .dcls) {
      # both scale and pca:
      .pc <- sF@data.terms[.dcls == '.pcaSetting'][[1]]
      #--
      if ('.' %in% .pc@vars) {
        .v <- ff@numeric
      } else {
        if (!any(.pc@vars %in% ff@numeric$names)) {
          warning('The variables specified in the pca function in the formula are not available in dataset, so the default (all numeric variables) are considered for scale! ')
          .v <- ff@numeric
        } else {
          if (!all(.pc@vars %in% ff@numeric$names)) warning(paste0('The variables: ',paste(.pc@vars[!.pc@vars %in% ff@numeric$names],collapse = ', '),' specified in the pca function in the formula is not available in the data and ignored!'))
          .v <- ff@numeric[ff@numeric$names %in% .pc@vars,]
        }
      }
      #--
      
      if (is.null(ff@transformers)) ff@transformers <- .getPCAfunction(data,.v,n = .pc@n,.scale = TRUE)
      else ff@transformers <- .getPCAfunction(data,.v,n = .pc@n,.scale = ff@transformers)
      
      .tmp <- ff@transformers(data[,.v$names])

      if (all(ff@numeric$names %in% .v$names)) {
        ff@numeric <- .getDataParams(.tmp)
      } else {
        ff@numeric <- rbind(ff@numeric[-which(ff@numeric$names %in% .v$names),],.getDataParams(.tmp))
      }
    }
  }
  #######################
  #--- Features:
  if (length(.mcls) > 0) {
    ff@featureGenerator <- .getFeatureGenerator(ff,sF@model.terms)
  }
  ff
}
babaknaimi/sdm documentation built on May 6, 2024, 1:52 a.m.