R/FLLDA.R

NULL
##https://www.quora.com/Mathematical-Modeling-How-are-posterior-probabilities-calculated-in-linear-discriminant-analysis
##http://stats.stackexchange.com/questions/134282/relationship-between-svd-and-pca-how-to-use-svd-to-perform-pca
##https://www.analyticsvidhya.com/blog/2015/09/naive-bayes-explained/

#' An S4 class to represent output from Discriminant Analysis on in-database Objects
#'
#' @slot offset column name used as offset
#' @method coefficients FLLDA.
#' @method residuals FLLDA. 
#' @method influence FLLDA. 
#' @method lm.influence FLLDA. 
#' @method plot FLLDA. 
#' @method predict FLLDA. 
#' @export
setClass(
    "FLLDA",
    contains="FLRegr",
    slots=list(offset="character",
               vfcalls="character"))

#' Flexible Discriminant Analysis
#' 
#' \code{fda} performs Flexible Discriminant Analysis on FLTable objects.
#' library(mda)
#'
#' The DB Lytix function called is FLFlexDiscriminant. Performs Flexible Discriminant Analysis and stores the results in predefined tables.
#' @seealso \code{\link[stats]{fda}} for R reference implementation.
#' @param formula A symbolic description of model to be fitted
#' @param data An object of class FLTable or FLTableMD.
#' @param MaxMARSMaximum number of basis functions to be used for multivariate nonparametric
#' regression step where multivariate adaptive regression splines is used.
#' @param MinRsq Terminating condition for multivariate regression step based on change in Rsq upon addition of new basis functions.
#' @return \code{fda} returns an object of class \code{FLLDA}
#' @examples
#' deeptbl <- FLTable("tblIrisDeep", "ObsID", "VarID", "Num_Val")
#' flmod <- fda(a~., data = deeptbl)
#' vpred <- predict(flmod);coefficients(flmod);confusion(flmod)
#' Performs flexible discriminant analysis and stores the results in predefined tables. It
#' involves the use of multivariate adaptive regression splines for obtaining a basis
#' transformation of the independent variables and performing the multivariate
#' non-parametric regression step in the Flexible Discriminant Analysis procedure.
#'
#' couldnt be implemented: plot, values, precent.explained
#' (lack of data of discriminant space).
#' @export
fda <- function (formula,data=list(),...) {
    UseMethod("fda", data)
}

#' @export
fda.default <- function (formula,data=list(),...) {
    if (!requireNamespace("mda", quietly = TRUE)){
        stop("mda package needed for mda. Please install it.",
             call. = FALSE)
    }
    else return(mda::fda(formula=formula,data=data,...))
}

#' @export
fda.FLpreparedData <- function(formula, data,MaxMARS = 11, MinRsq = .001 ,...)
{
    vcallObject <- match.call()
    data <- setAlias(data,"")
    return(ldaGeneric(formula=formula,
                      data=data,
                      callObject=vcallObject,
                      MaxMARS = MaxMARS,
                      MinRsq = MinRsq,
                      familytype="Flex",
                      ...))
}

#' @export
fda.FLTable <- fda.FLpreparedData


#' @export
fda.FLTableDeep <- fda.FLpreparedData

#' @export
fda.FLTableMD <- fda.FLpreparedData


NULL
##https://www.quora.com/Mathematical-Modeling-How-are-posterior-probabilities-calculated-in-linear-discriminant-analysis
##http://stats.stackexchange.com/questions/134282/relationship-between-svd-and-pca-how-to-use-svd-to-perform-pca
##https://www.analyticsvidhya.com/blog/2015/09/naive-bayes-explained/

#' An S4 class to represent output from Discriminant Analysis on in-database Objects
#'
#' @slot offset column name used as offset
#' @method coefficients FLLDA.
#' @method residuals FLLDA. 
#' @method influence FLLDA. 
#' @method lm.influence FLLDA. 
#' @method plot FLLDA. 
#' @method predict FLLDA. 
#' @export
setClass(
    "FLLDA",
    contains="FLRegr",
    slots=list(offset="character",
               vfcalls="character"))

#' Linear discriminant analysis.
#' 
#' \code{lda} performs Linear discriminant analysis on FLTable objects.
#' The DB Lytix function called is FLLDA. Performs Linear discriminant analysis and 
#' stores the results in predefined tables.
#'
#' @seealso \code{\link[stats]{lda}} for R reference implementation.
#' @param formula A formula of the form groups ~ x1 + x2 + ... That is, the response is the grouping factor and the right hand side specifies the (non-factor) discriminators.
#' @param data An object of class FLTable or FLTableMD.
#' @return \code{LDA} returns an object of class \code{FLLDA}
#' @examples
#' tbl <- FLTable("tblLDA", "OBSID", "VARID", "NUM_VAL")
#' flmod <- lda(a~. , data = tbl)
#' flmod$scaling, flmod$means;cof <-coefficients(flmod)
#' predict(flmod): Not implemented yet.
#' plot(flmod)
#' @export
lda <- function (formula,data=list(),...) {
    UseMethod("lda", data)
}

#' @export
lda.default <- MASS::lda

#' @export
lda.FLpreparedData <- function(formula, data,...)
{
    vcallObject <- match.call()
    data <- setAlias(data,"")
    return(ldaGeneric(formula=formula,
                      data=data,
                      callObject=vcallObject,
                      familytype="lda",
                      ...))
}

#' @export
lda.FLTable <- lda.FLpreparedData

#' @export
lda.FLTableMD <- lda.FLpreparedData


## MDA function
#' Mixture Discriminant Analysis.
#' 
#'\code{mda} performs Mixture Discriminant Analysis on FLTable objects.
#' The DB Lytix function called is FLMDA. Performs Mixture Discriminant Analysis and 
#' stores the results in predefined tables.
#'
#' limitations: no dollar operator access to means, precent.explained, values, plot((lack of data of discriminant space).
#' @seealso \code{\link[stats]{mda}} for R reference implementation.
#' @param formula A symbolic description of model to be fitted
#' @param data An object of class FLTable or FLTableMD.
#' @param subclasses Number of subclasses.
#' @param iter Maximum number of iterations for expectation maximization.
#' @param init Initialization method for each obs' latent variable Prob(x is in
#' the subclass of class) 1 = Assign weight of 1 to a
#' random subclass of its class; 0 otherwise.
#' @param hypothesis Number of hypotheses to run simultaneously
#' @return \code{mda} returns an object of class \code{FLLDA}
#' @examples
#' deeptbl <- FLTable("tblMDA","ObsID", "VarID", "Num_Val")
#' flmod <- mda(formula = a~.,data = deeptbl)
#' vpred <- predict(flmod); flmod$N
#' FLMDA performs mixed discriminant analysis. For the training data, MDA divides each
#' class into a number of artificial subclasses. It calibrates the mixture of Gaussians
#' and the mixing probability by maximizing the log-likelihood with expectation maximization.
#' @export
mda <- function (formula,data=list(),...) {
    UseMethod("mda", data)
}

#' @export
mda.default <- function (formula,data=list(),...) {
    if (!requireNamespace("mda", quietly = TRUE)){
        stop("nortest package needed for mda. Please install it.",
             call. = FALSE)
    }
    else return(mda::mda(formula=formula,data=data,...))
}

#' @export
mda.FLpreparedData <- function(formula, data,subclasses = 3, iter = 5, init = 1,hypothesis = 5, ...)
{
    vcallObject <- match.call()
    data <- setAlias(data,"")
    return(ldaGeneric(formula=formula,
                      data=data,
                      subclasses = subclasses,
                      iter = iter,
                      init = init,
                      hypothesis = hypothesis, 
                      callObject=vcallObject,
                      familytype="Mixed",
                      ...))
}

#' @export
mda.FLTable <- mda.FLpreparedData

#' @export
mda.FLTableMD <- mda.FLpreparedData



## Generic function for DA.
ldaGeneric <- function(formula,data,
                       callObject=NULL,
                       familytype = "",
                       MaxMARS = MaxMARS,
                       MinRsq = MinRsq,
                       subclasses = subclasses,
                       iter = iter,
                       init = init,
                       hypothesis = hypothesis,
                       matrixtype = "COVAR",
                       ...)
    
{
    prepData <- prepareData.lmGeneric(formula,data,
                                      callObject=callObject,
                                      familytype=familytype,
                                      performNorm=1,
                                      cost = 1,
                                      ...)
    vclass <- "FLLDA"
    for(i in names(prepData))
   	assign(i,prepData[[i]])
    deeptable <- getTableNameSlot(deepx)
    functionName <- "FLLDA"
    var <- getVariables(deepx)
    vinputcols <- list()
    vinputcols <- c(vinputcols,
                    TableName = deeptable,
                    ObsIDCol = var[[1]],
                    VarIDCol = var[[2]],
                    ValueCol = var[[3]])
    extra <- NULL
    if(familytype == "Flex")
    {
        functionName = "FLFlexDiscriminant"
        vinputcols = c(vinputcols, MaxBasisMARS = MaxMARS, MinDeltaRsqMARS =MinRsq)     
        
    }
    
    if (familytype == "Mixed")
    {
        functionName = "FLMDA"
        vinputcols = c(vinputcols, WhereClause = " ",Subclasses = subclasses,Iterations = iter,Initilization = init,Hypothesis = hypothesis)
       extra  <- subclasses
    }

    if (familytype == "pca"){
        functionName = "FLPCA"
        vinputcols = c(vinputcols, WhereClause = " ", GroupBy = 'NULL', MatrixType = matrixtype, TableOutput = 1)
        vclass <- "FLPCA"
    }

    if(familytype %in% c("lda", "Mixed", "Flex"))
        vinputcols <- c(vinputcols, NOTE = "")

    ret <- sqlStoredProc(connection,
                         functionName,
                         pInputParams = vinputcols,
                         outputParameter = c(OutTable = 'a')
                         )
    return(new(vclass,
               formula=formula,
               AnalysisID = as.character(ret[1,1]),
               table=data,
               results=list(call=callObject,
                            familytype = familytype,
                            extra = extra),
               ##AnalysisID = as.character(ret[1,1])),
               deeptable=deepx,
               mapTable=mapTable,
               scoreTable="",
               offset=as.character(offset)))
}



#' @export
`$.FLLDA`<-function(object,property){

                                        #parentObject <- deparse(substitute(object))
    parentObject <- unlist(strsplit(unlist(strsplit(as.character(sys.call()),"(",fixed=T))[2],",",fixed=T))[1]
    var <- getVariables(object@deeptable)

    if(property=="scaling"){
        if(object@results$familytype == "lda"){
            
        str <- paste0("SELECT VarID, CanID, Num_Val FROM fzzlLDACanCoeff WHERE AnalysisID = ",fquote(object@AnalysisID)," AND CanType = 'Within-Class Can Struct' ORDER BY VarID, CanID ")
        df <- sqlQuery(connection ,str)
        colnames(df) <- tolower(colnames(df))
        nvar <- length(unique(df$canid))
        dtf <- as.data.frame(lapply(1:nvar, function(x){
            df$num_val[df$canid == x]
        }), ncol = nvar)
        dtf <- as.matrix.data.frame(dtf)
        colnames(dtf) <- paste0("LD",1:nvar)
        return(dtf)}
        else
        {
            vstr <- paste0("scaling is only supported by LDA as of now")
            return(vstr)}
    }
    else if (property=="coefficients"){
        cof <- coefficients(object)
        return(cof)
        
    }
    else if (property == "N"){
        return(length(object@deeptable@Dimnames[[1]]))
    }
    else if (property == "call"){
        return(object@results$call)}
    else if (property == "counts"){
        vcount <- sqlQuery(connection, paste0("SELECT ",var[[3]]," AS id, count(*) AS val FROM ",getTableNameSlot(object@deeptable)," WHERE ",var[[2]]," = -1 GROUP BY ",var[[3]]," ORDER BY ",var[[3]]," "))
        vect <- vcount$val
        names(vect) <- vcount$id
        return(vect)       
    }
    else if (property == "means")
    {
        if(object@results$familytype %in% c("lda", "Mixed"))
        {
            str <- paste0("SELECT FLMean(d.",var[[3]],") as means, d.",var[[2]]," AS varid, c.val as val  FROM ",getTableNameSlot(object@deeptable)," d, (SELECT ",var[[1]]," AS ObsID, ",var[[3]]," AS val FROM tbllda b WHERE b.",var[[2]]," = -1) AS
c WHERE d.",var[[1]]," = c.ObsID AND d.",var[[2]]," <> -1 GROUP BY c.val, d.",var[[2]]," ORDER BY d.",var[[2]],", c.val ")
            df <- sqlQuery(connection, str)
            var <- unique(df$val)
                                        #browser()
            dtf <- t(sapply(var, function(x){df$means[df$val == x]}))
            rownames(dtf) <- as.character(var)
            colnames(dtf) <- as.character(object@deeptable@Dimnames[[2]][-1])
            return(dtf)}
        else {
            
            str <- paste0("SELECT * FROM fzzlFDAThetaMeans WHERE AnalysisID = '",object@AnalysisID,"' ORDER BY Col_ID, Row_ID")
            dtf <- sqlQuery(connection, str)
            colnames(dtf) <- tolower(colnames(dtf))
            var <- unique(dtf$col_id)
            df <- sapply(var, function(x){
                dtf$num_val[dtf$col_id == x]})
            rownames(df) <- object$lev
            return(df)    
        }
        
    }
    
    else if (property == "lev"){
        level <- sqlQuery(connection, paste0("SELECT DISTINCT ",var[[3]]," AS val FROM ",getTableNameSlot(object@deeptable)," WHERE ",var[[2]]," = -1 ORDER BY ",var[[3]]," "))
        return(as.character(level$val))
    }
    else if (property == "xlevels")
        return(NULL)
    else if (property == "prior")
    {
        return(object$counts/object$N)
    }
    else if (property == "confusion")
    {
        if(object@results$familytype %in% "Flex")
        {
            str <- paste0("SELECT * FROM fzzlFDAConfusionMtx WHERE AnalysisID = '",object@AnalysisID,"'ORDER BY PredictedClass, ActualClass ")
            dtf <- sqlQuery(connection, str)
            colnames(dtf) <- tolower(colnames(dtf))
            var <- max(as.integer(object$lev))
            df <- as.data.frame(lapply(1:var, function(x){
                dtf$num_val[dtf$actualclass == x]
            }), ncol = var)
            rownames(df) <- 1:var
            colnames(df) <- 1:var
            return(df)
        }
        else return(warning("confusion method supported for fda only "))
    }
    else if(property == "weights")
    {

        if(object@results$familytype == "Mixed"){
            dl <- list()
            str <- paste0("SELECT ClassID, COUNT(DISTINCT(ObsID)) FROM fzzlMDAWeight WHERE AnalysisID = '",object@AnalysisID,"' GROUP BY ClassID")
            vcount <- sqlQuery(connection, str)
            vcol <- object@deeptable@dims[2]-1
            vrow <- vcount[,2]
            vclass <- 0
            dl <- lapply(object$lev, function(x){wtfun(object, nrow = vrow[vcount[,1] == x] ,ncol = vcol, nclass = x)})
            names(dl) <- object$lev

            return(dl)

        }
    }

}

#' @export
setMethod("names", signature("FLLDA"), function(x) c("scaling", "coefficients", "N",
                                                     "call", "counts", "means",
                                                     "lev","xlevels","prior",
                                                     "confusion","weights" ))




coefficients.FLLDA <- function(object)
{
    if( object@results$familytype %in% "lda"){
        str <- paste0("SELECT VarID, CanID, Num_Val FROM fzzlLDACanCoeff WHERE AnalysisID = ",fquote(object@AnalysisID)," AND CanType = 'Raw Canonical Coefficients' ORDER BY VarID, CanID ")
        df <- sqlQuery(connection ,str)
        colnames(df) <- tolower(colnames(df))

        nvar <- length(unique(df$canid))
        dtf <- data.frame(lapply(1:nvar, function(x){
            df$num_val[df$canid == x]
        }))
        dtf <- as.matrix.data.frame(dtf)
        colnames(dtf) <- paste0("LD",1:nvar)  
        return(dtf)}
    else
        if (object@results$familytype %in% "Flex")
        {
            vID <- object@AnalysisID
            str <- paste0("SELECT * FROM fzzlFDARegrCoeffs WHERE AnalysisID = '",vID,"' ORDER BY 2,3 ")
            df <- sqlQuery(connection, str)
            colnames(df) <- tolower(colnames(df))
            nvar <- 2
            dtf <- data.frame(lapply(1:nvar, function(x){
                df$coeffest[df$depvarid == x]
            }))
            dtf <- as.matrix.data.frame(dtf)
            colnames(dtf) <- 1:nvar
            rownames(dtf) <- 1:length(dtf[,1])
            return(dtf)   
        }
    else
    {vstr <- paste0("only computes for LDA or mixed as of now")
        return(vstr)}
    
}
##posterior probablity:https://www.quora.com/Mathematical-Modeling-How-are-posterior-probabilities-calculated-in-linear-discriminant-analysis
## http://sites.stat.psu.edu/~jiali/course/stat597e/notes2/lda.pdf
#' @export
predict.FLLDA <- function(object){
    var <- getVariables(object@deeptable)
    if(object@results$familytype %in% "Flex")
    {
        vinputcols <- list()
        tblname <- gen_score_table_name("flexscore")
        
        ret <- sqlStoredProc(connection,"FLFlexDiscriminantScore",
                             TableName = getTableNameSlot(object@deeptable),
                             ObsIDCol = var[[1]],
                             VarIDCol = var[[2]],
                             ValueCol = var[[3]],
                             WhereClause = "WHERE VarID >= 0",
                             InAnalysisID = object@AnalysisID,
                             ScoreTable = tblname,
                             Note = "",
                             outputParameter = c(OutTable = 'a')                            
                             )
        str <- paste0("SELECT PredictedClass AS predicted FROM ",tblname," ORDER BY 1 ")
        df <- sqlQuery(connection,str)
        rownames(df) <- object@deeptable@Dimnames[[1]]
        return(df)}
    else if (object@results$familytype %in% "lda"){
        
        str <- paste0("SELECT a.",var[[1]]," , a.",var[[3]]," AS LD1, b.",var[[3]]," AS LD2  FROM fzzlLDACanVariate a, (SELECT * FROM fzzlLDACanVariate) b WHERE a.",var[[1]]," = b.",var[[1]],"   AND a.",var[[2]]," = 1 AND b.",var[[2]]," = 2 AND a.",var[[1]]," < 3000  AND a.AnalysisID = b.AnalysisID AND a.AnalysisID = '",object@AnalysisID,"' ORDER BY a.",var[[1]]," ")
        ##
        ##str <- paste0("SELECT * FROM fzzlLDACanVariate WHERE AnalysisID = '",object@AnalysisID,"' AND ",var[[1]]," < 100 ORDER BY ",var[[1]]," ")
        x <- sqlQuery(connection, str)
        post <- NULL
        cl <- NULL
        list(x = x, class = cl, posterior = post)    }

    else if (object@results$familytype %in% "Mixed"){
        str <- paste0("SELECT ClassID as classid FROM fzzlMDAClassify WHERE AnalysisID = '",object@AnalysisID,"' AND HypothesisID = 1 ORDER BY ClassID")
        dtf <- sqlQuery(connection , str)
        dtf <- as.integer(dtf$classid)
        names(dtf) <- object@deeptable@Dimnames[[1]]
        return(dtf)      
    }
}




#' @export
confusion.FLLDA <- function(object){
    if(object@results$familytype %in% "Flex")
        return(object$confusion)
    else
        warning("confusion method only supported for fda")
}


#' @export
plot.FLLDA <- function(object){
    if(object@results$familytype %in% "lda")
        val <- predict(object)$x
    else warning("plot for lda method is only supported for now ")
    val <- val[, 2:3]
    plot(val)
}




wtfun <- function(object, nrow, ncol, nclass){
    tblfunqueryobj <- new("FLTableFunctionQuery",
                          connectionName = getFLConnectionName(),
                          variables=list(MATRIX_ID="MATRIX_ID",
                                         rowIdColumn="rowIdColumn",
                                         colIdColumn="colIdColumn",
                                         valueColumn="valueColumn"),
                          whereconditions="",
                          order = "",
                          SQLquery=paste0("SELECT hypothesisid as matrix_id,row_number()over(partition by Subclassid order by obsid ) as rowIdColumn,Subclassid as colIdColumn,weight as valueColumn from fzzlMDAWeight WHERE AnalysisID = '",object@AnalysisID,"' AND ClassID = ",nclass," AND HypothesisID = 5"))

    flm <- newFLMatrix(
        select= tblfunqueryobj,
        dims=c(nrow, ncol),
        Dimnames=list(1:nrow, 1:ncol))
    return(flm)

}
Fuzzy-Logix/AdapteR documentation built on May 6, 2019, 5:07 p.m.