NULL
##https://www.quora.com/Mathematical-Modeling-How-are-posterior-probabilities-calculated-in-linear-discriminant-analysis
##http://stats.stackexchange.com/questions/134282/relationship-between-svd-and-pca-how-to-use-svd-to-perform-pca
##https://www.analyticsvidhya.com/blog/2015/09/naive-bayes-explained/
#' An S4 class to represent output from Discriminant Analysis on in-database Objects
#'
#' @slot offset column name used as offset
#' @method coefficients FLLDA.
#' @method residuals FLLDA.
#' @method influence FLLDA.
#' @method lm.influence FLLDA.
#' @method plot FLLDA.
#' @method predict FLLDA.
#' @export
setClass(
"FLLDA",
contains="FLRegr",
slots=list(offset="character",
vfcalls="character"))
#' Flexible Discriminant Analysis
#'
#' \code{fda} performs Flexible Discriminant Analysis on FLTable objects.
#' library(mda)
#'
#' The DB Lytix function called is FLFlexDiscriminant. Performs Flexible Discriminant Analysis and stores the results in predefined tables.
#' @seealso \code{\link[stats]{fda}} for R reference implementation.
#' @param formula A symbolic description of model to be fitted
#' @param data An object of class FLTable or FLTableMD.
#' @param MaxMARSMaximum number of basis functions to be used for multivariate nonparametric
#' regression step where multivariate adaptive regression splines is used.
#' @param MinRsq Terminating condition for multivariate regression step based on change in Rsq upon addition of new basis functions.
#' @return \code{fda} returns an object of class \code{FLLDA}
#' @examples
#' deeptbl <- FLTable("tblIrisDeep", "ObsID", "VarID", "Num_Val")
#' flmod <- fda(a~., data = deeptbl)
#' vpred <- predict(flmod);coefficients(flmod);confusion(flmod)
#' Performs flexible discriminant analysis and stores the results in predefined tables. It
#' involves the use of multivariate adaptive regression splines for obtaining a basis
#' transformation of the independent variables and performing the multivariate
#' non-parametric regression step in the Flexible Discriminant Analysis procedure.
#'
#' couldnt be implemented: plot, values, precent.explained
#' (lack of data of discriminant space).
#' @export
fda <- function (formula,data=list(),...) {
UseMethod("fda", data)
}
#' @export
fda.default <- function (formula,data=list(),...) {
if (!requireNamespace("mda", quietly = TRUE)){
stop("mda package needed for mda. Please install it.",
call. = FALSE)
}
else return(mda::fda(formula=formula,data=data,...))
}
#' @export
fda.FLpreparedData <- function(formula, data,MaxMARS = 11, MinRsq = .001 ,...)
{
vcallObject <- match.call()
data <- setAlias(data,"")
return(ldaGeneric(formula=formula,
data=data,
callObject=vcallObject,
MaxMARS = MaxMARS,
MinRsq = MinRsq,
familytype="Flex",
...))
}
#' @export
fda.FLTable <- fda.FLpreparedData
#' @export
fda.FLTableDeep <- fda.FLpreparedData
#' @export
fda.FLTableMD <- fda.FLpreparedData
NULL
##https://www.quora.com/Mathematical-Modeling-How-are-posterior-probabilities-calculated-in-linear-discriminant-analysis
##http://stats.stackexchange.com/questions/134282/relationship-between-svd-and-pca-how-to-use-svd-to-perform-pca
##https://www.analyticsvidhya.com/blog/2015/09/naive-bayes-explained/
#' An S4 class to represent output from Discriminant Analysis on in-database Objects
#'
#' @slot offset column name used as offset
#' @method coefficients FLLDA.
#' @method residuals FLLDA.
#' @method influence FLLDA.
#' @method lm.influence FLLDA.
#' @method plot FLLDA.
#' @method predict FLLDA.
#' @export
setClass(
"FLLDA",
contains="FLRegr",
slots=list(offset="character",
vfcalls="character"))
#' Linear discriminant analysis.
#'
#' \code{lda} performs Linear discriminant analysis on FLTable objects.
#' The DB Lytix function called is FLLDA. Performs Linear discriminant analysis and
#' stores the results in predefined tables.
#'
#' @seealso \code{\link[stats]{lda}} for R reference implementation.
#' @param formula A formula of the form groups ~ x1 + x2 + ... That is, the response is the grouping factor and the right hand side specifies the (non-factor) discriminators.
#' @param data An object of class FLTable or FLTableMD.
#' @return \code{LDA} returns an object of class \code{FLLDA}
#' @examples
#' tbl <- FLTable("tblLDA", "OBSID", "VARID", "NUM_VAL")
#' flmod <- lda(a~. , data = tbl)
#' flmod$scaling, flmod$means;cof <-coefficients(flmod)
#' predict(flmod): Not implemented yet.
#' plot(flmod)
#' @export
lda <- function (formula,data=list(),...) {
UseMethod("lda", data)
}
#' @export
lda.default <- MASS::lda
#' @export
lda.FLpreparedData <- function(formula, data,...)
{
vcallObject <- match.call()
data <- setAlias(data,"")
return(ldaGeneric(formula=formula,
data=data,
callObject=vcallObject,
familytype="lda",
...))
}
#' @export
lda.FLTable <- lda.FLpreparedData
#' @export
lda.FLTableMD <- lda.FLpreparedData
## MDA function
#' Mixture Discriminant Analysis.
#'
#'\code{mda} performs Mixture Discriminant Analysis on FLTable objects.
#' The DB Lytix function called is FLMDA. Performs Mixture Discriminant Analysis and
#' stores the results in predefined tables.
#'
#' limitations: no dollar operator access to means, precent.explained, values, plot((lack of data of discriminant space).
#' @seealso \code{\link[stats]{mda}} for R reference implementation.
#' @param formula A symbolic description of model to be fitted
#' @param data An object of class FLTable or FLTableMD.
#' @param subclasses Number of subclasses.
#' @param iter Maximum number of iterations for expectation maximization.
#' @param init Initialization method for each obs' latent variable Prob(x is in
#' the subclass of class) 1 = Assign weight of 1 to a
#' random subclass of its class; 0 otherwise.
#' @param hypothesis Number of hypotheses to run simultaneously
#' @return \code{mda} returns an object of class \code{FLLDA}
#' @examples
#' deeptbl <- FLTable("tblMDA","ObsID", "VarID", "Num_Val")
#' flmod <- mda(formula = a~.,data = deeptbl)
#' vpred <- predict(flmod); flmod$N
#' FLMDA performs mixed discriminant analysis. For the training data, MDA divides each
#' class into a number of artificial subclasses. It calibrates the mixture of Gaussians
#' and the mixing probability by maximizing the log-likelihood with expectation maximization.
#' @export
mda <- function (formula,data=list(),...) {
UseMethod("mda", data)
}
#' @export
mda.default <- function (formula,data=list(),...) {
if (!requireNamespace("mda", quietly = TRUE)){
stop("nortest package needed for mda. Please install it.",
call. = FALSE)
}
else return(mda::mda(formula=formula,data=data,...))
}
#' @export
mda.FLpreparedData <- function(formula, data,subclasses = 3, iter = 5, init = 1,hypothesis = 5, ...)
{
vcallObject <- match.call()
data <- setAlias(data,"")
return(ldaGeneric(formula=formula,
data=data,
subclasses = subclasses,
iter = iter,
init = init,
hypothesis = hypothesis,
callObject=vcallObject,
familytype="Mixed",
...))
}
#' @export
mda.FLTable <- mda.FLpreparedData
#' @export
mda.FLTableMD <- mda.FLpreparedData
## Generic function for DA.
ldaGeneric <- function(formula,data,
callObject=NULL,
familytype = "",
MaxMARS = MaxMARS,
MinRsq = MinRsq,
subclasses = subclasses,
iter = iter,
init = init,
hypothesis = hypothesis,
matrixtype = "COVAR",
...)
{
prepData <- prepareData.lmGeneric(formula,data,
callObject=callObject,
familytype=familytype,
performNorm=1,
cost = 1,
...)
vclass <- "FLLDA"
for(i in names(prepData))
assign(i,prepData[[i]])
deeptable <- getTableNameSlot(deepx)
functionName <- "FLLDA"
var <- getVariables(deepx)
vinputcols <- list()
vinputcols <- c(vinputcols,
TableName = deeptable,
ObsIDCol = var[[1]],
VarIDCol = var[[2]],
ValueCol = var[[3]])
extra <- NULL
if(familytype == "Flex")
{
functionName = "FLFlexDiscriminant"
vinputcols = c(vinputcols, MaxBasisMARS = MaxMARS, MinDeltaRsqMARS =MinRsq)
}
if (familytype == "Mixed")
{
functionName = "FLMDA"
vinputcols = c(vinputcols, WhereClause = " ",Subclasses = subclasses,Iterations = iter,Initilization = init,Hypothesis = hypothesis)
extra <- subclasses
}
if (familytype == "pca"){
functionName = "FLPCA"
vinputcols = c(vinputcols, WhereClause = " ", GroupBy = 'NULL', MatrixType = matrixtype, TableOutput = 1)
vclass <- "FLPCA"
}
if(familytype %in% c("lda", "Mixed", "Flex"))
vinputcols <- c(vinputcols, NOTE = "")
ret <- sqlStoredProc(connection,
functionName,
pInputParams = vinputcols,
outputParameter = c(OutTable = 'a')
)
return(new(vclass,
formula=formula,
AnalysisID = as.character(ret[1,1]),
table=data,
results=list(call=callObject,
familytype = familytype,
extra = extra),
##AnalysisID = as.character(ret[1,1])),
deeptable=deepx,
mapTable=mapTable,
scoreTable="",
offset=as.character(offset)))
}
#' @export
`$.FLLDA`<-function(object,property){
#parentObject <- deparse(substitute(object))
parentObject <- unlist(strsplit(unlist(strsplit(as.character(sys.call()),"(",fixed=T))[2],",",fixed=T))[1]
var <- getVariables(object@deeptable)
if(property=="scaling"){
if(object@results$familytype == "lda"){
str <- paste0("SELECT VarID, CanID, Num_Val FROM fzzlLDACanCoeff WHERE AnalysisID = ",fquote(object@AnalysisID)," AND CanType = 'Within-Class Can Struct' ORDER BY VarID, CanID ")
df <- sqlQuery(connection ,str)
colnames(df) <- tolower(colnames(df))
nvar <- length(unique(df$canid))
dtf <- as.data.frame(lapply(1:nvar, function(x){
df$num_val[df$canid == x]
}), ncol = nvar)
dtf <- as.matrix.data.frame(dtf)
colnames(dtf) <- paste0("LD",1:nvar)
return(dtf)}
else
{
vstr <- paste0("scaling is only supported by LDA as of now")
return(vstr)}
}
else if (property=="coefficients"){
cof <- coefficients(object)
return(cof)
}
else if (property == "N"){
return(length(object@deeptable@Dimnames[[1]]))
}
else if (property == "call"){
return(object@results$call)}
else if (property == "counts"){
vcount <- sqlQuery(connection, paste0("SELECT ",var[[3]]," AS id, count(*) AS val FROM ",getTableNameSlot(object@deeptable)," WHERE ",var[[2]]," = -1 GROUP BY ",var[[3]]," ORDER BY ",var[[3]]," "))
vect <- vcount$val
names(vect) <- vcount$id
return(vect)
}
else if (property == "means")
{
if(object@results$familytype %in% c("lda", "Mixed"))
{
str <- paste0("SELECT FLMean(d.",var[[3]],") as means, d.",var[[2]]," AS varid, c.val as val FROM ",getTableNameSlot(object@deeptable)," d, (SELECT ",var[[1]]," AS ObsID, ",var[[3]]," AS val FROM tbllda b WHERE b.",var[[2]]," = -1) AS
c WHERE d.",var[[1]]," = c.ObsID AND d.",var[[2]]," <> -1 GROUP BY c.val, d.",var[[2]]," ORDER BY d.",var[[2]],", c.val ")
df <- sqlQuery(connection, str)
var <- unique(df$val)
#browser()
dtf <- t(sapply(var, function(x){df$means[df$val == x]}))
rownames(dtf) <- as.character(var)
colnames(dtf) <- as.character(object@deeptable@Dimnames[[2]][-1])
return(dtf)}
else {
str <- paste0("SELECT * FROM fzzlFDAThetaMeans WHERE AnalysisID = '",object@AnalysisID,"' ORDER BY Col_ID, Row_ID")
dtf <- sqlQuery(connection, str)
colnames(dtf) <- tolower(colnames(dtf))
var <- unique(dtf$col_id)
df <- sapply(var, function(x){
dtf$num_val[dtf$col_id == x]})
rownames(df) <- object$lev
return(df)
}
}
else if (property == "lev"){
level <- sqlQuery(connection, paste0("SELECT DISTINCT ",var[[3]]," AS val FROM ",getTableNameSlot(object@deeptable)," WHERE ",var[[2]]," = -1 ORDER BY ",var[[3]]," "))
return(as.character(level$val))
}
else if (property == "xlevels")
return(NULL)
else if (property == "prior")
{
return(object$counts/object$N)
}
else if (property == "confusion")
{
if(object@results$familytype %in% "Flex")
{
str <- paste0("SELECT * FROM fzzlFDAConfusionMtx WHERE AnalysisID = '",object@AnalysisID,"'ORDER BY PredictedClass, ActualClass ")
dtf <- sqlQuery(connection, str)
colnames(dtf) <- tolower(colnames(dtf))
var <- max(as.integer(object$lev))
df <- as.data.frame(lapply(1:var, function(x){
dtf$num_val[dtf$actualclass == x]
}), ncol = var)
rownames(df) <- 1:var
colnames(df) <- 1:var
return(df)
}
else return(warning("confusion method supported for fda only "))
}
else if(property == "weights")
{
if(object@results$familytype == "Mixed"){
dl <- list()
str <- paste0("SELECT ClassID, COUNT(DISTINCT(ObsID)) FROM fzzlMDAWeight WHERE AnalysisID = '",object@AnalysisID,"' GROUP BY ClassID")
vcount <- sqlQuery(connection, str)
vcol <- object@deeptable@dims[2]-1
vrow <- vcount[,2]
vclass <- 0
dl <- lapply(object$lev, function(x){wtfun(object, nrow = vrow[vcount[,1] == x] ,ncol = vcol, nclass = x)})
names(dl) <- object$lev
return(dl)
}
}
}
#' @export
setMethod("names", signature("FLLDA"), function(x) c("scaling", "coefficients", "N",
"call", "counts", "means",
"lev","xlevels","prior",
"confusion","weights" ))
coefficients.FLLDA <- function(object)
{
if( object@results$familytype %in% "lda"){
str <- paste0("SELECT VarID, CanID, Num_Val FROM fzzlLDACanCoeff WHERE AnalysisID = ",fquote(object@AnalysisID)," AND CanType = 'Raw Canonical Coefficients' ORDER BY VarID, CanID ")
df <- sqlQuery(connection ,str)
colnames(df) <- tolower(colnames(df))
nvar <- length(unique(df$canid))
dtf <- data.frame(lapply(1:nvar, function(x){
df$num_val[df$canid == x]
}))
dtf <- as.matrix.data.frame(dtf)
colnames(dtf) <- paste0("LD",1:nvar)
return(dtf)}
else
if (object@results$familytype %in% "Flex")
{
vID <- object@AnalysisID
str <- paste0("SELECT * FROM fzzlFDARegrCoeffs WHERE AnalysisID = '",vID,"' ORDER BY 2,3 ")
df <- sqlQuery(connection, str)
colnames(df) <- tolower(colnames(df))
nvar <- 2
dtf <- data.frame(lapply(1:nvar, function(x){
df$coeffest[df$depvarid == x]
}))
dtf <- as.matrix.data.frame(dtf)
colnames(dtf) <- 1:nvar
rownames(dtf) <- 1:length(dtf[,1])
return(dtf)
}
else
{vstr <- paste0("only computes for LDA or mixed as of now")
return(vstr)}
}
##posterior probablity:https://www.quora.com/Mathematical-Modeling-How-are-posterior-probabilities-calculated-in-linear-discriminant-analysis
## http://sites.stat.psu.edu/~jiali/course/stat597e/notes2/lda.pdf
#' @export
predict.FLLDA <- function(object){
var <- getVariables(object@deeptable)
if(object@results$familytype %in% "Flex")
{
vinputcols <- list()
tblname <- gen_score_table_name("flexscore")
ret <- sqlStoredProc(connection,"FLFlexDiscriminantScore",
TableName = getTableNameSlot(object@deeptable),
ObsIDCol = var[[1]],
VarIDCol = var[[2]],
ValueCol = var[[3]],
WhereClause = "WHERE VarID >= 0",
InAnalysisID = object@AnalysisID,
ScoreTable = tblname,
Note = "",
outputParameter = c(OutTable = 'a')
)
str <- paste0("SELECT PredictedClass AS predicted FROM ",tblname," ORDER BY 1 ")
df <- sqlQuery(connection,str)
rownames(df) <- object@deeptable@Dimnames[[1]]
return(df)}
else if (object@results$familytype %in% "lda"){
str <- paste0("SELECT a.",var[[1]]," , a.",var[[3]]," AS LD1, b.",var[[3]]," AS LD2 FROM fzzlLDACanVariate a, (SELECT * FROM fzzlLDACanVariate) b WHERE a.",var[[1]]," = b.",var[[1]]," AND a.",var[[2]]," = 1 AND b.",var[[2]]," = 2 AND a.",var[[1]]," < 3000 AND a.AnalysisID = b.AnalysisID AND a.AnalysisID = '",object@AnalysisID,"' ORDER BY a.",var[[1]]," ")
##
##str <- paste0("SELECT * FROM fzzlLDACanVariate WHERE AnalysisID = '",object@AnalysisID,"' AND ",var[[1]]," < 100 ORDER BY ",var[[1]]," ")
x <- sqlQuery(connection, str)
post <- NULL
cl <- NULL
list(x = x, class = cl, posterior = post) }
else if (object@results$familytype %in% "Mixed"){
str <- paste0("SELECT ClassID as classid FROM fzzlMDAClassify WHERE AnalysisID = '",object@AnalysisID,"' AND HypothesisID = 1 ORDER BY ClassID")
dtf <- sqlQuery(connection , str)
dtf <- as.integer(dtf$classid)
names(dtf) <- object@deeptable@Dimnames[[1]]
return(dtf)
}
}
#' @export
confusion.FLLDA <- function(object){
if(object@results$familytype %in% "Flex")
return(object$confusion)
else
warning("confusion method only supported for fda")
}
#' @export
plot.FLLDA <- function(object){
if(object@results$familytype %in% "lda")
val <- predict(object)$x
else warning("plot for lda method is only supported for now ")
val <- val[, 2:3]
plot(val)
}
wtfun <- function(object, nrow, ncol, nclass){
tblfunqueryobj <- new("FLTableFunctionQuery",
connectionName = getFLConnectionName(),
variables=list(MATRIX_ID="MATRIX_ID",
rowIdColumn="rowIdColumn",
colIdColumn="colIdColumn",
valueColumn="valueColumn"),
whereconditions="",
order = "",
SQLquery=paste0("SELECT hypothesisid as matrix_id,row_number()over(partition by Subclassid order by obsid ) as rowIdColumn,Subclassid as colIdColumn,weight as valueColumn from fzzlMDAWeight WHERE AnalysisID = '",object@AnalysisID,"' AND ClassID = ",nclass," AND HypothesisID = 5"))
flm <- newFLMatrix(
select= tblfunqueryobj,
dims=c(nrow, ncol),
Dimnames=list(1:nrow, 1:ncol))
return(flm)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.