Nothing
################################################################################
##
## R package sgee by Gregory Vaughan, Kun Chen, and Jun Yan
## Copyright (C) 2016-2018
##
## This file is part of the R package sgee.
##
## The R package sgee is free software: You can redistribute it and/or
## modify it under the terms of the GNU General Public License as published
## by the Free Software Foundation, either version 3 of the License, or
## any later version (at your option). See the GNU General Public License
## at <http://www.gnu.org/licenses/> for details.
##
## The R package sgee is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
##
#################################################################################' Coefficient Path summary
#'
#' Function to analyze and summarize a path of coefficent values by
#' comparing them using
#' prediction error on a \"new\" data set (or fold in CV), or the
#' original data set if no comparison data is provided. The best point along
#' the path in terms of the prediction error is identified. All of the
#' prediction errors for each point along the path, the minimum prediction
#' error, and the index of the minimum are returned.
#'
#' The prediction error used is dependent on the input. If the true Beta is
#' not given, then the sum squared error (or MSE; see parameter averaged)
#' in the response is used for gaussian (or non-poisson); for poisson
#' if the scale (or an estimate) is also given, then the sum squared Pearson
#' residuals are used, otherwise the deviance is used. If the true Beta is
#' provided then the sum squared error in the linear predictor is used instead.
#'
#' Furthermore, when true Beta is supplied, additional model selection metrics
#' are produced, including: False Positive Rate, False Discovery Rate,
#' False Negative Rate.
#'
#' The function is provided to allow for model selection; given
#' a path generated by a sgee function, the path can be fed into this function
#' with a testing data set to identify an optimal point along the path.
#' Cross validation can be performed by dividing the original data set
#' into k folds before hand and generating multiple coefficient paths and
#' applying this function to each path generated.
#'
#' @param object Object of class \code{sgee}, from which various path
#' information is pulled.
#' @param newX Design matrix to be used for model testing. It is assumed
#' that \code{newX} does not contain an intercept column. An intercept column
#' is appended by\code{sgee.summary} if an intercept was used to make
#' \code{object}.
#' @param newY Response vector to be used for model testing.
#' @param newOffset Vector of offsets to be used for model testing. Must be
#' same length as newY.
#' @param trueBeta For simulation use; true coefficient values can be provided
#' to get certain metrics.
#' @param trueIntercept For simulation use; true intercept value to be used in
#' conjunction with trueBeta.
#' @param scale Scale value can be passed to allow for standardized error
#' measurements (poisson case only).
#' @param classification A numeric parameter from 0 to 1 indicating
#' cutoff to be used to determine classification rate in Binomial
#' setting. Default is 0.5. Values below 0 indicate that the
#' squared error, in either the observation
#' or the true linear predictor is the trueBeta is given, is to be used
#' instead of the classification rate.
#' @param averaged Logical parameter indicating whether the mean of the
#' total error is to be used; assumed TRUE.
#' @param ... Currently not used.
#'
#' @return A list containing 1) a vector of prediction errors with
#' testing data set, 2) the smallest prediction error found along path,
#' 3) the index of the smallest error, and if the trueBeta parameter is
#' provided the False Positive, False Discovery, and false negative
#' rates, and True positive and False Positive counts
#' at the index of the smallest error, along with the minimum
#' mis-classification and corresponding index, where the mis-classification
#' is the total of the coefficients incorrectly marked as important/unimportant.
#'
#' @author Gregory Vaughan
#' @examples
#'
#' ## Initialize covariate values
#' p <- 50
#' beta <- c(rep(2.4,5),
#' c(1.3, 0, 1.7, 0, .5),
#' rep(0.5,5),
#' rep(0,p-15))
#' groupSize <- 1
#' numGroups <- length(beta)/groupSize
#'
#'
#'
#' trainingData <- genData(numClusters = 50,
#' clusterSize = 4,
#' clusterRho = 0.6,
#' clusterCorstr = "exchangeable",
#' yVariance = 1,
#' xVariance = 1,
#' numGroups = numGroups,
#' groupSize = groupSize,
#' groupRho = 0.3,
#' beta = beta,
#' family = gaussian(),
#' intercept = 1)
#'
#' testingData <- genData(numClusters = 50,
#' clusterSize = 4,
#' clusterRho = 0.6,
#' clusterCorstr = "exchangeable",
#' yVariance = 1,
#' xVariance = 1,
#' numGroups = numGroups,
#' groupSize = groupSize,
#' groupRho = 0.3,
#' beta = beta,
#' family = gaussian(),
#' intercept = 1)
#'
#' coefMat <- see(y = trainingData$y,
#' x = trainingData$x,
#' family = gaussian(),
#' clusterID = trainingData$clusterID,
#' corstr="exchangeable",
#' maxIt = 200,
#' epsilon = .1)
#'
#' analysisResults <- summary(coefMat,
#' newX = testingData$x,
#' newY = testingData$y)
#'
#'
#' @export
summary.sgee <- function(object,
newX = NULL,
newY = NULL,
newOffset = NULL,
trueBeta = NULL,
trueIntercept = NULL,
scale = NULL,
classification = 0.5,
averaged = TRUE,
...){
path <- object$path
interceptPath <- rep(0, nrow(path))
modelingFamily <- object$family
it<- 0
maxIt <- object$maxIt
if(is.null(newX) | is.null(newY)){
cat("\nnewX and/or newY missing; original data used\n")
newX <- object$x
newY <- object$y
offset <- object$offset
} else{
if(!is.null(newOffset)){
if(length(newOffset) != length(newY) & length(newOffset) != 1){
stop("length of newY and newOffset must match")
} else{
offset <- newOffset
}
} else{
offset <- 0
}
}
addIntercept <- object$intercept
if(addIntercept){
interceptPath <- path[,1]
path <- path[,-1]
#newX <- cbind(rep(1,nrow(newX)),
# newX)
}
if(!is.null(trueBeta)){
if(is.null(trueIntercept)){
warning("trueIntercept Missing; assuming intercept of zero")
##cat("\n trueIntercept Missing; assuming intercept of zero \n")
trueIntercept <- 0
}
}
## prediction measure used for comparison
minMeasure <- Inf
bestIndex <- 0
allMeasures <- rep(0, maxIt)
if(!is.null(trueBeta)){
FP <- 0
FD <- 0
FN <- 0
TPCount <- 0
FPCount <- 0
minMisClass <- Inf
bestMisClassIndex <- 0
allMisClass <- rep(0, maxIt)
}
if(!is.null(trueBeta)){
##if(addIntercept){
## trueEta <- newX %*% c(trueIntercept, trueBeta) + offset
##} else{
trueEta <- trueIntercept + newX %*% trueBeta + offset
##}
}
while(it < maxIt){
it <- it + 1
## score measurement of path
## supposed to generate new X and Y to see how each
## step along path does in terms of some score equation
## or when doing cross validation, instead of using
## new X and Y, the remaining fold can be supplied
eta <- interceptPath[it] + newX%*%path[it,] + offset
Yhat <- modelingFamily$linkinv(t(eta))
if(is.null(trueBeta)){
if(modelingFamily$family == "poisson"){
if(is.null(scale)){
## If no scale is provided, then the deviance will be used
modelLogLikelihood <- sum(stats::dpois(x = newY,
lambda = Yhat,
log = TRUE))
saturatedLogLikelihood <- sum(stats::dpois(x = newY,
lambda = newY,
log = TRUE))
measure <- -2*(modelLogLikelihood - saturatedLogLikelihood)
} else{
if(!((length(scale[it,]) == 1) | (length(scale[it,]) == length(Yhat)))){
stop("scale parameter provided is not appropriately sized; must have a either a length of 1 or length(newY)")
}
standardErrorSquared <- (newY - Yhat)^2/(modelingFamily$variance(Yhat)*scale[it,])
measure <- sum(standardErrorSquared)
}
}else if(classification >=0 & modelingFamily$family == "binomial"){
error <- newY - (Yhat > classification)
measure <- sum(abs(error))
}else{ ## non-poisson/non-classification case
error <- newY - Yhat
measure <- sum(error^2)
}
} else{
if(classification >=0 & modelingFamily$family == "binomial"){
error <- newY - (Yhat > classification)
measure <- sum(abs(error))
} else{
## If true beta is given, then theMSe in terms of the linear
## predictor is given
error <- trueEta - eta
measure <- sum(error^2)
}
}
allMeasures[it] <- measure
if(measure < minMeasure){
bestIndex <- it
minMeasure <- measure
if(!is.null(trueBeta)){
TPCount <- sum((trueBeta != 0) & (path[bestIndex,] != 0))
FPCount <- sum((trueBeta == 0) & (path[bestIndex,] != 0))
FP <- FPCount/sum(trueBeta == 0)
FD <- sum((trueBeta == 0) & (path[bestIndex,] != 0))/sum(path[bestIndex,] != 0)
FN <- sum((trueBeta != 0) & (path[bestIndex,] == 0))/sum(trueBeta != 0)
}
}
if(!is.null(trueBeta)){
misClass <- sum((trueBeta!=0) != (path[it,] !=0))
allMisClass[it] <- misClass
if(measure < minMeasure){
bestMisClassIndex <- it
minMisClass <- misClass
}
}
}
## if(modelingFamily$family == "binomial"){
## minMeasure <- 1-minMeasure
## }
if(averaged){
allMeasures <- allMeasures/nrow(newX)
minMeasure <- minMeasure/nrow(newX)
}
if(!is.null(trueBeta)){
summaryResults <-list(sgee = object,
intercept = addIntercept,
allMeasures = allMeasures,
minMeasure = minMeasure,
bestIndex = bestIndex,
trueBeta = trueBeta,
FP = FP,
FD = FD,
FN = FN,
TPCount = TPCount,
FPCount = FPCount,
allMisClass = allMisClass,
minMisClass = minMisClass,
bestMisClassIndex = bestMisClassIndex)
} else{
summaryResults <- list(sgee = object,
intercept = addIntercept,
allMeasures = allMeasures,
minMeasure = minMeasure,
bestIndex = bestIndex,
trueBeta = NULL)
}
class(summaryResults) <- "sgeeSummary"
summaryResults$call <- object$call
summaryResults
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.