# Copyright (C) 2016-2017 Ren-Huai Huang <huangrenhuai@gmail.com>
#
# This file is part of relvm.
#
# relvm is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# relvm is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with relvm. If not, see <http://www.gnu.org/licenses/>.
#' Random Effect Latent Variable Model By Non Adaptive Quadrature Approximation
#'
#' Estimate multiple groups of the random effect latent variable model by gauss
#' hermite quadrature.
#'
#'
#' @param object A mstbl object.
#' @param groups A vector of measure group names. The default is NULL, in which
#' case a vector of all groups will be generated accordingly.
#' @param fit A list of fitting parameters. \itemize{ \item qpoints: The numbe
#' of the quadrature points. \item init: Initial values for mu, fl, and err
#' term in a list. fl is the factor loading. They will be initialized
#' generally if it is null. The default is a list with for all mu and one for
#' others. \item predict: The default is TRUE. \item adaptive: noad, no
#' adaptive or ad, use adaptive.}
#'
#' @return An list of S3 object of class "relvm" with estimated parametes.
#' @seealso \code{\link{mstbl}}
#' @importFrom pracma hessian
#'
#' @export
#'
relvm_noad <- function(object,groups=NULL,fit=list(qpoints=30,init=NULL,predict=TRUE),file = NULL) {
if (!is.null(file) && file.exists(file)) {
# if the cached file exists
allout <- readRDS(file)
} else {
# -------------------------------------------------------
# Merge both tables of the measure score and weights.
alldf <- merge(x=object$mstbl_std, y=object$wtbl, by="ccnid", all=TRUE)
# Check & update "groups"
mtbl <- create_measure_tbl(alldf)
all_groups <- unique(mtbl$group)
if (is.null(groups)) {
groups <- all_groups
} else if (any(groups %in% all_groups)) {
groups <- groups[groups %in% all_groups]
} else stop("The group name do not match.")
# Fit control
fit_default <- list(qpoints = 30,init=NULL,predict=TRUE,adaptive=c("noad","ad"))
extra_default <- fit_default[!(names(fit_default) %in% names(fit))]
fit[names(extra_default)] <- extra_default
qpoints = fit[["qpoints"]]
init = fit[["init"]]
predict = fit[["predict"]]
adaptive= fit[["adaptive"]][1]
# ------------------------------------------------------------------#
start_time = Sys.time()
cat(sprintf("Fitting started at: %-15s\n",start_time))
# Call relvm_single
# snowfall::sfInit(parallel=TRUE,cpus=2);snowfall::sfExportAll()
# snowfall::sfExport(create_measure_tbl)
allout <- sapply(groups, relvm_single_noad, df=alldf, qpoints=qpoints,
init = init, predict = predict, adaptive=adaptive,simplify = FALSE)
# snowfall::sfRemoveAll()
# snowfall::sfStop()
cat("\n","Total time: ", as.character.Date(Sys.time() - start_time),"\n")
# ------------------------------------------------------------------#
# After Relvm:
# Merge the predicted group score if there is multiple group.
preds <- alldf[,1,drop=FALSE] # take the column "ccnid"
for (group in allout) {preds <- merge(x=preds,y=group$pred,all=TRUE)}
colnames(preds) <- gsub("pred_","",colnames(preds))
# Calculate the summary score.
hospital_score <- rstarating::sum_score(preds)
hospital_score <- merge.data.frame(x=hospital_score,y=object$report_indicator,
by='ccnid',all.x=TRUE)
# ************
# Oct. 2016 used all hospitals to run the clustering
# Dec. 2017 used only the valid hospitals (report_indicator == 1) to run the clustering.
# hospital_score <- subset(hospital_score, report_indicator == 1)
# Merge factor loadings and other parametes.
pars <- data.frame()
for (group in allout) {pars = rbind(pars,group$par)}
# convergence
convergence<- data.frame(convergence=vapply(allout,function(x) {x$convergence},c(0)))
value <- data.frame(value=vapply(allout, function(x) x$value,c(0)))
message <- data.frame(message=vapply(allout,function(x) x$message,"0"),stringsAsFactors = FALSE)
counts <- t(as.data.frame(vapply(allout, function(x) x$counts,c(0L,0L))))
#output
allout$groups <- structure(list(preds=preds,pars=pars,
summary_score=hospital_score,
counts= as.matrix(counts),
value = as.matrix(value),
message=as.matrix(message),
convergence = as.matrix(convergence)),class="relvm")
# save the data
if (!is.null(file)) {
# check if the file directory exists. Create one if not.
file_dir <- dirname(file)
if (!dir.exists(file_dir)) file_dir_created <- dir.create(file_dir,recursive =TRUE)
saveRDS(allout,file=file)
}
}
# output
structure(allout, class='relvms')
}
# Simplified normal density function.
dnorm2 <- function(x,mean=0,sd=1) -(log(2 * pi) +2*log(sd)+((x-mean)/sd)^2)/2
#
#' Estimation Of The Random Effect Latent Variable Model Parameters
#'
#' Estimate the random effect latent variable model
#'
#' @param mstbl_std The standized measure score table.
#' @param wts_tbl The measure score weight table.
#' @param qpoints The numbe of the quadrature points.
#' @param init Initial values for mu, fl, and err term in a list. fl is the
#' factor loading. They will be initialized generally if it is null. The
#' default is a list with for all mu and one for others.
#' @param predict The default is TRUE.
#'
#' @return An object of S3 class "relvm" with estimated parametes.
#'
relvm_single_noad <- function(group, df, qpoints,init,predict,adaptive) {
# -------------------------------------------------------#
# Prepare to fit
# start of the cycle
start_time = Sys.time()
cat(adaptive,"-",qpoints,"qpts",": ")
cat(sprintf("Fitting: %-15s =>",group))
# data table & weight table
subdat <- sub1group(group,df)
mstbl_std <- as.matrix(subdat$mstbl_std)
wts_tbl <- as.matrix(subdat$wtbl)
# Setup and initialize the parameters
nc <- ncol(mstbl_std);
if (is.null(init)) {
init <- unlist(list(mu = rep(0.5, nc),
fl = rep(0.5, nc),
err = rep(0.5, nc)))}
# cc$x & cc$w
cc <- pracma::gaussHermite(qpoints);
ccidx <- cc$w>1e-36;
cc$w = cc$w[ccidx];
cc$x = cc$x[ccidx];
cc_len=length(cc$x);
#--------------------------------------------------------#
# Fit the function
fit <- optim(par = init, # Model parameter
fn = venll11m, # venll11m, # Estimation function
gr = NULL,
method = "L-BFGS-B",
control = list(maxit=1000), # set factr=1e-8
hessian = FALSE,
score = mstbl_std,
wts = wts_tbl,
cc = cc,
qpoints = cc_len)
#--------------------------------------------------------#
# Output the fitting
#
# Format fit$par
theta_names <- names(fit$par);
fit$par <- data.frame(name = names(subdat$mstbl_std),
fl = fit$par[grepl("fl", theta_names)],
mu = fit$par[grepl("mu", theta_names)],
err= fit$par[grepl("err",theta_names)],
row.names=NULL)
# Prediction
if (identical(predict, TRUE)) {
pred_out <- relvm:::pred(mstbl_std,wts_tbl,pms=fit$par);
colnames(pred_out)<- paste(colnames(pred_out),group,sep="_")
pred_out <- cbind(subdat$ccnid,pred_out)
fit$pred <- pred_out[,1:2]
#
fit$stderr <- pred_out[,c(1,3)]
}
# Add three fields to the output
init_names <- names(init);
fit$init <- data.frame(name = names(subdat$mstbl_std),
fl = init[grepl("fl", init_names)],
mu = init[grepl("mu", init_names)],
err= init[grepl("err",init_names)], row.names=NULL)
fit$mstbl_std = cbind(subdat$ccnid,mstbl_std)
fit$wtbl = cbind(subdat$ccnid,wts_tbl)
# Output
cat(" : ", as.character.Date(Sys.time() - start_time))
cat(", ",fit$message, "\n")
structure(fit,class="relvm")
}
# object function
venll12 <- function(par,score,wts,cc,qpoints) {
# Reconstruction of the parameters
nr <- nrow(score); nc <- ncol(score)
mu <- par[grepl("mu", names(par))] #
fl <- par[grepl("fl", names(par))] # factor loading
err <- par[grepl("err", names(par))]
# fv matrix
fv <- 1.4142135623730951 * cc$x
# Weighted log likelyhood
# 3D array:
wts_arr <- aperm(array(wts, dim=c(nr,nc,qpoints)),c(2,3,1))
score_arr <- aperm(array(score,dim=c(nr,nc,qpoints)),c(2,3,1))
means_arr <-array(mu+c(fl %o% fv), dim=c(nc,qpoints,nr))
wll_mtx <- colSums(wts_arr * dnorm2(score_arr, mean=means_arr, sd = err),na.rm=TRUE)
# Gaussian quadrature integral approximation
# gqi <- log(sum(exp(joint_mtx + log(cc$w) +(cc$x)^2),na.rm=TRUE))
# log(2*pi)/2 = 0.91893853320467267=dnorm_cpp(cc$x * sqrt(2), mean=0,sd=1) +(cc$x)^2;
# log(sqrt(2))=0.3465735902799727
gqi <- matrixStats::colLogSumExps(log(cc$w) + wll_mtx - 0.91893853320467267,na.rm=TRUE)
-sum(gqi +0.3465735902799727, na.rm=TRUE)
}
venll11m <- function(par,score,wts,cc,qpoints) {
# Reconstruction of the parameters
nr <- nrow(score); nc <- ncol(score)
mu <- par[grepl("mu", names(par))] #
fl <- par[grepl("fl", names(par))] # factor loading
err <- par[grepl("err", names(par))]
err <- abs(err)
# 2nd derivative
# coefs <- sqrt(2/(rowSums((fl^2 * wts/err^2), na.rm = TRUE) + 1))
coefs <- rep(sqrt(2),nr)
# fv matrix
fv_mtx <- cc$x %o% coefs
# 3D array:
wts_arr <- aperm(array(wts, dim=c(nr,nc,qpoints)),c(2,3,1))
score_arr <- aperm(array(score,dim=c(nr,nc,qpoints)),c(2,3,1))
means_arr <- array(mu, dim=c(nc,qpoints,nr)) + fl %o% fv_mtx
# Weighted log likelyhood
wll_mtx <- colSums(wts_arr * dnorm2(score_arr, mean=means_arr, sd = err),na.rm=TRUE)
# Joint probability
joint_mtx <- wll_mtx + dnorm2(fv_mtx, mean=0,sd=1)
# Gaussian quadrature integral approximation
gqi <- matrixStats::colLogSumExps(joint_mtx + log(cc$w) +(cc$x)^2,na.rm=TRUE)
-sum(log(coefs)+gqi, na.rm=TRUE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.