Nothing
#' @title GEOmetric Density Estimation.
#
#' @description It selects the principal directions of the data and
#' performs inference. Moreover GEODE is also able to handle missing
#' data.
#'
#' @details
#' GEOmetric Density Estimation (rgeode) is a fast algorithm performing
#' inference on normally distributed data. It is essentially
#' divided in two principal steps:
#' \itemize{
#' \item Selection of the principal axes of the data.
#' \item Adaptive Gibbs sampler with the creation of a set of samples from
#' the full conditional posteriors of the parameters of interest,
#' which enable us to perform inference.
#' }
#' It takes in inputs several quantities. A rectangular
#' \eqn{(N,D)} matrix \eqn{Y}, on which we will run a Fast rank
#' \eqn{d} SVD. The conservative upper bound of the true dimension
#' of our data \eqn{d}. A set of tuning parameters. We remark that
#' the choice of the conservative upper bound \eqn{d} must be such
#' that \eqn{d>p}, with \eqn{p} real dimension, and \eqn{d << D}.
#'
#'
#' @param Y array_like \cr
#' a real input matrix (or data frame), with dimensions
#' \eqn{(n, D)}. It is the real matrix of data.
#'
#' @param d int, optional \cr
#' it is the conservative upper bound for the dimension
#' D. We are confident that the real dimension is smaller
#' then it.
#'
#' @param burn int, optional \cr
#' number of burn-in to perform in our Gibbs sampler. It
#' represents also the stopping time that stop the choice
#' of the principal axes.
#'
#' @param its int, optional \cr
#' number of iterations that must be performed after
#' the burn-in.
#'
#' @param tol double, optional \cr
#' threshold for adaptively removing redundant
#' dimensions. It is used compared with the ratio:
#' \eqn{\frac{\alpha_j^2(t)}{\max \alpha_i^2(t)}}.
#'
#' @param atau double, optional \cr
#' The parameter \eqn{a_\tau} of the truncated
#' Exponential (the prior for \eqn{\tau_j}).
#'
#' @param asigma double, optional \cr
#' The shape parameter \eqn{a_\sigma} of the
#' truncated Gamma (the prior for \eqn{\sigma^2}).
#'
#' @param bsigma double, optional \cr
#' The rate parameter \eqn{b_\sigma} of the
#' truncated Gamma (the prior for \eqn{\sigma^2}).
#'
#'
#' @param starttime int, optional \cr
#' starting time for adaptive pruning. It must be less
#' then the number of burn-in.
#'
#'
#' @param stoptime int, optional \cr
#' stop time for adaptive pruning. It must be less
#' then the number of burn-in.
#'
#' @param fast bool, optional \cr
#' If \eqn{TRUE} it is run using fast d-rank SVD.
#' Otherwise it uses the classical SVD.
#'
#' @param c0 double, optional \cr
#' Additive constant for the exponent of the pruning step.
#'
#' @param c1 double, optional \cr
#' Multiplicative constant for the exponent of the pruning step.
#'
#' @return \code{rgeode} returns a list containing the following
#' components:
#' \item{InD}{ array_like \cr
#' The chose principal axes.
#'
#' }
#'
#' \item{u}{ matrix \cr
#' Containing the sample from the full conditional
#' posterior of \eqn{u_j}s. We store each
#' iteration on the columns.
#' }
#'
#' \item{tau}{ matrix \cr
#' Containing the sample from the full conditional
#' posterior of \eqn{tau_j}s.
#'
#' }
#'
#' \item{sigmaS}{ array_like \cr
#' Containing the sample from the full conditional
#' posterior of \eqn{sigma}.
#'
#' }
#'
#' \item{W}{ matrix \cr
#' Containing the principal singular vectors.
#'
#' }
#'
#' \item{Miss}{ list \cr
#' Containing all the informations about missing
#' data. If there are not missing data this output
#' is not provide.
#' \itemize{
#' \item{id_m} array \cr
#' It contains the set of rows with missing data.
#' \item{pos_m} list \cr
#' It contains the set of missing data positions for each
#' row with missing values.
#' \item{yms} list \cr
#' The list contained the pseudo-observation substituting our
#' missing data. Each element of the list represents the simulated
#' data for that time.
#' }
#'
#' }
#'
#'
#'
#'
#'
#' @note The part related to the missing data is filled only
#' in the case in which we have missing data.
#'
#'
#' @references
#' \itemize{
#'
#' \item [1] Y. Wang, A. Canale, D. Dunson.
#' "Scalable Geometric Density Estimation" (2016).\cr
#' }
#'
#' @author L. Rimella, \email{lorenzo.rimella@hotmail.it}
#'
#'
#'
#' @examples
#'
#' library(MASS)
#' library(RGeode)
#'
#' ####################################################################
#' # WITHOUT MISSING DATA
#' ####################################################################
#' # Define the dataset
#' D= 200
#' n= 500
#' d= 10
#' d_true= 3
#'
#' set.seed(321)
#'
#' mu_true= runif(d_true, -3, 10)
#'
#' Sigma_true= matrix(0,d_true,d_true)
#' diag(Sigma_true)= c(runif(d_true, 10, 100))
#'
#' W_true = svd(matrix(rnorm(D*d_true, 0, 1), d_true, D))$v
#'
#' sigma_true = abs(runif(1,0,1))
#'
#' mu= W_true%*%mu_true
#' C= W_true %*% Sigma_true %*% t(W_true)+ sigma_true* diag(D)
#'
#' y= mvrnorm(n, mu, C)
#'
#' ################################
#' # GEODE: Without missing data
#' ################################
#'
#' start.time <- Sys.time()
#' GEODE= rgeode(Y= y, d)
#' Sys.time()- start.time
#'
#' # SIGMAS
#' #plot(seq(110,3000,by=1),GEODE$sigmaS[110:3000],ty='l',col=2,
#' # xlab= 'Iteration', ylab= 'sigma^2', main= 'Simulation of sigma^2')
#' #abline(v=800,lwd= 2, col= 'blue')
#' #legend('bottomright',c('Posterior of sigma^2', 'Stopping time'),
#' # lwd=c(1,2),col=c(2,4),cex=0.55, border='black', box.lwd=3)
#'
#'
#' ####################################################################
#' # WITH MISSING DATA
#' ####################################################################
#'
#' ###########################
#' #Insert NaN
#' n_m = 5 #number of data vectors containing missing features
#' d_m = 1 #number of missing features
#'
#' data_miss= sample(seq(1,n),n_m)
#'
#' features= sample(seq(1,D), d_m)
#' for(i in 2:n_m)
#' {
#' features= rbind(features, sample(seq(1,D), d_m))
#' }
#'
#' for(i in 1:length(data_miss))
#' {
#'
#' if(i==length(data_miss))
#' {
#' y[data_miss[i],features[i,][-1]]= NaN
#' }
#' else
#' {
#' y[data_miss[i],features[i,]]= NaN
#' }
#'
#' }
#'
#' ################################
#' # GEODE: With missing data
#' ################################
#' set.seed(321)
#' start.time <- Sys.time()
#' GEODE= rgeode(Y= y, d)
#' Sys.time()- start.time
#'
#' # SIGMAS
#' #plot(seq(110,3000,by=1),GEODE$sigmaS[110:3000],ty='l',col=2,
#' # xlab= 'Iteration', ylab= 'sigma^2', main= 'Simulation of sigma^2')
#' #abline(v=800,lwd= 2, col= 'blue')
#' #legend('bottomright',c('Posterior of sigma^2', 'Stopping time'),
#' # lwd=c(1,2),col=c(2,4),cex=0.55, border='black', box.lwd=3)
#'
#'
#'
#' ####################################################################
#' ####################################################################
#' @useDynLib RGeode
#' @importFrom Rcpp sourceCpp
#' @importFrom Rcpp evalCpp
#' @import stats MASS
#'
#' @export
rgeode <- function(Y, d= 6, burn= 1000, its= 2000, tol= 0.01,
atau= 1/20, asigma= 1/2, bsigma= 1/2,
starttime= NULL, stoptime= NULL, fast= TRUE,
c0= -1, c1= -0.005)
{
#*************************************************************************
#*** Author: L. Rimella <lorenzo.rimella@hotmail.it> ***
#*** ***
#*** Supervisors: M. Beccuti ***
#*** A. Canale ***
#*** ***
#*************************************************************************
if(nargs()==0) stop("Insert at least a matrix.")
if(nargs()> 13) stop("Too many arguments.")
if(is.null(starttime) | is.null(stoptime))
{
if(!is.null(starttime)){warning("starttime will be modified.")}
if(!is.null(stoptime)){warning("stoptime will be modified.")}
starttime= round(0.1* burn)
stoptime= round(0.8* burn)
}
if(c1>0) warning("c1 is positive.")
if(d<6) stop("d is too small.")
if(is.null(d)) warning("d is not specified.")
if(tol>1) warning("The tolerance is so large.")
if(fast!= TRUE & fast!= FALSE) stop("fast is a boolean.")
if(d<0 | burn<0 | its<0 | tol<0 | atau<0 | asigma<0 |
bsigma<0 | starttime<0 | stoptime<0) stop("d, burn, its, tol, atau,
asigma, bsigma, step,
starttime, stoptime must be
positive.")
InD= c()
u= c()
tau= c()
sigmaS= c()
W= c()
mu= c()
if(NA %in% Y | NaN %in% Y)
{
Y[is.na(Y)]= NaN
#print("There are missing values.")
id_m= c()
pos_m= list()
yms= list()
Miss= list("id_m"= id_m, "pos_m"= pos_m, "yms"= yms)
output= list("InD"= InD, "u"= u, "tau"= tau, "sigmaS"= sigmaS,
"W"= W, "mu"= mu, "Miss"= Miss)
outputAlg= rgeode_root_m(Y, d, burn, its, tol, atau, asigma,
bsigma, starttime, stoptime,
fast, c0, c1)
#fill it
output$InD= outputAlg[[1]]
output$u= outputAlg[[2]]
output$tau= outputAlg[[3]]
output$sigmaS= outputAlg[[4]]
output$W= outputAlg[[5]]
output$mu= outputAlg[[6]]
output$Miss$id_m = outputAlg[[7]]
output$Miss$pos_m = outputAlg[[8]]
output$Miss$yms = outputAlg[[9]]
}
else
{
#print("There are not missing values.")
output= list("InD"= InD, "u"= u, "tau"= tau, "sigmaS"= sigmaS,
"W"= W, "mu"= mu)
outputAlg= rgeode_root(Y, d, burn, its, tol, atau, asigma,
bsigma, starttime, stoptime,
fast, c0, c1)
#fill it
output$InD= outputAlg[[1]]
output$u= outputAlg[[2]]
output$tau= outputAlg[[3]]
output$sigmaS= outputAlg[[4]]
output$W= outputAlg[[5]]
output$mu= outputAlg[[6]]
}
return(output)
} # End rgeode
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.