R/simul_data_UniYX_beta.R

Defines functions simul_data_UniYX_beta

Documented in simul_data_UniYX_beta

#' Data generating function for univariate beta plsR models
#' 
#' This function generates a single univariate rate response value \eqn{Y} and
#' a vector of explanatory variables \eqn{(X_1,\ldots,X_{totdim})} drawn from a
#' model with a given number of latent components.
#' 
#' This function should be combined with the replicate function to give rise to
#' a larger dataset. The algorithm used is a modification of a port of the one
#' described in the article of Li which is a multivariate generalization of the
#' algorithm of Naes and Martens.
#' 
#' @param totdim Number of columns of the X vector (from \code{ncomp} to
#' hardware limits)
#' @param ncomp Number of latent components in the model (from 2 to 6)
#' @param disp Tune the shape of the beta distribution (defaults to 1)
#' @param link Character specification of the link function in the mean model
#' (mu). Currently, "\code{logit}", "\code{probit}", "\code{cloglog}",
#' "\code{cauchit}", "\code{log}", "\code{loglog}" are supported.
#' Alternatively, an object of class "link-glm" can be supplied.
#' @param type Simulation scheme
#' @param phi0 Simulation scheme "a" parameter
#' @return \item{vector}{\eqn{(Y,X_1,\ldots,X_{totdim})}}
#' @author Frédéric Bertrand\cr
#' \email{frederic.bertrand@@utt.fr}\cr
#' \url{https://fbertran.github.io/homepage/}
#' @seealso \code{\link[plsRglm]{simul_data_UniYX}}
#' @references Frédéric Bertrand, Nicolas Meyer,
#' Michèle Beau-Faller, Karim El Bayed, Izzie-Jacques Namer,
#' Myriam Maumy-Bertrand (2013). Régression Bêta
#' PLS. \emph{Journal de la Société Française de Statistique},
#' \bold{154}(3):143-159.
#' \url{http://publications-sfds.math.cnrs.fr/index.php/J-SFdS/article/view/215}
#' 
#' T. Naes, H. Martens (1985). Comparison of prediction methods for
#' multicollinear data.  \emph{Commun. Stat., Simul.}, \bold{14}:545-576.
#' <doi:10.1080/03610918508812458>
#' 
#' Baibing Li, Julian Morris, Elaine B. Martin (2002). Model selection for
#' partial least squares regression, \emph{Chemometrics and Intelligent
#' Laboratory Systems}, \bold{64}:79-89. <doi:110.1016/S0169-7439(02)00051-5>
#' @keywords datagen utilities
#' @examples
#' 
#' \donttest{
#' # logit link
#' layout(matrix(1:4,nrow=2))
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4)))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=3)))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=5)))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=15)))[,1])
#' layout(1)
#' 
#' # probit link
#' layout(matrix(1:4,nrow=2))
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,link="probit")))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=3,link="probit")))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=5,link="probit")))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=15,link="probit")))[,1])
#' layout(1)
#' 
#' # cloglog link
#' layout(matrix(1:4,nrow=2))
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,link="cloglog")))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=3,link="cloglog")))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=5,link="cloglog")))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=15,link="cloglog")))[,1])
#' layout(1)
#' 
#' # cauchit link
#' layout(matrix(1:4,nrow=2))
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,link="cauchit")))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=3,link="cauchit")))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=5,link="cauchit")))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=15,link="cauchit")))[,1])
#' layout(1)
#' 
#' # loglog link
#' layout(matrix(1:4,nrow=2))
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,link="loglog")))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=3,link="loglog")))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=5,link="loglog")))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=15,link="loglog")))[,1])
#' layout(1)
#' 
#' # log link
#' layout(matrix(1:4,nrow=2))
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,link="log")))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=3,link="log")))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=5,link="log")))[,1])
#' hist(t(replicate(100,simul_data_UniYX_beta(4,4,disp=15,link="log")))[,1])
#' layout(1)
#' }
#' 
simul_data_UniYX_beta <- function(totdim,ncomp,disp=1,link="logit",type="a",phi0=20) {
                    
if (is.character(link)) {
    linkstr <- link
    if (linkstr != "loglog") {
        linkobj <- make.link(linkstr)
    }
    else {
         linkobj <- structure(list(linkfun = function(mu) -log(-log(mu)), 
                linkinv = function(eta) pmax(pmin(exp(-exp(-eta)), 
                  1 - .Machine$double.eps), .Machine$double.eps), 
                mu.eta = function(eta) {
                  eta <- pmin(eta, 700)
                  pmax(exp(-eta - exp(-eta)), .Machine$double.eps)
                }, valideta = function(eta) TRUE, name = "loglog"), 
                class = "link-glm")
        }
    }
    else {
        linkobj <- link
        linkstr <- link$name
    }

dimok <- FALSE
varsR <- c(10,8,6,4,2,1/2) 
varepsilon <- .01 
varsF <- c(.25,.125,.05,.0125,.005,.00125) 

if(totdim==1){stop("'totdim' must be > 1")}
if(ncomp==1){stop("'ncomp' must be > 1")}

if(totdim==2){
dimok <- TRUE
if(!(ncomp %in% 1:totdim)){
cat(paste("ncomp must be <= ",totdim,"\n"))
cat(paste("ncomp was set to ",totdim,"\n"))
ncomp <- totdim
}
ksi1 <- c(1,1)/sqrt(2)
ksi2 <- c(1,-1)/sqrt(2)
ksi <- cbind(ksi1,ksi2)[1:totdim,1:ncomp]
}

if(totdim==3){
dimok <- TRUE
if(!(ncomp %in% 1:totdim)){
cat(paste("ncomp must be <= ",totdim,"\n"))
cat(paste("ncomp was set to ",totdim,"\n"))
ncomp <- totdim
}
ksi1 <- c(1,1,1)/sqrt(3)
ksi2 <- c(-1/2,-1/2,1)/sqrt(3/2)
ksi3 <- c(-1,1,0)/sqrt(2)
ksi <- cbind(ksi1,ksi2,ksi3)[1:totdim,1:ncomp]
}

if(totdim==4){
dimok <- TRUE
if(!(ncomp %in% 1:totdim)){
cat(paste("ncomp must be <= ",totdim,"\n"))
cat(paste("ncomp was set to ",totdim,"\n"))
ncomp <- totdim
}
ksi1 <- c(1,1,1,1)/2
ksi2 <- c(1,-1,1,-1)/2
ksi3 <- c(1,1,-1,-1)/2
ksi4 <- c(1,-1,-1,1)/2
ksi <- cbind(ksi1,ksi2,ksi3,ksi4)[1:totdim,1:ncomp]
}

if(totdim==5){
dimok <- TRUE
if(!(ncomp %in% 1:totdim)){
cat(paste("ncomp must be <= ",totdim,"\n"))
cat(paste("ncomp was set to ",totdim,"\n"))
ncomp <- totdim
}
ksi1 <- c(1,1,1,1,1)/sqrt(5)
ksi2 <- c(1,-1,1,-1,0)/2
ksi3 <- c(1,1,-1,-1,0)/2
ksi4 <- c(1,-1,-1,1,0)/2
ksi5 <- c(1/4,1/4,1/4,1/4,-1)/sqrt(5)*2
ksi <- cbind(ksi1,ksi2,ksi3,ksi4,ksi5)[1:totdim,1:ncomp]
}

if((totdim %% 6 == 0)&(totdim>=6)){
dimok <- TRUE
if(!(ncomp %in% 1:6)){
cat(paste("ncomp must be <= ",6,"\n"))
cat(paste("ncomp was set to ",6,"\n"))
ncomp <- 6
}
ksi1 <- rep(c(1,1,1,1,1,1),totdim/6)/sqrt(totdim)
ksi2 <- rep(c(-1/2,-1/2,1,-1/2,-1/2,1),totdim/6)/sqrt(totdim/2)
ksi3 <- rep(c(-1,1,0,-1,1,0),totdim/6)/sqrt(2*totdim/3)
ksi4 <- rep(c(1,1,1,-1,-1,-1),totdim/6)/sqrt(totdim)
ksi5 <- rep(c(-1/2,-1/2,1,1/2,1/2,-1),totdim/6)/sqrt(totdim/2)
ksi6 <- rep(c(-1,1,0,1,-1,0),totdim/6)/sqrt(2*totdim/3)
ksi <- cbind(ksi1,ksi2,ksi3,ksi4,ksi5,ksi6)[1:totdim,1:ncomp]
}

if((totdim %% 6 == 1)&(totdim>=6)){
dimok <- TRUE
if(!(ncomp %in% 1:6)){
cat(paste("ncomp must be <= ",6,"\n"))
cat(paste("ncomp was set to ",6,"\n"))
ncomp <- 6
}
ksi1 <- c(1,1,1,1,1,1,1,rep(c(1,1,1,1,1,1),(totdim-1)/6-1))/sqrt(totdim)
ksi2 <- c(-1/2,-1/2,1,1,-1,1,-1,rep(c(-1/2,-1/2,1,-1/2,-1/2,1),(totdim-1)/6-1))/sqrt(3*((totdim-1)/6-1)+11/2)
ksi3 <- c(-1,1,0,1,1,-1,-1,rep(c(-1,1,0,-1,1,0),(totdim-1)/6-1))/sqrt(4*((totdim-1)/6-1)+6)
ksi4 <- c(-8/11,-8/11,16/11,-6/11,6/11,-6/11,6/11,rep(c(1,1,1,-1,-1,-1),(totdim-1)/6-1))/sqrt(6*((totdim-1)/6-1)+4*sqrt(33)/11)
ksi5 <- c(2/3,-2/3,0,1/3,1/3,-1/3,-1/3,rep(c(-1/2,-1/2,1,1/2,1/2,-1),(totdim-1)/6-1))/sqrt(3*((totdim-1)/6-1)+2*sqrt(3)/3)
ksi6 <- c(0,0,0,1,-1,-1,1,rep(c(-1,1,0,1,-1,0),(totdim-1)/6-1))/sqrt(4*((totdim-1)/6-1)+4)
ksi <- cbind(ksi1,ksi2,ksi3,ksi4,ksi5,ksi6)[1:totdim,1:ncomp]
}

if((totdim %% 6 == 2)&(totdim>=6)){
dimok <- TRUE
if(!(ncomp %in% 1:6)){
cat(paste("ncomp must be <= ",6,"\n"))
cat(paste("ncomp was set to ",6,"\n"))
ncomp <- 6
}
ksi1 <- c(1,1,1,1,1,1,1,1,rep(c(1,1,1,1,1,1),(totdim-2)/6-1))/sqrt(totdim)
ksi2 <- c(1,-1,1,-1,1,-1,1,-1,rep(c(-1/2,-1/2,1,-1/2,-1/2,1),(totdim-2)/6-1))/sqrt(3*((totdim-2)/6-1)+8)
ksi3 <- c(1,1,-1,-1,1,1,-1,-1,rep(c(-1,1,0,-1,1,0),(totdim-2)/6-1))/sqrt(4*((totdim-2)/6-1)+8)
ksi4 <- c(1,-1,-1,1,1,-1,-1,1,rep(c(1,1,1,-1,-1,-1),(totdim-2)/6-1))/sqrt(6*((totdim-2)/6-1)+8)
ksi5 <- c(1,1,1,1,-1,-1,-1,-1,rep(c(-1/2,-1/2,1,1/2,1/2,-1),(totdim-2)/6-1))/sqrt(3*((totdim-2)/6-1)+8)
ksi6 <- c(1,-1,1,-1,-1,1,-1,1,rep(c(-1,1,0,1,-1,0),(totdim-2)/6-1))/sqrt(4*((totdim-2)/6-1)+8)
ksi <- cbind(ksi1,ksi2,ksi3,ksi4,ksi5,ksi6)[1:totdim,1:ncomp]
}

if((totdim %% 6 == 3)&(totdim>=6)){
dimok <- TRUE
if(!(ncomp %in% 1:6)){
cat(paste("ncomp must be <= ",6,"\n"))
cat(paste("ncomp was set to ",6,"\n"))
ncomp <- 6
}
ksi1 <- c(1,1,1,1,1,1,1,1,1,rep(c(1,1,1,1,1,1),(totdim-3)/6-1))/sqrt(totdim)
ksi2 <- c(-1/2,-1/2,1,-1/2,-1/2,1,-1/2,-1/2,1,rep(c(-1/2,-1/2,1,-1/2,-1/2,1),(totdim-3)/6-1))/sqrt(3*((totdim-3)/6-1)+9/2)
ksi3 <- c(-1,1,0,-1,1,0,-1,1,0,rep(c(-1,1,0,-1,1,0),(totdim-3)/6-1))/sqrt(4*((totdim-3)/6-1)+6)
ksi4 <- c(-1/2,-1/2,-1/2,-1/2,-1/2,-1/2,1,1,1,rep(c(1,1,1,-1,-1,-1),(totdim-3)/6-1))/sqrt(6*((totdim-3)/6-1)+9/2)
ksi5 <- c(1/4,1/4,-1/2,1/4,1/4,-1/2,-1/2,-1/2,1,rep(c(-1/2,-1/2,1,1/2,1/2,-1),(totdim-3)/6-1))/sqrt(3*((totdim-3)/6-1)+9/4)
ksi6 <- c(1/2,-1/2,0,1/2,-1/2,0,-1,1,0,rep(c(-1,1,0,1,-1,0),(totdim-3)/6-1))/sqrt(4*((totdim-3)/6-1)+3)
ksi <- cbind(ksi1,ksi2,ksi3,ksi4,ksi5,ksi6)[1:totdim,1:ncomp]
}

if((totdim %% 6 == 4)&(totdim>=6)){
dimok <- TRUE
if(!(ncomp %in% 1:6)){
cat(paste("ncomp must be <= ",6,"\n"))
cat(paste("ncomp was set to ",6,"\n"))
ncomp <- 6
}
ksi1 <- c(1,1,1,1,1,1,1,1,1,1,rep(c(1,1,1,1,1,1),(totdim-4)/6-1))/sqrt(totdim)
ksi2 <- c(1,-1,1,-1,0,1,-1,1,-1,0,rep(c(-1/2,-1/2,1,-1/2,-1/2,1),(totdim-4)/6-1))/sqrt(3*((totdim-4)/6-1)+8)
ksi3 <- c(1,1,-1,-1,0,1,1,-1,-1,0,rep(c(-1,1,0,-1,1,0),(totdim-4)/6-1))/sqrt(4*((totdim-4)/6-1)+8)
ksi4 <- c(1,-1,-1,1,0,1,-1,-1,1,0,rep(c(1,1,1,-1,-1,-1),(totdim-4)/6-1))/sqrt(6*((totdim-4)/6-1)+8)
ksi5 <- c(1/4,1/4,1/4,1/4,-1,1/4,1/4,1/4,1/4,-1,rep(c(-1/2,-1/2,1,1/2,1/2,-1),(totdim-4)/6-1))/sqrt(3*((totdim-4)/6-1)+5/2)
ksi6 <- c(1,1,1,1,1,-1,-1,-1,-1,-1,rep(c(-1,1,0,1,-1,0),(totdim-4)/6-1))/sqrt(4*((totdim-4)/6-1)+10)
ksi <- cbind(ksi1,ksi2,ksi3,ksi4,ksi5,ksi6)[1:totdim,1:ncomp]
}

if((totdim %% 6 == 5)&(totdim>=6)){
dimok <- TRUE
if(!(ncomp %in% 1:6)){
cat(paste("ncomp must be <= ",6,"\n"))
cat(paste("ncomp was set to ",6,"\n"))
ncomp <- 6
}
ksi1 <- c(1,1,1,1,1,1,1,1,1,1,1,rep(c(1,1,1,1,1,1),(totdim-5)/6-1))/sqrt(totdim)
ksi2 <- c(1,-1,1,-1,0,1,-1,1,-1,0,0,rep(c(-1/2,-1/2,1,-1/2,-1/2,1),(totdim-5)/6-1))/sqrt(3*((totdim-5)/6-1)+8)
ksi3 <- c(1,1,-1,-1,0,1,1,-1,-1,0,0,rep(c(-1,1,0,-1,1,0),(totdim-5)/6-1))/sqrt(4*((totdim-5)/6-1)+8)
ksi4 <- c(10/11,-12/11,-12/11,10/11,-1/11,10/11,-12/11,-12/11,10/11,-1/11,10/11,rep(c(1,1,1,-1,-1,-1),(totdim-5)/6-1))/sqrt(6*((totdim-5)/6-1)+98/11)
ksi5 <- c(13/196,53/196,53/196,13/196,-53/49,13/196,53/196,53/196,13/196,-53/49,40/49,rep(c(-1/2,-1/2,1,1/2,1/2,-1),(totdim-5)/6-1))/sqrt(3*((totdim-5)/6-1)+325/98)
ksi6 <- c(4/5,62/65,62/65,4/5,77/65,-6/5,-68/65,-68/65,-6/5,-53/65,8/13,rep(c(-1,1,0,1,-1,0),(totdim-5)/6-1))/sqrt(4*((totdim-5)/6-1)+138/13)
ksi <- cbind(ksi1,ksi2,ksi3,ksi4,ksi5,ksi6)[1:totdim,1:ncomp]
}

if(!dimok) {stop("Incorrect value for 'totdim'. 'totdim' must be > 1")}

epsilon <- stats::rnorm(totdim,mean=rep(0,totdim),sd=varepsilon)

r <- stats::rnorm(ncomp,mean=rep(0,ncomp),sd=varsR[1:ncomp])

simX <- r%*%t(ksi)+epsilon


if(ncomp==2) {
HH <- 3
eta21 <- c(1,1,1)/sqrt(3)
eta22 <- c(1,1,1)/sqrt(3)
eta <- cbind(eta21,eta22)
}

if(ncomp==3) {
HH <- 3
eta31 <- c(1,1,1)/sqrt(3)
eta32 <- c(1,1,1)/sqrt(3)
eta33 <- c(1,1,1)/sqrt(3)
eta <- cbind(eta31,eta32,eta33)
}

if(ncomp==4) {
HH <- 4
eta41 <- c(1,1,1,1)/2
eta42 <- c(1,1,1,1)/2
eta43 <- c(1,1,1,1)/2
eta44 <- c(1,1,1,1)/2
eta <- cbind(eta41,eta42,eta43,eta44)
}

if(ncomp==5) {
HH <- 4
eta51 <- c(1,1,1,1)/2
eta52 <- c(1,1,1,1)/2
eta53 <- c(1,1,1,1)/2
eta54 <- c(1,1,1,1)/2
eta55 <- c(1,1,1,1)/2
eta <- cbind(eta51,eta52,eta53,eta54,eta55)
}

if(ncomp==6) {
HH <- 4
eta61 <- c(1,1,1,1)/2
eta62 <- c(1,1,1,1)/2
eta63 <- c(1,1,1,1)/2
eta64 <- c(1,1,1,1)/2
eta65 <- c(1,1,1,1)/2
eta66 <- c(1,1,1,1)/2
eta <- cbind(eta61,eta62,eta63,eta64,eta65,eta66)
}

f <- stats::rnorm(ncomp,mean=rep(0,ncomp),sd=varsF[1:ncomp])
z <- f+r


sigmaScarre <- .001
lambda <- .6

sigmaPsi <- sigmaScarre*((1-lambda)*diag(rep(1,HH))+lambda*rep(1,HH)%*%t(rep(1,HH)))

if(!(type %in% c("a","b"))){stop}
if(type=="a"){
linearYbeta <- ((z%*%t(eta))[1]+sigmaPsi[1,1])/disp
muYbeta <- linkobj$linkinv(linearYbeta)
phiYbeta <- phi0
}
if(type=="b"){
linearYbeta <- (z%*%t(eta))[1]/disp
muYbeta <- linkobj$linkinv(linearYbeta)
phiYbeta <- 1/sigmaPsi[1,1]
}
pYbeta <- muYbeta*phiYbeta
qYbeta <- (1-muYbeta)*phiYbeta
Ybeta <- rbeta(1,pYbeta,qYbeta)

res <- c(Ybeta,simX)
names(res) <- c("Ybeta",paste("X",1:totdim,sep=""))

return(res)
}



  

Try the plsRbeta package in your browser

Any scripts or data that you put into this service are public.

plsRbeta documentation built on March 31, 2023, 11:01 p.m.