R/EM_functions.R

Defines functions R.MLE.memory R.MLE.check R.MLE Copula.Hconfig_gaussian_density gaussian_copula_density EM_calibration_gaussian_memory EM_calibration_gaussian EM_calibration_indep_memory EM_calibration_indep

Documented in Copula.Hconfig_gaussian_density EM_calibration_gaussian EM_calibration_gaussian_memory EM_calibration_indep EM_calibration_indep_memory gaussian_copula_density R.MLE R.MLE.check R.MLE.memory

#' @import utils
## quiets concerns of R CMD check re: the .'s that appear in pipelines
#if(getRversion() >= "2.15.1")  utils::globalVariables(c(".",">"))
if(getRversion() >= "2.15.1")  utils::globalVariables(c(".",":=",".x",">"))

###################################################################
#' EM calibration in the case of conditional independence 
#'
#' @param fHconfig a matrix containing config densities evaluated at each items, each column corresponding to a configurations.
#' @param Prior.init  the initialization of prior probabilities for each of the H-configurations.  
#' @param Precision Precision for the stop criterion. (Default is 1e-6) 
#'
#' @return a vector of estimated prior probabilities for each of the H-configurations.


EM_calibration_indep <- function(fHconfig, Prior.init,Precision=1e-6){
  
  n <- nrow(fHconfig)
  LogfHconfig <- log(fHconfig)
  
  NotOK <- TRUE
  NoLowerThan <- 1e-7
  NewPrior <- Prior.init
  PriorsAt0 <- which(NewPrior==0)
  while(NotOK){
    
    ## E step
    Tau <- exp( LogfHconfig + log((tcrossprod(rep(1:n),NewPrior))))
    Tau <- Tau/rowSums(Tau)
    
    ## M step
    OldPrior <- NewPrior
    NewPrior <- colMeans(Tau)
    if(length(PriorsAt0)==0){
      NewPrior[NewPrior<NoLowerThan] <- NoLowerThan
    } else {
      NoLowerCoord <- setdiff(which(NewPrior<NoLowerThan),PriorsAt0)
      if(length(NoLowerCoord)>0){
        NewPrior[NoLowerCoord] <- NoLowerThan
      }
    }
    NewPrior <- NewPrior/sum(NewPrior)
    NotOK <- max((OldPrior-NewPrior)^2) > Precision
    
  }
  return(list(priorHconfig = NewPrior, Rcopula = NULL))
  
}

###################################################################
#' EM calibration in the case of conditional independence with memory management (unsigned)
#'
#' @param Logf0Mat a matrix containing the log(f0(xi_q)) 
#' @param Logf1Mat a matrix containing the log(f1(xi_q))
#' @param Prior.init  the initialization of prior probabilities for each of the H-configurations. 
#' @param Hconfig A list of all possible combination of H0 and H1 hypotheses generated by the [GetHconfig()] function.
#' @param Precision Precision for the stop criterion. (Default is 1e-6) 
#' @param threads_nb The number of threads to use.
#'
#' @return a vector of estimated prior probabilities for each of the H-configurations.


EM_calibration_indep_memory <- function(Logf0Mat,Logf1Mat,Prior.init,Hconfig,Precision=1e-6,threads_nb){
  
  n <- nrow(Logf0Mat)
  
  NotOK <- TRUE
  NoLowerThan <- 1e-7
  NewPrior <- Prior.init
  PriorsAt0 <- which(NewPrior==0)
  while(NotOK){
    
    ## E step
    fHconfig_sum <- fHconfig_sum_update_ptr_parallel(Hconfig,
                                                     NewPrior,
                                                     Logf0Mat,
                                                     Logf1Mat,
                                                     threads_nb = threads_nb)
    
    ## M step
    OldPrior <- NewPrior
    NewPrior <- prior_update_arma_ptr_parallel(Hconfig,
                                               fHconfig_sum,
                                               OldPrior,
                                               Logf0Mat,
                                               Logf1Mat,
                                               threads_nb = threads_nb)
    
    if(length(PriorsAt0)==0){
      NewPrior[NewPrior<NoLowerThan] <- NoLowerThan
    } else {
      NoLowerCoord <- setdiff(which(NewPrior<NoLowerThan),PriorsAt0)
      if(length(NoLowerCoord)>0){
        NewPrior[NoLowerCoord] <- NoLowerThan
      }
    }
    NewPrior <- NewPrior/sum(NewPrior)
    NotOK <- max((OldPrior-NewPrior)^2) > Precision
    
  }
  return(list(priorHconfig = NewPrior, Rcopula = NULL))
  
}

###################################################################
#' EM calibration in the case of the gaussian copula (unsigned)
#'
#' @param Hconfig A list of all possible combination of H0 and H1 hypotheses generated by the [GetHconfig()] function.
#' @param F0Mat a matrix containing the evaluation of the marginal cdf under H0 at each items, each column corresponding to a p-value serie.
#' @param F1Mat a matrix containing the evaluation of the marginal cdf under H1 at each items, each column corresponding to a p-value serie.
#' @param fHconfig a matrix containing config densities evaluated at each items, each column corresponding to a configurations.
#' @param R.init  the initialization of the correlation matrix of the gaussian copula parameter.  
#' @param Prior.init  the initialization of prior probabilities for each of the H-configurations.  
#' @param Precision Precision for the stop criterion. (Default is 1e-6) 
#'
#' @return A list of 2 objects 'priorHconfig' and 'Rcopula'.
#' Object 'priorHconfig' is a vector of estimated prior probabilities for each of the H-configurations.
#' Object 'Rcopula' is the estimated correlation matrix of the gaussian copula.
#'

EM_calibration_gaussian <- function(Hconfig, F0Mat, F1Mat, fHconfig, R.init, Prior.init,Precision=1e-6){
  
  n <- nrow(fHconfig)
  LogfHconfig <- log(fHconfig)
  zeta0 <- qnorm(p = F0Mat,mean = 0,sd = 1)
  zeta1 <- qnorm(p = F1Mat,mean = 0,sd = 1)
  
  NotOK <- TRUE
  NoLowerThan <- 1e-7
  NewPrior <- Prior.init
  PriorsAt0 <- which(NewPrior==0)
  NewR <- R.init
  while(NotOK){
    
    ## E step
    Logfcopula.Hconfig <- log(Copula.Hconfig_gaussian_density(Hconfig, F0Mat, F1Mat, NewR)) + LogfHconfig
    
    Tau <- exp( Logfcopula.Hconfig + log((tcrossprod(rep(1:n),NewPrior))))
    Tau <- Tau/rowSums(Tau)
    
    ## M step
    OldPrior <- NewPrior
    NewPrior <- colMeans(Tau)
    if(length(PriorsAt0)==0){
      NewPrior[NewPrior<NoLowerThan] <- NoLowerThan
    } else {
      NoLowerCoord <- setdiff(which(NewPrior<NoLowerThan),PriorsAt0)
      if(length(NoLowerCoord)>0){
        NewPrior[NoLowerCoord] <- NoLowerThan
      }
    }
    
    NewPrior <- NewPrior/sum(NewPrior)
    
    OldR<- NewR
    NewR <- R.MLE(Hconfig, zeta0, zeta1,Tau)
    NewR <- R.MLE.check(NewR)
    NotOK <- max(c((OldR-NewR)^2,(OldPrior-NewPrior)^2)) > Precision
  }
  
  return(list(priorHconfig = NewPrior, Rcopula = NewR))
  
}


###################################################################
#' EM calibration in the case of the gaussian copula (unsigned) with memory management
#'
#' @param Logf0Mat a matrix containing the log(f0(xi_q))
#' @param Logf1Mat a matrix containing the log(f1(xi_q))
#' @param F0Mat a matrix containing the evaluation of the marginal cdf under H0 at each items, each column corresponding to a p-value serie.
#' @param F1Mat a matrix containing the evaluation of the marginal cdf under H1 at each items, each column corresponding to a p-value serie.
#' @param Prior.init  the initialization of prior probabilities for each of the H-configurations.
#' @param R.init  the initialization of the correlation matrix of the gaussian copula parameter.  
#' @param Hconfig A list of all possible combination of H0 and H1 hypotheses generated by the [GetHconfig()] function.
#' @param Precision Precision for the stop criterion. (Default is 1e-6) 
#' @param threads_nb The number of threads to use.
#'
#' @return A list of 2 objects 'priorHconfig' and 'Rcopula'.
#' Object 'priorHconfig' is a vector of estimated prior probabilities for each of the H-configurations.
#' Object 'Rcopula' is the estimated correlation matrix of the gaussian copula.
#'
## 

EM_calibration_gaussian_memory <- function(Logf0Mat,Logf1Mat,F0Mat,F1Mat,Prior.init,R.init,Hconfig,Precision=1e-6,threads_nb){
  
  n <- nrow(Logf0Mat)
  
  zeta0 <- qnorm(p = F0Mat,mean = 0,sd = 1)
  zeta1 <- qnorm(p = F1Mat,mean = 0,sd = 1)
  
  RhoIndex <- which(lower.tri(R.init,diag=TRUE),arr.ind=TRUE)
  
  NotOK <- TRUE
  NoLowerThan <- 1e-7
  NewPrior <- Prior.init
  PriorsAt0 <- which(NewPrior==0)
  NewR <- R.init
  while(NotOK){
    
    ## Implicit E step
    NewRinv <- solve(NewR)
    fHconfig_sum <- fHconfig_sum_update_gaussian_copula_ptr_parallel(Hconfig,
                                                                     NewPrior,
                                                                     Logf0Mat,
                                                                     Logf1Mat,
                                                                     zeta0,
                                                                     zeta1,
                                                                     NewR,NewRinv,
                                                                     threads_nb = threads_nb)
    
    
    ## M step
    OldPrior <- NewPrior
    OldR <- NewR
    OldRinv <- NewRinv
    
    NewPrior <- prior_update_gaussian_copula_ptr_parallel(Hconfig,
                                                          fHconfig_sum,
                                                          OldPrior,
                                                          Logf0Mat,
                                                          Logf1Mat,
                                                          zeta0,
                                                          zeta1,
                                                          OldR,OldRinv,
                                                          threads_nb = threads_nb)
    if(length(PriorsAt0)==0){
      NewPrior[NewPrior<NoLowerThan] <- NoLowerThan
    } else {
      NoLowerCoord <- setdiff(which(NewPrior<NoLowerThan),PriorsAt0)
      if(length(NoLowerCoord)>0){
        NewPrior[NoLowerCoord] <- NoLowerThan
      }
    }
    NewPrior <- NewPrior/sum(NewPrior)
    
    NewR <- R_MLE_update_gaussian_copula_ptr_parallel(Hconfig,
                                                      fHconfig_sum,
                                                      OldPrior,
                                                      Logf0Mat,
                                                      Logf1Mat,
                                                      zeta0,
                                                      zeta1,
                                                      OldR,OldRinv,
                                                      RhoIndex,
                                                      threads_nb = threads_nb)
    NewR <- R.MLE.check(NewR)
    NotOK <- max(c((OldR-NewR)^2,(OldPrior-NewPrior)^2)) > Precision
  }
  
  return(list(priorHconfig = NewPrior, Rcopula = NewR))
  
}

###################################################################
#' Gaussian copula density 
#'
#' @param zeta the matrix of probit-transformed observations.
#' @param R the correlation matrix.
#' @param Rinv the inverse correlation matrix.
#' 
#' @return A numeric vector, each coordinate i corresponding to the evaluation of the Gaussian copula density function at observation zeta_i.
## 
gaussian_copula_density<- function(zeta,R,Rinv){
  A <- Rinv-diag(x = 1,nrow = nrow(Rinv))
  res <- (1/sqrt(det(R))) * exp(-0.5*rowSums(zeta*(tcrossprod(zeta,A))))
  return(res)
}


###################################################################
#' Gaussian copula density for each Hconfiguration.
#' 
#' @param Hconfig A list of all possible combination of H0 and H1 hypotheses generated by the [GetHconfig()] function.
#' @param F0Mat a matrix containing the evaluation of the marginal cdf under H0 at each items, each column corresponding to a p-value serie.
#' @param F1Mat a matrix containing the evaluation of the marginal cdf under H1 at each items, each column corresponding to a p-value serie.
#' @param R the correlation matrix.
#' 
#' @return A matrix containing the evaluation of the Gaussian density function for each Hconfiguration in columns.
## 

Copula.Hconfig_gaussian_density <- function(Hconfig, F0Mat, F1Mat, R){
  zeta0 <- qnorm(p = F0Mat,mean = 0,sd = 1)
  zeta1 <- qnorm(p = F1Mat,mean = 0,sd = 1)
  
  Rinv <- solve(R)
  
  Copula.Hconfig <- purrr::map_dfc(Hconfig, function(h){
    zeta <- matrix(0,nrow = nrow(F0Mat),ncol = ncol(F0Mat))
    if (length(which(h==1)) > 0){zeta[, which(h==1)] <- zeta1[, which(h==1)]}
    if (length(which(h==0)) > 0){zeta[, which(h==0)] <- zeta0[, which(h==0)]}
    
    return(gaussian_copula_density(zeta = zeta,R = R,Rinv = Rinv))
  })
  
  return(as.matrix(Copula.Hconfig))
}



###################################################################
#' Gaussian copula correlation matrix Maximum Likelihood estimator.
#' 
#' @param Hconfig A list of all possible combination of H0 and H1 hypotheses generated by the [GetHconfig()] function.
#' @param zeta0 a matrix containing the Phi(F_0(Z_iq)), each column corresponding to a p-value serie.
#' @param zeta1 a matrix containing the Phi(F_1(Z_iq)), each column corresponding to a p-value serie.
#' @param Tau a matrix providing for each item (in row) its posterior probability to belong to each of the H-configurations (in columns).
#' 
#' @return Estimate of the correlation matrix. 
## 
R.MLE <- function(Hconfig, zeta0, zeta1,Tau){
  
  n <- nrow(zeta0)
  Q <- ncol(zeta0)
  
  R <- matrix(0,nrow = Q, ncol= Q)
  RhoIndex <- which(lower.tri(R,diag=TRUE),arr.ind=TRUE)
  Rho <- rep(0,nrow(RhoIndex))
  zeta <- matrix(0,nrow = n,ncol = Q) 
  for(c in 1:length(Hconfig)){
    h <- Hconfig[[c]]
    if (length(which(h==1)) > 0){zeta[, which(h==1)] <- zeta1[, which(h==1)]}
    if (length(which(h==0)) > 0){zeta[, which(h==0)] <- zeta0[, which(h==0)]}
    for(index in 1:nrow(RhoIndex)){
      Rho[index] <- Rho[index]+ sum(Tau[,c]*zeta[,RhoIndex[index,1]]*zeta[,RhoIndex[index,2]])
    } 
  }
  R[lower.tri(R,diag = TRUE)] <- Rho/n
  for(i in 1:Q){
    R[i,] <- R[,i]
  }
  return(R)
}


###################################################################
#' Check the Gaussian copula correlation matrix Maximum Likelihood estimator 
#' 
#' @param R Estimate of the correlation matrix. 
#' @return Estimate of the correlation matrix. 
## 
R.MLE.check <- function(R){
  R <- cov2cor(R)
  # R <- Matrix::nearPD(R,corr = TRUE)
  return(R)
}


###################################################################
#' Gaussian copula correlation matrix Maximum Likelihood estimator (memory handling)
#' 
#' @param Hconfig A list of all possible combination of H0 and H1 hypotheses generated by the [GetHconfig()] function.
#' @param fHconfig_sum a vector containing sum_c(w_c*psi_c) for each items.
#' @param OldPrior  a vector containing the prior probabilities for each of the H-configurations.
#' @param Logf0Mat a matrix containing log(f0Mat), each column corresponding to a p-value serie.
#' @param Logf1Mat a matrix containing log(f1Mat), each column corresponding to a p-value serie.
#' @param zeta0 a matrix containing qnorm(F0Mat), each column corresponding to a p-value serie.
#' @param zeta1 a matrix containing qnorm(F1Mat), each column corresponding to a p-value serie.
#' @param OldR the copula correlation matrix.
#' @param OldRinv the inverse of copula correlation matrix.
#' 
#' 

#' 
#' @return Estimate of the correlation matrix. 
## 

R.MLE.memory <- function(Hconfig,fHconfig_sum,OldPrior, Logf0Mat, Logf1Mat, zeta0, zeta1, OldR, OldRinv){
  
  n <- nrow(zeta0)
  Q <- ncol(zeta0)
  
  R <- matrix(0,nrow = Q, ncol= Q)
  RhoIndex <- which(lower.tri(R,diag=FALSE),arr.ind=TRUE)
  Rho <- rep(0,nrow(RhoIndex))
  zeta <- matrix(0,nrow = n,ncol = Q) 
  
  for(c in 1:length(Hconfig)){
    h <- Hconfig[[c]]
    
    f_c <- rep(0,n)
    if (length(which(h==1)) > 0){f_c <- f_c + rowSums(Logf1Mat[, which(h==1), drop=FALSE])}
    if (length(which(h==0)) > 0){f_c <- f_c + rowSums(Logf0Mat[, which(h==0), drop=FALSE])}
    f_c <- exp(f_c)
    
    zeta <- matrix(0,nrow = n,ncol = Q) 
    if (length(which(h==1)) > 0){zeta[, which(h==1)] <- zeta1[, which(h==1)]}
    if (length(which(h==0)) > 0){zeta[, which(h==0)] <- zeta0[, which(h==0)]}
    copula_c <- gaussian_copula_density(zeta = zeta,R = OldR,Rinv = OldRinv)
    
    Tau_c <- OldPrior[c]*f_c*copula_c/fHconfig_sum
    
    for(index in 1:nrow(RhoIndex)){
      Rho[index] <- Rho[index]+ sum(Tau_c*zeta[,RhoIndex[index,1]]*zeta[,RhoIndex[index,2]])
    }
  }
  
  R <- copula::p2P(param = Rho/n,d = Q)
  return(R)
}

Try the qch package in your browser

Any scripts or data that you put into this service are public.

qch documentation built on May 29, 2024, 8:11 a.m.