R/svd_olsgasp.R

Defines functions svd_olsgasp

Documented in svd_olsgasp

#' @title Singular Value Decomposition of X and Y
#' @description
#' Make the SVD of the covariates matrix X and the incomplete matrix Y
#' @param Y_obs a incomplete matrix, data frame or data table to impute. The missing values are NA.
#' @param sites a numeric vector of size equal to the number of col of Y_obs with the sites location.
#' @param X a matrix, data frame or data table with the covariables same number of row as Y_obs.
#' @param tol_eig the threshold for the eigen values of the wo SVD
#' @return a list with all the object necessary to the estimation and prediction.
#'
#' @author Melina Ribaud
#' @references Melina Ribaud, Aurélie Labbe and Karim Oualkacha.
#' Imputation in genetic methylation studies: A linear model of coregionalization (LMC) with informative covariates.
#' 2022. hal-00000000
#'
#' @examples
#'
#'library(FastGP)
#'N = 100
#'K = 10
#'D = 2
#'sites = sort(runif(N))
#'beta = c(runif(D,2,5),runif(K-D,10,1000))
#'nugget = c(rep(0,D),runif(K-D,0.001,0.05))
#'A = matrix(runif(K*(K-D),0,0.1),nrow = K, ncol = K-D )
#'X = matrix(runif(D*K),ncol = D, nrow= K)
#'Hx = matrix(solve(t(X)%*%X,t(X)),nrow = length(X)/K)
#'A = A-X%*%Hx%*%A
#'A = cbind(X,A)
#'V = matrix(NA, nrow = K, ncol = N)
#'R00 = abs(outer(sites, sites, '-'))
#'for (d in 1:K) {
#'  R = matern_5_2_kernel(R00, beta = beta[d])
#'  R_tilde = R + nugget[d] * diag(N)
#'  V[d, ] = rcpp_rmvnorm_stable(1, R, rep(0, N))
#'}
#'Y_obs = A %*% V
#'obj_olsgasp = svd_olsgasp(Y_obs,sites,X,tol_eig = 1e-6)
#' @export
#'



svd_olsgasp = function(Y_obs,sites,X,tol_eig = 0){

  if(sum(sites != sort(sites))>0){
    stop("sites must be sorted")
  }

  if(length(ncol(X))<1){
    nrow_X = length(X)
  }else{
    nrow_X = nrow(X)
  }


  K = nrow(Y_obs)
  D = K - (length(X)/K)
  n_index = apply(is.na(Y_obs),2,sum)==0
  scale_sites = scale_01(sites)
  input = scale_sites[n_index]

  if (length(input) < K) {
    warning(
      "ncol(Y_obs) without NA is lower than K, the SVD of I-Px(Y) will not be of rank K"
    )
  }

  if (K != nrow_X) {
    stop(
      "nrow(X) must be equal to nrow(Y_obs)"
    )
  }

  output = Y_obs[,n_index]
  C=(max(input)-min(input))/ncol(output)
  rowMeans_t_output=rep(0,K)

  output_mean = output - rowMeans_t_output



  svd_X = svd(X)
  eig_X = svd_X$d
  select_eig = eig_X >= tol_eig
  eig_X[eig_X<=0] = 1e-30
  D_svd = diag(eig_X[select_eig],sum(select_eig),sum(select_eig))
  X_svd = svd_X$u[,1:sum(select_eig)]%*%D_svd

  inv_D_svd_2 = diag(1/(eig_X[select_eig]^2),sum(select_eig),sum(select_eig))
  B_svd = inv_D_svd_2%*%t(X_svd)%*%output_mean




  Y_k = X_svd%*%B_svd
  X = X_svd
  B = B_svd




  svd_A = svd(output_mean-Y_k)
  eig_A = svd_A$d
  select_eig = eig_A > tol_eig
  eig_A[eig_A<=0] = 1e-30
  D_svd = diag(eig_A[select_eig],sum(select_eig),sum(select_eig))
  A = -(svd_A$u[,1:sum(select_eig)]%*%D_svd/sqrt(ncol(output)))[,1:sum(select_eig)]
  A_output_t = t(-svd_A$v[,1:sum(select_eig)]*sqrt(ncol(output)))


  A = cbind(X,A)
  A_output_t = rbind(B,A_output_t)

  obj = list(
    Y_obs = Y_obs,
    input = input,
    output = output,
    C = C,
    rowMeans_t_output = rowMeans_t_output,
    scale_sites = scale_sites,
    A = A,
    A_output_t = A_output_t,
    X = X
  )

  return(obj)
}
melinaR/olsgasp documentation built on March 11, 2023, 12:10 a.m.