R/SSI.R

Defines functions SSI

Documented in SSI

#X = Z = indexK = subset = saveAt = name = NULL; lambda=NULL
#alpha = 1; nLambda = 100; method = "CD1"; K=G; b=NULL;
#mc.cores = 1; tol = 1E-4; maxIter = 300; verbose = TRUE

SSI <- function(y, X = NULL, b = NULL, Z = NULL, K, indexK = NULL,
         h2 = NULL, trn = seq_along(y), tst = seq_along(y),
         subset = NULL, alpha = 1, lambda = NULL, nLambda = 100,
         method = c("CD1","CD2"), tol = 1E-4, maxIter = 500,
         saveAt = NULL, name = NULL, mc.cores = 1, verbose = TRUE)
{
  method <- match.arg(method)

  n <- length(y)
  if(is.logical(trn)){
     if(n != length(trn)) stop("Object 'trn' must be of the same length of 'y'\n")
     trn <- which(trn)
  }
  if(is.logical(tst)){
     if(n != length(tst)) stop("Object 'tst' must be of the same length of 'y'\n")
     tst <- which(tst)
  }
  nTRN <- length(trn);  nTST <- length(tst)

  if(is.character(K)){
      K <- readBinary(K,indexRow=indexK,indexCol=indexK)
  }
  if(!is.double(K)) K <- apply(K,2,as.double)

  if(is.null(X))   # Design matrix for fixed effects including the intercept
  {
    X <- model.matrix(~1,data=data.frame(rep(1,n)))
  }else{
    if(is.vector(X)){
      X <- stats::model.matrix(~X)
      if(ncol(X)>2)  colnames(X)[-1] <- substr(colnames(X)[-1],2,nchar(colnames(X)[-1]))
    }
  }

  if(!is.null(Z)){
    if(!is.matrix(Z)) stop("Object 'Z' must be a matrix with nrow(Z)=n and ncol(Z)=nrow(K)\n")
    K <- Z%*%K%*%t(Z)
  }

  if(!is.matrix(K) | (length(K) != n^2))
    stop("Product Z %*% K %*% t(Z) must be a squared matrix with number of rows",
     "\n(and columns) equal to the number of elements in 'y'")

  RHS <- K[trn,tst,drop=FALSE]
  K <- K[trn,trn]

  if(is.null(h2))
  {
    # Fit LMM to get variance components and estimate fixed effects as GLS
    fm <- fitBLUP(y[trn],X=X[trn,,drop=FALSE],K=K)
    if(fm$convergence){
      varU <- fm$varU
      varE <- fm$varE
      h2 <- varU/(varU + varE)
      b <- fm$b
    }else stop("Convergence was not reached in the 'EMMA' algorithm. \n\t",
           "Please provide a heritability estimate in 'h2' parameter")
  }else{   # Only estimate fixed effects as GLS
    varU <- varE <- NULL
    if(is.null(b)){
      b <- fitBLUP(y[trn],X=X[trn,,drop=FALSE],K=K,BLUP=FALSE,h2=h2)$b
    }else{
      if(length(b) != ncol(X)) stop("The length of 'b' must be the same as the number of columns of 'X'\n")
    }
  }
  if(h2 < 0.001) warning("The 'heritability' is too small. Results may be affected",immediate.=TRUE)
  Xb <- drop(X%*%b)

  # Standardizing
  for(i in 1:nTRN)  K[i,i] <- K[i,i] + (1-h2)/h2
  sdx <- sqrt(diag(K))
  scale_cov(K,void=TRUE)   # Equal to K=cov2cor(K) but faster
  RHS <- apply(RHS,2,function(x)x/sdx)

  if(is.null(lambda)){
    if(method == "CD1"){
        Cmax <- ifelse(alpha > .Machine$double.eps,max(abs(RHS)/alpha),5)
        lambda <- exp(seq(log(Cmax),log(.Machine$double.eps^0.5),length=nLambda))
    }
  }else{
    if(is.matrix(lambda)){
        if(nrow(lambda) != nTST) stop("Object 'lambda' must be a vector or a matrix with nTST rows")
    }
  }

  name <- ifelse(is.null(name),method,name)

  # Split the testing set into subsets. Only the subset provided will be fitted
  if(!is.null(subset)){
     if(!is.numeric(subset) & length(subset) != 2)
      stop("Object 'subset' must contain at least a 2-elements vector")
     sets <- sort(rep(1:subset[2],ceiling(nTST/subset[2]))[1:nTST])
     index <- which(sets == subset[1])
     tmp <- paste0(" of ",length(tst))
     tst <- tst[index]
     RHS <- RHS[,index,drop=FALSE]
     subset_size <- as.vector(table(sets)[as.character(1:subset[2])])
  }else{
    subset_size <- NULL
    tmp <- ""
  }

  if(verbose)
    cat(" Fitting SSI model for nTST=",length(tst),tmp," and nTRN=",nTRN," individuals\n",sep="")

  compApply <- function(ind)
  {
    rhs <- drop(RHS[,ind])
    if(is.matrix(lambda)){
      lambda0 <- lambda[ind,]
    }else lambda0 <- lambda

    fm <- solveEN(K,rhs,scale=FALSE,lambda=lambda0,nLambda=nLambda,
                  alpha=alpha,tol=tol,maxIter=maxIter)

    # Return betas to the original scale by dividing by sdx
    # B <- Matrix::Matrix(scale(fm$beta,FALSE,sdx), sparse=TRUE)
    saveBinary(scale(fm$beta,FALSE,sdx),filename=paste0(tmpdir,"/",file_beta,ind,".bin"),size=8,verbose=FALSE)

    if(verbose){
      cat(1,file=con,append=TRUE)
      utils::setTxtProgressBar(pb, nchar(scan(con,what="character",quiet=TRUE))/length(tst))
    }
    #return(list(beta=filename,lambda=fm$lambda,tst=tst[ind],df=fm$df))
    return(list(lambda=fm$lambda,tst=tst[ind],df=fm$df))
  }

  tmpdir <- tempdir()
  file_beta <- paste0(tempfile(tmpdir=""),"_beta_ind_")
  if(verbose){
     pb = utils::txtProgressBar(style=3)
     con <- tempfile(tmpdir=tmpdir)
  }
  if(mc.cores == 1L) {
    out = lapply(X=seq_along(tst),FUN=compApply)
  }else{
    out = parallel::mclapply(X=seq_along(tst),FUN=compApply,mc.cores=mc.cores)
    # registerDoParallel(cores=mc.cores)
    # out = foreach(ind=seq_along(tst),.options.snow=list(preschedule=TRUE)) %dopar% compApply(ind)
  }
  if(verbose) {
    close(pb); unlink(con)
  }

  if(sum(tst != unlist(lapply(out,function(x)x$tst)))>0){
    stop("Some sub-processes failed. Something went wrong during the analysis.")
  }

  out <- list(method=method, name=name, y=y, Xb=Xb, b=b, varU=varU,
              varE=varE, h2=h2, trn=trn, tst=tst, alpha=alpha,
              df = do.call("rbind",lapply(out,function(x)x$df)),
              lambda = do.call("rbind",lapply(out,function(x)x$lambda)),
              file_beta=paste0(tmpdir,"/",file_beta))
  class(out) <- "SSI"

  # Save outputs if 'saveAt' is not NULL
  if(!is.null(saveAt)){
    if(!is.null(subset)){
       filenames <- paste0(saveAt,c("_B_","_"),subset[1],"_of_",subset[2],c(".bin",".RData"))
    }else  filenames <- paste0(saveAt,c("_B.bin",".RData"))

    if(!file.exists(dirname(filenames[1])))    dir.create(dirname(filenames[1]),recursive = TRUE)

    saveBinary(as.matrix(do.call(rbind,coef.SSI(out))),filename=filenames[1],size=4,verbose=FALSE)
    out$file_beta <- filenames[1]
    out$subset <- subset
    out$subset_size <- subset_size
    save(out,file=filenames[2])
  }
  return(out)
}
MarcooLopez/SFSI_data documentation built on April 15, 2021, 10:53 a.m.