R/generate.normal.statistics.R

Defines functions generate.normal.statistics

generate.normal.statistics <- function(resp.var, null, raw.geno, pathway, family, lambda, options){
  
  pathway <- pathway[pathway$SNP %in% colnames(raw.geno), ]
  
  chr <- sort(unique(pathway$Chr))
  G <- list()
  for(i in 1:length(chr)){
    snps <- pathway$SNP[pathway$Chr == chr[i]]
    id <- which(colnames(raw.geno) %in% snps)
    G[[i]] <- as.matrix(raw.geno[, id, drop = FALSE])
    setDT(raw.geno)
    rs <- colnames(raw.geno)[id]
    raw.geno[, (rs) := NULL]
    ## with=FALSE together with := was deprecated in data.table v1.9.4
    ## a solution is to wrap the LHS of := with parentheses; 
    ## e.g., DT[,(myVar):=sum(b),by=a] to assign to column name(s) held in variable myVar
    ## this following loop is thereby replaced by raw.geno[, (rs) := NULL]
    # for(r in rs){
    #   raw.geno[, r := NULL, with = FALSE]
    # }
    class(raw.geno) <- "data.frame"
    ## the following line make hard copy in memory so it is inefficient
    #raw.geno <- raw.geno[, -id, drop = FALSE]
    gc()
  }
  rm(raw.geno)
  gc()
  
  formula <- paste(resp.var, "~ . -1")
  
  mdl.null <- glm(formula, data = null, family = family)
  nchr <- length(G)
  
  X <- as.matrix(null[, -1, drop = FALSE])
  V <- list()
  score0 <- list()
  
  for(i in 1:nchr){
    gs <- gen.stat.miss(resp.var, null, family, G[[i]], X, lambda, options, chr[i])
    V[[i]] <- gs$V
    score0[[i]] <- gs$score0
    G[[i]] <- NA
    rm(gs)
    gc()
  }
  
  debug <- FALSE
  if(debug){
  if(family == "binomial"){
    y.hat <- mdl.null$fitted.values
    r <- null[, 1] - y.hat
    A <- y.hat * (1 - y.hat)
    for(i in 1:nchr){
      tmp <- try(V[[i]] <- t(G[[i]])%*% (A * G[[i]]) - t(G[[i]])%*% (A * X) %*% solve(t(X) %*% (A * X)) %*% t(X) %*% (A * G[[i]]), silent = TRUE)
      
      if(error.try(tmp)){
        msg <- "Potential existence of multicollinearity detected and ARTP2 cannot automatically deal with it right now. Please check your covariates specified in formula"
        stop(msg)
      }
      
      score0[[i]] <- as.vector(t(G[[i]]) %*% r)
      G[[i]] <- NA
      gc()
      V[[i]] <- V[[i]] / nrow(X)
      score0[[i]] <- score0[[i]] / sqrt(nrow(X)) / sqrt(lambda)
      names(score0[[i]]) <- colnames(V[[i]])
    }
  }else{
    r <- mdl.null$residuals
    s2 <- sum(r^2)/(length(r)-ncol(X))
    for(i in 1:nchr){
      tmp <- try(V[[i]] <- (t(G[[i]]) %*% G[[i]] - t(G[[i]]) %*% X %*% solve(t(X) %*% X) %*% t(X) %*% G[[i]]) / s2, silent = TRUE)
      
      if(error.try(tmp)){
        msg <- "Potential existence of multicollinearity detected and ARTP2 cannot automatically deal with it right now. Please check your covariates specified in formula"
        stop(msg)
      }
      
      score0[[i]] <- as.vector(t(G[[i]]) %*% r / s2) / sqrt(lambda)
      G[[i]] <- NA
      gc()
      V[[i]] <- V[[i]] / nrow(X)
      score0[[i]] <- score0[[i]] / sqrt(nrow(X))
      names(score0[[i]]) <- colnames(V[[i]])
    }
  }
  
  for(i in 1:length(V)){
    rs <- sort(names(score0[[i]]))
    score0[[i]] <- score0[[i]][rs]
    V[[i]] <- V[[i]][rs, rs, drop = FALSE]
  }
  }
  
  names(V) <- as.character(chr)
  names(score0) <- as.character(chr)
  
  list(V = V, score0 = score0)
  
}
zhangh12/ARTP2 documentation built on Aug. 16, 2019, 7:27 p.m.