R/load.summary.files.R

Defines functions load.summary.files

load.summary.files <- function(summary.files, lambda, sel.snps, options){
  
  msg <- paste("Loading summary files:", date())
  if(options$print) message(msg)
  
  ambigFlag <- options$ambig.by.AF
  header <- c('SNP', 'RefAllele', 'EffectAllele', 'BETA') # columns that must be provided by users
  opt.header <- c('P', 'SE')
  if (ambigFlag) {
    opt.header3 <- c("RAF", "EAF")
  } else {
    opt.header3 <- NULL
  } 
  

  complete.header <- c(header, opt.header, 'Chr', 'Pos', 'Direction', opt.header3)
  
  nfiles <- length(summary.files)
  stat <- list()
  lam <- NULL
  
  fid <- 0
  for(i in 1:nfiles){
    st <- load.file(summary.files[i], header = TRUE, nrows = 1e4)
    header.map <- colnames(st)
    colnames(st) <- convert.header(colnames(st), complete.header)
    tmp <- (header %in% colnames(st))
    if(!all(tmp)){
      msg <- paste0("Columns below were not found in ", summary.files[i], ":\n", paste(header[!tmp], collapse = " "))
      stop(msg)
    }
    names(header.map) <- colnames(st)
    
    col.class <- sapply(st, class)
    col.id <- which(colnames(st) %in% complete.header)
    col.class[-col.id] <- "NULL"
    col.class[c('SNP', 'RefAllele', 'EffectAllele')] <- 'character'
    names(col.class) <- header.map[names(col.class)]
    try(st <- load.file(summary.files[i], header = TRUE, select = col.class), silent = TRUE)
    colnames(st) <- convert.header(colnames(st), complete.header)
    if(!is.null(sel.snps)){
      st <- st[st$SNP %in% sel.snps, ]
    }
    
    if(nrow(st) == 0){
      next
    }
    
    if(!any(opt.header %in% colnames(st))){
      msg <- paste0("Neither SE nor P is not provided in ", summary.files[i])
      stop(msg)
    }
    
    
    if(!('P' %in% colnames(st))){
      st$P <- NA
    }
    
    if(!('SE' %in% colnames(st))){
      st$SE <- NA
    }
    
    if(!('Chr' %in% colnames(st))){
      st$P <- NA
    }
    
    if(!('Pos' %in% colnames(st))){
      st$P <- NA
    }
    
    if(!('Direction' %in% colnames(st))){
      msg <- paste0('Direction is absent in ', summary.files[i], '. Function meta() assumed equal sample sizes for all SNPs in that study. Violation of this assumption can lead to false positive if summary data of this study is used in pathway analysis')
      warning(msg)
      st$Direction <- ifelse(st$BETA == 0, '0', ifelse(st$BETA > 0, '+', '-'))
    }
    
    if (ambigFlag) st <- ambig.check.data(st, summary.files[i], vars=opt.header3)

    nc <- unique(nchar(st$Direction))
    if(length(nc) != 1){
      msg <- paste0('String lengths of Direction are unequal in ', summary.files[i])
      stop(msg)
    }
    
    st <- st[, which(toupper(colnames(st)) %in% toupper(complete.header))]
    
    dup <- duplicated(st$SNP)
    if(any(dup)){
      dup.snps <- unique(st$SNP[dup])
      msg <- paste("SNPs duplicated in ", summary.files[i], " are discarded: ", paste(dup.snps, collapse = " "))
      warning(msg)
      st <- subset(st, !(st$SNP %in% dup.snps))
      if(nrow(st) == 0){
        next
      }
    }
    
    id.no.SE.P <- which(is.na(st$SE) & is.na(st$P))
    if(length(id.no.SE.P) > 0){
      msg <- paste("For SNPs below, neither SE nor P is not provided in", summary.files[i], ":\n", paste(st$SNP[id.no.SE.P], collapse = " "))
      stop(msg)
    }
    
    st$RefAllele <- toupper(st$RefAllele)
    st$EffectAllele <- toupper(st$EffectAllele)
    
    id.no.SE <- which(is.na(st$SE))
    id.no.P <- which(is.na(st$P))
    
    if(length(id.no.SE) > 0){
      z2 <- qchisq(st$P[id.no.SE], df = 1, lower.tail = FALSE)
      st$SE[id.no.SE] <- abs(st$BETA[id.no.SE]/sqrt(z2))
    }
    
    if(length(id.no.P) > 0){
      st$P[id.no.P] <- pchisq((st$BETA[id.no.P]/st$SE[id.no.P])^2, df = 1, lower.tail = FALSE)
    }
    
    fid <- fid + 1
    lam <- c(lam, lambda[i])
    rownames(st) <- st$SNP
    st <- st[complete.cases(st), ]
    
    stat[[fid]] <- st
    rm(st)
    gc()
    
  }
  
  if(length(stat) == 0){
    msg <- "No SNPs to be included in analysis"
    stop(msg)
  }
  
  lambda <- lam
  
  list(stat = stat, lambda = lambda)
  
}
zhangh12/ARTP2 documentation built on Aug. 16, 2019, 7:27 p.m.