R/wga_stream.GxE.R

Defines functions GxE.scan.jobsFile GxE.scan.genfile scan.check.transform scan.include.snps scan.change.tab get.nsnps.vec nsnps.genoFile GxE.scan.combine GxE.scan.partition GxE.scan.4 GxE.setup.4 GxE.scan.1_2 GxE.setup.1_2 GxE.scan.3a GxE.setup.3a GxE.scan.3 GxE.setup.3 GxE.scan.2 GxE.setup.2 GxE.scan.1 GxE.call.1 GxE.setup.1 GxE.scan

Documented in GxE.scan GxE.scan.combine GxE.scan.partition

# Main scan function for CGEN package
GxE.scan <- function(snp.list, pheno.list, op=NULL) {

  # op
  #   model  1=snp.logistic, 2=additive.test, 3=score.test, 4=snp.matched
  #          0=user-defined
  
  if (!is.list(snp.list)) stop("snp.list must be a list")  
  op <- default.list(op, 
          c("model"), 
          list(1), 
          error=c(0), 
          checkList=list(0:4))
  scan.func.op <- op[["scan.func.op", exact=TRUE]] 
  SOPFLAG      <- !is.null(scan.func.op)
  which        <- op$model

  if (which == 3) {
    method <- "wald"
    if (SOPFLAG) {
      temp <- scan.func.op[["method", exact=TRUE]]
      if (is.null(temp)) temp <- 0
      if (temp %in% 1) method <- "max"
    }
  }

  # Output file
  out.file <- op[["out.file", exact=TRUE]]
  if (is.null(out.file)) out.file <- paste(getwd(), "/GxE.scan.output.txt", sep="")
  op$out.file <- out.file

  if (is.null(snp.list[["temp.dir", exact=TRUE]])) {
    snp.list$temp.dir <- dirname(out.file)
  }
  if (is.null(snp.list[["id.str", exact=TRUE]])) {
    snp.list$id.str <- basename(out.file)
  }
  snp.list <- check.snp.list(snp.list)

  format <- snp.list$format
  
  
  scan.func       <- op[["scan.func", exact=TRUE]]
  if ((!which) && (is.null(scan.func))) {
    stop("ERROR with options: scan.func must be specified with model=0")
  }
 
  impute.flag       <- format %in% "impute"
  op$scan.GxE.model <- NULL
  UML_CML           <- 0
  if (which != 1) UML_CML <- 0
  str2 <- ""
  if (UML_CML) str2 <- "_2"
  if ((which) && (impute.flag) && (which != 3)) stop("ERROR: Only the score test can be used with imputed genotype data")
  if ((which == 3) && (method == "wald")) str2 <- "a"

  if (which) {
  
    op$scan.setup.func     <- paste("GxE.setup.", which, str2, sep="")
    op$scan.func           <- paste("GxE.scan.", which, str2, sep="") 
    pheno.list$remove.miss <- 1

    # Set keep.vars
    vlist <- list(id.var=pheno.list[["id.var", exact=TRUE]],
                  response.var=pheno.list[["response.var", exact=TRUE]], 
                  main.vars=pheno.list[["main.vars", exact=TRUE]],
                  int.vars=pheno.list[["int.vars", exact=TRUE]], 
                  strata.var=pheno.list[["strata.var", exact=TRUE]], 
                  ProbG1.var=pheno.list[["ProbG1.var", exact=TRUE]],
                  cc.var=pheno.list[["cc.var", exact=TRUE]],
                  nn.var=pheno.list[["nn.var", exact=TRUE]])
    temp <- getAllVars(vlist, names=names(vlist))
    pheno.list$keep.vars <- unique(temp)

    if (which %in% 2:4) {
      snp.list$impute.method <- 2
      if (snp.list$impute.cutoff < 0) snp.list$impute.cutoff <- 0.999 
    }  
  } 

  impute.method <- snp.list$impute.method
  if (which) {
    gmodel       <- NULL
    if (!is.null(scan.func.op)) gmodel <- scan.func.op[["genetic.model", exact=TRUE]]
    if (is.null(gmodel)) gmodel <- 0
    if ((UML_CML) && (gmodel == 3)) stop("ERROR1: genetic.model = 3 is not valid")
    if ((impute.flag) && (impute.method == 1) && (gmodel == 3)) {
      stop("ERROR2: genetic.model = 3 is not valid")
    }
  }

  snp.list$genetic.model <- 0  

  scan.stream(snp.list, pheno.list, op=op)

  out.file

} # END: scan.GxE

# Setup function for snp.logistic
GxE.setup.1 <- function(data, opList) {

  # data   data frame
  # op     List of sublists

  pheno.list <- opList$pheno.list
  op         <- opList[["scan.func.op", exact=TRUE]]
  if (is.null(op)) op <- list()
  op <- default.list(op, c("output.parms", "output.methods"), 
                     list(0, c("UML", "CML", "EB")))  
  parmFlag <- op$output.parms
  methods  <- unique(toupper(removeWhiteSpace(op$output.methods)))
  temp     <- methods %in% c("UML", "CML", "EB") 
  methods  <- methods[temp]
  if (!length(methods)) {
    cat("\nERROR: output.methods must be any of UML, CML or EB\n")
    stop()
  }

  op$imputed <- opList$impute.flag
  if (op$imputed) {
    op$ProbG0.name <- opList$ProbG0.name
    op$ProbG1.name <- opList$ProbG1.name
    op$ProbG2.name <- opList$ProbG2.name
  }
  response.var <- pheno.list[["response.var", exact=TRUE]]
  snp.var      <- pheno.list[["snp.var", exact=TRUE]]

  # Check for errors
  if (length(response.var) != 1) scan.error("pheno.list$response.var must be a single variable", "GxE.setup.1")
  if (length(snp.var) != 1) scan.error("pheno.list$snp.var must be a single variable", "GxE.setup.1")
  if (!is.data.frame(data)) scan.error("data must be a data frame", "GxE.setup.1")

  main.vars   <- pheno.list[["main.vars", exact=TRUE]]
  int.vars    <- pheno.list[["int.vars", exact=TRUE]]
  strata.var  <- pheno.list[["strata.var", exact=TRUE]]
  main.call   <- main.vars
  int.call    <- int.vars
  strata.call <- strata.var  
  int.flag    <- !is.null(int.vars)

  op <- default.list(op, c("snpName", "fit.null", "genetic.model"), list("SNP", 0, 0))
  if (!is.numeric(snp.var)) op$snpName <- snp.var
  zeroFlag  <- 0
  zero.vars <- NULL
  main.form <- ("formula" %in% class(main.vars))
  int.form  <- ("formula" %in% class(int.vars))
  s.form    <- ("formula" %in% class(strata.var))

  # Check variable names
  vlist <- list(response.var=response.var, main.vars=main.vars,
               int.vars=int.vars, strata.var=strata.var)
  vars <- getAllVars(vlist, names=names(vlist))
  temp <- !(vars %in% colnames(data))
  if (any(temp)) {
    print(vars[temp])
    scan.error("The above variables were not found in the input data", "GxE.setup.1")
  }

  # Check if snp.var is in main.vars or int.vars
  if (snp.var %in% getAllVars(main.vars)) scan.error("ERROR: main.vars must not contain snp.var", "GxE.setup.1")
  if (snp.var %in% getAllVars(int.vars)) scan.error("ERROR: int.vars must not contain snp.var", "GxE.setup.1")

  # Remove missing values
  temp <- getFormulas(vlist)
  miss <- c(NA, NaN, Inf, -Inf)
  if (length(temp)) data <- applyFormulas(data, temp, remove=miss)
  data <- removeMiss.vars(data, vars=vars, miss=miss)

  rm(vlist, vars)
  temp <- gc(verbose=FALSE)

  # Get the response variable 
  D    <- unfactor(data[, response.var])
  nobs <- length(D)

  # Get the snp variable
  #snp <- unfactor(data[, snp.var])

  facVars  <- NULL
  sflag    <- !is.null(strata.var)
 
  # Check for constant strata variable
  if ((sflag) && (!s.form) && (length(strata.var) == 1)) {
    temp <- unique(makeVector(data[, strata.var]))
    if (length(temp) == 1) sflag <- FALSE
  }

  # Determine if any strata vars are character
  if ((sflag) && (!s.form)) {
    for (v in strata.var) {
      if (is.character(data[, v])) data[, v] <- factor(data[, v])
    }
  }

  # Get the variables that are factors
  for (temp in colnames(data)) {
    if (is.factor(data[, temp])) facVars <- c(facVars, temp)
  }

  # Get the V design matrix
  design.V0 <- logistic.dsgnMat(data, int.vars, facVars)$designMatrix
  int.vars  <- colnames(design.V0)

  # For fitting a null model
  if (op$fit.null) {
    # Remove interaction vars, snp
    temp         <- getAllVars(int.call)
    temp         <- unique(temp, int.vars) 
    zero.vars    <- temp
    int.vars     <- NULL
    design.V0    <- NULL
    zeroFlag     <- 1
    op$fixParms  <- list(parms=snp.var, values=0)
    op$zero.vars <- NULL
  }

  # Get the stratafication matrix
  temp         <- sMatrix.logistic(data, strata.var, facVars)
  design.S0    <- temp$design.S0
  op$s.1catVar <- temp$s.1catVar

  # Get the X design matrix
  X_ <- logistic.dsgnMat(data, main.vars, facVars, remove.vars=zero.vars)$designMatrix

  # For the zero.vars option
  zero.vars <- op[["zero.vars", exact=TRUE]]
  if (!is.null(zero.vars)) {
    temp <- list(main.vars=X_, int.vars=design.V0, strata.var=design.S0)
    temp <- apply_zero.vars(zero.vars, temp, snp.var, facVars, data)
    op$fixParms <- temp[["fixParms", exact=TRUE]]
    temp        <- temp$mat.list
    design.V0   <- temp[["int.vars", exact=TRUE]]
    int.vars    <- colnames(design.V0)
    X_          <- temp[["main.vars", exact=TRUE]]
    design.S0   <- temp[["strata.var", exact=TRUE]]
  }

  # Set up return vector
  int.names <- NULL
  if (op$genetic.model != 3) {
    retvars <- snp.var
    snpvars <- snp.var
    if (int.flag) {
      int.names <- paste(snp.var, ":", int.vars, sep="")
      retvars   <- c(retvars, int.names)
    }
  } else {
    retvars   <- paste(snp.var, 1:2, sep="")
    snpvars   <- retvars
    if (int.flag) {
      int.names <- paste(snp.var, "1:", int.vars, sep="")
      int.names <- c(int.names, paste(snp.var, "2:", int.vars, sep=""))
      retvars   <- c(retvars, int.names)
    }
  }
  
  pheno.list$return.vars      <- retvars
  pheno.list$omnibus.vars     <- retvars
  pheno.list$interaction.vars <- int.names

  #methods <- c("UML", "CML", "EB")
  vnames  <- paste(methods, ".Omnibus.Pvalue", sep="")
  if (int.flag) vnames <- c(vnames, paste(methods, ".Inter.Pvalue", sep=""))
  if (parmFlag) {
    # Betas
    v2 <- "Beta"
    for (method in methods) {
      vnames <- c(vnames, paste(method, ".", retvars, ".", v2, sep=""))
    }

    # Cov
    v2 <- "Cov"
    nv <- length(retvars)
    for (method in methods) {
      for (i in 1:nv) {
        vnames <- c(vnames, paste(method, ".", retvars[i], "_", retvars[i:nv], ".", v2, sep=""))
      }
    }

  }
  retvec                      <- rep(NA, length(vnames))
  names(retvec)               <- vnames
  pheno.list$return.vec       <- retvec
  pheno.list$omnibus.flag     <- !is.null(pheno.list[["omnibus.vars", exact=TRUE]])
  pheno.list$interaction.flag <- !is.null(pheno.list[["interaction.vars", exact=TRUE]])
  pheno.list$parmFlag         <- parmFlag
  pheno.list$methods          <- methods

  # Base model
  if (!is.null(X_)) {
    fit <- try(glm(D ~ X_, family=binomial()), silent=TRUE)
  } else {
    fit <- try(glm(D ~ 1, family=binomial()), silent=TRUE)
  }
  checkBaseModel(fit, opList)

  retdata <- list(D=D, design.X0=X_, design.V0=design.V0, design.S0=design.S0)
  pheno.list$subject.ids   <- makeVector(data[, pheno.list$id.var])  
  pheno.list$response.name <- "D"

  list(data=retdata, pheno.list=pheno.list, scan.func.op=op)

} # END: GxE.setup.1

# Scan function for snp.logistic
GxE.call.1 <- function(data, op) {

  # data     List of data objects
  # op       snp.logistic options
  
  #imputed <- op$imputed
  imputed <- 0
  if (imputed) {
    snp  <- cbind(data[[op$ProbG0.name]], data[[op$ProbG1.name]],
                  data[[op$ProbG2.name]])
    temp <- is.na(snp[, 1])
  } else {
    snp  <- data[[op$snpName]]
    temp <- is.na(snp)
  }

  D         <- data$D 
  design.X0 <- data$design.X0
  design.V0 <- data$design.V0
  design.S0 <- data$design.S0
  
  nmiss     <- sum(temp)
  n         <- length(D)
  if (n - nmiss < 5) return(NULL)
  if (nmiss) {
    temp      <- !temp
    if (imputed) {
      snp     <- snp[temp, ]
    } else {
      snp     <- snp[temp]
    }
    D         <- D[temp]
    design.X0 <- design.X0[temp, , drop=FALSE]
    design.V0 <- design.V0[temp, , drop=FALSE]
    design.S0 <- design.S0[temp, , drop=FALSE]
  }

  ret <- snp.main(D, snp, X.main=design.X0, X.int=design.V0,
                      X.strata=design.S0, ProbG1=NULL, op=op) 

  ret

} # END: GxE.call.1

# Scan function for snp.logistic
GxE.scan.1 <- function(data, op) {

  # data   List of data objects
  # op     List of sublists

  ret <- GxE.call.1(data, op$scan.func.op)
  if (is.null(ret)) return(NULL)

  pheno.list <- op$pheno.list 
  retvars    <- pheno.list$return.vars
  retvec     <- pheno.list$return.vec
  omnivars   <- pheno.list$omnibus.vars
  omniFlag   <- pheno.list$omnibus.flag
  intervars  <- pheno.list$interaction.vars
  interFlag  <- pheno.list$interaction.flag
  parmFlag   <- pheno.list$parmFlag
  methods    <- pheno.list$methods
  outvars    <- pheno.list$OUTVARS

  retvec[]   <- NA
  for (method in methods) {
    obj <- ret[[method, exact=TRUE]]
    if (!is.null(obj)) {
      parms <- obj$parms
      cov   <- obj$cov
      temp  <- retvars %in% colnames(cov)
      vars  <- retvars[temp]
      nvars <- length(vars)
      if ((parmFlag) && (nvars)) {
        # Betas
        names <- paste(method, ".", vars, ".Beta", sep="")
        retvec[names] <- parms[vars]

        # Cov
        for (i in 1:nvars) {
          v1    <- vars[i]
          v2    <- vars[i:nvars]
          names <- paste(method, ".", v1, "_", v2, ".Cov", sep="")
          retvec[names] <- cov[v1, v2]
        }
      }
      if (omniFlag) {
        v <- paste(method, ".Omnibus.Pvalue", sep="")
        retvec[v] <- waldTest.main(parms, cov, omnivars)$pvalue
      }

      if (interFlag) {
        v <- paste(method, ".Inter.Pvalue", sep="")
        retvec[v] <- waldTest.main(parms, cov, intervars)$pvalue
      }
    }
  }

  retvec

} # END: GxE.scan.1

# Setup function for the additive test
GxE.setup.2 <- function(data, opList=NULL) {

  pheno.list   <- opList$pheno.list
  op           <- opList[["scan.func.op", exact=TRUE]]
  if (is.null(op)) op <- list()

  
  response.var <- pheno.list[["response.var", exact=TRUE]]
  snp.var      <- pheno.list[["snp.var", exact=TRUE]]
  exposure.var <- pheno.list[["int.vars", exact=TRUE]]
  strata.var   <- pheno.list[["strata.var", exact=TRUE]]
  main.vars    <- pheno.list[["main.vars", exact=TRUE]]

  # Check for errors
  if (length(response.var) != 1) scan.error("response.var must be a single variable", "GxE.setup.2")
  if (length(snp.var) != 1) scan.error("snp.var must be a single variable", "GxE.setup.2")
  if (!is.data.frame(data)) scan.error("data must be a data frame")
  if (length(exposure.var) != 1) scan.error("pheno.list$int.vars must be a single variable", "GxE.setup.2")
  if (!(length(strata.var) %in% 0:1)) scan.error("strata.var must be NULL or a single variable", "GxE.setup.2")

  op <- default.list(op, 
        c("indep", "maxit", "reltol", "optim.method", "use.C.code", "genetic.model"), 
        list(FALSE, 500, 1e-7, "BFGS", 1, 3))
  if (!(op$genetic.model %in% 1:3)) scan.error("ERROR: genetic.model must be 1, 2, or 3", "GxE.setup.2")
  op$snp.name <- opList$snp.name

  # Check variable names
  vlist <- list(response.var=response.var, main.vars=main.vars,
                strata.var=strata.var, exposure.var=exposure.var)

  vars <- getAllVars(vlist, names=names(vlist))
  temp <- !(vars %in% colnames(data))
  if (any(temp)) {
    print(vars[temp])
    scan.error("The above variables were not found in the input data", "GxE.setup.2")
  }

  # Check if snp.var is in main.vars 
  temp <- getAllVars(main.vars) 
  if (snp.var %in% temp) scan.error("ERROR: main.vars must not contain snp.var", "GxE.setup.2")
  if (exposure.var %in% temp) scan.error("ERROR: main.vars must not contain the exposure var", "GxE.setup.2")

  # Remove missing values
  temp <- getFormulas(vlist)
  miss <- c(NA, NaN, Inf, -Inf)
  if (length(temp)) data <- applyFormulas(data, temp, remove=miss)
  data <- removeMiss.vars(data, vars=vars, miss=miss)
  if (!nrow(data)) scan.error("ERROR: Zero rows in the input data after removing rows with missing values", "GxE.setup.2")

  # Get the response variable 
  Y <- as.numeric(unfactor(makeVector(data[, response.var])))
  if (!(all(Y %in% 0:1))) scan.error("ERROR: response.var must be coded 0-1", "GxE.setup.2")

  # Get the exposure variable
  exv  <- unfactor(makeVector(data[, exposure.var]))
  uexv <- unique(exv)
  nexv <- length(uexv)
  if (nexv < 2) scan.error("After missing values are removed, the interaction variable has less than 2 levels", "GxE.setup.2")
  if (!(all(uexv %in% 0:2))) {
    # Recode to 0-1-2
    tlist <- list()
    for (i in 1:nexv) tlist[[i]]      <- exv %in% uexv[i]
    for (i in 1:nexv) exv[tlist[[i]]] <- i - 1 
  }
  exv <- as.numeric(exv)

  # Get the stratification vector
  if (!is.null(strata.var)) {
    strataVec <- factor(makeVector(data[, strata.var]))
  } else {
    strataVec <- NULL
  }

  # Get the variables that are factors
  facVars <- NULL
  for (temp in vars) {
    if (is.factor(data[, temp])) facVars <- c(facVars, temp)
  }

  # Get the design matrix
  if (is.null(main.vars)) {
    X_ <- NULL
  } else {
    X_ <- logistic.dsgnMat(data, main.vars, facVars, removeInt=1)$designMatrix
  }

  pheno.list$RERI.flag <- 0
  pheno.list$AP.flag   <- 0
  pheno.list$SYN.flag  <- 0
  indep    <- op$indep
  cnames   <- c("Add.LRT.P", "Mult.LRT.P", "Mult.Wald.P")
  if (nexv == 2) {
    cnames   <- c(cnames, "RERI.P")
    pheno.list$RERI.flag <- 1
    if (!indep) {
      cnames <- c(cnames, "Synergy.P", "Attrib.P")
      pheno.list$AP.flag  <- 1
      pheno.list$SYN.flag <- 1
    }
  }
  retvec <- rep(NA, length(cnames))
  names(retvec) <- cnames
  pheno.list$return.vec    <- retvec
  pheno.list$response.name <- "Y"

  # Base model
  if (!is.null(X_)) {
    form <- Y ~ X_ + exv
  } else {
    form <- Y ~ exv
  }
  fit <- try(glm(form, family=binomial()), silent=TRUE)
  checkBaseModel(fit, opList)


  retdata <- list(Y=Y, exv=exv, design.X0=X_, strataVec=strataVec)

  list(data=retdata, pheno.list=pheno.list, scan.func.op=op)

} # END: GxE.setup.2

# Scan function for additive test
GxE.scan.2 <- function(data, opList) {

  pheno.list <- opList$pheno.list
  op         <- opList$scan.func.op

  # Get the snp variable
  snp    <- data[[opList$snp.name]]
  subset <- is.na(snp)
  miss   <- any(subset)
  if (miss) {
    subset <- !subset
    snp    <- snp[subset]
  }
  usnp <- unique(snp)
  if (!(all(usnp %in% 0:2))) scan.error("ERROR: snp.var must be coded 0-1-2", "GxE.scan.2")
  n1 <- length(usnp)
  if (n1 < 2) scan.error("After missing values the SNP has less than 2 levels", "GxE.scan.2")
  if (n1 == 2) {
    if (!(all(usnp %in% 0:1))) scan.error("ERROR: snp.var must be coded 0-1 for this analysis", "GxE.scan.2")
    if (op$genetic.model != 0) cat("NOTE: SNP only has 2 levels. Changing genetic.model.\n")
    op$genetic.model <- 0
  }

  exv  <- data$exv
  if (miss) exv  <- exv[subset]
  uexv <- unique(exv)
  n2   <- length(uexv)

  if (n2 < 2) scan.error("After missing values are removed, the interaction variable has less than 2 levels", "GxE.scan.2")
  if (n2 == 2) {
    if (!(all(uexv %in% 0:1))) {
      # Recode to 0-1
      temp1      <- exv %in% uexv[1]
      temp2      <- exv %in% uexv[2]
      exv[temp1] <- 0
      exv[temp2] <- 1
    }
  }

  # Get the method
  if (op$genetic.model %in% 0:2) {
    method <- paste("2x", n2, sep="")
  } else {
    method <- paste(n1, "x", n2, sep="")
  }

  Y         <- data$Y
  design.X0 <- data[["design.X0", exact=TRUE]]
  strataVec <- data[["strataVec", exact=TRUE]]
  if (miss) { 
    Y       <- Y[subset]
    if (!is.null(design.X0)) design.X0 <- design.X0[subset, , drop=FALSE]
    if (!is.null(strataVec)) strataVec <- strataVec[subset]
  }

  ret <- try(additiveTest(Y, snp, exv, design.X0, method, indep=op$indep, X.st=strataVec,
          control=list(maxit=op$maxit, reltol=op$reltol), optim.method=op$optim.method,
          use.C.code=op$use.C.code, genetic.model=op$genetic.model), silent=TRUE)
  if (checkTryError(ret, conv=0)) scan.error(ret, "additiveTest")
  retvec <- pheno.list$return.vec  
  nn     <- names(ret)

  if ("pval.add" %in% nn)                         retvec[1]           <- ret$pval.add
  if ("pval.mult" %in% nn)                        retvec[2]           <- ret$pval.mult
  if ("pval.wald.mult" %in% nn)                   retvec[3]           <- ret$pval.wald.mult
  if ((pheno.list$RERI.flag) && ("RERI" %in% nn)) retvec["RERI.P"]    <- ret$RERI[1, "pval"]
  if ((pheno.list$SYN.flag) && ("S" %in% nn))     retvec["Synergy.P"] <- ret$S[1, "pval"]
  if ((pheno.list$AP.flag) && ("AP" %in% nn))     retvec["Attrib.P"]  <- ret$AP[1, "pval"]

  retvec

} # END: GxE.scan.2

# Setup function for score test
GxE.setup.3 <- function(data, opList) {

  pheno.list   <- opList$pheno.list
  op           <- opList[["scan.func.op", exact=TRUE]]
  if (is.null(op)) op <- list()
 
  response.var <- pheno.list$response.var
  main.vars    <- pheno.list[["main.vars", exact=TRUE]]
  strata.var   <- pheno.list[["strata.var", exact=TRUE]] 
  exposure.var <- pheno.list[["int.vars", exact=TRUE]]

  if (is.null(exposure.var)) {
    scan.error("pheno.list$int.vars must be specified", "GxE.setup.3")
  }
  if (length(response.var) != 1) scan.error("pheno.list$response.var must be a single variable", "GxE.setup.3")
  if (!(length(strata.var) %in% 0:1)) scan.error("strata.var must be NULL or a single variable", "GxE.setup.3")

  # Check the options list
  if (is.null(op)) op <- list()
  op <- default.list(op, 
          c("indep", "doGLM", "p.mvn", "do.joint", "df2", "thetas"),
        list(FALSE, FALSE, TRUE, FALSE, TRUE, seq(-3,3,by=0.1)))
  op$p.mvn <- FALSE
  
  indep    <- op$indep
  doGLM    <- op$doGLM
  p.mvn    <- op$p.mvn
  do.joint <- op$do.joint
  df2      <- op$df2
  thetas   <- op$thetas
  if (!(indep %in% 0:1)) scan.error("The option indep must be TRUE or FALSE", "GxE.setup.3") 
  if (!(doGLM %in% 0:1)) scan.error("The option doGLM must be TRUE or FALSE", "GxE.setup.3") 
  if (!(p.mvn %in% 0:1)) scan.error("The option p.mvn must be TRUE or FALSE", "GxE.setup.3") 
  if (!(do.joint %in% 0:1)) scan.error("The option do.joint must be TRUE or FALSE", "GxE.setup.3") 
  if (!(df2 %in% 0:1)) scan.error("The option df2 must be TRUE or FALSE", "GxE.setup.3") 
  if (!is.vector(thetas)) scan.error("The option thetas must be a numeric vector", "GxE.setup.3")
  if (!is.numeric(thetas)) scan.error("The option thetas must be a numeric vector", "GxE.setup.3")
  if ((!indep) && (!is.null(strata.var))) {
    warning("strata.var is ignored since it can only be used with indep=TRUE")
    strata.var            <- NULL
    pheno.list$strata.var <- NULL
  }

  vlist <- list(response.var=response.var, main.vars=main.vars,
                strata.var=strata.var, exposure.var=exposure.var)
  vars <- getAllVars(vlist, names=names(vlist))
  temp <- !(vars %in% colnames(data))
  if (any(temp)) {
    print(vars[temp])
    scan.error("The above variables were not found in the input data", "GxE.setup.3")
  }

  # Check variable names
  mvars <- getAllVars(main.vars) 
  evars <- getAllVars(exposure.var) 

  if (response.var %in% mvars) scan.error("ERROR: main.vars must not contain response.var", "GxE.setup.3")
  if (response.var %in% evars) scan.error("ERROR: exposure.var must not contain response.var", "GxE.setup.3")
  temp  <- !(mvars %in% c(evars))
  mvars <- mvars[temp]

  # Get the response variable 
  Y <- as.numeric(unfactor(data[, response.var]))
  if (!(all(Y %in% 0:1))) scan.error("ERROR: response.var must be coded 0-1", "GxE.setup.3")

  # Get the stratification vector
  if (!is.null(strata.var)) {
    strataVec <- as.numeric(factor(makeVector(data[, strata.var])))
  } else {
    strataVec <- NULL
  }

  # Get the variables that are factors
  facVars <- NULL
  for (temp in vars) {
    if (is.factor(data[, temp])) facVars <- c(facVars, temp)
  }

  # Design matrix for exposure
  X2 <- logistic.dsgnMat(data, evars, facVars, removeInt=1)$designMatrix
  #for (i in 1:ncol(X2)) {
  #  if (!(all(X2[, i] %in% 0:1))) scan.error("ERROR: pheno.list$int.vars must be binary variables", "GxE.setup.3")
  #}

  # Get the design matrix for main effects
  if (!length(mvars)) {
    X_ <- NULL
  } else {
    X_ <- logistic.dsgnMat(data, mvars, facVars, removeInt=1)$designMatrix
  }

  cnames                   <- c("maxTheta", "maxScore", "pval")
  vnames                   <- c("Max.Theta", "Max.Score", "Pvalue")
  if (doGLM) {
    cnames <- c(cnames, "pval.logit")
    vnames <- c(vnames, "Pvalue.logit")
  }
  if (do.joint) {
    cnames <- c(cnames, "pval.joint")
    vnames <- c(vnames, "Pvalue.joint")
  }
  retvec                   <- rep(NA, length(cnames))
  names(retvec)            <- vnames
  pheno.list$return.vec    <- retvec
  pheno.list$return.names  <- cnames
  pheno.list$response.name <- "Y"

  # Base model
  if (!is.null(X_)) {
    form <- Y ~ X_ + X2
  } else {
    form <- Y ~ X2
  }
  fit <- try(glm(form, family=binomial()), silent=TRUE)
  checkBaseModel(fit, opList)


  retdata <- list(Y=Y, X2=X2, COVS=X_, strataVec=strataVec)

  list(data=retdata, pheno.list=pheno.list, scan.func.op=op)

} # END: GxE.setup.3

# Scan function for score test
GxE.scan.3 <- function(data, opList) {

  snp.var <- opList$snp.name
  
  # Get the snp variable
  snp  <- as.numeric(makeVector(data[[snp.var]]))
  temp <- is.na(snp)
  miss <- any(temp)
  if (miss) {
    temp <- !temp
    snp  <- snp[temp]
  } 

  usnp <- unique(snp)
  #if (!(all(usnp %in% 0:2))) scan.error("ERROR: snp.var must be coded 0-1-2", "GxE.setup.3")
  n1   <- length(usnp)
  if (n1 < 2) scan.error("After removing rows with missing values, the SNP only has 1 level", "GxE.scan.3")

  Y         <- data$Y
  X2        <- data$X2
  COVS      <- data[["COVS", exact=TRUE]]
  strataVec <- data[["strataVec", exact=TRUE]]
  if (miss) {
    Y  <- Y[temp]
    X2 <- X2[temp, , drop=FALSE]
    if (!is.null(COVS)) COVS <- COVS[temp, , drop=FALSE]
    if (!is.null(strataVec)) strataVec <- strataVec[temp]
  }

  op  <- opList$scan.func.op
  ret <- scoreTest.general9(Y, snp, X2, COVS, op$thetas, op$df2, op$indep, strataVec,
               op$doGLM, op$p.mvn, do.joint=op$do.joint)

  pheno.list <- opList$pheno.list
  retnames   <- pheno.list$return.names
  retvec     <- pheno.list$return.vec
  rnames     <- names(ret)
  for (i in 1:length(retnames)) {
    nn <- retnames[i]
    if (nn %in% rnames) retvec[i] <- ret[[nn]]
  }

  retvec

} # END: GxE.scan.3

# Setup function for Minsun's score test
GxE.setup.3a <- function(data, opList) {

  pheno.list   <- opList$pheno.list
  op           <- opList[["scan.func.op", exact=TRUE]]
  if (is.null(op)) op <- list()
  op           <- default.list(op, c("output.sandwich", "output.parms", "output.methods"),  
                                   list(0, 0, c("UML", "CML", "EB")))
  sandFlag <- op$output.sandwich
  parmFlag <- op$output.parms
  methods  <- op$methods
  methods  <- unique(toupper(removeWhiteSpace(op$output.methods)))
  temp     <- methods %in% c("UML", "CML", "EB") 
  methods  <- methods[temp]
  if (!length(methods)) {
    cat("\nERROR: output.methods must be any of UML, CML or EB\n")
    stop()
  }

  response.var <- pheno.list$response.var
  main.vars    <- pheno.list[["main.vars", exact=TRUE]]
  strata.var   <- pheno.list[["strata.var", exact=TRUE]] 
  exposure.var <- pheno.list[["int.vars", exact=TRUE]]

  if (is.null(exposure.var)) {
    scan.error("pheno.list$int.vars must be specified", "GxE.setup.3a")
  }
  if (length(response.var) != 1) scan.error("pheno.list$response.var must be a single variable", "GxE.setup.3a")
  if (!(length(strata.var) %in% 0:1)) scan.error("strata.var must be NULL or a single variable", "GxE.setup.3a")

  vlist <- list(response.var=response.var, main.vars=main.vars,
                strata.var=strata.var, exposure.var=exposure.var)
  vars <- getAllVars(vlist, names=names(vlist))
  temp <- !(vars %in% colnames(data))
  if (any(temp)) {
    print(vars[temp])
    scan.error("The above variables were not found in the input data", "GxE.setup.3a")
  }

  # Check variable names
  mvars <- getAllVars(main.vars) 
  if (!length(mvars)) scan.error("pheno.list$main.vars cannot be NULL")
  evars <- getAllVars(exposure.var) 

  if (response.var %in% mvars) scan.error("ERROR: main.vars must not contain response.var", "GxE.setup.3a")
  if (response.var %in% evars) scan.error("ERROR: exposure.var must not contain response.var", "GxE.setup.3a")

  # Get the response variable 
  Y <- as.numeric(unfactor(data[, response.var]))
  if (!(all(Y %in% 0:1))) scan.error("ERROR: response.var must be coded 0-1", "GxE.setup.3a")

  # Get the stratification vector
  if (!is.null(strata.var)) {
    strataVec <- as.numeric(factor(makeVector(data[, strata.var])))
  } else {
    strataVec <- rep(1, nrow(data))
  }

  # Get the variables that are factors
  facVars <- NULL
  for (temp in vars) {
    if (is.factor(data[, temp])) facVars <- c(facVars, temp)
  }

  # Design matrix for exposure
  X2 <- logistic.dsgnMat(data, evars, facVars, removeInt=1)$designMatrix
  ev <- colnames(X2)
  
  # Get the design matrix for main effects
  if (!length(mvars)) {
    COVS <- NULL
  } else {
    COVS <- logistic.dsgnMat(data, mvars, facVars, removeInt=1)$designMatrix
  }

  # Compute the fitted values
  if (is.null(COVS)) {
    fit <- glm(Y ~ 1, family=binomial())
  } else {
    fit <- glm(Y ~ COVS, family=binomial())
  }
  checkBaseModel(fit, opList)
  fitted_value <- fitted(fit) 
  rvars        <- NULL
  names        <- NULL
  for (meth in methods) {
    rvars <- c(rvars, paste(meth, c("_joint_pval", "_interaction_pval"), sep=""))
    names <- c(names, paste(meth, c(".Omnibus.Pvalue", ".Inter.Pvalue"), sep=""))
  }
  
  if (sandFlag) {
    for (meth in methods) {
      rvars <- c(rvars, paste(meth, c("_joint_pval_sandwich", "_interaction_pval_sandwich"), sep="")) 
      names <- c(names, paste(meth, c(".Omnibus.Pvalue", ".Inter.Pvalue"), ".Sand", sep=""))
    }
  }
  pheno.list$return.vars <- rvars

  n.ex     <- ncol(X2)
  m        <- n.ex + 1  # For SNP + exposure terms
  meth     <- methods
  nmeth    <- length(meth)
  retnames <- names
  pheno.list$n.ret.parms <- m
  VARS     <- c("SNP", paste("SNP", ev, sep=":"))

  # Parameter estimates
  if (parmFlag) {
    for (i in 1:nmeth) {
      temp     <- paste(meth[i], VARS, "Beta", sep=".")
      retnames <- c(retnames, temp) 
    }
  }  

  # Cov parameters
  cov.ij <- NULL
  cp     <- list()
  for (i in 1:m) {
    cp[[i]] <- paste(VARS[i], VARS[i:m], sep="_")
    cov.ij  <- c(cov.ij, i:m + m*(i - 1)) 
  }
  if (parmFlag) {
    for (i in 1:nmeth) {
      for (j in 1:m) {
        temp     <- paste(meth[i], cp[[j]], "Cov", sep=".")
        retnames <- c(retnames, temp)
      } 
    }
  }

  # Sandwich cov
  if (sandFlag) {
    for (i in 1:nmeth) {
      for (j in 1:m) {
        temp     <- paste(meth[i], cp[[j]], "Cov.Sand", sep=".")
        retnames <- c(retnames, temp)
      } 
    }
  }

  len                   <- length(retnames)
  vec                   <- rep("", len)
  names(vec)            <- retnames
  pheno.list$return.vec <- vec
  pheno.list$cov.ij     <- cov.ij
  pheno.list$parmFlag   <- parmFlag
  pheno.list$sandFlag   <- sandFlag
  pheno.list$output.methods <- methods

  retdata <- list(Y=Y, X2=X2, COVS=COVS, strataVec=strataVec, fitted_value=fitted_value)
  pheno.list$response.name <- "Y"

  list(data=retdata, pheno.list=pheno.list, scan.func.op=op)

} # END: GxE.setup.3a

# Scan function for score test
GxE.scan.3a <- function(data, opList) {

  snp.var <- opList$snp.name

  # Get the snp variable
  snp  <- as.numeric(makeVector(data[[snp.var]]))
  temp <- is.na(snp)
  miss <- any(temp)
  
  Y         <- data$Y
  X2        <- data$X2
  COVS      <- data[["COVS", exact=TRUE]]
  strataVec <- data$strataVec
  fitted    <- data$fitted
  if (miss) {
    temp <- !temp
    Y    <- Y[temp]
    X2   <- X2[temp, , drop=FALSE]
    if (!is.null(COVS)) COVS <- COVS[temp, , drop=FALSE]
    strataVec <- strataVec[temp]
    fitted    <- fitted[temp]
    snp       <- snp[temp]
  }
  pheno.list   <- opList$pheno.list
  sandFlag     <- pheno.list$sandFlag

  rlist <- Modified_Wald_Test(Y, snp, X2, COVS, strataVec, fitted, sandwich=sandFlag)
  rvec  <- unlist(rlist)
  
  retvec       <- pheno.list$return.vec
  retvec[]     <- ""
  pvars        <- pheno.list$return.vars
  np           <- length(pvars)
  retvec[1:np] <- rvec[pvars]
  parmFlag     <- pheno.list$parmFlag
  
    
  meth     <- pheno.list$output.methods
  nmeth    <- length(meth)
  m        <- pheno.list$n.ret.parms
  b        <- np  

  # Parameter estimates
  if (parmFlag) {
    parms <- paste(meth, "_parm", sep="")
    for (i in 1:nmeth) {
      a    <- b + 1
      b    <- a + m - 1
      temp <- rlist[[parms[i], exact=TRUE]]
      if (is.null(temp)) next
      retvec[a:b] <- temp
    }
  }

  # Covariance
  cov.ij <- pheno.list$cov.ij 
  n      <- m*(m+1)/2
  if (parmFlag) {
    parms  <- paste(meth, "_var", sep="")
    for (i in 1:nmeth) {
      a    <- b + 1
      b    <- a + n - 1
      p2   <- paste(parms[i], cov.ij, sep="")
      retvec[a:b] <- rvec[p2]
    }
  }

  # Sandwich covariance
  if (sandFlag) {
    parms  <- paste(meth, "_var_sandwich", sep="")
    for (i in 1:nmeth) {
      a    <- b + 1
      b    <- a + n - 1
      p2   <- paste(parms[i], cov.ij, sep="")
      retvec[a:b] <- rvec[p2]
    }
  }
  
  retvec

} # END: GxE.scan.3a

# Setup function for snp.logistic returning all UML-CML estimates
GxE.setup.1_2 <- function(data, opList) {

  rlist      <- GxE.setup.1(data, opList)
  dlist      <- rlist$data
  pheno.list <- rlist$pheno.list
  op         <- rlist$scan.func.op

  # Run a base model with simulated SNP
  nobs   <- length(dlist$D)
  snp    <- rbinom(nobs, 1, 0.5) 
  ProbG1 <- NULL
  if (op$imputed) ProbG1 <- rep(0.5, nobs) 
  ret <- try(snp.main(dlist$D, snp, X.main=dlist[["design.X0", exact=TRUE]], 
                  X.int=dlist$design.V0, X.strata=dlist[["design.S0", exact=TRUE]], 
                  ProbG1=ProbG1, op=op), silent=TRUE)
  if (checkTryError(ret)) {
    scan.error("Model failed", "GxE.setup.1_2")
  }

  out.base <- opList[["base.outfile", exact=TRUE]]
  outFlag  <- !is.null(out.base)

  interaction.variables <- colnames(dlist$design.V0)
  main.variables        <- op[["E.parm.names", exact=TRUE]]
  if (is.null(main.variables)) main.variables <- interaction.variables
  pheno.list$main.variables <- main.variables
  pheno.list$interaction.variables <- interaction.variables

  if (outFlag) {
    sink(out.base, append=TRUE)
    cat("Exposure variables:\n")
    print(main.variables)
    cat("Interaction variables:\n")
    print(interaction.variables)
    print(summary(ret))
    sink()
  }
  if (opList$print) {
    cat("Exposure variables:\n")
    print(main.variables)
    cat("Interaction variables:\n")
    print(interaction.variables)
  }

  # Make sure all interaction variables are also main effects
  temp <- ret$UML$parms
  temp <- names(temp)
  if (!(all(main.variables %in% temp))) {
    scan.error("All interaction variables must also be main effects", "GxE.setup.1_2")
  }

  # Output variable names for the UML main effects:
  # UML.G.BETA UML.E.BETA UML.GE.BETA 
  
  # Output variable names for the upper triangle of the UML covariance matrix:
  # UML.G.G.COV, UML.G.E.COV UML.G.GE.COV
  #              UML.E.E.COV UML.E.GE.COV
  #                          UML.GE.GE.COV

  # Output variable names for the UML-CML covariance matrix:
  # UML.G.CML.G.COV, UML.G.CML.E.COV UML.G.CML.GE.COV
  # UML.E.CML.G.COV, UML.E.CML.E.COV UML.E.CML.GE.COV         
  # UML.GE.CML.G.COV, UML.GE.CML.E.COV UML.GE.CML.GE.COV                   

  n.main.vars     <- length(main.variables)
  n.int.vars      <- length(interaction.variables)
  UML.ME.out      <- outNames.me("UML", n.main.vars, n.int.vars)
  CML.ME.out      <- outNames.me("CML", n.main.vars, n.int.vars)
  UML.COV.out     <- outNames.cov("UML", n.main.vars, n.int.vars)
  CML.COV.out     <- outNames.cov("CML", n.main.vars, n.int.vars)
  UML.CML.COV.out <- outNames.cov2(n.main.vars, n.int.vars)

  pheno.list$UML.ME.out      <- UML.ME.out
  pheno.list$CML.ME.out      <- CML.ME.out
  pheno.list$UML.COV.out     <- UML.COV.out
  pheno.list$CML.COV.out     <- CML.COV.out
  pheno.list$UML.CML.COV.out <- UML.CML.COV.out

  retnames <- c(UML.ME.out, UML.COV.out, CML.ME.out, CML.COV.out, UML.CML.COV.out)
  pheno.list$return.names <- retnames
  retvec                  <- rep(NA, length(retnames))
  names(retvec)           <- retnames
  pheno.list$return.vec   <- retvec
  
  rlist$pheno.list <- pheno.list

  rlist

} # END: GxE.setup.1_2

# Scan function for snp.logistic returning all UML-CML estimates
GxE.scan.1_2 <- function(data, opList) {

  pheno.list <- opList$pheno.list
  outvec     <- pheno.list$return.vec

  fit <- GxE.call.1(data, opList$scan.func.op)
  if (is.null(fit)) return(outvec)

  main.variables        <- pheno.list$main.variables
  interaction.variables <- pheno.list$interaction.variables 
  snp                   <- opList$snp.name

  # Extract UML and CML estimates
  temp <- try(extractEst(fit, "UML", outvec, snp, main.variables, interaction.variables, 
                           pheno.list$UML.ME.out, pheno.list$UML.COV.out), silent=TRUE)
  if (!("try-error" %in% class(temp))) outvec <- temp
  temp <- try(extractEst(fit, "CML", outvec, snp, main.variables, interaction.variables, 
                           pheno.list$CML.ME.out, pheno.list$CML.COV.out), silent=TRUE)
  if (!("try-error" %in% class(temp))) outvec <- temp
  temp <- try(extract_UML.CML(fit, outvec, snp, main.variables, interaction.variables, 
                                pheno.list$UML.CML.COV.out), silent=TRUE)
  if (!("try-error" %in% class(temp))) outvec <- temp

  outvec

} # END: GxE.scan.1_2

# Setup function for snp.matched
GxE.setup.4 <- function(data, opList) {

  pheno.list   <- opList$pheno.list
  op           <- opList[["scan.func.op", exact=TRUE]]

  op           <- default.list(op, 
                  c("maxiter", "reltol", "genetic.model"), 
                  list(100, 1e-5, 0))

  response.var <- pheno.list[["response.var", exact=TRUE]]
  main.vars    <- pheno.list[["main.vars", exact=TRUE]]
  int.vars     <- pheno.list[["int.vars", exact=TRUE]]
  cc.var       <- pheno.list[["cc.var", exact=TRUE]]
  nn.var       <- pheno.list[["nn.var", exact=TRUE]]
  int.flag     <- !is.null(int.vars)

  # Check for errors
  if (length(response.var) != 1) scan.error("response.var must be a single variable", "GxE.setup.4")
  if (!is.data.frame(data)) scan.error("data must be a data frame", "GxE.setup.4")

    ccFlag <- !is.null(cc.var)
	nnFlag <- !is.null(nn.var)
	if(!ccFlag && !nnFlag) scan.error("At least one of cc.var and nn.var must be provided", "GxE.setup.4")
    if (ccFlag && length(cc.var) > 1) scan.error("cc.var must be a single variable", "GxE.setup.4")
    if (nnFlag && length(nn.var) > 1) scan.error("nn.var must be a single variable", "GxE.setup.4")
		
	main.form <- ("formula" %in% class(main.vars))
	int.form  <- ("formula" %in% class(int.vars))
	
	# Check variable names
	vlist <- list(response.var=response.var, main.vars=main.vars,
				  int.vars=int.vars, cc.var=cc.var, nn.var=nn.var)
	vars <- getAllVars(vlist, names=names(vlist))
	temp <- !(vars %in% colnames(data))
	if (any(temp)) {
	  print(vars[temp])
	  scan.error("The above variables were not found in the input data", "GxE.setup.4")
	}
	
	# Remove missing values
	temp <- getFormulas(vlist)
	miss <- c(NA, NaN, Inf, -Inf)
	if (length(temp)) data <- applyFormulas(data, temp, remove=miss)
	data <- removeMiss.vars(data, vars=vars, miss=miss)

	# Get the response variable
	D <- unfactor(data[, response.var])

	facVars <- NULL
	
	# Get the strata vars
    cc.strat <- NULL
    nn.strat <- NULL
	if (ccFlag) cc.strat     <- as.integer(data[, cc.var])
	if (nnFlag) nn.strat     <- as.integer(data[, nn.var])
    pheno.list$ccFlag        <- ccFlag
    pheno.list$nnFlag        <- nnFlag
    pheno.list$response.name <- "D"
	
	# Get the variables that are factors
	for (temp in colnames(data)) {
	  if (is.factor(data[, temp])) facVars <- c(facVars, temp)
	}

	# Get the X design matrix
	design.X0 <- logistic.dsgnMat(data, main.vars, facVars)$designMatrix
	
	# Get the V design matrix
	design.V0 <- logistic.dsgnMat(data, int.vars, facVars)$designMatrix

    # Set up return vector
    snp.var   <- pheno.list$snp.var
    int.names <- NULL
    if (op$genetic.model != 3) {
      retvars   <- snp.var
      if (int.flag) {
        int.names <- paste(snp.var, ":", int.vars, sep="")
        retvars   <- c(retvars, int.names)
      }
    } else {
      retvars   <- paste(snp.var, 1:2, sep="")
      if (int.flag) {
        int.names <- paste(snp.var, "1:", int.vars, sep="")
        int.names <- c(int.names, paste(snp.var, "2:", int.vars, sep=""))
        retvars   <- c(retvars, int.names)
      }
    }
  
    pheno.list$return.vars      <- retvars
    pheno.list$omnibus.vars     <- retvars
    pheno.list$interaction.vars <- NULL
    pheno.list$interaction.vars <- int.names

    methods <- NULL
    if (ccFlag) methods <- c("CLR", "CCL")
    if (nnFlag) methods <- c(methods, "HCL")
    pheno.list$return.methods <- methods
    vnames  <- paste(methods, ".Omnibus.Pvalue", sep="")
    if (int.flag) vnames <- c(vnames, paste(methods, ".Inter.Pvalue", sep=""))
    v2 <- c("Beta", "SE")
    for (method in methods) {
      for (var in retvars) { 
        vnames <- c(vnames, paste(method, ".", var, ".", v2, sep=""))
      }
    }
    retvec                      <- rep(NA, length(vnames))
    names(retvec)               <- vnames

    pheno.list$return.vec       <- retvec
    pheno.list$omnibus.flag     <- !is.null(pheno.list[["omnibus.vars", exact=TRUE]])
    pheno.list$interaction.flag <- !is.null(pheno.list[["interaction.vars", exact=TRUE]])

    # Test with a simulated SNP
    design.S0 <- as.matrix(rbinom(length(D), 2, 0.4))
    colnames(design.S0) <- opList$snp.name
    if (ccFlag) {
	  clg <- try(snp.ccl.main(D, X.snp=design.S0, X.main=design.X0, X.int=design.V0,
                 cc.strat=cc.strat, op=op))
	  if (checkTryError(clg, conv=0)) {
        print(clg)
        scan.error("ERROR with snp.matched", "GxE.setup.4")
      }
	}
	if (nnFlag) {
	  hcl <- try(snp.hcl.main(D, X.snp=design.S0, X.main=design.X0, X.int=design.V0,
                   nn.strat=nn.strat, op=op))
	  if (checkTryError(hcl, conv=0)) {
        print(hcl)
        scan.error("ERROR with snp.matched", "GxE.setup.4")
      }
	}


    retdata <- list(D=D, cc.strat=cc.strat, nn.strat=nn.strat, 
                 design.X0=design.X0, design.V0=design.V0)

    list(data=retdata, pheno.list=pheno.list, scan.func.op=op)

} # END: GxE.setup.4

# Scan function for snp.matched
GxE.scan.4 <- function(data, opList) {

    op         <- opList$scan.func.op
    pheno.list <- opList$pheno.list  
    retvars    <- pheno.list$return.vars
    retvec     <- pheno.list$return.vec
    omnivars   <- pheno.list$omnibus.vars
    omniFlag   <- pheno.list$omnibus.flag
    intervars  <- pheno.list$interaction.vars
    interFlag  <- pheno.list$interaction.flag

    snp        <- data[[opList$snp.name]]
    D          <- data$D
    design.X0  <- data[["design.X0", exact=TRUE]]
    design.V0  <- data[["design.V0", exact=TRUE]]
    cc.strat   <- data[["cc.strat", exact=TRUE]]
    nn.strat   <- data[["nn.strat", exact=TRUE]]

    temp <- is.na(snp)
    miss <- any(temp) 
    if (miss) {
      temp <- !temp
      snp  <- snp[temp]
      D    <- D[temp]
      if (!is.null(design.X0)) design.X0 <- design.X0[temp, , drop=FALSE]
      if (!is.null(design.V0)) design.V0 <- design.V0[temp, , drop=FALSE]
      if (!is.null(cc.strat))  cc.strat  <- cc.strat[temp]
      if (!is.null(nn.strat))  nn.strat  <- nn.strat[temp]
    }

    gmodel <- op$genetic.model
    cnames <- opList$snp.name
    if (gmodel == 1) {
      snp[snp %in% 2] <- 1
    } else if (gmodel == 2) {
      temp2      <- snp %in% 2
      temp1      <- snp %in% 1
      snp[temp2] <- 1
      snp[temp1] <- 0
    } else if (gmodel == 3) {
      snp <- cbind(as.numeric(snp %in% 1), as.numeric(snp %in% 2))
      cnames <- paste(cnames, 1:2, sep="")
    }
    design.S0 <- as.matrix(snp)
    colnames(design.S0) <- cnames

	ret <- list()
	if (pheno.list$ccFlag) {
	  clg <- try(snp.ccl.main(D, X.snp=design.S0, X.main=design.X0, X.int=design.V0,
                 cc.strat=cc.strat, op=op))
	  if(!checkTryError(clg, conv=0)) { 
	    ret$CLR <- clg$CLR
		ret$CCL <- clg$CCL
	  }
	}
	if (pheno.list$nnFlag) {
	  hcl <- try(snp.hcl.main(D, X.snp=design.S0, X.main=design.X0, X.int=design.V0,
                   nn.strat=nn.strat, op=op))
	  if (!checkTryError(hcl, conv=0)) ret$HCL <- hcl
	}

    for (method in pheno.list$return.methods) {
      obj <- ret[[method, exact=TRUE]]
      if (!is.null(obj)) {
        parms <- obj$parms
        cov   <- obj$cov
        temp  <- retvars %in% colnames(cov)
        vars  <- retvars[temp]
        nvars <- length(vars)
        if (nvars) {
          names <- paste(method, ".", vars, ".Beta", sep="")
          retvec[names] <- parms[vars]
          names <- paste(method, ".", vars, ".SE", sep="")
          if (nvars > 1) {
            retvec[names] <- sqrt(diag(cov[vars, vars]))
          } else {
            retvec[names] <- sqrt(cov[vars, vars])
          }
        }
        if (omniFlag) {
          v <- paste(method, ".Omnibus.Pvalue", sep="")
          retvec[v] <- waldTest.main(parms, cov, omnivars)$pvalue
        }

        if (interFlag) {
          v <- paste(method, ".Inter.Pvalue", sep="")
          retvec[v] <- waldTest.main(parms, cov, intervars)$pvalue
        }
      }
    }

    retvec

} # END: GxE.scan.4

# Function to partition a scan into smaller jobs to be run on a cluster
GxE.scan.partition <- function(snp.list, pheno.list, op=NULL) {

  op <- default.list(op, c("n.jobs", "out.dir", "R.cmd", "qsub.cmd", "id.str",
                     "begin.commands.R"), 
                     list(100, getwd(), "R --vanilla", "qsub", "",
                     "library(CGEN)"))

  if (length(op$R.cmd) > 1) stop("ERROR: with R.cmd")
  if (length(op$qsub.cmd) > 1) stop("ERROR: with qsub.cmd")

  snp.list <- check.snp.list(snp.list)
  if (op$n.jobs < length(snp.list$file)) stop("ERROR: n.jobs < number of genotype files")
  pheno.list <- check.pheno.list(pheno.list)

  # If plink format, check for 2 id variables
  if (snp.list$plink.format) {
    if (length(pheno.list$id.var) != 2) stop("ERROR: pheno.list$id.var must be length 2 for PLINK genotype files")
    temp <- snp.list[["subject.list", exact=TRUE]]
    if (!is.null(temp)) {
      if (length(temp$id.var) != 2) stop("ERROR: subject.list$id.var must be length 2 for PLINK genotype files")
    }
  }

  snp.list           <- check.GLU(snp.list)
  snp.list           <- check.PLINK(snp.list)
  ret                <- NULL
  out.dir            <- op$out.dir
  snp.list$temp.dir  <- out.dir
  snp.list$id.str    <- op$id.str
  snp.list$start.vec <- 1
  snp.list$stop.vec  <- -1
  in.str             <- paste("job_", op$id.str, sep="")
  out.str            <- paste("GxEout_", op$id.str, sep="")
  format             <- snp.list$format
  inc.snps           <- snp.list[["include.snps", exact=TRUE]]

  if ((!(format %in% c("impute", "tped", "ldat"))) && (is.null(inc.snps))) {
    cat("\n*****************************************\n")
    cat("Consider specifying snp.list$include.snps\n")
    cat("*****************************************\n\n")
  }

  gen.op <- list(out.out=out.dir, out.log=out.dir, out.call=out.dir,
                 source.list=list(), nFiles=op$n.jobs, which.scan=0,
                 scan.func="GxE.scan", qsub=1, qsub.op="", R.op="",
                 R.cmd=op$R.cmd, basefile=0, qsub.cmd=op$qsub.cmd,
                 op.list=op[["GxE.scan.op", exact=TRUE]],
                 begin.commands.qsub=op[["begin.commands.qsub", exact=TRUE]],
                 begin.commands.R=op$begin.commands.R,
                 filePrefix=in.str, outString=out.str)

  ret <- GxE.scan.genfile(snp.list, pheno.list, gen.op)
  f   <- ret$Rjobs.file

  f

} # END: GxE.scan.partition

# Function to combine output files
GxE.scan.combine <- function(out.file, dir.or.files, pattern="GxEout_") {

  if (length(dir.or.files) < 2) {
    ff <- list.files(dir.or.files, pattern=pattern, full.names=TRUE)
  } else {
    ff <- dir.or.files
  }
  nf <- length(ff)
  if (!nf) stop("ERROR: no files to combine")

  fid  <- file(out.file, "w")
  flag <- 0
  for (f in ff) {
    if (!file.exists(f)) next
    x <- scan(f, what="character", sep="\n", quiet=TRUE)
    if (length(x)) {
      # Remove header
      if (flag) x <- x[-1]
      if (length(x)) {
        write(x, file=fid, ncolumns=1)
        flag <- 1
      }
    }
  }
  close(fid)

  ff

} # END: GxE.scan.combine

# Function to get the snps to run a scan on
nsnps.genoFile <- function(snp.list) {

  nsnps <- -1

  if (nchar(snp.list$GLU)) {
    if (!snp.list$glu.checked) snp.list <- check.GLU(snp.list)
    if (nchar(snp.list$GLU)) {
      ret   <- ginfo.GLU(snp.list, out.snps=NULL, out.subs=NULL)
      nsnps <- ret$n.snps
      if (!length(nsnps)) nsnps <- -1
      if (!is.finite(nsnps)) nsnps <- -1
    }
  } 

  if (nsnps < 0) {
    cat("\n********************************************************\n")
    cat("Number of SNPs in genotype file could not be determined.\n")
    cat("Consider specifying snp.list$nsnps.vec.\n")
    cat("********************************************************\n\n")
  }
  
  nsnps

} # END: nsnps.genoFile

# Function to set nsnps.vec
get.nsnps.vec <- function(snp.list, op) {

  nsnps <- snp.list[["nsnps.vec", exact=TRUE]]
  if (is.null(nsnps)) nsnps <- op[["nsnps.vec", exact=TRUE]]
  len   <- length(nsnps)
  if (len) {
    if (len != length(snp.list$file)) stop("ERROR length(nsnps.vec) != length(file)")
 
    return(nsnps) 
  }

  ff      <- snp.list$file
  tlist   <- snp.list
  nsnps   <- NULL
  for (f in ff) {
    tlist$file <- f
    temp       <- nsnps.genoFile(tlist)
    if (temp < 0) return(NULL)
    nsnps      <- c(nsnps, temp) 
  }

  nsnps

} # END: get.nsnps.vec

# Function to change \t to \\t
scan.change.tab <- function(inlist) {

  if (is.null(inlist)) return(inlist)
  if (!is.list(inlist)) return(inlist)

  nam <- names(inlist)
  len <- length(nam)
  for (i in 1:len) {
    name <- nam[i]
    obj  <- inlist[[name]]
    if (grepl("delimiter", name, fixed=TRUE)) {
      if (obj == "\t") inlist[[name]] <- "\\t"
      if (obj == "\n") inlist[[name]] <- "\\n"
    } else if (is.list(obj)) {
      if ("delimiter" %in% names(obj)) {
        del <- obj$delimiter
        if (del == "\t") {
          obj$delimiter <- "\\t"
          inlist[[name]] <- obj
        } else if (del == "\n") {
          obj$delimiter <- "\\n"
          inlist[[name]] <- obj
        }
      }
    }
  }

  inlist

} # END: scan.change.tab

# Function to create a file of all snps for a plink format
scan.include.snps <- function(snp.list, op) {

  if (!snp.list$plink.format) return(NULL)
  if (length(snp.list$file) != 1) return(NULL)
  if (!is.null(snp.list[["include.snps", exact=TRUE]])) return(NULL)

  bim <- snp.list[["PLINK.bim.file", exact=TRUE]]
  map <- snp.list[["PLINK.map.file", exact=TRUE]]
  out <- paste(snp.list$temp.dir, "inc_", op$filePrefix, sep="") 
  if (!is.null(bim)) {
    f <- bim
  } else if (!is.null(map)) {
    f <- map
  } else {
    return(NULL)
  }
  x <- scan.file(f)
  x <- makeVector(x[, 2])
  write(x, file=out, ncolumns=1)
  temp <- list(file=out, id.var=-1, header=0)
  snp.list$include.snps <- temp
  op[["nsnps.vec"]] <- length(x)

  list(snp.list=snp.list, op=op)

} # END: scan.include.snps

# Function to determine if data should be transformed
scan.check.transform <- function(snp.list, op) {

  snp.list   <- check.GLU(snp.list)
  snp.list   <- check.PLINK(snp.list)
  nsnps.vec  <- op[["nsnps.vec", exact=TRUE]]
  nsnps.flag <- !is.null(nsnps.vec)
  GLU        <- nchar(snp.list$GLU)
  PLINK      <- nchar(snp.list$PLINK)
  transform  <- snp.list[["transform", exact=TRUE]]
  njobs      <- op$nFiles
  nfiles     <- length(snp.list$file)

  if (is.null(op[["include.snps.files", exact=TRUE]])) {
    op$include.snps.files <- snp.list[["include.snps", exact=TRUE]] 
    snp.list$include.snps <- NULL
  }

  if (njobs < nfiles) stop("ERROR: njobs < nfiles")
  if (njobs < 2) return(list(snp.list=snp.list, op=op))

  if (is.null(transform)) transform <- -1

  if (snp.list$format %in% "impute") transform <- 0
  if ((!GLU) && (!PLINK)) transform <- 0
  if ((!GLU) && (!snp.list$plink.format)) transform <- 0

  if (transform) {
    temp <- scan.include.snps(snp.list, op)
    if (!is.null(temp)) return(temp)
  } 

  # If the data can be transformed, decide to break up include.snps.
  # Only do this if nfiles = 1.
  if ((nfiles == 1) && (transform %in% c(-1, 1))) {
    snp.list$start.vec <- 1
    snp.list$stop.vec  <- -1
    inc.snps  <- getSNPsToInclude(snp.list)
    N.inc     <- length(inc.snps)
    if (!nsnps.flag) nsnps.vec <- -1
    if (N.inc) {
      if (N.inc < njobs) njobs <- N.inc
      transform <- 1
      nperfile  <- ceiling(N.inc/njobs)
      
      # Create seperate include.snps files
      include.snps.files <- NULL
      outdir <- snp.list$temp.dir
      str    <- paste(outdir, "inc_", op$filePrefix, sep="") 
      ii     <- 1 
      b      <- 0
      while (1) {
        a   <- b + 1
        b   <- a + nperfile - 1
        if (b > N.inc) b <- N.inc
        out <- paste(str, "_", ii, ".txt", sep="") 
        write(inc.snps[a:b], file=out, ncolumns=1)
        include.snps.files <- c(include.snps.files, out)

        if (b >= N.inc) break
        ii <- ii + 1 
      }
      op$include.snps.files <- include.snps.files
      
      # Repeat snp file
      snp.list$file <- rep(snp.list$file, length(include.snps.files))
      op$nsnps.vec  <- NULL
    }
  }

  if (transform == -1) transform <- NULL
  snp.list$transform <- transform
  op$nFiles <- njobs

  list(snp.list=snp.list, op=op)

} # END: scan.check.transform

# Function to create job files
GxE.scan.genfile <- function(snp.list, pheno.list, op) {

  # This function returns the (updated) options list op.

  #################################################################
  # op              List of options with the following names
  #  out.out        Folder for output results
  #                 No default   
  #  out.log        Folder for output log files
  #                 No default
  #  out.call       Folder for the "call" files
  #                 No default
  #  nsnps.vec      Vector for the number of snps in each file of snp.list$file
  #                 The default is NULL
  #  source.list    List of source files
  #                 No default
  #  nFiles         The maximum number of jobs to submit 
  #                 The default is 1000
  #  filePrefix     Character string to prefix each call file
  #                 The default is "call_".
  #  outString      Character string to prefix each output file in out.out
  #                 The default is "out".
  #  qcString       Character string to prefix each qc output file. Use NULL
  #                 for no qc output files.
  #                 The default is "qc_info".
  #  test           0 or 1 for testing purposes.
  #                 The default is 0.
  #  lib.list       List of sublist with names "library" and "lib.loc" for
  #                 including libraries with the library() function.
  #                 The default is NULL.
  #  op.list        List of options for the snp.scan() function.
  #                 The default is NULL.
  #  fileNames      File to store the names of each call file and out file
  #                 The default is NULL
  #  SHLIB
  #  qsub.cmd       qsub command. The default is "qsub"
  #  begin.commands.qsub  Commands at the beginning of each appcall file.
  #                       The default is "#!/bin/bash".
  #  begin.commands.R     Commands at the beginning of each R file.
  #                       The default is "#!/bin/bash".

  op <- default.list(op,
        c("out.out", "out.log", "out.call", "source.list", 
          "nFiles", "filePrefix", "outString",
          "test", "qsub.op", "biowulf", "qcString", "which.scan", "qsub",
          "qsub.cmd", "begin.commands.qsub", "basefile", "begin.commands.R",
          "R.cmd", "R.op"), 
        list("ERROR", "ERROR", "ERROR", "ERROR", 1000,
             "call_", "out", 0, "-l nodes=1", 1, "qc_info", c(1, 2), 1,
             "qsub", "#!/bin/bash", 1, NULL,
             "/usr/local/bin/R", "--vanilla"), 
        error=c(1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

  snp.list     <- check.snp.list(snp.list)
  op$nsnps.vec <- get.nsnps.vec(snp.list, op)
  snp.list$nsnps.vec <- NULL

  # Determine if data can and/or should be transformed
  temp         <- scan.check.transform(snp.list, op)
  snp.list     <- temp$snp.list
  op           <- temp$op  
  rm(temp)
  gc()

  nsnps.vec    <- op[["nsnps.vec", exact=TRUE]]
  source       <- op$source.list
  lib.list     <- op[["lib.list", exact=TRUE]]
  out.out      <- checkForSep(op$out.out)
  out.log      <- checkForSep(op$out.log)
  out.call     <- checkForSep(op$out.call)
  nFiles       <- op$nFiles
  filePrefix   <- op$filePrefix
  outString    <- op$outString
  op.list      <- op[["op.list", exact=TRUE]] # For the model
  test         <- op$test
  basefile     <- op$basefile
  rcomm0       <- op[["begin.commands.R", exact=TRUE]]
  n.source     <- length(source)

  scanFlag    <- 1 %in% op$which.scan
  scan0Flag   <- 0 %in% op$which.scan
  if ((scanFlag) && (scan0Flag)) stop("ERROR: op$which.scan cannot contain both 0 and 1")
  qcString    <- op[["qcString", exact=TRUE]]
  qcFlag      <- 2 %in% op$which.scan
  scan.func   <- op[["scan.func", exact=TRUE]]
  if ((scan0Flag) && (is.null(scan.func))) stop("ERROR: op$scan.func must be specified")
  noHeader    <- !(snp.list$format %in% c("ldat", "lbat", "sdat", "sbat"))
  include.snps.files <- op[["include.snps.files", exact=TRUE]]
  include.flag <- !is.null(include.snps.files)

  # Get the file names
  snp.file      <- snp.list$file
  nsnp.file     <- length(snp.file)
  snp.list$file <- NULL
  stopEarly     <- 0

  # ndata will also be used as a flag is nsnps.vec = NULL
  if (is.null(nsnps.vec)) {
    ndata  <- 0
    nFiles <- length(snp.file)
  } else {
    ndata <- sum(nsnps.vec)
    if (ndata < nFiles) nFiles <- ndata
  }
  runsPerFile <- ceiling(ndata/nFiles)

  # Initialize
  index     <- 1
  start.row <- 1
  stop.row  <- runsPerFile 
  outIndex  <- 1
  fnFlag    <- !is.null(op[["fileNames", exact=TRUE]])
  
  # For testing purposes
  if (test) {
    nFiles   <- 1
    stop.row <- max(test, 10)
  }

  # Change tab
  pheno.list <- scan.change.tab(pheno.list)
  snp.list   <- scan.change.tab(snp.list)
  snp.list$subject.list <- scan.change.tab(snp.list[["subject.list", exact=TRUE]])

  qsub <- op$qsub
  if (fnFlag) {
    fn_index <- 1
    fnMat <- matrix(data="", nrow=2*nFiles, ncol=3)
    colnames(fnMat) <- c("callFile", "outFile", "nlines")
    if (qsub) {
      fnStr <- ""
    } else {
      fnStr <- ".R"
    }    
  }
  jjindex <- 1  

  if (noHeader) {
    begin.row <- 1
  } else {
    begin.row <- 2
  }
  if (runsPerFile == 1) stop.row <- begin.row

  # Check include.snps.files
  if (include.flag) {
    if (length(include.snps.files) == 1) include.snps.files <- rep(include.snps.files, nFiles)
    if (length(include.snps.files) != nFiles) stop("ERROR: with include.snps.files") 
  }
  snp.list$alreadyChecked <- 0

  # Create files
  for (i in 1:nFiles) {
    if ((i == 1) && (basefile)) {
      temp <- paste(out.out, "baseModel", sep="")
      op.list$base.outfile <- temp
      op$base.outfile <- temp
    } else {
      op.list$base.outfile <- NULL
    }

    # The files will be named <filePrefix>i.R
    temp <- paste(out.call, filePrefix, i, ".R", sep="")
    fid  <- file(temp, "w")

    #cat("rm(list = ls(all=TRUE)) \n", file=fid)
    #cat("gc() \n \n", file=fid)

    if (!is.null(rcomm0)) write(rcomm0, file=fid, ncolumns=1)

    # libraries   
    if (length(lib.list)) {
      for (j in 1:length(lib.list)) {
        libs <- lib.list[[j]]
        lb   <- libs$library
        loc  <- libs[["lib.loc", exact=TRUE]]
        if (is.null(loc)) {
          temp <- paste("library(", lb, ") \n", sep="")
        } else {
          temp <- paste('library(', lb, ', lib.loc="', loc, '") \n', sep='')
        }
        cat(temp, file=fid)
      }
    }

    # source files
    if (n.source) {
      for(k in 1:n.source) {
        temp <- paste('source("', source[[k]], '") \n', sep='')
        cat(temp, file=fid)
      }
    }

    # snp.list
    genfile.list(snp.list, "snp.list", fid)

    # Pheno list
    genfile.list(pheno.list, "pheno.list", fid)

    # Options list
    genfile.list(op.list, "op.list", fid)

    # Shared libraries
    temp <- op[["SHLIB", exact=TRUE]] 
    if (!is.null(temp)) {
      for (j in 1:length(temp)) {
        str <- paste('dyn.load("', temp[j], '") \n', sep='')
        cat(str, file=fid)
      }  
    }

    # Output the info that depends on each file
    stop        <- 0
    newFileFlag <- 0
    nThisRun    <- NULL
    nThisFile   <- 0
    if (qcFlag) {
      qc.list <- list(freqByCC=1)
      genfile.list(qc.list, "qc.list", fid)
    } 
    

    while (!stop) { 

      if ((!ndata) && (!test)) {
        start.row <- 1
        stop.row  <- -1 
      }

      # snp file
      BNAME <- basename(snp.file[index])
      temp  <- paste('snp.list$file <- "', snp.file[index], '" \n', sep="")
      cat(temp, file=fid) 

      # include.snps
      if (include.flag) {
        temp <- paste('snp.list$include.snps <- "', include.snps.files[index], '" \n', sep="")
        cat(temp, file=fid) 
      }

      # start row
      temp <- paste("snp.list$start.vec <- ", start.row, " \n", sep="")
      cat(temp, file=fid)

      nCarryOver <- NULL

      # Get the stop row
      if (ndata) { 
        if (runsPerFile > 1) {
          check.N <- nsnps.vec[index] - 1
        } else {
          check.N <- nsnps.vec[index] + begin.row - 1
        }       

        if (stop.row >= check.N) {
          # Get the number of runs for this file
          nThisRun <- nsnps.vec[index] - start.row + 1

          # Get the total number of runs for this file
          nThisFile <- nThisFile + nThisRun
   
          # Get the number of runs for the next file
          nCarryOver <- max(runsPerFile - nThisFile, 0)

          if (nCarryOver == 1) nCarryOver <- 0

          # Set stop row to -1, so that the rest of the current file will be read
          stop.row <- -1

          newFileFlag <- 1
        } else {
          nThisFile <- nThisFile + runsPerFile
        }

        # print(c(start.row, stop.row, nThisFile, nThisRun, runsPerFile, nCarryOver))      
      } 

      # Stop row
      temp <- paste("snp.list$stop.vec <- ", stop.row, " \n", sep="")
      cat(temp, file=fid)

      # Output file 
      out.b0 <- paste(outString, BNAME, "_job", i, "_", outIndex, "_", jjindex, ".txt", sep="")
      temp   <- paste('op.list$out.file <- "', out.out, out.b0, '" \n \n', sep="")
      cat(temp, file=fid)

      if (scanFlag) {   
        cat("ret <- snp.scan(snp.list, pheno.list, op=op.list) \n", file=fid)
      }
      if (scan0Flag) {  
        str <- paste("ret <- ", scan.func, "(snp.list, pheno.list, op=op.list) \n", sep="") 
        cat(str, file=fid)
      }
      if (qcFlag) {
        temp <- paste('qc.list$outfile <- "', out.out, qcString, 
                 "_id", i, "_", outIndex, "_", jjindex, '" \n \n', sep="")
        cat(temp, file=fid)
        cat("ret <- getGenoStats.file(snp.list, pheno.list, op=qc.list) \n", file=fid)
      }
      
      # Save file names
      if (fnFlag) {
        fnMat[fn_index, "callFile"] <- paste(filePrefix, i, fnStr, sep="")
        fnMat[fn_index, "outFile"]  <- out.b0
        if (runsPerFile > 1) {
          if (stop.row <= 0) {
            nlines <- nsnps.vec[index] - start.row + 2
          } else {
            nlines <- stop.row - start.row + 2
          }
          if (start.row == 1) nlines <- nlines - 1
          if (stop.row == -1) nlines <- nlines + 1

          if (noHeader) {
            if (start.row == 1) nlines <- nlines + 1
            if (stop.row == -1) nlines <- nlines - 1
          }
          fnMat[fn_index, "nlines"]  <- nlines
        } else {
          fnMat[fn_index, "nlines"]  <- 2
        }
        fn_index <- fn_index + 1
      }

      # update 
      jjindex <- jjindex + 1
      if (!is.null(nCarryOver)) {
        # Go to the next file
        index    <- index + 1   
        outIndex <- 1

        # Update start and stop rows
        start.row <- 1
        if (nCarryOver == 0) {
          stop.row  <- runsPerFile 
          # Exit the loop
          stop <- 1
        } else {
          stop.row <- nCarryOver 
        }
        if (runsPerFile == 1) stop.row <- begin.row
      } else {
        # Exit loop
        stop <- 1

        # Update start and stop rows
        start.row <- stop.row + 1
        stop.row  <- stop.row + runsPerFile 
        if (runsPerFile == 1) stop.row <- start.row
        outIndex  <- outIndex + 1
 
        # For the case when 1 file per job
        if (!ndata) {
          index    <- index + 1   
          outIndex <- 1
        }
      }

      # Check index
      if (index > nsnp.file) {
        stop      <- 1
        save      <- i
        stopEarly <- 1
        break
      }

    } # END: while (!stop)

    # close the file
    close(fid)

    if (stopEarly) {
      nFiles <- save
      break
    }

  } # END: for (i in 1:nFiles)

  op$Rjobs.file <- GxE.scan.jobsFile(nFiles, out.call, out.log, filePrefix=filePrefix, 
                   qsub.op=op$qsub.op, qsub=op$qsub, Rcmd=op$R.cmd, 
                   LD_LIB=op[["LD_LIB", exact=TRUE]], qsub.cmd=op$qsub.cmd,
                   begin.commands=op[["begin.commands.qsub", exact=TRUE]],
                   R.op=op$R.op)

  op$out.out  <- out.out
  op$out.call <- out.call
  op$out.log  <- out.log
  op$nFiles   <- nFiles

  # Write out file names
  if (fnFlag) {
    fnMat <- removeOrKeepRows(fnMat, 1:(fn_index-1))
    writeTable(fnMat, op$fileNames)
  }

  op
 
} # END: GxE.scan.genfile

# Function for generating the R jobs files
GxE.scan.jobsFile <- function(nFiles, out.call, out.log, filePrefix="call_", 
                  qsub.op="-l nodes=1", qsub=1, Rcmd="/usr/local/bin/R",
                  LD_LIB=NULL, begin.commands="#!/bin/bash", qsub.cmd="qsub",
                  R.op="--vanilla") {

  # export LD_LIBRARY_PATH=/spin1/users/wheelerwi/kai/ERneg_PBCS/source/jul16_2013:${LD_LIBRARY_PATH}
  LD.flag <- !is.null(LD_LIB)
  if (LD.flag) {
    LD.str <- paste("export LD_LIBRARY_PATH=", LD_LIB, ":${LD_LIBRARY_PATH}\n", sep="")
  } else {
    LD.str <- NULL
  }
  
  str0 <- c(begin.commands, LD.str)

  if ((qsub) || (LD.flag)) {
    # Create the appcall files
    for (i in 1:nFiles) {
      ff   <- paste(out.call, filePrefix, i, sep="")
      temp <- removeWhiteSpace(paste(Rcmd, " ", R.op, sep=""))
      temp <- paste(temp, " < ", out.call, 
                    filePrefix, i, ".R > ", out.log, 
                    filePrefix, i, ".log", sep="")
      str  <- c(str0, temp)
      write(str, file=ff, ncolumns=1)
    }

    # Create R jobs file
    Rjobs.file <- paste(out.call, "Rjobs_", filePrefix, sep="")
    if (qsub) {
      cfiles <- removeWhiteSpace(paste(qsub.cmd, " ", qsub.op, sep=""))
      cfiles <- paste(cfiles, " ", out.call, filePrefix, 1:nFiles, sep="")
      cfiles <- removeWhiteSpace(cfiles)
    } else {
      cfiles <- paste("bash ", out.call, filePrefix, 1:nFiles, " > ", out.log,  
                      filePrefix, 1:nFiles, ".log", sep="")
    }
    #cfiles <- c("#!/bin/bash", cfiles)
    write(cfiles, file=Rjobs.file)
  } else {
    Rjobs.file <- paste(out.call, "Rjobs_", filePrefix, sep="")
    cfiles <- removeWhiteSpace(paste(Rcmd, " ", R.op, sep=""))
    cfiles <- paste(cfiles, " < ", out.call, 
            filePrefix, 1:nFiles, ".R > ", out.log, 
            filePrefix, 1:nFiles, ".log", sep="")
    #cfiles <- c("#!/bin/bash", cfiles)
    write(cfiles, file=Rjobs.file)
  }

  Rjobs.file

} # END: GxE.scan.jobsFile

Try the CGEN package in your browser

Any scripts or data that you put into this service are public.

CGEN documentation built on April 28, 2020, 8:08 p.m.