R/summary_permuted_quantitative.R
In GxEprs: Genotype-by-Environment Interaction in Polygenic Score Models

Documented in summary_permuted_quantitative

#' summary_permuted_quantitative function
#' This function outputs the p value of permuted model in the target dataset, using pre-generated Polygenic Risk Scores (PRSs) of all the individuals. Note that the input used in this function can be generated by using PRS_quantitative functions. It is recommended to run this function, if you choose to fit 'PRS_gxe x E' interaction component (i.e. novel proposed model, Model 4) when generating risk scores. If the 'PRS_gxe x E' term is significant in Model 4, and insignificant in Model 4* (permuted p value), consider that the 'PRS_gxe x E' interaction component is actually insignificant (always give priority to the p value obtained from the permuted model).
#' @param Qphe_target Phenotype file containing family ID, individual ID and phenotype of the target dataset as columns, without heading
#' @param Qcov_target Covariate file containing family ID, individual ID, standardized covariate, square of standardized covariate, and/or confounders of the target dataset as columns, without heading
#' @param iterations Number of iterations used in permutation
#' @param add_score PRSs generated using additive SNP effects of GWEIS summary statistics
#' @param gxe_score PRSs generated using interaction SNP effects of GWEIS summary statistics
#' @keywords permuted model pvalue
#' @export 
#' @importFrom stats binomial fitted.values glm lm
#' @importFrom utils read.table write.table
#' @return This function will output
#' \item{Q_permuted_p}{the p value of the permuted model}
#' @examples \dontrun{ 
#' a <- GWEIS_quantitative(plink_path, DummyData, Qphe_discovery, Qcov_discovery)
#' add <- a[c("ID", "A1", "ADD_BETA")]
#' gxe <- a[c("ID", "A1", "INTERACTION_BETA")]
#' p <- PRS_quantitative(plink_path, DummyData, summary_input = add)
#' q <- PRS_quantitative(plink_path, DummyData, summary_input = gxe)
#' x <- summary_permuted_quantitative(Qphe_target, Qcov_target, iterations = 1000, 
#' add_score = p, gxe_score = q)
#' x 
#' }
summary_permuted_quantitative <- function(Qphe_target, Qcov_target, iterations = 1000, add_score, gxe_score){
  os_name <- Sys.info()["sysname"]
   if (startsWith(os_name, "Win")) {
     slash <- paste0("\\")
   } else {
     slash <- paste0("/")
   }  
  cov_file <- read.table(Qcov_target)
  n_confounders = ncol(cov_file) - 4
  fam=read.table(Qphe_target, header=F) 
  colnames(fam) <- c("FID", "IID", "PHENOTYPE")
  dat=read.table(Qcov_target, header=F)
  colnames(dat)[1] <- "FID"
  colnames(dat)[2] <- "IID"
  sink(paste0(tempdir(), slash, "add_score"))
  write.table(add_score, sep = " ", row.names = FALSE, quote = FALSE)
  sink()
  prs1_all=read.table(paste0(tempdir(), slash, "add_score"), header=T)
  colnames(prs1_all)[1] <- "FID"
  colnames(prs1_all)[2] <- "IID"
  prs1=merge(fam, prs1_all, by = "FID", sort=F)
  sink(paste0(tempdir(), slash, "gxe_score"))
  write.table(gxe_score, sep = " ", row.names = FALSE, quote = FALSE)
  sink()
  prs2_all=read.table(paste0(tempdir(), slash, "gxe_score"), header=T)
  colnames(prs2_all)[1] <- "FID"
  colnames(prs2_all)[2] <- "IID"
  prs2=merge(fam, prs2_all, by = "FID", sort=F)
  m1 <- match(dat$IID, prs1$IID.x)
  out = fam$PHENOTYPE[m1]
  cov=scale(dat$V3[m1])
  ps1=scale(prs1$PRS)
  ps2=scale(prs2$PRS)
  xv1=scale(prs1$PRS*cov)
  xv2=scale(prs2$PRS*cov)
  cov2=scale(cov^2)
  if(n_confounders == 0){
    pn=iterations; pp_gxe_x_E=0
    df_regular_new <- as.data.frame(cbind(out, cov, ps1, ps2, xv2))
    colnames(df_regular_new)[1] <- "out"
    colnames(df_regular_new)[2] <- "E"
    colnames(df_regular_new)[3] <- "PRS_add"
    colnames(df_regular_new)[4] <- "PRS_gxe"
    colnames(df_regular_new)[5] <- "PRS_gxe x E"
    regular_m = lm(out ~., data = df_regular_new)     
    regular_p = summary(regular_m)$coefficients[5,4]
    for(i in 1:pn){
    percentage <- (i / iterations) * 100
    percentage <- (i / iterations) * 100
    cat(sprintf("\rProgress: %3.0f%%", percentage))
      sv2=sample(seq(1, length(out)))
      xv2=scale(prs2$PRS[sv2]*cov)
    df_new <- as.data.frame(cbind(out, cov, ps1, ps2, xv2))
    colnames(df_new)[1] <- "out"
    colnames(df_new)[2] <- "E"
    colnames(df_new)[3] <- "PRS_add"
    colnames(df_new)[4] <- "PRS_gxe"
    colnames(df_new)[5] <- "PRS_gxe x E"
    m = lm(out ~., data = df_new)
    if (regular_p > summary(m)$coefficients[5,4]) pp_gxe_x_E=pp_gxe_x_E+1
    }
    }else{
    pn=iterations; pp_gxe_x_E=0
    conf_var <- matrix(ncol = n_confounders, nrow = nrow(dat))
    for (k in 1:n_confounders) {
      conf_var[, k] <- as.numeric(dat[, k+4])
    }
    conf_var <- conf_var[m1,]
    df_regular_new <- as.data.frame(cbind(out, cov, ps1, ps2, xv2, conf_var))
    colnames(df_regular_new)[1] <- "out"
    colnames(df_regular_new)[2] <- "E"
    colnames(df_regular_new)[3] <- "PRS_add"
    colnames(df_regular_new)[4] <- "PRS_gxe"
    colnames(df_regular_new)[5] <- "PRS_gxe x E"
    regular_m = lm(out ~., data = df_regular_new)     
    regular_p = summary(regular_m)$coefficients[5,4]
    for(i in 1:pn){
    percentage <- (i / iterations) * 100
    cat(sprintf("\rProgress: %3.0f%%", percentage))
      sv2=sample(seq(1, length(out)))
      xv2=scale(prs2$PRS[sv2]*cov)
      df_new <- as.data.frame(cbind(out, cov, ps1, ps2, xv2, conf_var))
      colnames(df_new)[1] <- "out"
      colnames(df_new)[2] <- "E"
      colnames(df_new)[3] <- "PRS_add"
      colnames(df_new)[4] <- "PRS_gxe"
      colnames(df_new)[5] <- "PRS_gxe x E"
      m = lm(out ~., data = df_new)
      if (regular_p > summary(m)$coefficients[5,4]) pp_gxe_x_E=pp_gxe_x_E+1
    }
  }
  cat("\n")
  return(pp_gxe_x_E/pn)
}