R/predict_XPASS.R

Defines functions evalR2_XPASS predict_XPASS

Documented in evalR2_XPASS predict_XPASS

# construct PRS with new genotype file
#'@title  Construct PRS from the XPASS output.
#'@description This function use the XPASS posterior means to construct PRS for the prediction samples.
#'
#' @param pm The output `mu` obtained from the `XPASS` function, which stores the posterior means computed by XPASS.
#' @param file_predgeno  Prefix of the prediction samples' genotypes in plink format.
#'
#'
#' @return A data frame storing the PRS generated by using mu1, mu2, mu_XPASS1, and mu_XPASS2, respectively.
#'
#' @examples
#' See vignette.
#' @export
predict_XPASS <- function(pm,file_predgeno){
  psnp <- nrow(pm)

  geno <- read_data(file_predgeno,"zero")
  X <- geno$X
  rm(geno)
  ref <- fread(paste0(file_predgeno,".bim"),data.table=F)
  fam <- fread(paste0(file_predgeno,".fam"),data.table=F)

  # find common SNPs in test and training data
  snps <- intersect(pm$SNP,ref$V2)
  if(length(snps)!=psnp){
    warning("Test data and training data have different SNPs. This may reduce prediction accuracy. To avoid this problem, pecify the test genotype file in the `file_predgeno` argument in XPASS function to pre-mathc SNPs.")
  }

  # match SNPs in both datasets
  idx_snps <- match(snps,ref$V2)
  X <- X[,idx_snps]
  ref <- ref[idx_snps,]

  idx_snps <- match(snps,pm$SNP)
  pm <- pm[idx_snps,]

  # flip alleles
  idx_flip1 <- which(ref$V5%in%c("A","T")&pm$A1%in%c("C","G"))
  idx_flip2 <- which(ref$V5%in%c("C","G")&pm$A1%in%c("A","T"))
  idx_flip <- c(idx_flip1,idx_flip2)

  X[,idx_flip] <- 2-X[idx_flip]

  # compute PRS
  PRS <- X %*% data.matrix(pm[,c("mu1","mu2","mu_XPASS1","mu_XPASS2")])
  colnames(PRS) <- c("PRS1","PRS2","PRS_XPASS1","PRS_XPASS2")
  PRS <- data.frame(FID=fam$V1,IID=fam$V2,PRS)

}


# evalueate R2 using external summary statistics
#'@title  Evaluate the approximated predictive R2 using an independent GWAS summary statistics.
#'@description This function approximates the predictive R2 of XPASS using an independent GWAS summary statistics.
#'
#' @param pm The output `mu` obtained from the `XPASS` function, which stores the posterior means computed by XPASS.
#' @param file_z_pred  Summary statistics file of the test data. The format is the same as XPASS input.
#' @param file_predgeno  Prefix of the reference genotypes in plink format.
#'
#' @return Approximated predictive R^2.
#'
#' @examples
#' See vignette.
#' @export
evalR2_XPASS <- function(pm,file_z_pred,file_predgeno){
  psnp <- nrow(pm)

  geno <- read_data(file_predgeno,"zero")
  X <- geno$X
  rm(geno)
  ref <- fread(paste0(file_predgeno,".bim"),data.table=F)

  zf <- fread(file_z_pred,data.table=F)

  # find common SNPs in pm, test sumstats and test geno
  snps <- intersect(pm$SNP,ref$V2)
  snps <- intersect(snps,zf$SNP)
  if(length(snps)!=psnp){
    warning("Test data and training data have different SNPs. This may reduce prediction accuracy. To avoid this problem, pecify the test genotype file in the `file_predgeno` argument in XPASS function to pre-match SNPs.")
  }

  # match SNPs in pm, test sumstats and test geno
  idx_snps <- match(snps,ref$V2)
  X <- X[,idx_snps]
  ref <- ref[idx_snps,]

  idx_snps <- match(snps,pm$SNP)
  pm <- pm[idx_snps,]

  idx_snps <- match(snps,zf$SNP)
  zf <- zf[idx_snps,]

  # flip alleles in test geno
  idx_flip1 <- which(ref$V5%in%c("A","T")&pm$A1%in%c("C","G"))
  idx_flip2 <- which(ref$V5%in%c("C","G")&pm$A1%in%c("A","T"))
  idx_flip <- c(idx_flip1,idx_flip2)

  X[,idx_flip] <- 2-X[idx_flip]

  # flip alleles in test sumstats
  zs <- zf$Z
  idx_flip1 <- which(zf$A1%in%c("A","T")&pm$A1%in%c("C","G"))
  idx_flip2 <- which(zf$A1%in%c("C","G")&pm$A1%in%c("A","T"))
  idx_flip <- c(idx_flip1,idx_flip2)

  zs[idx_flip] <- -zs[idx_flip]

  # compute R2
  betas <- data.matrix(pm[,c("mu1","mu2","mu_XPASS1","mu_XPASS2")])
  denom <- sqrt(colMeans((X%*%betas)^2))

  # Xsd <- c(scaleC(X)$Xs)
  xsd <- apply(X,2,sd)
  R2 <- (colSums(zs*betas*xsd/sqrt(median(zf$N)))/denom)^2
  names(R2) <- c("PRS1","PRS2","PRS_XPASS1","PRS_XPASS2")

  return(R2)
}
YangLabHKUST/XPASS documentation built on June 29, 2024, 11:16 a.m.