# construct PRS with new genotype file
#'@title Construct PRS from the XPASS output.
#'@description This function use the XPASS posterior means to construct PRS for the prediction samples.
#'
#' @param pm The output `mu` obtained from the `XPASS` function, which stores the posterior means computed by XPASS.
#' @param file_predgeno Prefix of the prediction samples' genotypes in plink format.
#'
#'
#' @return A data frame storing the PRS generated by using mu1, mu2, mu_XPASS1, and mu_XPASS2, respectively.
#'
#' @examples
#' See vignette.
#' @export
predict_XPASS <- function(pm,file_predgeno){
psnp <- nrow(pm)
geno <- read_data(file_predgeno,"zero")
X <- geno$X
rm(geno)
ref <- fread(paste0(file_predgeno,".bim"),data.table=F)
fam <- fread(paste0(file_predgeno,".fam"),data.table=F)
# find common SNPs in test and training data
snps <- intersect(pm$SNP,ref$V2)
if(length(snps)!=psnp){
warning("Test data and training data have different SNPs. This may reduce prediction accuracy. To avoid this problem, pecify the test genotype file in the `file_predgeno` argument in XPASS function to pre-mathc SNPs.")
}
# match SNPs in both datasets
idx_snps <- match(snps,ref$V2)
X <- X[,idx_snps]
ref <- ref[idx_snps,]
idx_snps <- match(snps,pm$SNP)
pm <- pm[idx_snps,]
# flip alleles
idx_flip1 <- which(ref$V5%in%c("A","T")&pm$A1%in%c("C","G"))
idx_flip2 <- which(ref$V5%in%c("C","G")&pm$A1%in%c("A","T"))
idx_flip <- c(idx_flip1,idx_flip2)
X[,idx_flip] <- 2-X[idx_flip]
# compute PRS
PRS <- X %*% data.matrix(pm[,c("mu1","mu2","mu_XPASS1","mu_XPASS2")])
colnames(PRS) <- c("PRS1","PRS2","PRS_XPASS1","PRS_XPASS2")
PRS <- data.frame(FID=fam$V1,IID=fam$V2,PRS)
}
# evalueate R2 using external summary statistics
#'@title Evaluate the approximated predictive R2 using an independent GWAS summary statistics.
#'@description This function approximates the predictive R2 of XPASS using an independent GWAS summary statistics.
#'
#' @param pm The output `mu` obtained from the `XPASS` function, which stores the posterior means computed by XPASS.
#' @param file_z_pred Summary statistics file of the test data. The format is the same as XPASS input.
#' @param file_predgeno Prefix of the reference genotypes in plink format.
#'
#' @return Approximated predictive R^2.
#'
#' @examples
#' See vignette.
#' @export
evalR2_XPASS <- function(pm,file_z_pred,file_predgeno){
psnp <- nrow(pm)
geno <- read_data(file_predgeno,"zero")
X <- geno$X
rm(geno)
ref <- fread(paste0(file_predgeno,".bim"),data.table=F)
zf <- fread(file_z_pred,data.table=F)
# find common SNPs in pm, test sumstats and test geno
snps <- intersect(pm$SNP,ref$V2)
snps <- intersect(snps,zf$SNP)
if(length(snps)!=psnp){
warning("Test data and training data have different SNPs. This may reduce prediction accuracy. To avoid this problem, pecify the test genotype file in the `file_predgeno` argument in XPASS function to pre-match SNPs.")
}
# match SNPs in pm, test sumstats and test geno
idx_snps <- match(snps,ref$V2)
X <- X[,idx_snps]
ref <- ref[idx_snps,]
idx_snps <- match(snps,pm$SNP)
pm <- pm[idx_snps,]
idx_snps <- match(snps,zf$SNP)
zf <- zf[idx_snps,]
# flip alleles in test geno
idx_flip1 <- which(ref$V5%in%c("A","T")&pm$A1%in%c("C","G"))
idx_flip2 <- which(ref$V5%in%c("C","G")&pm$A1%in%c("A","T"))
idx_flip <- c(idx_flip1,idx_flip2)
X[,idx_flip] <- 2-X[idx_flip]
# flip alleles in test sumstats
zs <- zf$Z
idx_flip1 <- which(zf$A1%in%c("A","T")&pm$A1%in%c("C","G"))
idx_flip2 <- which(zf$A1%in%c("C","G")&pm$A1%in%c("A","T"))
idx_flip <- c(idx_flip1,idx_flip2)
zs[idx_flip] <- -zs[idx_flip]
# compute R2
betas <- data.matrix(pm[,c("mu1","mu2","mu_XPASS1","mu_XPASS2")])
denom <- sqrt(colMeans((X%*%betas)^2))
# Xsd <- c(scaleC(X)$Xs)
xsd <- apply(X,2,sd)
R2 <- (colSums(zs*betas*xsd/sqrt(median(zf$N)))/denom)^2
names(R2) <- c("PRS1","PRS2","PRS_XPASS1","PRS_XPASS2")
return(R2)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.