prs_gw <- function(weights_file, genotypes_file, samples_file, weight_col = 'WEIGHT_') {
if (is.null(weights_file)) {
stop("Provide weights file.")
}
if (is.null(genotypes_file)) {
stop("Provide VCF/BCV/savvy file with genotypes and dosages.")
}
lookahead <- read.table(weights_file, header = TRUE, nrows = 1, stringsAsFactors = FALSE) # read first line to determine columns and setup their types (needed for faster read.table)
col_classes <- sapply(lookahead, class)
col_classes[c("CHROM", "OA", "EA")] <- "character"
col_classes["POS"] <- "integer"
col_classes[which(startsWith(colnames(lookahead), weight_col))] <- "numeric"
col_classes[-c(which(colnames(lookahead) %in% c("CHROM", "POS", "OA", "EA")), which(startsWith(colnames(lookahead), weight_col)))] <- list(NULL)
weights <- read.table(weights_file, header = TRUE, stringsAsFactors = FALSE, colClasses = col_classes)
weights <- weights[with(weights, order(CHROM, POS, decreasing = FALSE)),] # ensure that positions are sorted
# set samples to use
if (!is.null(samples_file)) {
samples <- readLines(samples_file)
} else {
samples <- as.character(c()) # use all samples
}
genotypes <- new(Genotypes, genotypes_file) # create VCF/BCF/savvy reader
genotypes$open(samples) # open file for reading and set samples
geno_samples <- genotypes$get_selected_samples() # get samples in the same order as in the genotype file
weight_cols <- colnames(weights)[which(startsWith(colnames(weights), weight_col))]
individual_prs <- data.frame(matrix(nrow = length(geno_samples), ncol = length(weight_cols), data = 0, dimnames = list(samples = geno_samples, scores = paste("PRS_", weight_cols, sep=""))))
for (row_idx in 1:nrow(weights)) {
ea <- weights[row_idx, "EA"]
oa <- weights[row_idx, "OA"]
dosage <- genotypes$read_variant(weights[row_idx, "CHROM"], weights[row_idx, "POS"], ea, oa)
if (nrow(dosage) == 0) {
next
}
for (i in seq_along(weight_cols)) {
weight <- weights[row_idx, weight_cols[i]]
if (weight > 0) {
individual_prs[, i] <- individual_prs[, i] + weight * dosage
} else if (weight < 0) {
individual_prs[, i] <- individual_prs[, i] + weight * (dosage - 2.0)
}
}
if (row_idx %% 10000 == 0) {
message(sprintf("Processed %d/%d variants", row_idx, nrow(weights)))
}
}
message("Done")
individual_prs <- cbind(IID = rownames(individual_prs),individual_prs)
if (length(samples) > 0) {
individual_prs <- individual_prs[samples,]
}
rownames(individual_prs) <- NULL
return(individual_prs)
}
prs_pt <- function(weights_file, genotypes_file, samples_file, pvalues = c(1.0)) {
if (is.null(weights_file)) {
stop("Provide weights file.")
}
if (is.null(genotypes_file)) {
stop("Provide VCF/BCV/savvy file with genotypes and dosages.")
}
weights <- read.table(weights_file, header = TRUE, stringsAsFactors = FALSE, colClasses = c("character", "integer", "character", "character", "numeric", "numeric"))
weights <- weights[with(weights, order(CHROM, POS, decreasing = FALSE)),] # ensure that positions are sorted
weights <- weights[weights$PVALUE <= max(pvalues) & weights$WEIGHT != 0, ] # subset to weights which satisfy all p-value thresholds; skip SNPs with 0 effects
pvalues <- sort(pvalues, decreasing = TRUE)
# set samples to use
if (!is.null(samples_file)) {
samples <- readLines(samples_file)
} else {
samples <- as.character(c()) # use all samples
}
genotypes <- new(Genotypes, genotypes_file) # create VCF/BCF/savvy reader
genotypes$open(samples) # open file for reading and set samples
geno_samples <- genotypes$get_selected_samples() # get samples in the same order as in the genotype file
individual_prs <- data.frame(matrix(nrow = length(geno_samples), ncol = length(pvalues), data = 0, dimnames = list(samples = geno_samples, scores = paste("PRS_", pvalues, sep=""))))
for (row_idx in 1:nrow(weights)) {
weight <- weights[row_idx, "WEIGHT"]
if (weight >= 0) {
ra <- weights[row_idx, "EA"]
pa <- weights[row_idx, "OA"]
risk <- weight
} else {
ra <- weights[row_idx, "OA"]
pa <- weights[row_idx, "EA"]
risk <- -1 * weight
}
pvalue <- weights[row_idx, 'PVALUE']
if (row_idx %% 10000 == 0) {
message(sprintf("Processed %d/%d variants", row_idx, nrow(weights)))
}
dosage <- genotypes$read_variant(weights[row_idx, "CHROM"], weights[row_idx, "POS"], ra, pa)
if (nrow(dosage) == 0) {
next
}
for (i in seq_along(pvalues)) {
if (pvalue <= pvalues[i]) {
individual_prs[, i] <- individual_prs[, i] + risk * dosage
} else {
break
}
}
}
message("Done")
individual_prs <- cbind(IID = rownames(individual_prs),individual_prs)
if (length(samples) > 0) {
individual_prs <- individual_prs[samples,]
}
rownames(individual_prs) <- NULL
return(individual_prs)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.