opts_chunk$set(comment = NA, results = 'markup', tidy = F, message = F, warning = F, echo = T, fig.width = 12, fig.height = 6)
library(utils) # for downloads library(BEDMatrix) # to read plink data efficiently library(RSpectra) # to perform PCA with a few components, e.g. 2 library(magrittr) library(dplyr) #library(bigcov) library(devtools) #load_all("~/git/variani/bigcov/") load_all()
plot_pop <- function(mod, labs, ...) { plot(mod$vectors[, 1], mod$vectors[, 2], type = "n", xlab = "PC1", ylab = "PC2", ...) text(mod$vectors[, 1], mod$vectors[, 2], label = labs, col = as.numeric(labs)) }
We define a path to 1K dataset based on the output of Sys.info()[["username"]]
.
nodename <- Sys.info()[["user"]] dpath <- switch(nodename, "your_username" = "~/Data/1KGenome/", "redpr" = "/udd/redpr/mixed_model/1kG/", stop("unknown username"))
f <- file.path(dpath, "phase3_common_EUR503_pruned_100ksubset_bed.bed") bmat <- BEDMatrix(f) bmat dim(bmat) ids <- rownames(bmat) ids <- strsplit(ids, "_") %>% sapply(function(x) x[1]) head(ids) pop <- read.table(file.path(dpath, "pop_label_phase3_2504_without_rels"), header = TRUE, stringsAsFactors = FALSE) %>% mutate(pop = factor(pop)) %>% left_join(data_frame(sample = ids), ., by = "sample") system.time({ grm <- bigdat_grm(bmat, batch_size = 1e4, verbose = 1) }) mod <- eigs(grm, k = 2) mod2<-prcomp(grm) names(mod2)[2]<-'vectors' labs <- pop$pop par(mfrow=c(1, 2)) plot_pop(mod, labs, main = "GRM on common markers, using eigs") plot_pop(mod2,labs,main='GRM on common markers, using base::prcomp')
f <- file.path(dpath, "phase3_rare_EUR503_nosingle_100ksubset_bed.bed") bmat <- BEDMatrix(f) bmat dim(bmat) ids <- rownames(bmat) ids <- strsplit(ids, "_") %>% sapply(function(x) x[1]) head(ids) pop <- read.table(file.path(dpath, "pop_label_phase3_2504_without_rels"), header = TRUE, stringsAsFactors = FALSE) %>% mutate(pop = factor(pop)) %>% left_join(data_frame(sample = ids), ., by = "sample") system.time({ grm <- bigdat_grm(bmat, batch_size = 1e4, verbose = 1) }) mod <- eigs(grm, k = 2) mod2<-prcomp(grm) names(mod2)[2]<-'vectors' labs <- pop$pop par(mfrow=c(1, 2)) plot_pop(mod, labs, main = "GRM on rare markers, using eigs") plot_pop(mod2,labs,main='GRM on rare markers, using base::prcomp')
f <- file.path(dpath, "phase3_rare_EUR503_nosingle_100ksubset_bed.bed") bmat <- BEDMatrix(f) bmat dim(bmat) ids <- rownames(bmat) ids <- strsplit(ids, "_") %>% sapply(function(x) x[1]) head(ids) pop <- read.table(file.path(dpath, "pop_label_phase3_2504_without_rels"), header = TRUE, stringsAsFactors = FALSE) %>% mutate(pop = factor(pop)) %>% left_join(data_frame(sample = ids), ., by = "sample") system.time({ grm <- bigdat_grm(bmat, "Jacard", batch_size = 1e4, verbose = 1) }) mod <- eigs(grm, k = 2) mod2<-prcomp(grm) names(mod2)[2]<-'vectors' labs <- pop$pop par(mfrow=c(1, 2)) plot_pop(mod, labs, main = "JAC on rare markers, using eigs") plot_pop(mod2,labs,main='JAC on rare markers, using base::prcomp')
f <- file.path(dpath, "phase3_rare_EUR503_nosingle_100ksubset_bed.bed") bmat <- BEDMatrix(f) bmat dim(bmat) ids <- rownames(bmat) ids <- strsplit(ids, "_") %>% sapply(function(x) x[1]) head(ids) pop <- read.table(file.path(dpath, "pop_label_phase3_2504_without_rels"), header = TRUE, stringsAsFactors = FALSE) %>% mutate(pop = factor(pop)) %>% left_join(data_frame(sample = ids), ., by = "sample") system.time({ grm <- bigdat_grm(bmat, "Jacard", batch_size = 1e4, maf_max = 5e-3, verbose = 1) }) mod <- eigs(grm, k = 2) mod2<-prcomp(grm) names(mod2)[2]<-'vectors' labs <- pop$pop par(mfrow=c(1, 2)) plot_pop(mod, labs, main = "JAC on rare markers, using eigs") plot_pop(mod2,labs,main='JAC on rare markers, using base::prcomp')
sessionInfo()
This document is licensed under the Creative Commons Attribution 4.0 International Public License.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.