## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup, include=FALSE-----------------------------------------------------
library(SNPFastImputeMac)
library(tidyverse)
library(ggplot2)
library(ggpubr)
#library(doParallel)
#library(foreach)
#library(doRNG)
## ----vcf, echo=T--------------------------------------------------------------
data("full_vcf")
dim(full_vcf)
full_vcf[1:5, c(1:7, 9:14)]
## ----vcf2df, message=FALSE----------------------------------------------------
full_df <- vcf2df(full_vcf)
## ----showdf, echo=F-----------------------------------------------------------
full_df[1:4, 1:4]
sub <- 200
## ----subset-------------------------------------------------------------------
SNP_orig_sub <- full_df[, 1:sub]
## ----introduceNA--------------------------------------------------------------
ratios <- seq(0.05, 0.25, by = 0.05)
SNP_NA_dfs <- list()
ratios_len <- length(ratios)
#set.seed(20980)
for (i in 1:ratios_len){
SNP_NA_dfs[[i]] <- NA_Generator(SNP_orig_sub, ratios[i])
print(SNP_NA_dfs[[i]]$NA_percent_generate)
}
names(SNP_NA_dfs) <- paste("missing", as.character(ratios))
## ----classification error calculation for different missing rate--------------
errors <- rep(NA, ratios_len)
for(i in 1:ratios_len){
errors[i] <- classification_error(SNP_orig_sub, df_fills[[i]], SNP_NA_dfs[[i]]$NP_generate_positions)
}
## ----calculate missing ratio vs error rate and time, message=FALSE------------
errors_df <- data.frame(ratios, errors)
times_df <- data.frame(ratios, proctimes)
## ----plot missing ratio vs error rate and time, echo = T, eval = T, fig.width=8, message = F, tidy = T, tidy.opts=list(width.cutoff=35)----
p1 <- ggplot(errors_df, aes(x = ratios, y = errors)) + geom_point() + ylim(c(0, 0.2))
p2 <- ggplot(times_df, aes(x = ratios, y = proctimes)) + geom_point() + ylim(c(0, 20))
ggarrange(p1, p2, nrow = 1)
## ----windows size list--------------------------------------------------------
sizes <- seq(10, 50, by = 10)
sizes_len <- length(sizes)
SNP_NA_df02 <- SNP_NA_dfs[[4]]$SNP_NA_df
## ----classification error calculation for different windows size--------------
errors <- rep(NA, ratios_len)
for(i in 1:ratios_len){
errors[i] <- classification_error(SNP_orig_sub, df_fills[[i]], SNP_NA_dfs[[i]]$NP_generate_positions)
}
## ----calculate windows size vs error rate and time, message=FALSE-------------
errors_df <- data.frame(sizes, errors)
times_df <- data.frame(sizes, proctimes)
## ----plot windows size vs error rate and time, echo = T, eval = T, fig.width=8, message = F, tidy = T, tidy.opts=list(width.cutoff=35)----
p1 <- ggplot(errors_df, aes(x = sizes, y = errors)) + geom_point() + ylim(c(0, 0.2))
p2 <- ggplot(times_df, aes(x = sizes, y = proctimes)) + geom_point() + ylim(c(0, 20))
ggarrange(p1, p2, nrow = 1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.