# this file was generated by combining and extracting all homozygous carriers.
# it requires first, that 05_find_homo_plof.sh to be run. And then, the
# resulting data to be combined row-wise.
# todo: executeable script
#plot_vpc_homozygous <- function(df, n = 480000, cols = c('ID1', 'ID2', 'CHROM','BP','SNP','MAF')){
########################
### run on the server ##
########################
x <- lapply(1:22, function(x) read.table(paste0('ukb_mfi_homo_plof_chr',x,'.txt'), skip = 2))
df <- do.call(rbind, x)
colnames(df) <- c('ID1', 'ID2', 'CHROM','BP','SNP','MAF')
n = 479230 # all
#n = 402133 # wb
# summarize data
inds <- as.data.frame(table(df$ID2, df$SNP))
snp_count <- aggregate(Freq ~ Var2, data = inds, FUN = sum)
colnames(snp_count) <- c('SNP','observed_snps')
snp_maf <- df[,c('SNP','MAF')]
snp_maf <- snp_maf[!duplicated(snp_maf),]
res <- merge(snp_count, snp_maf)
res$homo_freq <- res$MAF ** 2
res$expected_snps <- res$homo_freq * n
write.csv(res, '/well/lindgren/flassen/210131_ukbb_homozyogus_carriers_count.csv', quote = F, row.names = F)
###############
# run locally #
###############
# plot data
res <- read.csv('~/Desktop/210131_ukbb_homozyogus_carriers_count.csv')
res$AF <- res$MAF
title = paste0('Observed vs Expected high impact variant count in UKBB (All)')
subtitle = paste0('Samples: ',479230,' SNPs: ', length(unique(res$SNP)))
p1 <- ggplot(res, aes(x=observed_snps, y=expected_snps, color = AF)) +
geom_point() +
geom_abline(slope=1, intercept=0, col = 'red') +
annotate("text", x = 184, y = 170, col = 'red', label = 'x=y') +
xlab('Observed homozygous carriers (n)') +
ylab('Expected homozygous carries (n)') +
ggtitle(title, subtitle) +
theme_bw()
# plot data WB
wb <- read.csv('~/Desktop/210131_ukbb_wb_homozyogus_carriers_count.csv')
wb$AF <- wb$MAF
title = paste0('Observed vs Expected high impact variant count in UKBB (WB)')
subtitle = paste0('Samples: ',402133,' SNPs: ', length(unique(wb$SNP)))
p2 <- ggplot(wb, aes(x=observed_snps, y=expected_snps, color = AF)) +
geom_point() +
geom_abline(slope=1, intercept=0, col = 'red') +
annotate("text", x = 180, y = 170, col = 'red', label = 'x=y') +
xlab('Observed homozygous carries (n)') +
ylab('Expected homozygous carries (n)') +
ggtitle(title, subtitle) +
theme_bw()
ggsave('210131_ukb_all_samples_obs_vs_expected_high_imapct_count.png', p1, width = 6, height = 6)
ggsave('210131_ukb_all_samples_obs_vs_expected_high_imapct_WB_count.png', p2, width = 6, height = 6)
#}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.