# function for reading in 23andMe files
read_23andMe <- function(filepath){
data <- read.table(filepath, col.names = c('id', 'chr', 'pos', 'gt'))
print(paste('Processed file with ', nrow(data), ' variants!'))
return(data)
}
# karyoplot function
plot_karyotype <- function(data, chrs = "auto", main = NULL){
if(chrs != "auto"){
chrs <- paste0('chr', chrs)}
sub <- subset(data, data$chr %in% c(seq(1,22),'X', 'Y'))
gr <- GRanges(seqnames = paste0('chr', sub$chr), ranges = IRanges(start = sub$pos, width = 1, names = sub$id))
c_tab <- c(rep('#68DAFC', 9), '#014571', '#68DAFC', '#030303')
names(c_tab) <- names(getCytobandColors(color.schema = 'only.centromeres'))
pp <- getDefaultPlotParams(plot.type = 1)
pp$topmargin <- 280
kp <- plotKaryotype("hg19", chromosomes = chrs, ideogram.plotter = NULL, main = main, plot.params = pp)
kpAddCytobandsAsLine(kp,lwd = 4, color.table = c_tab)
kpPlotDensity(kp, data=gr, col = '#FAA3A3')
}
# zoom plot function
zoom_plot <- function(data, chr_num, range){
plot_data <- subset(data, data$chr == as.character(chr_num) & data$pos >= range[1] & data$pos <= range[2])
plot_data <- separate(plot_data, gt, into = c('allele1', 'allele2'), 1)
plot_data <- pivot_longer(plot_data, cols = c(allele1, allele2), names_to = 'allele')
allele_cols <- c('#595454', '#eb0505', '#0f46db', '#09bd21', '#fafa2a')
names(allele_cols) <- c('-', 'A', 'C', 'G', 'T')
plot_data$value <- factor(plot_data$value, levels = c('A', 'C', 'G', 'T', '-'))
if(nrow(plot_data) < 2000){
tryCatch(
ggplotly(ggplot(plot_data, aes(x = pos, fill = value, text = id)) +
geom_segment(aes(x=pos, xend=pos, y=0.53, yend=1), size = 0.1, show.legend = F) +
geom_segment(aes(x=pos, xend=pos, y=0, yend=0.47), size = 0.1, show.legend = F) +
geom_point(data = subset(plot_data, plot_data$allele == 'allele1'), size=5, alpha=0.8, shape=21, stroke=0.1, aes(y = 1, fill = value))+
geom_point(data = subset(plot_data, plot_data$allele == 'allele2'), size=5, alpha=0.8, shape=21, stroke=0.1, aes(y = 0, fill = value))+
#geom_hline(yintercept = 0.53, col = 'black', size = 8, lty =1) +
geom_hline(yintercept = 0.53, col = "#C8C8C8", size = 2.4) +
#geom_hline(yintercept = 0.47, col = 'black', size = 0.5, lty =1) +
geom_hline(yintercept = 0.47, col = "#C8C8C8", size = 2.4) +
scale_fill_manual(values = allele_cols[levels(plot_data$value)])+
scale_y_continuous(limits = c(-0.25,1.25))+
scale_x_continuous(labels = comma) +
labs(title = paste(' Chromosome', chr_num, ':\n', prettyNum(range[1],big.mark=",", scientific = F), '-', prettyNum(range[2], big.mark=",", scientific = F)), x = NULL, y = NULL, fill = NULL) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = 'none'), tooltip = 'text'),
error = function(e) {print("This region is either not valid or doesn't contain any variants!")}
)
} else {
print('Choose a smaller genomic region')
}}
# print my DNA function
print_DNA <- function(data){
# create list to store plots
plots <- list()
# loop through data one chr at a time
chrs <- c(seq(1:22), 'X', 'Y', 'MT')
data <- subset(data, data$chr %in% chrs)
gt_cols <- paletteer_d('Polychrome::glasbey', length(levels(data$gt))) # create colour palette
gt_cols <- sample(gt_cols) # shuffle colours in palette
names(gt_cols) <- levels(data$gt) # name colours
for(i in chrs){
plot_data <- subset(data, data$chr == i) # subset to chr
plot_data <- plot_data %>% group_by(gt) %>% tally() # get counts for each genotype
plot_data$percent <- plot_data$n/sum(plot_data$n) # calculate percentages for each gt
plot_data$scale_n <- log2(plot_data$n) # log scale counts to decrease range
plot_data$scale_per <- plot_data$percent*360 # scale percentage so that it should cover full 360 degrees
plot_data <- plot_data[order(-plot_data$n),] # order by size
plot_data$gt <- factor(plot_data$gt, levels = plot_data$gt) # set factor levels so biggest gets plotted first
plot <- eval(substitute(ggplot(plot_data) + # initialize plot
geom_ellipse(aes(x0 = 0, y0 = 0, a = scale_n, b = scale_n*2.5, angle = scale_per, fill = gt, col = gt), alpha = 0.7, show.legend = F) + # draw ellipses
geom_text( x = -Inf, y = -Inf, hjust = 0, vjust = 0, label = i, col = 'gray95', size = 3) + # annotate with chr number
scale_fill_manual(values=gt_cols[levels(plot_data$gt)]) + # set fill colours
scale_color_manual(values = gt_cols[levels(plot_data$gt)]) + # set outline colours
theme_classic() +
theme(axis.title = element_blank(), # get rid of axes
axis.text = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.background = element_rect(fill = "grey8", colour = NA), # set black background
plot.background = element_rect(fill = "grey8", colour = NA),
plot.margin=margin(l=-1, r = -1, unit="cm")) + # reduce width of plots
coord_fixed(ratio = 1),list(i = i)))
plots[[i]] <- plot # save plot to list
}
# arrange all plots in a grid with black background
g <- plot_grid(plotlist = plots, align = 'hv')
ggdraw(g) +
theme(plot.background = element_rect(fill="grey8", color = NA))
}
# function to read in vcf and convert to 23andMe format
vcf_to_23 <- function(input_filepath, return_df = F, zip = T){
vcf <- read.vcfR(input_filepath, verbose = F) # read in vcf
id <- vcf@fix[,'ID'] # get rsIDs
chr <- gsub('chr', '', vcf@fix[,'CHROM']) # get chr column
pos <- vcf@fix[,'POS'] # get pos column
alleles <- as.data.frame(extract.gt(vcf, return.alleles = T, IDtoRowNames = F)) %>% separate(1, c('A1', 'A2'), '[/|]', fill = 'right') %>% replace_na(list(A2='-')) # get genotypes for all variants and separate into A1 and A2 columns
alleles$gt <- ifelse(alleles$A1 == alleles$A2, 1, 0) # determine whether each allele is homo/heterozygous
multialleles <- intersect(which(grepl(',',vcf@fix[,'ALT'], fixed = T)), which(nchar(gsub(',', '', vcf@fix[,'ALT'])) == 2)) # find multiallelic sites (should contain ',')
ins <- setdiff(which(nchar(vcf@fix[,'ALT']) > 1 ), multialleles) # find insertions
dels <- which(nchar(vcf@fix[,'REF']) > 1) # find deletions
hom_ins <- intersect(which(alleles$gt == 1), ins) # determine which insertions are homozygous
hom_dels <- intersect(which(alleles$gt == 1), dels) # determine which deletions are homozygous
alleles[ins, 'A2'] <- '+' # replace all indels with +/_
alleles[hom_ins, 'A1'] <- '+'
alleles[dels, 'A2'] <- '-'
alleles[hom_dels, 'A1'] <- '-'
het_dels <- which(nchar(alleles$A1) > 1)
alleles[het_dels, 'A1'] <- substr(alleles[het_dels, 'A1'],1,1 )
final_gts <- rep('NA', nrow(alleles)) # get list of gts in 23andMe format
final_gts[which(alleles[,'gt'] == 0)]<- paste0(alleles[which(alleles[,'gt'] == 0), 'A1'], alleles[which(alleles[,'gt'] == 0), 'A2'])
final_gts[which(alleles[,'gt'] == 1)] <- paste0(alleles[which(alleles[,'gt'] == 1), 'A2'], alleles[which(alleles[,'gt'] == 1), 'A2'])
data <- data.frame( id, chr, pos, final_gts) # construct 23andMe df
colnames(data) <- c('# rsid', 'chromosome', 'position', 'genotype')
if(return_df == F){
output_txt <- gsub("\\.vcf|\\.vcf.gz",'_23andMe.txt', input_filepath) # create name for output files
output_zip <- gsub("\\.vcf|\\.vcf.gz",'_23andMe.zip', input_filepath)
write.table(data, output_txt, sep = '\t', quote = F, row.names = F) # write output file
if(zip == T){
system(paste('zip ', output_zip, output_txt)) # zip output file
output_filepath <- output_zip } else{
output_filepath <- output_txt
}
print(paste('Created file', as.character(output_filepath), 'with', nrow(data), 'variants!'))
} else {
return(data)
}
}
# rainfall karyoplot
rainfall_karyoplot <- function(data, chrs = "auto", range = NULL){
if(chrs != "auto"){
chrs <- paste0('chr', chrs)}
if(!is.null(range)){
range <- GRanges(seqnames = rep(chrs, 2), ranges = IRanges(start = range[1], width = range[2]- range[1]))
}
sub <- subset(data, data$chr %in% c(seq(1,22),'X', 'Y'))
gr <- GRanges(seqnames = paste0('chr', sub$chr), ranges = IRanges(start = sub$pos, width = 1, names = sub$id))
kp <- plotKaryotype("hg19", chromosomes = chrs, plot.type=4, labels.plotter = NULL, zoom = range)
kpPlotRainfall(kp, data=gr, col = 1:5)
kpAddChromosomeNames(kp, srt=45)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.