# genetics
library(vcfR)
devtools::install_github("nickbrazeau/NFBtools")
library(NFBtools)
# tidy
library(tidyverse)
library(DT)
# plot
library(RColorBrewer)
library(grid)
# networks
library(ggraph)
library(tidygraph)

Read in Data

# download this file: 

url <- "ftp://ngs.sanger.ac.uk/production/malaria/pf-crosses/1.0/3d7_hb3.combined.final.vcf.gz" 
destfile <- "~/Downloads/temp.vcf.gz"
httr::GET(url=url, httr::write_disk(path=destfile, overwrite = F))
vcfRobject <- vcfR::read.vcfR(file=destfile)

vcfRobject <- vcfRobject[vcfR::is.biallelic(vcfRobject)] # biallelic 
vcfRobject <-vcfR::extract.indels(vcfRobject, return.indels = F) # subset to SNPs
vcfRobject <- NFBtools::vcfR2segsites(vcfRobject)

Plotting the WHOLE Cross Data Subset

plotObjlist <- NFBtools::vcfR2GTCov(vcfR = vcfRobject)

# this is the dataframe we are using to plot, plots by chromosome
head(plotObjlist$snpdflist$Pf3D7_01_v3)
plot(plotObjlist$plot)

Overview

Cross experiment. Two parents. Different levels of relatedness -- nice test/use case. Let's subset to make our life easier... Of note, the LD weights here are going to be very strong since this is a one-generation cross. This approach is assuming random draws from a population, not meiotic siblings.

Subset and Plot Chrom1 Cross Data Subset

# two chromosomes to show that this parallelizes well 
vcfRobject <- NFBtools::vcfR2SubsetChrom(vcfRobject = vcfRobject, chromvect = c("Pf3D7_01_v3"))
plotObjlist <- NFBtools::vcfR2GTCov(vcfR = vcfRobject)
# smaller plot now
plot(plotObjlist$plot)

Run the Genetic Distance Calcuator

ret <- NFBtools::vcfR2Fw_pairwisegendist(vcfRobject = vcfRobject, biallelicsnps = T, segsites = NULL) # I did segsites above. this is a required feature
colnames(ret) <- c("pair1", "pair2", "dab")

# Wrangle this into distance format

ret_pairwise_df <- tidyr::spread(ret, key=pair1, value=dab)
row.names(ret_pairwise_df) <- smpl <- levels(ret$pair2)
# this insanity to create a proper off diagonal
ret_pairwise_df <- rbind.data.frame(rep(NA, ncol(ret_pairwise_df)), ret_pairwise_df)
row.names(ret_pairwise_df)[1] <- levels(ret$pair1)[1]
ret_pairwise_df <- cbind.data.frame(ret_pairwise_df, rep(NA, nrow(ret_pairwise_df)))

# returning to normal code
smpl <- c(levels(ret$pair1)[1], smpl)
ret_pairwise_df <- ret_pairwise_df[,-1] # drop pair
retdist <- as.dist(ret_pairwise_df)

Gen Dist Return

DT::datatable(ret,
              extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'print')
  ))

PCA

fit <- cmdscale(retdist,eig=TRUE, k=3) # k is the number of dim

#fit # view results

eigpoints <- as.data.frame(fit$points)
colnames(eigpoints) <- c("PC1", "PC2", "PC3")

## color schematics
n <- length(smpl)
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))


colorsample <- data.frame(smpl = smpl,
                    colorsample = sample(col_vector,length(smpl)))

eigpoints$smpl <- smpl  

eigpoints <- left_join(eigpoints, colorsample, by=c("smpl"))

########################
#          PCA         #
########################
library(plotly)
Sys.setenv("plotly_username"="nbrazeau1")
Sys.setenv("plotly_api_key"="ePqTRUO0K7qUlYc8DffD")

psample <- plot_ly(eigpoints, x = ~PC1, y = ~PC3, z = ~PC2, color = ~smpl, colors = as.character(eigpoints$colorsample)) %>%
  add_markers() %>%
  layout(scene = list(xaxis = list(title = 'PC1'),
                     yaxis = list(title = 'PC3'),
                     zaxis = list(title = 'PC2')))

psample

Network Analysis

ret_prune <- ret %>% 
  dplyr::filter(dab < 1e-3)

f.graph <- as_tbl_graph(ret_prune, directed = FALSE)

plotobj <- ggraph(f.graph) +
  geom_edge_fan(aes(width=dab), colour = "#d9d9d9", alpha=0.8) +
  scale_edge_width(range = c(0.8, 1.5)) +
  geom_node_point(aes(color = name, size=2.5)) +
  geom_node_text(aes(label = name), size = 2, repel = TRUE) +
  theme_graph() +
  theme(legend.position = "none")

plot(plotobj)


nickbrazeau/NFBtools documentation built on May 17, 2019, 6:37 a.m.