knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(data.table)
library(RColorBrewer)
library(viridis)
library(cowplot)
library(GGally)
library(CDBNgenomics)
source("../data-raw/ignore/scorelocalfunctions.R")
source("~/Github/Functions_ggplot-theme-adjustments_2018-01-03.R")

Figure S1

Correlation plot of BLUPs from ASReml

blups <- read.table("~/Github/CDBNgenomics/data-raw/ASReml_coefficients_13_GAPIT_2020-01-12.txt", header = TRUE, sep = "\t")

blup_cor <- blups[2:20] %>%
  cor(use = "pairwise.complete") # correlation matrix between phenotype breeding values

phe_key <- tibble(Phenotype_abbr = row.names(blup_cor), Phenotype = c("Seed yield (kg/ha)", "Seed weight (mg)",
                         "Halo blight damage score", "Rust damage score",
                        "Seed appearance score", "White mold damage score", 
                        "Blackroot BCMV response",
                        "CBB damage score", "BCMV damage score", 
                        "CTV damage score", "Early vigor", 
                        "Growth habit", "Plant height", 
                        "Biomass (kg)", "Days to flowering", 
                        "Days to maturity", "Harvest index (%)", 
                        "Lodging score", "Seedfill duration (days)"))
min(blup_cor) # minimum correlation between pairs

(length(which(abs(blup_cor)>0.2))-19) / (length(blup_cor)-19) # fraction of correlations > 0.2

blup_cor %>%
  CDBNgenomics:::reorder_cormat(.) %>%
  ggcorr(data = NULL, cor_matrix = ., geom = "circle", layout.exp = 5, min_size = 0.2, max_size = 2.5) + 
  scale_color_viridis(option = "B")

save_plot("Supplement_Fig_1_ASReml.svg", plot = last_plot(), base_width = 4)

Local score supplement

FDRannos <- readRDS("~/Github/CDBNgenomics/data-raw/ignore/10perFDR_annosWithin100kbp_GAPIT_BLUP_PC0_phenotypedonly_2020-01-17.rds")
all_data <- readRDS("~/Github/CDBNgenomics/data-raw/ignore/local_scores_22_CDBN_phenotypes_ksi3_list.rds")
sigzones <- readRDS("~/Github/CDBNgenomics/data-raw/ASReml/GAPIT-for-mash/All_significant_regions_localscore_ksi3.rds")
pathpt1 <- file.path("I:", "OneDrive", "NSF Common Bean", "CDBN Genomics", "Pvulgaris-genome", 
                        "annotation", "Pvulgaris") 

  txdb <- AnnotationDbi::loadDb(file = file.path(pathpt1, "v2.1", "annotation", 
                                  "Pvulgaris_442_v2.1.gene.sqlite"))
  anno_info <- read.table(file = file.path(pathpt1, "v2.1", "annotation", 
                                 "Pvulgaris_442_v2.1.annotation_info.txt"), 
                          sep = "\t", fill = TRUE, header = TRUE)
  Pv_kegg <- read.table(file = file.path(pathpt1, "v2.1", "annotation", 
                                         "commonbean_kegg.txt"), 
                        sep = "\t", header = FALSE)
  Pv_GO <- read.table(file = file.path(pathpt1, "v2.1", "annotation", 
                                       "commonbean_go.txt"),
                      sep = "\t", header = FALSE)

SY

all_data[[20]] %>%
  filter(Chromosome == 1 & between(Position, 42180000,  42380000)) %>%
  ggplot(aes(x = Position, y = lindley)) +
  geom_line() + geom_point() + geom_hline(yintercept = 3.719524, linetype = 2)
all_data[[20]] %>%
  filter(Chromosome == 1 & between(Position,42205769,   42238454)) %>%
  ggplot(aes(x = Position, y = lindley)) +
  geom_line() + geom_point() + geom_hline(yintercept = 3.719524, linetype = 2)

SW

FDRannos$SW %>%
  left_join(all_data[[19]], by = c("Marker Name" = "SNP"))
FDRannos$SW %>%
  arrange((`p value`))
sigzones$SW

S05_27269508 5 27269508 27269508 4.828511 Phvul.005G087118
S02_30559854 2 30559854 30559854 4.467288 Phvul.002G151800 Phvul.002G151900 S08_62833499 8 62833423 62833499 5.899035 Phvul.008G290500 Phvul.008G290600 Phvul.008G290700 S03_4406253 3 4406253 4406260 4.971953 Phvul.003G039900

3 5259067 5261518 53.945135

all_data[[19]] %>%
  filter(Chromosome == 3 & between(Position,4366253,    5301518)) %>%
  ggplot(aes(x = Position, y = lindley)) +
  geom_line() + geom_point() + geom_hline(yintercept = 3.671113, linetype = 2)

FDRannos$SW %>% mutate(Posbin = ceiling(Position/1000000)) %>%
  #group_by(Chromosome, Posbin) %>% summarise(count = n())
  ggplot(aes(x = Posbin, y = -log10(`p value`))) +
  geom_point() + facet_wrap(~Chromosome)

LG

FDRannos$LG %>%
  left_join(all_data[[19]], by = c("Marker Name" = "SNP"))
FDRannos$LG %>%
  group_by(Chromosome) %>%
  summarise(max = max(`p value`), effect = mean(abs(`SNP Effect`)))

0.04077616+0.05782306+0.01812062
FDRannos$LG %>%
  arrange((`p value`))
sigzones$LG

2932290-2992352

0.04077616+0.05782306+0.01812062 S04_2932290 4 2990488 2992352 4.393083 2869697 3154155 S07_34512482 S07_34455942 S07_34459776 7 34459716 34488947 23.988031
S08_18222478 8 16680874 16680881 5.669153

all_data[[13]] %>%
  filter(Chromosome == 4 & between(Position, 2839697,   3184155)) %>%
  ggplot(aes(x = Position, y = lindley)) +
  geom_line() + geom_point() + geom_hline(yintercept = 3.672843, linetype = 2)

all_data[[13]] %>%
  filter(Chromosome == 7 & between(Position, 34429716,  34558947)) %>%
  ggplot(aes(x = Position, y = lindley)) +
  geom_line() + geom_point() + geom_hline(yintercept = 3.672843, linetype = 2)

all_data[[13]] %>%
  filter(Chromosome == 8 & between(Position, 16680874,  18252478)) %>%
  ggplot(aes(x = Position, y = lindley)) +
  geom_line() + geom_point() + geom_hline(yintercept = 3.672843, linetype = 2)
all_data[[13]] %>%
  filter(Chromosome == 8 & between(Position, 18222478,  18252478))

GH

FDRannos$GH %>%
  arrange((`p value`))
sigzones$GH %>%
  arrange(Chromosome, end)

S01_6284049 1 6284049 6284049 4.730580
S01_42228454 1 42266737 42280778 14.154966 complicated... 38886673 47706619 S09_30938127 9 30938118 30938127 7.708725
S10_42797085 10 42791454 42797324 26.235210

42280778 - 42228454
all_data[[10]] %>%
  filter(Chromosome == 1 & between(Position, 42185896,  42300778)) %>%
  ggplot(aes(x = Position, y = lindley)) +
  geom_line() + geom_point() + geom_hline(yintercept = 3.672843, linetype = 2)
all_data[[10]] %>%
  filter(Chromosome == 1 & between(Position, 35006673,  47876619)) %>%
  ggplot(aes(x = Position, y = lindley)) +
  geom_line() + geom_point() + geom_hline(yintercept = 3.672843, linetype = 2)
all_data[[10]] %>%
  filter(Chromosome == 9 & between(Position, 30918118,  30958127)) %>%
  ggplot(aes(x = Position, y = lindley)) +
  geom_line() + geom_point() + geom_hline(yintercept = 3.672843, linetype = 2)
all_data[[10]] %>%
  filter(Chromosome == 10 & between(Position, 42701454, 42997324)) %>%
  ggplot(aes(x = Position, y = lindley)) +
  geom_line() + geom_point() + geom_hline(yintercept = 3.672843, linetype = 2)

RU

FDRannos$RU %>%
  arrange((`p value`))
sigzones$RU %>%
  arrange(Chromosome, end)

S11_50690130 11 50650488 50670920 56.499461

Mash for 17

mash <- readRDS("../data-raw/ASReml/GAPIT-for-mash/Strong_Effects30114SNPs.rds")
get_significant_results(m=mash)[1:10]
bf <- CDBNgenomics:::get_log10bf(m=mash)
CDBNgenomics::mash_plot_effects(m = mash, i = 1771)
CDBNgenomics::mash_plot_effects(m = mash, i = 3217)
CDBNgenomics::mash_plot_effects(m = mash, i = 7065)
CDBNgenomics::mash_plot_effects(m = mash, i = 10808)
CDBNgenomics::mash_plot_effects(m = mash, i = 14290)
mash_plot_effects(m= mash, i =5784)
mash_plot_effects(m= mash, i =8008)
mash_plot_effects(m= mash, i =21703)
bf[10808]
bf[21703:21800]
df <- CDBNgenomics:::get_marker_df(m=mash)
df[which(bf > 1.8),]
which(bf > 6)
head(bf)
df %>%
  filter(grepl("S07_3", Marker))

df[which(bf > 2.),] %>%
  df %>%
  separate(Marker, into = c("S", "Chr", "_", "Pos"), sep = c(1,3,4)) %>%
  mutate(#bf = bf[which(bf>2)],
         Chr = as.numeric(Chr),
         Pos = as.numeric(Pos),
         Posbin = ceiling(Pos/1000)) %>%
  filter(Chr == 7 & Pos > 30000000) %>%
  group_by(Posbin) %>% tally() %>%
  mutate(Poslag = Posbin - lag(Posbin))
  ggplot(aes(x = Posbin, y = bf)) + 
  geom_point() + geom_line()

get_significant_results(m = mash, conditions = "Bhat_PH")

6 to 21 Mb (5.3Mb to 20.5Mb) 35 to 46 Mb (34.1Mb to 45.4Mb)

S01_762752 1771 S01_3173121 3217
S01_34203637 7065

[1] 1771 3217 4613 5757 5784 6135 7065 8008 8034 8040 8041 [12] 10808 14290

S01_0.762752 1771
S01_3.173121 3217 Phvul.001G033800 PROTEIN PHOSPHATASE 2C 61-RELATED S01_6.244853 4613 growth habit
S01_15.360832 5784 days to flowering S01_34.203637 7065 Phvul.001G123900. Glutamate synthase (ferredoxin) / Ferredoxin-dependent glutamate synthase S01_43.841457 8008 Phvul.001G180600 SERINE/THREONINE PROTEIN PHOSPHATASE 2A S02_49.069219 10808
S04_2.486436 14290 lodging score

Phvul.004G021500.1 (primary)

Location: Chr04:2488480..2491886 forward Phvul.004G021400.1 (primary) Location: Chr04:2479850..2482645 forward

pairwise <- mash_plot_pairwise_sharing(effectRDS = "../data-raw/ASReml/GAPIT-for-mash/Pairwise_sharing_Strong_Effects_30114SNPs.rds", label = TRUE)

pairwise$gg_corr
library(GGally)
?ggcorr()


Alice-MacQueen/CDBNgenomics documentation built on Aug. 18, 2020, 4:39 p.m.