R/mod_genotypic6.R

Defines functions mod_genotypic6_server mod_genotypic6_ui

#' genotypic6 UI Function
#'
#' @description A shiny Module.
#'
#' @param id,input,output,session Internal parameters for {shiny}.
#'
#' @noRd 
#'
#' @importFrom shiny NS tagList 
mod_genotypic6_ui <- function(id){
  ns <- NS(id)
  tagList(
    shiny::sidebarLayout(
      shiny::sidebarPanel(tags$h3("Genetic divergence among populations"),
                          tags$br(),
                          tags$p('We may also be interested in finding out how genetically
                                 divergent are the populations. For this, we can calculate
                                 the fixation index Fst, which measures the difference 
                                 between the mean gene diversity within populations (Hs)
                                 and the total genetic diversity expected in the whole 
                                 population (Ht). The higher the difference between Ht 
                                 and Hs, the higher the fixation index Fst and therefore
                                 the higher the populations we are comparing. We will 
                                 use the program hierfstat to calculate Fst indexes.'),
                          tags$br(),
                          shiny::fileInput(ns("filevcf"), "Choose a VCF file",
                                           multiple = F,
                                           accept = ".vcf",
                                           buttonLabel = "Uploading..."),
                          shiny::fileInput(ns("filetxt"), "Choose a TXT file",
                                           multiple = F,
                                           accept = ".txt",
                                           buttonLabel = "Uploading...")
      ),
      mainPanel(tags$h2("Results ML"),
                tags$p('A useful plot to explore the overall difference between 
                       Hs and Ht. We can see that Hs is lower than Ht, therefore
                       we can envision that there will be certain degree of 
                       differentiation among populations (in this case we are 
                       comparing genepools in Lima bean) as measured by Fst 
                       (Fst=(Ht-Hs)/Ht).'),
                shiny::plotOutput(ns('boxplot')),
                tags$p('to calculate Fis and Fst on one level hierarchy 
                       (the populations -or genepools- grouped within the 
                       total population) by the method of Weir and Cockerham (1984)'),
                shiny::verbatimTextOutput(ns("summary1")),
                tags$p('To obtain pariwise Fst values among genepools'),
                shiny::verbatimTextOutput(ns("summary2")),
                tags$p('Cluster Validity by NbCLust (and factoextra) 
                       using 30 indices from the scientific literature'),
                shiny::plotOutput(ns('plot1')),
                tags$p('Cluster Validity by OptCluster (an improvement of ClValid)'),
                shiny::plotOutput(ns('plot2')),
                tags$p('Vizualization of OptCluster Output by means of UPGMA dendogram'),
                shiny::plotOutput(ns('plot3')),
      )
    )
  )
}
    
#' genotypic6 Server Functions
#' @import plotly
#' @import adegenet
#' @import randomForest
#' @import ggplot2
#' @import hierfstat
#' @import ape
#' @import poppr
#' @import pegas
#' @import vcfR
#' @import RColorBrewer
#' @import readr
#' @import tidyverse
#' @import mice
#' @import imputeTS
#' @import knitr
#' @import rgl
#' @import caTools
#' @import randomForestExplainer
#' @import dplyr
#' @import ranger
#' @import vegan
#' @import pvclust
#' @import ape
#' @import nnet
#' @import NeuralNetTools
#' @import tidyr
#' @import openxlsx
#' @import cluster
#' @import ggdendro
#' @import factoextra
#' @import foreach
#' @import gridExtra
#' @import caret
#' @import NbClust
#' @import dendextend
#' @import pals
#' @import expss
#' @import e1071
#' @import ROCR
#' @import pROC
#' @import aricode
#' @import dendextend
#' @import fpc
#' @import mclust
#' @import corrplot
#' @import scales
#' @import ggpubr
#' @import grDevices
#' @import optCluster
#' @noRd 
mod_genotypic6_server <- function(id){
  moduleServer( id, function(input, output, session){
    ns <- session$ns
    
    output$boxplot <- renderPlot({
      tryCatch(
        {
          LimaBeanGBS = read.vcfR(input$filevcf$datapath)
        },
        error = function(e){
          stop(safeError(e))
        }
      )
      
      tryCatch(
        {
          data = read.table(input$filetxt$datapath, header = TRUE)
        },
        error = function(e){
          stop(safeError(e))
        }
      )
      Genepool = as.character(data$Genepool)
      LimaBeanData3 <- vcfR2genlight(LimaBeanGBS)
      ploidy(LimaBeanData3) <- 2
      pop(LimaBeanData3) <- Genepool
      
      LimaBeanData2 = vcfR2genind(LimaBeanGBS, 
                                  pop= Genepool, 
                                  NA.char= "NA")
      
      diversity.clusters <- basic.stats(LimaBeanData2, diploid=TRUE, digits=2)
      boxplot(diversity.clusters$perloc[,2:3])
    })
    
    output$summary1 <- renderPrint({
      tryCatch(
        {
          LimaBeanGBS = read.vcfR(input$filevcf$datapath)
        },
        error = function(e){
          stop(safeError(e))
        }
      )
      
      tryCatch(
        {
          data = read.table(input$filetxt$datapath, header = TRUE)
        },
        error = function(e){
          stop(safeError(e))
        }
      )
      Genepool = as.character(data$Genepool)
      LimaBeanData3 <- vcfR2genlight(LimaBeanGBS)
      ploidy(LimaBeanData3) <- 2
      pop(LimaBeanData3) <- Genepool
      
      LimaBeanData2 = vcfR2genind(LimaBeanGBS, 
                                  pop= Genepool, 
                                  NA.char= "NA")
      
      global.Fst.weir_cock <- wc(LimaBeanData2) 
      #to calculate Fis and Fst on one level hierarchy (the populations -
      #or genepools- grouped within the total population) by the method of Weir and Cockerham (1984).
      
       # We can see that genetic differentiation among 
      #genepools is high (Fst=0.59), and that deviation from HWE is also high, 
      #with deficit of heterozygous (Fis=0.69).
      
      print(global.Fst.weir_cock)
    })
    
    output$summary2 <- renderPrint({
      tryCatch(
        {
          LimaBeanGBS = read.vcfR(input$filevcf$datapath)
        },
        error = function(e){
          stop(safeError(e))
        }
      )
      
      tryCatch(
        {
          data = read.table(input$filetxt$datapath, header = TRUE)
        },
        error = function(e){
          stop(safeError(e))
        }
      )
      Genepool = as.character(data$Genepool)
      LimaBeanData3 <- vcfR2genlight(LimaBeanGBS)
      ploidy(LimaBeanData3) <- 2
      pop(LimaBeanData3) <- Genepool
      
      LimaBeanData2 = vcfR2genind(LimaBeanGBS, 
                                  pop= Genepool, 
                                  NA.char= "NA")
      
      pairwise.fst.genepools <- genet.dist(LimaBeanData2, method="WC84") 
      # To obtain pariwise Fst values among genepools
       #We can see that the highest Fst values (highest genetic 
      #differentiation) were observed among genepools Dom_MI vs Dom_AI, 
      #Dom_MI vs Wild_AII and Dom_AI vs wild_AII. The lowest genetic 
      #differentiation was observed among genepools Dom_ADM vs wild MII, 
      #Wild_AI vs Dom_AI (Andean wild ancestor and their domesticated counterpart) 
      #and Wild_MI and Dom_MI (Mesoamerican wild ancestor and their domesticated counterpart).
      
      print(pairwise.fst.genepools)
    })
    
    output$plot1 <- renderPlot({
      tryCatch(
        {
          LimaBeanGBS = read.vcfR(input$filevcf$datapath)
        },
        error = function(e){
          stop(safeError(e))
        }
      )
      
      tryCatch(
        {
          data = read.table(input$filetxt$datapath, header = TRUE)
        },
        error = function(e){
          stop(safeError(e))
        }
      )
      Genepool = as.character(data$Genepool)
      LimaBeanData3 <- vcfR2genlight(LimaBeanGBS)
      ploidy(LimaBeanData3) <- 2
      pop(LimaBeanData3) <- Genepool
      LimaBeanPCA <- glPca(LimaBeanData3, nf = 3)
      Limapcascores <- as.data.frame(LimaBeanPCA$scores)
      Limapcascores$pop <- pop(LimaBeanData3)
      distance = "euclidean" #"euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"
      algorithm = "kmeans" #"ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid", "kmeans"
      index = c("all")
      res.nbclust <- NbClust(as.data.frame(Limapcascores[,-4]), distance = distance,
                             min.nc = 4, max.nc = 10,
                             method = algorithm, index =index)
      fviz_nbclust(res.nbclust) + theme_minimal()
      res.nbclust$Best.nc[1,]
    })
    
    output$plot2 <- renderPlot({
      tryCatch(
        {
          LimaBeanGBS = read.vcfR(input$filevcf$datapath)
        },
        error = function(e){
          stop(safeError(e))
        }
      )
      
      tryCatch(
        {
          data = read.table(input$filetxt$datapath, header = TRUE)
        },
        error = function(e){
          stop(safeError(e))
        }
      )
      Genepool = as.character(data$Genepool)
      LimaBeanData3 <- vcfR2genlight(LimaBeanGBS)
      ploidy(LimaBeanData3) <- 2
      pop(LimaBeanData3) <- Genepool
      LimaBeanPCA <- glPca(LimaBeanData3, nf = 3)
      Limapcascores <- as.data.frame(LimaBeanPCA$scores)
      Limapcascores$pop <- pop(LimaBeanData3)
      algorithms =  c("pam","kmeans","clara","diana","agnes") #agglomerative =agnes &hclust "kmeans","clara","diana","agnes","som","sota"
      # Select the k range
      k = 4:10
      # Select the valitadion method: "CE", "GA"
      validation_method="CE"
      # run
      PVCA <-as.data.frame(Limapcascores[,1:3])
      set.seed(2022)
      norm1 <- optCluster(as.matrix.data.frame(Limapcascores[,-4]), k, clMethods = algorithms, 
                          rankMethod = validation_method, hierMethod = "ward")
      summary(norm1)
      
      putput <- optAssign(norm1)
      CLUSTER <- as.data.frame(putput$cluster)
      colnames(CLUSTER) <- "CLUSTER"
      
      final_cluster_data <- cbind(PVCA$PC1, PVCA$PC2, CLUSTER)
      rownames(final_cluster_data) <- rownames(Limapcascores)
      final_cluster_data <- as.data.frame(final_cluster_data)
      final_cluster_data$CLUSTER <- as.factor(final_cluster_data$CLUSTER)
      colnames(final_cluster_data) <- c("V1","V2","CLUSTER")
      
      my_pal <- c("darkgreen","darkblue", "orangered","darkred","lightslateblue","orange","purple4","darkred","green","red","pink","yellow","black","deeppink4","darkturquoise","khaki3")
      my_fill <- c("darkgreen","darkblue", "orangered","darkred","lightslateblue","orange","purple4","darkred","green","red","pink","yellow","black","deeppink4","darkturquoise","khaki3")
      p1 <- ggplot(final_cluster_data, aes(x = V1, y = V2,color = CLUSTER))
      p1 <- p1 + geom_point(size = 3, aes(fill = CLUSTER),alpha =0.5)
      p1 <- p1 + geom_hline(yintercept = 0) 
      p1 <- p1 + geom_vline(xintercept = 0) 
      p1 <- p1 + geom_point(size=3)
      p1 <- p1 + theme_bw()
      p1 <- p1 + scale_color_manual(values=c(my_pal))
      p1 <- p1 + scale_fill_manual(values=c(paste(my_fill)))+
        xlab(sprintf("PC1 %f percent", 100*LimaBeanPCA$eig[1]/sum(LimaBeanPCA$eig))) + ylab(sprintf("PC2 %f percent", 100*LimaBeanPCA$eig[2]/sum(LimaBeanPCA$eig)))
      p1
    })
    
    output$plot3 <- renderPlot({
      tryCatch(
        {
          LimaBeanGBS = read.vcfR(input$filevcf$datapath)
        },
        error = function(e){
          stop(safeError(e))
        }
      )
      
      tryCatch(
        {
          data = read.table(input$filetxt$datapath, header = TRUE)
        },
        error = function(e){
          stop(safeError(e))
        }
      )
      Genepool = as.character(data$Genepool)
      LimaBeanData3 <- vcfR2genlight(LimaBeanGBS)
      ploidy(LimaBeanData3) <- 2
      pop(LimaBeanData3) <- Genepool
      # metodo distancia
      pop(LimaBeanData3) <- final_cluster_data$CLUSTER
      tree <- aboot(LimaBeanData3, tree = "upgma", distance = nei.dist, sample = 100, showtree = F, cutoff = 50, quiet = T)
      # cols <- brewer.pal(n = nPop(gl.rubi), name = "Dark2")
      plot.phylo(tree, cex = 0.3, font = 2, adj = 0, tip.color =  my_pal[pop(LimaBeanData3)])
      nodelabels(tree$node.label, adj = c(1.3, -0.5), frame = "n", cex = 0.3,font = 3, xpd = TRUE)
      axis(side = 1)
      title(xlab = "Genetic distance (proportion of loci that are different)")
    })
    
  })
}
    
## To be copied in the UI
# mod_genotypic6_ui("genotypic6_ui_1")
    
## To be copied in the server
# mod_genotypic6_server("genotypic6_ui_1")
Viinky-Kevs/microsoftAI documentation built on April 10, 2022, 12:01 p.m.