library(shiny)
library(ranger)
library(kmer)
library(ape)
library(DT)
library(plotly)
options(shiny.maxRequestSize = 1000*1024^2)
library(rintrojs)
library(shinythemes)
# library(magrittr)
# library(dplyr)
library(shinyjs)
#for mac, without this Rstudio crashes
Sys.setenv(LIBGL_ALWAYS_SOFTWARE=1)
# library(tibble)
library(RColorBrewer)
library(parallel)
# library(shinylogs)
ui <- fluidPage(
theme = shinytheme("simplex"),
introjsUI(), # must include in UI
introBox(titlePanel(div("Covidex",
img(style="width: 50px", src = "covidex_coronavirusazul2.png"))),div("Enabled by data from",tags$a(img(style="width: 50px", src = "GISAID.png"),href="https://www.gisaid.org/")),
data.step = 1,
data.intro = "Covidex is an ultra fast and accurate subtyping tool of hcov-SARS-CoV-2 genomes. The classification is performed using 3 machine learning models (random forest) of a k-mer database"
),
sidebarLayout(
sidebarPanel(
actionButton("btn","Help"),
introBox( fileInput("file", label = h3("Query file (multi-fasta format)"),
accept = c(".text",".fasta",".fas",".fasta")),
data.step = 2,
data.intro = "Please select the fasta file with the sequences to subtype (max size 10 Mb if running in server)"
),
textInput("user", "User name (optional)", "anonymous"),
introBox( actionButton("go", "RUN"),
data.step = 3,
data.intro = "When ready press RUN to begin the analysis"
),
strong(textOutput("text")),
strong("Questions?"),
tags$a(actionButton(inputId = "email1", label = "Contact Admin",
icon = icon("envelope", lib = "font-awesome")),
href="mailto:cacciabue.marco@inta.gob.ar;aguilera.pablo@inta.gob.ar"),
br(),
strong("or visit"),
tags$a(img(style="width: 100px", src = "sourceforge.png"),href="https://sourceforge.net/projects/covidex/"),
br(),
strong("or by Twitter"),
tags$a(img(style="width: 25px", src = "twitter.png"),href="https://twitter.com/marcocacciabue"),
div(class = "footer",
includeHTML("www/footer.html")),
),
mainPanel(
introBox( downloadButton('report',"Generate report (ENG)"),
data.step = 5,
data.intro="Press this button to generate a report with classification data."),
tabsetPanel(
tabPanel(title="Results", introBox( DT::dataTableOutput("table"),
data.step = 4,
data.intro="In the Results tab,
all uploaded sequences with the classification results will appear with an associated confidence value.")),
tabPanel("Classification model information",DT::dataTableOutput("table2")))
)
),
fluidRow(
column(12, align="center",
strong("GISAID data provided on this website are subject to GISAID's"),
tags$a(href="https://www.gisaid.org/DAA/",strong("Terms and Conditions")),
)
)
)
server <- shinyServer(function(input, output, session) {
# track_usage(storage_mode = store_json(path = "logs/"))
introjs(session)
hintjs(session, options = list("hintButtonLabel"="Done!"))
observeEvent(input$btn,
introjs(session))
observeEvent(input$go, {
if(length(input$file)==0){
showModal(modalDialog(
title = "Important message", easyClose = TRUE,
"Please load the fasta file first and then press RUN.
Also, remember that the file must NOT exceed 10 MB in size (about 350 complete sequences). If you need to process larger files consider downloading Covidex from the Sourceforge repository.
"
))}})
model1 <- readRDS(paste("models/",sep="","1.rds"))
model2 <- readRDS(paste("models/",sep="","2.rds"))
model3 <- readRDS(paste("models/",sep="","3.rds"))
model1_data<-data.frame(Model=model1$info,date=model1$date,trees=model1$num.trees,Oob= round(model1$prediction.error,4))
model2_data<-data.frame(Model=model2$info,date=model2$date,trees=model2$num.trees,Oob= round(model2$prediction.error,4))
model3_data<-data.frame(Model=model3$info,date=model3$date,trees=model3$num.trees,Oob= round(model3$prediction.error,4))
model_data<-rbind(model1_data,model2_data,model3_data)
data_reactive<- eventReactive(input$go,{
req(input$file)
progress <- shiny::Progress$new()
on.exit(progress$close())
progress$set(message = "processing data", value = 0)
progress$inc(0.2, detail = paste("Reading fasta"))
#Read input file in fasta format
query <- read.FASTA(input$file[1,4], type = "DNA")
model<-model1
if (length(query)>2000){
# progress$inc(0.4, detail = paste("Sample larger than 1000 sequences: runing in multithread mode"))
#get number of available cores in the machine to see how much to split the data.
progress$inc(0.4, detail = paste("Counting kmers in multithread mode"))
cores<-detectCores()-1
step<-ceiling(length(query)/cores)
last<-step
first<-1
query_split<-0
#split query sequences
for (j in 1:cores) {
#check to get the correct number of sequences at the end
if ((first+step)<length(query)){
query_split[j]<-list(query[first:last])
first<-first+step
last<-last+step}else{
query_split[j]<-list(query[first:length(query)])
}
}
#create core cluster
cl <- makeCluster(detectCores()-1)
# progress$inc(0.6, detail = paste("Kmer counting in multithread mode"))
#Use parallel function to send each split dataset to a different thread.
query_count_split<-parLapply(cl,query_split,count_parallel,model$kmer)
stopCluster(cl)
progress$inc(0.8, detail = paste("merging outputs"))
#Combine the results from the different threads in a single dataset
query_count<-as.numeric()
for (i in 1:length(query_count_split)) {
query_count<-rbind(query_count, query_count_split[[i]])
}
#normalize Kmer counts acording to kmer size and sequence length
genome_length<-0
n_length<-0
for(H in 1:length(query_count[,1])){
k<-query[H]
k<-as.matrix(k)
query_count[H,]<- query_count[H,]*model$kmer/(length(k))
genome_length[H]<-length(k)
n_length[H]<-round(100*base.freq(k,all = TRUE)[15],2)
}}
else{
#Calculate k-mer counts from query sequences
# progress$inc(0.4, detail = paste("Runing in Single-thread mode"))
progress$inc(0.4, detail = paste("Counting kmers in single-thread mode"))
query_count<-kcount(query , k=model$kmer)
genome_length<-0
n_length<-0
for(i in 1:length(query_count[,1])){
k<-query[i]
k<-as.matrix(k)
query_count[i,]<- query_count[i,]*model$kmer/(length(k))
genome_length[i]<-length(k)
n_length[i]<-round(100*base.freq(k,all = TRUE)[15],2)
}
}
progress$inc(0.9, detail = paste("Predicting"))
calling<-predict(model,query_count)
#Run the predict method from de Ranger package, retaining the classification result from each tree in the model (to calculate a probability value for each classification)
calling_all<-predict(model,query_count,predict.all = TRUE)
probability <- rep(0, length(calling_all$predictions[,1]))
for (i in 1:length(calling_all$predictions[,1])) {
#extract predictions for each query sample in temp vector,
#count the number of correct predictions and divide by number of trees to get a probability.
temp<-calling_all$predictions[i,]
probability[i] <- sum(temp==which(model$forest$levels==calling$predictions[i]))/model$num.trees
}
model<-model2
calling2<-predict(model,query_count)
#Run the predict method from de Ranger package, retaining the classification result from each tree in the model (to calculate a probability value for each classification)
calling_all2<-predict(model,query_count,predict.all = TRUE)
probability2 <- rep(0, length(calling_all2$predictions[,1]))
for (i in 1:length(calling_all2$predictions[,1])) { #
#extract predictions for each query sample in temp vector,
#count the number of correct predictions and divide by number of trees to get a probability.
temp2<-calling_all2$predictions[i,]
probability2[i] <- sum(temp2==which(model$forest$levels==calling2$predictions[i]))/model$num.trees
}
model<-model3
calling3<-predict(model,query_count)
#Run the predict method from de Ranger package, retaining the classification result from each tree in the model (to calculate a probability value for each classification)
calling_all3<-predict(model,query_count,predict.all = TRUE)
probability3 <- rep(0, length(calling_all3$predictions[,1]))
for (i in 1:length(calling_all3$predictions[,1])) { #count the number of correct predictions and divide by number of trees to get a probability.
temp3<-calling_all3$predictions[i,]
probability3[i] <- sum(temp3==which(model$forest$levels==calling3$predictions[i]))/model$num.trees
}
# QC<-as.character(probability>input$QC)
n_QC<-(n_length<1)
Length_QC<-(genome_length>29500)
VOC<-(calling2$prediction=="B.1.351")|(calling2$prediction=="B.1.1.7")|(calling2$prediction=="P.1")|(calling2$prediction=="B.1.427")|(calling2$prediction=="B.1.429")
VOI<-(calling2$prediction=="P.2")|(calling2$prediction=="B.1.525")|(calling2$prediction=="B.1.526")
FLAG<-(VOI|VOC)
data_out <- data.frame(label= row.names(query_count),Nextstrain=calling$prediction, probability_N=probability,Rambaut=calling2$prediction,probability_R=probability2,GISAID=calling3$prediction,probability_G=probability3,FLAG=FLAG,VOC=VOC,VOI=VOI,Length=genome_length,Length_QC=Length_QC,N=n_length,N_QC=n_QC)
rm(query_count,calling,calling_all,calling2,calling_all2)
list(message="Done!",data_out=data_out)
})
output$text <- renderText({
data_reactive()$message
})
table<-reactive({
data_out<-data_reactive()$data_out
data_out
})
output$table <- DT::renderDataTable({
col<-brewer.pal(5,"Blues")
col2<-brewer.pal(5,"Reds")
table<-table()
datatable(table,selection = 'single',
options = list(
columnDefs = list(list(targets = c(8,9,10,12,14), visible = FALSE)),
dom = 'Bfrtip',
lengthMenu = list(c(5, 15, -1), c('5', '15', 'All')),
pageLength = 15,
buttons =
list('copy', 'print', list(
extend = 'collection',
buttons = list(
list(extend = 'csv', filename = paste(input$file[1,1],"covidex_results"),sep=""),
list(extend = 'excel', filename = paste(input$file[1,1],"covidex_results"),sep=""),
list(extend = 'pdf', filename = paste(input$file[1,1],"covidex_results"),sep="")),
text = 'Download'
)
)
))%>% formatStyle("Rambaut","FLAG",
backgroundColor = styleEqual(c(0, 1), c(col[1], col[3])))%>% formatStyle("Length","Length_QC",
backgroundColor = styleEqual(c(0, 1), c(col2[3], col2[1])))%>% formatStyle("N","N_QC",
backgroundColor = styleEqual(c(0, 1), c(col2[3], col2[1])))
})
#
# output$table <- DT::renderDataTable({
# req(input$go)
# req(input$file)
# data_reactive()$data_out
#
#
# })
model_table<-reactive({
model_data
})
output$table2 <- DT::renderDataTable({
datatable(model_table())
})
output$report <- downloadHandler(
# For PDF output, change this to "report.pdf"
filename = "report.html",
content = function(file) {
# Copy the report file to a temporary directory before processing it, in
# case we don't have write permissions to the current working dir (which
# can happen when deployed).
tempReport <- file.path(tempdir(), "report.rmd")
file.copy("report.rmd", tempReport, overwrite = TRUE)
# Set up parameters to pass to Rmd document
# Knit the document, passing in the `params` list, and eval it in a
# child of the global environment (this isolates the code in the document
# from the code in this app).
rmarkdown::render(tempReport, output_file = file)
}
)
version<-reactive({
version<-2.1
return(version)})
count_parallel<-function(x,kmer) ({
library(kmer)
kcount(x,k= kmer)})
##uncomment this to end R session when the windows closes
# session$onSessionEnded(function() {
# stopApp()
# q("no")
# })
})
shinyApp(ui, server)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.