## ---- include = FALSE-----------------------------------------------------------------------------
options(width = 100)
knitr::opts_chunk$set(echo = TRUE, dev = c('png','pdf'), fig.width= 6,
fig.height=4,
collapse = TRUE,
comment = "#>"
)
## ---- echo=FALSE,eval=TRUE------------------------------------------------------------------------
Rscript <- readLines(file.path("..","inst","extscript","align_sequences.R"))
begin <- which(Rscript == "#- Begin blast -#") + 1
end <- which(Rscript == "#- End blast -#") - 1
Blast_Rcsript <- Rscript[begin:end]
begin <- which(Rscript == "#- Begin get_closest_seq -#") + 1
end <- which(Rscript == "#- End get_closest_seq -#") - 1
get_closest_seq_Rcsript <- Rscript[begin:end]
begin <- which(Rscript == "#- Begin align -#") + 1
end <- which(Rscript == "#- End align -#") - 1
align_Rcsript <- Rscript[begin:end]
begin <- which(Rscript == "#- Begin postprocess_align_gaps -#") + 1
end <- which(Rscript == "#- End postprocess_align_gaps -#") - 1
postprocess_align_gaps_Rcsript <- Rscript[begin:end]
begin <- which(Rscript == "#- Begin postprocess_align_identical -#") + 1
end <- which(Rscript == "#- End postprocess_align_identical -#") - 1
postprocess_align_identical_Rcsript <- Rscript[begin:end]
begin <- which(Rscript == "#- Begin postprocess_align_DRM -#") + 1
end <- which(Rscript == "#- End postprocess_align_DRM -#") - 1
postprocess_align_DRM_Rcsript <- Rscript[begin:end]
Rscript <- readLines(file.path("..","inst","extscript","build_trees.R"))
begin <- which(Rscript == "#- Begin build_tree -#") + 1
end <- which(Rscript == "#- End build_tree -#") - 1
build_tree_Rcsript <- Rscript[begin:end]
begin <- which(Rscript == "#- Begin postprocess_tree -#") + 1
end <- which(Rscript == "#- End postprocess_tree -#") - 1
postprocess_tree_Rcsript <- Rscript[begin:end]
Rscript <- readLines(file.path("..","inst","extscript","Ancestral_state_to_clades.R"))
begin <- which(Rscript == "#- Begin relabelling -#") + 1
end <- which(Rscript == "#- End relabelling -#") - 1
relabelling_Rcsript <- Rscript[begin:end]
begin <- which(Rscript == "#- Begin phyloscanner -#") + 1
end <- which(Rscript == "#- End phyloscanner -#") - 1
phyloscanner_Rcsript <- Rscript[begin:end]
begin <- which(Rscript == "#- Begin postprocess_phyloscanner -#") + 1
end <- which(Rscript == "#- End postprocess_phyloscanner -#") - 1
postprocess_phyloscanner_Rcsript <- Rscript[begin:end]
begin <- which(Rscript == "#- Begin extract_clades -#") + 1
end <- which(Rscript == "#- End extract_clades -#") - 1
extract_clades_Rcsript <- Rscript[begin:end]
## ---- echo=TRUE,eval=FALSE,code=readLines("../inst/extjson/options.json",warn=FALSE)--------------
# {
# "job_index": "NA",
# "path_RDS": "./extdata/FakeData/",
# "RDS_names": [
# "sequences_meta_modified.rds",
# "person_modified.rds",
# "Country_db.rds"
# ],
#
#
# "path_LANL_seq": "./extdata/LANL/",
# "LANL_seq_names": [
# "LANL_Subtype_01_AE.fasta",
# "LANL_Subtype_A1.fasta"
# ],
# "path_local_seq": "./extdata/Local/",
# "local_seq_names": [
# "newseq_Subtype_01_AE.fasta",
# "newseq_Subtype_A1.fasta"
# ],
#
#
# "path_aligned": "./extdata/Align/",
# "seq_names": [
# "newseq_LANL_Subtype_01_AE",
# "newseq_LANL_Subtype_A1"
# ],
#
#
# "path_tree": "./extdata/Tree/",
# "tree_names": [
# "newseq_LANL_Subtype_01_AE_mafft_aligned_ndrm",
# "newseq_LANL_Subtype_A1_mafft_aligned_ndrm"
# ],
# "path_colored_tree":"./extdata/Colored_tree/",
# "path_clades":"./extdata/Clades/",
#
#
# "local_group": "WA",
# "focus_group": "KC",
#
# "focus_subgroup": null,
# "index_subgroup": null,
# "name_subgroup": null,
#
# "bs.n": 2,
# "subtypes": ["01_AE","A1"],
# "verbose": true
# }
## ---- echo=TRUE,eval=FALSE,code=Blast_Rcsript-----------------------------------------------------
# ### Make a blast database ----
# makeblastdb(LANL_seq,LANL_db,
# job_index,verbose)
# # Run the following command for each subtype: (mixing between R and Linux command)
# # makeblastdb -in inputs -dbtype nucl -title
# # gsub(".fasta","",inputs)
# # -out outputs
# # Where:
# # -in: the sequence file to make the database
# # -dbtype: the type of the sequences (nucleotide)
# # -title : the title of the job
# # -out : the name of the output file
#
# ### Run blastn ----
# blastn(Local_seq,LANL_db,max_closest,
# job_index,verbose)
# # Run the following command for each subtype: (mixing between R and Linux command)
# # blastn -query name_Subtype_new_sequences -db names_db -out
# # gsub("_stripped.db","_closest.txt",names_db)
# # -max_target_seqs nbre_close -outfmt 6
# # Where:
# # -query: the sequence files ("studied sequences") for which we are looking for the closest in names_db
# # -db: the blast database
# # -out: the name of the output
# # -max_target_seqs: the number of closest sequences to find in the database for each studied sequence
## ---- echo=TRUE,eval=FALSE,code=get_closest_seq_Rcsript-------------------------------------------
# ### Get closest sequences ----
# closest_result <- get_closest_seq(LANL_seq,Local_seq,
# path_aligned,seq_names,
# max_closest, # maximal number of closest sequences
# nbre_close, # number of closest sequences
# min_percent, # minimal percent of closest sequences
# name_HXB2_file,
# job_index,verbose)
# if(is.na(job_index) || job_index == "NA"){
# save(closest_result,file=paste(path_aligned,"Closest_result.rda",sep=""))
# }else{
# save(closest_result,file=paste(path_aligned,"Closest_result_",job_index,".rda",sep=""))
# }
#
# ### Check that HXB2 is in each file ----
# names_file <- paste(path_aligned,seq_names,".fasta",sep="")
# check_HXB2(names_file,name_HXB2_file,
# job_index,verbose)
## ---- echo=TRUE,eval=FALSE,code=align_Rcsript-----------------------------------------------------
# ### Add an outgroup sequences from the closest subtype ----
# outgroup <- add_outgroup(subtypes,
# path_aligned,LANL_seq,seq_names,
# outgroup_size,
# job_index,verbose)
# if(is.na(job_index) || job_index == "NA"){
# save(outgroup,file=paste(path_aligned,"outgroup.rda",sep=""))
# }else{
# save(outgroup,file=paste(path_aligned,"outgroup_",job_index,".rda",sep=""))
# }
#
#
# ### Align ----
# align_seq(names_file,aligner,thread,
# job_index,verbose)
# # Run the following command for each subtype: (mixing between R and Linux command)
# # mafft --auto --thread 5
# # --add tmpdir/names_split[j]
# # tmpdir/names_split_aligned[j-1] > tmpdir/names_split_aligned[j]
# # Where:
# # --auto : to get the defaut options in terms of speed and accuracy
# # --thread: number of threads (parallele running)
# # -add : to merge the output to a given file
## ---- echo=TRUE,eval=FALSE,code=postprocess_align_gaps_Rcsript------------------------------------
# ### Remove some gaps column ----
# names_file_aligned <- gsub(x=names_file,".fasta",paste("_",aligner,"_aligned.fasta",sep=""))
#
# for(name in names_file_aligned){
# seq <- read.dna(name,format="fa")
# seq <- seq.strip.gap(seq, strip.pc=1-nbre_col_rm_strip/nrow(seq))
# write.dna(seq,name,format='fasta',colsep='', nbcol=-1)
# }
## ---- echo=TRUE,eval=FALSE,code=postprocess_align_identical_Rcsript-------------------------------
# ### Remove the identical sequences ----
# indexes <- find_identical_sequences(names_file_aligned,path_sequences_meta,
# dna_region,
# job_index,verbose)
# if(is.na(job_index) || job_index == "NA"){
# save(indexes,file=paste(path_aligned,"indexes_identical.rda",sep=""))
# }else{
# save(indexes,file=paste(path_aligned,"indexes_identical_",job_index,".rda",sep=""))
# }
## ---- echo=TRUE,eval=FALSE,code=postprocess_align_DRM_Rcsript-------------------------------------
# ### Remove Drug Resistance Mutations ----
# rm_DRM(names_file_aligned,name_HXB2,
# job_index,verbose)
## ---- echo=TRUE,eval=FALSE,code=build_tree_Rcsript------------------------------------------------
# ### Building trees ----
# building_tree(names_file_aligned_ndrm,path_tree,
# tree_builder,
# bs.n,bs.id,
# verbose)
# path_tree <- paste(normalizePath(path_tree),"/",sep="")
## ---- echo=TRUE,eval=FALSE,code=postprocess_tree_Rcsript------------------------------------------
# ### Postprocess trees (rooting and remove outgroup sequences) ----
# tree_names <-
# paste(path_tree,"fasttree_",gsub(x=basename(names_file_aligned_ndrm),".fasta","/"),
# gsub(x=basename(names_file_aligned_ndrm),".fasta",".newick"),sep='')
#
# postprocess_tree(tree_names,path_outgroup,name_HXB2,
# bs.n,bs.id,
# verbose)
## ---- echo=TRUE,eval=FALSE,code=relabelling_Rcsript-----------------------------------------------
# ### Relabelling ----
# tips_relabelling(tree_names,
# person,sequences_meta,Country_db,
# vars_for_label,
# local_group=local_group,
# focus_group=focus_group,
# focus_subgroup=focus_subgroup,
# index_subgroup=index_subgroup,
# name_subgroup=name_subgroup,
# negative=negative,
# local_indicator=local_indicator,
# focus_indicator=focus_indicator,
# separator=separator,
# blacklist_paths=blacklist_paths,
# blacklist_names=blacklist_names,
# Unknown = Unknown,
# local_tip_split=local_tip_split,
# external_tip_split=external_tip_split,
# local_index_location=local_index_location,
# external_index_location=external_index_location,
# bs.n=bs.n,bs.id=bs.id,
# verbose =verbose)
## ---- echo=TRUE, eval=FALSE-----------------------------------------------------------------------
# "focus_subgroup": ["HSX_F","HSX_M"], # the categories to focus
# "index_subgroup": 7, # in which spot is this information in the tip labels
# "name_subgroup": "HSX" # the name of the subgroup that will be use to relabel the tips
## ---- echo=TRUE,eval=FALSE,code=phyloscanner_Rcsript----------------------------------------------
# ### Phyloscanner ----
# wrap_phyloscanner_analyse_tree(renamed_tree_names,path_colored_tree,
# opt_regex=opt_regex,
# bs.n = bs.n,bs.id = bs.id,
# verbose =verbose)
## ---- echo=TRUE,eval=FALSE,code=postprocess_phyloscanner_Rcsript----------------------------------
# ### Phyloscanner postprocess ----
# if(!is.null(focus_subgroup)) focus_group <- paste(focus_group,focus_subgroup,sep="")
# phyloscanner_postprocess(renamed_tree_names,path_colored_tree,
# focus_group=focus_group,separator=separator,
# bs.n = bs.n,bs.id = bs.id,
# verbose =verbose)
## ---- echo=TRUE,eval=FALSE,code=extract_clades_Rcsript--------------------------------------------
# ### Clades extracting for each bootstrap index ----
# clades_extracting(renamed_tree_names,
# path_colored_tree,path_clades,
# focus_group,
# bs.n=bs.n,bs.id=bs.id,
# verbose=verbose)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.