#' Simulation function
#'
#' This function used to simulate both circRNA and tandem RNA RNA sequencing data base on given information.
#' @param BSJ_Info a data frame that contains 5 columns which are: Chr, start_EXONSTART, end_EXONEND, GENEID and cCount. Chr is chromosome name with formated as 1:22, X, Y, Mt. start_EXONSTART is starting position starting exon of circRNA, end_EXONEND is ending position ending exon of circRNA, GENEID is gene ID contains circRNA (used to get gene model) and cCount are number of read pair want to generated for the target circRNA.
#' @param tandem_rate the rate tandem RNA that you wish to simulated eg. 0.05
#' @param error_rate sequencing error rate, defaut is 0.005
#' @param set.seed set seed for reproducibility, defaut is on with 2018
#' @param gtfSqlite path to your annotation file, Sqlite formated (generated by GenomicFeatures)
#' @param genomeFastaFile path to your genome fasta file
#' @param txFastaFile path to your transcript fasta file (cDNA)
#' @param out_name prefix output folders, defaut is "circHunter_simuation"
#' @param out_dir the directory contains output, defaut is the current derectory
#' @export
circHunter_simulate <- function(BSJ_Info,tandem_rate=0,error_rate=0.005,set.seed=2018,gtfSqlite,genomeFastaFile,txFastaFile,out_name="circHunter_simuation",out_dir="./",...){
genes.exon.all=process_Sqlite(gtfSqlite)
fasta_genome=process_genome(genomeFastaFile)
#load tx.all.fasta and extract txname to dataframe
tx.all.fasta = readDNAStringSet(txFastaFile)
tx.all.NAME = sapply(names(tx.all.fasta),function(x) unlist(strsplit(x," "))[1]) # to get seqs from cDNA
set.seed(set.seed)
#tandem_rate = 0.05
tandem_num = round(tandem_rate*nrow(BSJ_Info))
pick = sample(1:nrow(BSJ_Info), tandem_num)
tandemInfo = BSJ_Info[pick,]
if(out_dir!="./") dir.create(out_dir)
if(nrow(tandemInfo)==0){
circInfo=BSJ_Info
circInfo=simulate_circRNA(circInfo,error_rate,genes.exon.all,fasta_genome,tx.all.fasta,tx.all.NAME,out_name,out_dir=out_dir)
}else{
circInfo=BSJ_Info[-pick,]
circInfo=simulate_circRNA(circInfo,error_rate,genes.exon.all,fasta_genome,tx.all.fasta,tx.all.NAME,out_name,out_dir=out_dir)
tandemInfo=simulate_tandem(tandemInfo,error_rate,genes.exon.all,fasta_genome,tx.all.fasta,tx.all.NAME,out_name,out_dir=out_dir)
}
save(circInfo,tandemInfo,file=paste0(out_name,"_simulation_setting.Rdata"))
return(list(BSJ_Info=BSJ_Info,circInfo=circInfo,tandemInfo=tandemInfo))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.