tests/testthat/manualTest.R

#' Testing for RNASeqPipeline.R
#'
#' Manual testing due to time considerations.
#'
library(RNASeqPipelineR)
library(data.table)
library(Biobase)



## manually check that the read values match between the generated read
## count table and the original FASTQ file.
testSummation <- function(counts=count_eset, prefix=tmp) {
  
  PREFIX <- prefix 
  
  dat <- exprs(count_eset)
  
  rsem <- read.table(paste0(PREFIX, "/test/RSEM/test1Aligned.toTranscriptome.out.genes.results"), sep="\t", header=TRUE, row.names=NULL)
  counts <- rsem[,c("gene_id", "expected_count")]
  
  annoFile <- RNASeqPipelineR:::ucschg38Table
  syms <- as.character(unique(annoFile$hg38.kgXref.geneSymbol))
  
  tol = .Machine$double.eps ^ 0.5
  
  for(g in syms) {
    
    ids <- unique(annoFile[annoFile$hg38.kgXref.geneSymbol==g,]$hg38.knownIsoforms.clusterId)
    read <- dat[rownames(dat)==g,1]
    
    rDat <- 0
    for(id in ids) {
       rDat <- rDat + counts[counts$gene_id==id,]$expected_count
    }
    
    if(rDat - read > tol) {
      cat(g, ": ",rDat, " ", read, "\n")
    }
    
    rDat - read  > tol
    
  } ## end for g
  
} ## end testSummation



##source("/home/cmurie/cmurie_working/git/RNASeqPipelineR/R/RNASeqPipelineR.R")

## local instance of reference genome
utils_dir <- "/shared/silo_researcher/Gottardo_R/10_ref_files/Reference_Genome/Homo_sapiens/UCSC/hg38/"

## get temp directory for project creation
tmp <- tempdir()

createProject(project_name = "test", path=tmp, load_from_immport = FALSE)
loadProject(project_dir=tmp, name="test")

## load test fastq files stored in extdata
## contains test1_R1.fastq, test1_R2.fastq, test2_R1.fastq, test2_R2.fastq,
fpath <- system.file("extdata", "testData.zip", package="RNASeqPipelineR")
unzip(zipfile=fpath, exdir=paste0(tmp, "/test/FASTQ/"))
fpath2 <-  system.file("extdata", "testPheno.csv", package="RNASeqPipelineR")
file.copy(from=fpath2, to=paste0(tmp, "/test/RAW_ANNOTATIONS/anno.csv"))

## reference should already be built. test this function separately
buildReference(path=utils_dir, gtf_file="UCSCDec2016.gtf", fasta_file="hg38.fa", isoformsFile="UCSCKnownIsoformsDec2016.txt", doSTAR=TRUE, threads=6, name="hg38")

AlignmentSTAR(parallel_threads=1, star_threads=1, paired=TRUE, paired_pattern=c("_R1.fastq", "_R2.fastq"))
RSEMCalculateExpression(parallel_threads=1,bowtie_threads=1,paired=TRUE, nchunks=1,slurm=FALSE, fromBAM=TRUE, fromSTAR=TRUE)
RSEMAssembleExpressionMatrix(force=TRUE)
##BioCAnnotate("TxDb.Hsapiens.UCSC.hg38.knownGene",force = TRUE,lib = 'org.Hs.eg.db', na.rm=TRUE)
annotateUCSC(genome="hg38", force=TRUE)  ## replaces BioCAnnotate
runFastQC(ncores=2)
qc_matrix <- QualityControl(paired=TRUE)

mData <- mergeData()

## multiple cluster ids can map to same gene symbol so sum them and return ExpressionSet
count_eset <- sumDuplicates(mData$counts, mData$featureData, mData$annoData)
tpm_eset <- sumDuplicates(mData$tpms, mData$featureData, mData$annoData)

save(count_eset, tpm_eset, file=paste0(tmp, "/test/OUTPUT/test.RData"))


## remove temp directory and all contents (just to be safe)
unlink(tmp, recursive=TRUE, force=TRUE)

testSummation(count_eset, tmp)


######################################################################################
################### test kallisto pseudo-alignment ###################################
######################################################################################

## local instance of reference genome
utils_dir <- "/shared/silo_researcher/Gottardo_R/10_ref_files/Reference_Genome/Homo_sapiens/Ensembl/kallisto/"

## get temp directory for project creation
tmp <- tempdir()

createProject(project_name = "test", path=tmp, load_from_immport = FALSE)
loadProject(project_dir=tmp, name="test")

## load test fastq files stored in extdata
## contains test1_R1.fastq, test1_R2.fastq, test2_R1.fastq, test2_R2.fastq,
fpath <- system.file("extdata", "testData.zip", package="RNASeqPipelineR")
unzip(zipfile=fpath, exdir=paste0(tmp, "/test/FASTQ/"))
fpath2 <-  system.file("extdata", "testPheno.csv", package="RNASeqPipelineR")
file.copy(from=fpath2, to=paste0(tmp, "/test/RAW_ANNOTATIONS/anno.csv"))

setGenomeReference(utils_dir, "GRCH38_R91.idx")

kallistoBuildTranscriptIndex(path=utils_dir,
                             fasta_file="Homo_sapiens.GRCh38.cdna.all.fa",
                             name="GRCH38_R91.idx", force=FALSE)

kallistoAlign(kallisto_threads=3, paired=TRUE, mail="cmurie@fredhutch.org",
              minutes_requested=5, slurm=TRUE, slurm_partition=NULL,
              force=TRUE,
              paired_pattern=c("_R1.fastq", "_R2.fastq"))

runFastQC(ncores=2)
kallistoAssembleOutput(paired=TRUE)
kallistoReadQC()
kallistoAssembleQC(paired=TRUE, doAnnotation=TRUE) 


unlink(tmp, recursive=TRUE, force=TRUE)
RGLab/RNASeqPipelineR documentation built on Jan. 19, 2020, 12:31 a.m.