R/03.AllClasses.R

# #These functions were (loosely) based on similar functions created for the DEXSeq package.
# #
# # Note that DEXSeq is licensed under the GPL v3. Therefore this
# #   code packaged together is licensed under the GPL v3, as noted in the LICENSE file.
# # Some code snippets are "united states government work" and thus cannot be
# #   copyrighted. See the LICENSE file for more information.
# #
# # The current versions of the original functions upon which these were based can be found
# #    here: http://github.com/Bioconductor-mirror/DEXSeq
# #
# # Updated Authorship and license information can be found here:
# #   here: http://github.com/Bioconductor-mirror/DEXSeq/blob/master/DESCRIPTION
#
#
#
# setClass( "JunctionSeqCountSet",
#           contains = "eSet",
#           representation = representation(
#             designColumns = "character",
#             dispFitCoefs = "numeric",
#             fittedMu = "matrix",
#             dispFunctionType = "list",
#             dispFunction = "function",
#             dispFunctionJct  = "function",
#             dispFunctionExon = "function",
#             formulas = "list",
#             annotationFile = "character",
#             geneCountData = "matrix",
#             countVectors = "matrix",
#             altSizeFactors = "data.frame",
#             plottingEstimates = "list",
#             plottingEstimatesVST = "list", #DEPRECATED! VST-xform is fast enough that it's better to calculate them as needed.
#             geneLevelPlottingEstimates = "list",
#             modelFitForHypothesisTest = "list", #USUALLY unused.
#             modelFitForEffectSize = "list", #USUALLY unused.
#             flatGffData = "data.frame",
#             flatGffGeneData = "list",
#             analysisType = "character",
#             DESeqDataSet = "DESeqDataSet",
#             modelCoefficientsSample    = "list", #USUALLY unused.
#             modelCoefficientsGene      = "list"  #USUALLY unused.
#           ),
#           prototype = prototype( new( "VersionedBiobase",
#                                       versions = c( classVersion("eSet"), JunctionSeqCountSet = "0.0.5" ) ) )
# )
#
#
# makeDESeqDataSetFromJSCS <- function(jscs, test.formula1){
#   countData <- jscs@countVectors
#   colData <- rbind.data.frame(
#     cbind.data.frame(data.frame(sample = rownames(pData(jscs))) , pData(jscs) ),
#     cbind.data.frame(data.frame(sample = rownames(pData(jscs))) , pData(jscs) )
#   )
#   colData$countbin <- factor(c(  rep("this",ncol(countData)/2)  ,   rep("others",ncol(countData)/2)   ), levels = c("this","others"))
#   for(i in 1:ncol(colData)){
#     colData[[i]] <- factor(colData[[i]])
#   }
#   colData <- DataFrame(colData)
#   rownames(colData) <- colnames(countData)
#
#   jscs@DESeqDataSet <- DESeqDataSetFromMatrix(countData = countData, colData = colData, design = test.formula1, ignoreRank = TRUE)
#   return(jscs)
# }
#
# getModelFit <- function(jscs, featureID, geneID, countbinID, modelFitType = c("fitH0","fitH1","fitEffect")){
#   if(is.null(featureID)){
#     if(is.null(geneID) | is.null(countbinID)){
#       stop("ERROR: getModelFit(): either featureID or geneID AND countbinID must be set!")
#     } else {
#       featureID <- paste0(geneID, ":", countbinID)
#     }
#   }
#
#   out <- list()
#   if(any(modelFitType == "fitH0")){
#     if(is.null(jscs@modelFitForHypothesisTest)){
#       stop("ERROR: getModelFit(): no modelFitForHypothesisTest found. Run testJunctionsForDJU().")
#     }
#     out[["fitH0"]] <- jscs@modelFitForHypothesisTest[[featureID]][["fitH0"]]
#   }
#   if(any(modelFitType == "fitH1")){
#     if(is.null(jscs@modelFitForHypothesisTest)){
#       stop("ERROR: getModelFit(): no modelFitForHypothesisTest found. Run testJunctionsForDJU().")
#     }
#     out[["fitH1"]] <- jscs@modelFitForHypothesisTest[[featureID]][["fitH1"]]
#   }
#   if(any(modelFitType == "fitEffect")){
#     if(is.null(jscs@modelFitForEffectSize)){
#       stop("ERROR: getModelFit(): no modelFitForHypothesisTest found. Run testJunctionsForDJU().")
#     }
#     out[["fitEffect"]] <- jscs@modelFitForEffectSize[[featureID]]
#   }
#   return(out)
# }
#
#
# newJunctionSeqCountSet <- function( countData, geneCountData, design, geneIDs, countbinIDs, featureIntervals=NULL, transcripts=NULL){
#
#   countData <- as.matrix( countData )
#   if( any( round( countData ) != countData ) ){
#     stop( "The countData is not integer." )}
#   mode( countData ) <- "integer"
#
#   geneCountData <- as.matrix( geneCountData )
#   if( any( round( geneCountData ) != geneCountData ) ){
#     stop( "The geneCountData is not integer." )}
#   mode( geneCountData ) <- "integer"
#
#   if( is( design, "matrix" ) ){
#     design <- as.data.frame( design )}
#
#   rownames(countData) <- paste(geneIDs, countbinIDs, sep=":")
#   if( any( duplicated(rownames(countData) ) ) ) {
#     stop("The geneIDs or countbinIDs are not unique")
#   }
#   if(any(duplicated(rownames(geneCountData)))){
#     stop("The geneIDs in the geneCountData are not unique")
#   }
#   if(any(duplicated(colnames(geneCountData)))){
#     stop("The sample ID's in the geneCountData are not unique")
#   }
#
#   phenoData <- annotatedDataFrameFrom( countData, byrow=FALSE )
#   featureData <- annotatedDataFrameFrom( countData, byrow=TRUE )
#
#   phenoData$sizeFactor <- rep( NA_real_, ncol(countData) )
#   varMetadata( phenoData )[ "sizeFactor", "labelDescription" ] <- "size factor (relative estimate of sequencing depth)"
#
#   geneIDs <- as.factor( geneIDs )
#   if( length(geneIDs) != nrow(countData) )
#     stop( "geneIDs must be of the same length as the number of columns in countData")
#
#   featureData$geneID <- geneIDs
#   varMetadata( featureData )[ "geneID", "labelDescription" ] <- "ID of gene to which the exon belongs"
#
#   countbinIDs <- as.character( countbinIDs )
#   if( length(countbinIDs) != nrow(countData) )
#     stop( "countbinIDs must be of the same length as the number of columns in countData")
#
#   featureData$countbinID <- countbinIDs
#   varMetadata( featureData )[ "countbinID", "labelDescription" ] <- "feature ID (unique only within a gene)"
#
#   if( is.null(featureIntervals) ){
#     featureIntervals <- data.frame(
#       chr    = rep( NA_character_, nrow( countData ) ),
#       start  = rep( NA_integer_,   nrow( countData ) ),
#       end    = rep( NA_integer_,   nrow( countData ) ),
#       strand = rep( NA_character_, nrow( countData ) ) ) }
#
#   featureData$testable <- rep( NA_real_, nrow( countData ) )
#   varMetadata( featureData )[ "testable", "labelDescription" ] <- "slot indicating if an feature should be considered in the test."
#   featureData$status <- rep( "TBD", nrow( countData ) )
#
#   featureData$allZero <- rep( NA_real_, nrow( countData ) )
#   varMetadata( featureData )[ "allZero", "labelDescription" ] <- "slot indicating if the feature count is zero across all samples."
#
#
#   featureData$status <- rep( "TBD", nrow( countData ) )
#   varMetadata( featureData )[ "status", "labelDescription" ] <- "Feature status (either 'OK' or the reason that the feature is untestable)."
#
#   featureData$baseMean <- rep( NA_real_, nrow( countData ) )
#   varMetadata( featureData )[ "baseMean", "labelDescription" ] <- "Mean normalized counts across all samples."
#   featureData$baseVar <- rep( NA_real_, nrow( countData ) )
#   varMetadata( featureData )[ "baseVar", "labelDescription" ] <- "Simple variance of normalized counts across all samples."
#
#   featureData$dispBeforeSharing <- rep( NA_real_, nrow( countData ) )
#   varMetadata( featureData )[ "dispBeforeSharing", "labelDescription" ] <- "feature dispersion (Cox-Reid estimate)"
#
#   featureData$dispFitted <- rep( NA_real_, nrow( countData ) )
#   varMetadata( featureData )[ "dispFitted", "labelDescription" ] <- "Fitted mean-variance estimate."
#
#   featureData$dispersion <- rep( NA_real_, nrow( countData ) )
#   varMetadata( featureData )[ "dispersion", "labelDescription" ] <- "Final dispersion estimate."
#
#   featureData$pvalue <- rep( NA_real_, nrow( countData ) )
#   varMetadata( featureData )[ "pvalue", "labelDescription" ] <- "p-value from testForDEU"
#
#   featureData$padjust <- rep( NA_real_, nrow( countData ) )
#   varMetadata( featureData )[ "padjust", "labelDescription" ] <- "BH adjusted p-value"
#
#
#   featureIntervals <- as.data.frame( featureIntervals )
#
#   # in case it was a GRanges object before, change the colname:
#   if( "seqnames" %in% colnames(featureIntervals) ){
#     colnames(featureIntervals)[ colnames(featureIntervals) == "seqnames" ] <- "chr"   }
#
#   if( !all( c( "chr", "start", "end", "strand" ) %in% colnames(featureIntervals) ) ){
#     stop( "featureIntervals must be a data frame with columns 'chr', 'start', 'end', and 'strand'." )}
#
#   if(is.null(transcripts)){
#     transcripts <- rep(NA_character_, nrow( countData ) )}
#
#   if(!is(transcripts, "character")){
#     stop("transcript information must be a character vector")}
#
#   featureData$chr    <- factor( featureIntervals$chr )
#   featureData$start  <- featureIntervals$start
#   featureData$end    <- featureIntervals$end
#   featureData$strand <- factor( featureIntervals$strand )
#   featureData$transcripts <- transcripts
#   varMetadata( featureData )[ "chr",    "labelDescription" ] <- "chromosome of feature"
#   varMetadata( featureData )[ "start",  "labelDescription" ] <- "start of feature"
#   varMetadata( featureData )[ "end",    "labelDescription" ] <- "end of feature"
#   varMetadata( featureData )[ "strand", "labelDescription" ] <- "strand of feature"
#   varMetadata( featureData )[ "transcripts", "labelDescription" ] <- "transcripts in which this feature is contained"
#
#   featureData$baseMean <- rep( NA_real_, nrow( countData ) )
#   varMetadata( featureData )[ "baseMean", "labelDescription" ] <- "The mean normalized counts across all samples."
#
#
#   if( is( design, "data.frame" ) || is( design, "AnnotatedDataFrame" ) ) {
#     stopifnot( nrow( design ) == ncol( countData ) )
#     stopifnot( all( unlist( lapply(design, class) ) == "factor" ) )
#     design <- as( design, "AnnotatedDataFrame" )
#     dimLabels(design) <- dimLabels(phenoData)
#     rownames( pData(design) ) <- rownames( pData(phenoData) )
#     phenoData <- combine( phenoData, design )
#     rvft <- c( `_all` = NA_character_ )
#     designColumns <- varLabels(design)
#   } else {
#     design <- factor( design, levels=unique(design))
#     stopifnot( length( design ) == ncol( countData ) )
#     phenoData$`condition` <- factor( design )
#     varMetadata( phenoData )[ "condition", "labelDescription" ] <- "experimental condition, treatment or phenotype"
#     designColumns <- "condition"
#   }
#   jscs <- new( "JunctionSeqCountSet",
#                assayData = assayDataNew( "environment", counts=countData ),
#                phenoData = phenoData,
#                featureData = featureData,
#                designColumns = designColumns,
#                dispFitCoefs = c( NA_real_, NA_real_ ),
#                geneCountData = geneCountData
#   )
#   jscs
# }
#
# setValidity( "JunctionSeqCountSet", function( object ) {
#
#   if( !all( object@designColumns %in% names(pData(object)) ) )
#     return( "Not all designColumns appear in phenoData." )
#
#   if( ! "sizeFactor" %in% names(pData(object)) )
#     return( "phenoData does not contain a 'sizeFactor' column.")
#   if( ! is( pData(object)$`sizeFactor`, "numeric" ) )
#     return( "The 'sizeFactor' column in phenoData is not numeric." )
#
#   if( ! "geneID" %in% names(fData(object)) )
#     return( "featureData does not contain a 'geneID' column.")
#   if( ! is( fData(object)$geneID, "factor" ) )
#     return( "The 'geneID' column in fData is not a factor." )
#
#   if( ! "countbinID" %in% names(fData(object)) )
#     return( "featureData does not contain an 'countbinID' column.")
#   if( ! is( fData(object)$countbinID, "character" ) )
#     return( "The 'countbinID' column in fData is not a character vector." )
#
#   if( ! "chr"  %in% names(fData(object)) )
#     return( "featureData does not contain a 'chr' column.")
#   if( ! is( fData(object)$chr, "factor" ) )
#     return( "The 'chr' column in fData is not a factor." )
#
#   if( ! "start"  %in% names(fData(object)) )
#     return( "featureData does not contain a 'start' column.")
#   if( ! is( fData(object)$start, "integer" ) )
#     return( "The 'start' column in fData is not integer." )
#
#   if( ! all(featureNames(object) == paste(geneIDs(object), countbinIDs(object), sep=":") ) )
#     return( "The featureNames do not match with the geneIDs:countbinIDs" )
#
#   if( ! all(rownames( counts(object) ) == featureNames(object) ) )
#     return( "The rownames of the count matrix do not coincide with the featureNames" )
#
#   if( ! all(rownames( fData( object ) ) == featureNames( object ) ) )
#     return( "The rownames of the featureData do not coincide with the featureNames" )
#
#
#
#   if( ! "end"  %in% names(fData(object)) )
#     return( "featureData does not contain a 'end' column.")
#   if( ! is( fData(object)$end, "integer" ) )
#     return( "The 'end' column in fData is not integer." )
#
#   if( ! "strand"  %in% names(fData(object)) )
#     return( "featureData does not contain a 'strand' column.")
#   if( ! is( fData(object)$strand, "factor" ) )
#     return( "The 'strand' column in fData is not a factor." )
#   if( !is(fData(object)$dispersion, "numeric")){
#     return( "The 'dispersion' is not numeric")}
#   if( !is(fData(object)$dispFitted, "numeric")){
#     return( "The 'dispFitted' is not numeric")}
#   if( !is(fData(object)$dispBeforeSharing, "numeric")){
#     return( "The 'dispBeforeSharing' column is not numeric")}
#   if( !is(fData(object)$pvalue, "numeric")){
#     return( "The 'pvalue' values are not numeric")}
#   if( !is(fData(object)$padjust, "numeric")){
#     return( "The 'padjust' values are not numeric")}
#   if( !is.integer( assayData(object)[["counts"]] ) )
#     return( "The count data is not in integer mode." )
#
#   if( any( assayData(object)[["counts"]] < 0 ) )
#     return( "The count data contains negative values." )
#
#   if( length( object@dispFitCoefs ) != 2 )
#     return( "dispFitCoefs is not a vector of length 2." )
#
#   TRUE
# } )
#
#
# setMethod("counts", signature(object="JunctionSeqCountSet"),
#           function( object, normalized=FALSE) {
#             cds <- object
#             if(!normalized){
#               assayData(cds)[["counts"]]
#             } else {
#               if(any(is.na( sizeFactors(cds)))) {
#                 stop( "Please first calculate size factors or set normalized=FALSE")
#               } else {
#                 t(t( assayData( cds )[["counts"]] ) / sizeFactors(cds) )
#               }
#             }
#           })
# setReplaceMethod("counts", signature(object="JunctionSeqCountSet", value="matrix"),
#                  function( object, value ) {
#                    cds <- object
#                    assayData(cds)[[ "counts" ]] <- value
#                    validObject(cds)
#                    cds
#                  })
#
# setMethod("sizeFactors",  signature(object="JunctionSeqCountSet"),
#           function(object) {
#             cds <- object
#             sf <- pData(cds)$sizeFactor
#             names( sf ) <- colnames( counts(cds) )
#             sf
#           })
#
# setReplaceMethod("sizeFactors",  signature(object="JunctionSeqCountSet", value="numeric"),
#                  function(object, value ) {
#                    cds <- object
#                    pData(cds)$sizeFactor <- value
#                    validObject( cds )
#                    cds
#                  })
#
# setMethod("design", signature(object="JunctionSeqCountSet"),
#           function( object, drop=TRUE, asAnnotatedDataFrame=FALSE ) {
#             cds <- object
#             if( asAnnotatedDataFrame )
#               return( phenoData(cds)[, cds@designColumns ] )
#             ans <- pData(cds)[, cds@designColumns, drop=FALSE ]
#             if( ncol(ans) == 1 && drop ) {
#               ans <- ans[,1]
#               names(ans) <- colnames( counts(cds) ) }
#             else
#               rownames( ans ) <- colnames( counts(cds) )
#             ans
#           })
# setReplaceMethod("design", signature(object="JunctionSeqCountSet"),
#                  function( object, value ) {
#                    cds <- object
#                    ## Is it multivariate or just a vector?
#                    if( ncol(cbind(value)) > 1 )
#                      value <- as( value, "AnnotatedDataFrame" )
#                    else {
#                      value <- new( "AnnotatedDataFrame",
#                                    data = data.frame( condition = value ) )
#                      varMetadata( value )[ "condition", "labelDescription" ] <-
#                        "experimental condition, treatment or phenotype" }
#
#                    rownames( pData(value) ) <- rownames( pData(cds) )
#                    dimLabels( value ) <- dimLabels( phenoData(cds) )
#                    phenoData(cds) <- combine(
#                      phenoData(cds)[ , !( colnames(pData(cds)) %in% cds@designColumns ), drop=FALSE ],
#                      value )
#                    cds@designColumns <- colnames( pData(value) )
#                    validObject(cds)
#                    cds
#                  })
#
# geneIDs <- function( ecs ) {
#   stopifnot( is( ecs, "JunctionSeqCountSet" ) )
#   g <- fData(ecs)$geneID
#   names(g) <- rownames( counts(ecs) )
#   g
# }
#
# `geneIDs<-` <- function( ecs, value ) {
#   stopifnot( is( ecs, "JunctionSeqCountSet" ) )
#   fData(ecs)$geneID <- value
#   validObject(ecs)
#   ecs
# }
#
# countbinIDs <- function( ecs ) {
#   stopifnot( is( ecs, "JunctionSeqCountSet" ) )
#   g <- fData(ecs)$countbinID
#   names(g) <- rownames( counts(ecs) )
#   g
# }
#
# `countbinIDs<-` <- function( ecs, value ) {
#   stopifnot( is( ecs, "JunctionSeqCountSet" ) )
#   fData(ecs)$countbinID <- value
#   validObject(ecs)
#   ecs
# }
#
#
# subsetByGenes <- function( ecs, genes ) {
#   stopifnot( is( ecs, "JunctionSeqCountSet" ) )
#   stopifnot( all( genes %in% levels(geneIDs(ecs)) ) )
#   ecs2 <- ecs[ as.character(geneIDs(ecs)) %in% genes, ]
#   ecs2
# }
#
# geneCountTable <- function( ecs ) {
#   stopifnot( is( ecs, "JunctionSeqCountSet" ) )
#   do.call( rbind,
#            tapply( seq_len(nrow(ecs)), geneIDs(ecs), function(rows)
#              colSums( counts(ecs)[rows,,drop=FALSE] ) ) )
# }
#
# DEUresultTable <- function(ecs)
# {
#   result <- data.frame(
#     geneID=geneIDs(ecs),
#     countbinID=countbinIDs(ecs),
#     dispersion=featureData(ecs)$dispersion,
#     pvalue=fData(ecs)$pvalue,
#     padjust=fData(ecs)$padjust,
#     baseMean=rowMeans(counts(ecs, normalized=TRUE)))
#
#   extracol <- regexpr("log2fold", colnames(fData(ecs)))==1
#   if(any(extracol)){
#     w <- which(extracol)
#     result <- data.frame(result, fData(ecs)[,w])
#     colnames(result)[7:(6+length(w))] <- colnames(fData(ecs))[w]
#   }
#   result
# }
#
aiminy/PathwaySJ documentation built on May 10, 2019, 7:38 a.m.