R/QSarray.R

Defines functions qsTable newQSarray

Documented in newQSarray qsTable

###################################################
## This file contains a technical description of the "QSarray" class, 
## and S4 methods for working with QSarray objects.
##
## Author: Christopher Bolen
## Updated: 2013-03-15
## ? 2013 Yale University. All rights reserved.

###The QSarray object contains data generated when running the Q-Sage method.
## It is primarily generated by the functions "q.sage","makeComparisons", or "calcIndividualExpressions"
## This method is designed to create a new QSarray object (if one of the above functions is not used).
## You can input a list object containing the fields of the new QSarray object, input these fields
## individually, or do a combination of both.

###Possible Fields:
## mean - The average fold change of each gene, calculated using makeComparisons or calcIndividualExpressions.
## SD - The standard deviation around the fold change.
## dof - Degrees of freedom for each gene.
## var.method -- one of c("Welch's","Pooled"), indicating the method used to calculate the variance
## sd.alpha - The factor each sd is multiplied by (either due to the min.variance.factor parameter in makeComparison or because of the Bayesian shrinkage of the SDs). This is used when calculating the VIF in order to correct for genes with 0 (or very small) variance.
## labels - The labels as input in makeComparisons, describing the group structure of the data.
## pairVector - A vector indicating which samples should be treated as pairs.
## contrast - A string describing which of the two groups in labels was compared.
##Pathway stuff:
## pathways - the list of genes in each gene set.
## path.mean - vector describing the mean fold change for each of the pathways provided to AggregateGeneSet
## path.PDF - Matrix describing the probability distributions for each of the pathways provided to AggregateGeneSet, where each column is a different gene set, and each row is a different point where this set was sampled. See the details section for how to find the x-coordinates for each PDF.
## path.size - The number of features in each pathway that mapped to the input data.
## ranges - the (uncorrected) range that all PDFs were calculated over. Together with n.points  and (optionally) vif, this is used to find the x-coordinates for the PDF.
## n.points - The number of points that the PDF was calculated at. This is equal to the number of rows in path.PDF
## vif - the Variance Inflation Factor for each pathway, as calculated by calcVIF
## absolute.p - p-values for the absolute gene set test, as generated by absoluteTest()
## homogeneity - a measurement of the consistency of the genes in each gene set.



setClass("QSarray", representation("list"))

##a constructor for the object
newQSarray = function(obj=NULL,
                      ...
                      ){
  output = list()
    
  if(!is.null(obj) & is.list(obj)){
    for(n in names(obj)){
      if(!is.null(output[[n]])){
        warning("Parameter '",n,"' defined more than once. Overwriting parameter provided in obj")
      }else{
        output[[n]] = obj[[n]]
      }
    }
  }
  if(length(list(...))>0){
    obj = list(...)
    for(n in names(obj)){
      if(!is.null(output[[n]])){
        warning("Parameter '",n,"' defined more than once. Overwriting parameter provided in obj")
      }
      output[[n]] = obj[[n]]
    }    
  }
  
  ##check that requried fields are there, and that all the fields are allowed.
  reqField = c()
  optField = c("mean","SD","dof","sd.alpha","var.method","labels","contrast","pairVector",
               "pathways","path.mean","path.PDF","path.size","ranges","n.points","vif",
               "QSlist","n.samples","absolute.p","homogeneity")
#   if(!any(reqField %in% names(output))){
#     stop("Missing required fields: ",paste(reqField[!(reqField %in% names(output))],collapse=", "))
#   }
  if(!all(names(output) %in% c(reqField,optField))){
    stop("Field not used: ",paste(names(output)[!(names(output) %in% c(reqField,optField))],collapse=", "))
  }
  
  return(new("QSarray", output))
}


## This is a wrapper function for the default plotting behavior of the QSarray object.
## By default, if the QSarray object contains data on less than 10 pathways, the function
## plotDensityCurves will be run on it. Otherwise, it will call the function plotCIs
setGeneric("plot", function(x,y,...) standardGeneric("plot"))
setMethod("plot", "QSarray", definition = 
  function(x, ##a QSarray object containing data from running aggregateGeneSet
           y, ##not used
           ...
          ){
    ##check if it's a combinedPDF
    if(!is.null(x$QSlist)){
      return(plotCombinedPDF(x,...))
    }
    
    pathNum = numPathways(x)
    if(is.na(pathNum)){stop("Nothing to plot. Run aggregateGeneSet first.")}  
    if(!is.null(list(...)$path.index)){pathNum=length(list(...)$path.index)}
    
    if(pathNum<10){invisible(plotDensityCurves(x,...))}
    else{plotCIs(x,...)}
  }
)


##override the normal "print" function for QSarrays
setGeneric("print", function(x) standardGeneric("print"))
setMethod("print", "QSarray", definition = 
  function(x,...){
    for(n in names(x)){
      cat("\n$",n,"\n")
      if(n %in% c("path.PDF")){
        cat("[",nrow(x[[n]]),"Points x",ncol(x[[n]]),"Gene Sets ]\n")
        
        ##going the long way about this to preserve the colnames of path.PDF
        
        print(head(as.matrix(x[[n]][,1:min(ncol(x[[n]]),5),drop=F])))
      }
      else if(n %in% "pathways"){
        if(length(x[[n]])>10){
          cat("[",length(x[[n]]),"Entries ]\n")
          print(summary(x[[n]][1:5]))
        }else{
          print(summary(x[[n]]))
        }
      }
      else{
        if(length(x[[n]])>30){
          cat("[",length(x[[n]]),"Entries ]\n")
          print(head(x[[n]]))
        }else{
          print(x[[n]])
        }
      }
    }
    invisible(x)
  }
)
setMethod("head", "QSarray", definition = 
  function(x, ...){invisible(print(x))}
)
          
## Briefly summarize the data in QSarray
setMethod("summary", "QSarray", definition = 
  function(object, ...){
    if(is.na(numPathways(object))){
      toKeep = c("contrast","var.method","mean","SD")
      ret = object[toKeep[toKeep %in% names(object)]]
      class(ret) = "QSarray"
      return(ret)
    }else{
      return(qsTable(object))
    }
  }
)


## This function prints a table with information on the gene sets in QSarray
qsTable = function(QSarray, number=20, sort.by=c("fdr","p","logFC")){
  if(is.null(numPathways(QSarray))){stop("No pathway information found.")}
  number = min(number, numPathways(QSarray))
  log.fold.change = QSarray$path.mean
  p.Value = pdf.pVal(QSarray)
  FDR = p.adjust(p.Value, method="fdr")
  #homogeneity.score = QSarray$homogeneity
  pathway.name = colnames(QSarray$path.PDF)
  
  if(is.null(log.fold.change)){log.fold.change = rep(NA, length(p.Value))}
  results = data.frame(pathway.name,log.fold.change,p.Value,FDR)#,homogeneity.score)
  rownames(results) = NULL
  
  if(sort.by[1]=="none"){sort.by=NULL}
  sort.by = match(sort.by, c("","logFC","p","fdr"))#,"homogeneity"))
  
  if(length(sort.by)>0){
    o = do.call(order, results[,sort.by,drop=F])
    results = results[o,]
  }
  return(results[1:number,])
}


# ###Override the functions for subsetting a QSarray object. 
# ## With this, you can subset a QSarray object using the [x] format, where x will select pathways.
# "[.QSarray" <- function(QSarray, x,...,drop){
#   if(length(list(...)>0)){stop("Incorrect number of dimensions")}
#   output = QSarray
#   for(n in names(QSarray)){
#     if(n %in% c("path.mean","path.size","vif")){
#       output[[n]] = QSarray[[n]][x]
#     }
#     if(n %in% c("path.PDF")){
#       output[[n]] = as.matrix(QSarray[[n]][,x,drop=F])
#     }
#   }
#   return(output)
# }

##a method to be called when trying to set a value of a QSarray object using the "$" operator
# "$<-.QSarray" <- function(QSarray, n, value){
#   if(n %in% c("dof","var.method","labels","contrast","design","range")){
#     if(!is.null(QSarray[[n]])){warning("This parameter was used in the generation of the data in this object. Changing this parameter may affect the results of other methods.")}
#     QSarray[[n]] <- value
#   }
#   if(n %in% c("mean","SD")){    ##things dependent upon nrow
#     if( is.null(QSarray[[n]]) && !is.na(nrow(QSarray)) && nrow(QSarray) != length(value) ||
#       !is.null(QSarray[[n]]) && length(QSarray[[n]])!=length(value) ){
#       stop("Replacement length not equal to the number of genes. If you are trying to subset the values of the QSarray object, use the '[,]' operator.")
#     }
#     QSarray[[n]] = value
#   }
#   if(n %in% c("path.mean","path.size","vif")){  ##things dependent upon ncol
#     if( is.null(QSarray[[n]]) && !is.na(ncol(QSarray)) && ncol(QSarray) != length(value) ||
#         !is.null(QSarray[[n]]) && length(QSarray[[n]])!=length(value) ){
#       stop("Replacement length not equal to the number of pathways. If you are trying to subset the values of the QSarray object, use the '[,]' operator.")
#     }
#     QSarray[[n]] = value
#   }
#   if(n %in% c("path.PDF")){  ##dependent on ncol and $n.points
#     if(!is.matrix(value)){value = as.matrix(value)}
#     if(!is.matrix(value)){stop("argument can not be coerced to a matrix")}
#     if( is.null(QSarray[[n]]) && !is.na(ncol(QSarray)) && ncol(QSarray) != ncol(value) ||
#       !is.null(QSarray[[n]]) && ncol(QSarray[[n]])!=ncol(value) ){
#       stop("Replacement length not equal to the number of pathways. If you are trying to subset the values of the QSarray object, use the '[,]' operator.")
#     }
#     QSarray[[n]] = value
#     if(!is.null(QSarray$n.points) && nrow(value) != QSarray$n.points){
#       warning("Number of rows does not match n.points. Updating n.points to match.")
#     }
#     QSarray$n.points = nrow(value)
#   }
#   if(n %in% c("n.points")){  ##dependent on $path.PDF
#     if(!is.numeric(value) || length(value) != 1){stop("value must be of length 1")}
#     if(!is.null(QSarray$path.PDF) && nrow(QSarray$path.PDF)!=value){
#       stop("n.points does not match nrows of path.PDF. To update this value, provide a new path.PDF matrix")
#     }
#     QSarray[[n]] = value
#   }
#   return(QSarray)
# }

##a method to be called when trying to set a value of a QSarray object using the "[" operator
##essentially: you can't do it.
# setMethod("[<-", "QSarray", definition = 
#   function(x, i,j,...,value){
#     stop("Can not replace parts of QSarray object.")
#   }
# )          
          
setGeneric("numFeatures", function(x) standardGeneric("numFeatures"))
setMethod("numFeatures", "QSarray", definition = function(x){
    ##calculate nrow from one of (mean, SD)
    genes = NA
    if(!is.null(x$mean)){genes = length(x$mean)}
    else if(!is.null(x$SD)){genes = length(x$SD)}
    return(genes)
  }
)
setGeneric("numPathways", function(x) standardGeneric("numPathways"))
setMethod("numPathways", "QSarray", definition = function(x){
    ##calculate ncol from one of (path.mean, path.size, path.PDF, vif)
    pathways = NA
    if(!is.null(x$path.mean)){pathways = length(x$path.mean)}
    else if(!is.null(x$path.size)){pathways = length(x$path.size)}
    else if(!is.null(x$path.PDF)){pathways = ncol(x$path.PDF)}
    else if(!is.null(x$vif)){pathways = length(x$vif)}
    return(pathways)
  }
)

##a method to return the dimensions of a QSarray object (i.e. the number of genes X the number of pathways)
setMethod("dim", "QSarray", definition = 
  function(x){
    return(c(numFeatures(x),numPathways(x)))
  }
)



# 
# ###The QScomb object contains data generated when running combinePDFs to 
# ## combine PDFs from different QuSage results objects.
# 
# ###Required Fields:
# ## x.coords        - Matrix describing the x coordinates of PDF(s), where each column is a 
# ##                   different gene set, and each row is a different point where this set 
# ##                   was sampled. 
# ## path.PDF        - Matrix describing the probability distributions for each of the 
# ##                   pathways provided to AggregateGeneSet, where each column is a 
# ##                   different gene set, and each row is a different point where this set 
# ##                   was sampled. 
# ## QSlist          - the original list of QSarray objects provided to combinePDFs. Each
# ##                   element of the list should be a valid QSarray object
# ## n.samples       - the number of samples in each dataset used to generate QSlist.
# ## path.names      - the vector of pathway names of each gene set.
# ## n.points        - the number of points that the PDF is sampled at. Should match 
# ##                   nrow(combined.PDF) and nrow(x.coords)
# 
# setClass("QScomb", representation("list"))
# newQScomb = function(obj=NULL,
#                              ...
# ){
#   output = list()
#   
#   if(!is.null(obj) & is.list(obj)){
#     for(n in names(obj)){
#       if(!is.null(output[[n]])){
#         warning("Parameter '",n,"' defined more than once. Overwriting parameter provided in obj")
#       }else{
#         output[[n]] = obj[[n]]
#       }
#     }
#   }
#   if(length(list(...))>0){
#     obj = list(...)
#     for(n in names(obj)){
#       if(!is.null(output[[n]])){
#         warning("Parameter '",n,"' defined more than once. Overwriting parameter provided in obj")
#       }
#       output[[n]] = obj[[n]]
#     }    
#   }
#   
#   ##check that requried fields are there, and that all the fields are allowed.
#   reqField = c("x.coords","path.PDF","QSlist","n.samples", "path.names", "n.points")
#   if(!any(reqField %in% names(output))){
#     stop("Missing required fields: ",paste(reqField[!(reqField %in% names(output))],collapse=", "))
#   }
#   if(!all(names(output) %in% reqField)){
#     stop("Field not used: ",paste(names(output)[!(names(output) %in% reqField)],collapse=", "))
#   }
#   
#   return(new("QScomb", output))
# }
# 
# 
# setGeneric("numDatasets", function(x) standardGeneric("numDatasets"))
# setMethod("numDatasets", "QScomb", definition = function(x){
#   ##calculate nrow from one of (mean, SD)
#   datasets = NA
#   if(!is.null(x$QSlist)){datasets = length(x$QSlist)}
#   else if(!is.null(x$n.samples)){datasets = length(x$n.samples)}
#   return(datasets)
# }
# )
# 
# setMethod("numPathways", "QScomb", definition = function(x){
#   ##calculate ncol from one of (path.mean, path.size, path.PDF, vif)
#   pathways = NA
#   if(!is.null(x$path.PDF)){pathways = ncol(x$path.PDF)}
#   else if(!is.null(x$path.names)){pathways = length(x$path.names)}
#   return(pathways)
# }
# )
# 
# ##a method to return the dimensions of a QSarray object (i.e. the number of genes X the number of pathways)
# setMethod("dim", "QScomb", definition = 
#             function(x){
#               return(c(numDatasets(x),numPathways(x)))
#             }
# )
# 
# 
# 
# ## This is a wrapper function for the default plotting behavior of the QScomb object.
# setMethod("plot",c(x="QScomb",y="missing"),function(x,  ##a QScomb object
#                                                     y,  ##when this is missing
#                                                     ...){
#   plotCombinedPDF(x,...)
# })
# setMethod("plot",c(x="QScomb",y="ANY"),function(x,  ##a QScomb object
#                                                 y,  ##if not missing, assumed to be path.index
#                                                 ...){
#   plotCombinedPDF(x,path.index=y,...)
# })

Try the qusage package in your browser

Any scripts or data that you put into this service are public.

qusage documentation built on Nov. 8, 2020, 8:09 p.m.