R/classes2.R
In tmod: Feature Set Enrichment Analysis for Metabolomics and Transcriptomics

Documented in check_tmod_gs makeTmodGS

## we need to use the tmod data set internally
.myDataEnv <- new.env(parent=emptyenv())

if(!exists("tmod", .myDataEnv)) {
  data("tmod", package="tmod", envir=.myDataEnv)
}

.gettmod <- function() {
  .myDataEnv[["tmod"]]
}

## check user provided mset for sanity
.mset_sanity_check_gs <- function(mset, gs=NULL) {

  required_members <-  c( "gs", "gv", "gs2gv")
  # sanity checks
  if(!all(required_members %in% names(mset))) {
    stop("Required members missing from the list mset parameter")
  }

  if(any(unlist(lapply(required_members, is.null)))) {
    stop("Required members missing from the list mset parameter")
  }

  stopifnot(is(mset$gs, "data.frame"))
  stopifnot("ID" %in% colnames(mset$gs))
  stopifnot(is(mset$gs2gv, "list"))
    
  if(any(duplicated(mset[["gs"]]$ID))) 
    stop("Gene set IDs must not be duplicated")

  stopifnot(length(mset$gs2gv) == nrow(mset$gs)) 

  if(!is.null(gs)) {

    if(!all(gs %in% mset[["gs"]]$ID )) {
      stop("Gene sets specified with the gs parameter are missing from definitions in the mset parameter")
    }
  }

  if(!"tmodGS" %in% class(mset)) {
    class(mset) <- c(class(mset), "tmodGS")
  }

  mset
}



## prepare the modules set
.getmodules_gs <- function(modules=NULL, mset="all", known.only=FALSE, skipcheck=FALSE) {

  if(is(mset, "tmod")) {
    warning(
"You are loading an obsolete version of tmod R object.
The class `tmod` has been retired.
The data will still work, but it will incur a penalty
on the computational time. Please use the `tmod2tmodGS`
function to convert your object to the new tmodGS class.")
    mset <- tmod2tmodGS(mset)
  }


  # user provided mset
  if(is(mset, "list")) {
    mset <- .mset_sanity_check_gs(mset, modules)
    if(!is.null(modules)) { mset <- mset[ modules ] }
  } else {

    tmod <- .gettmod()

    mset <- match.arg(mset, c( "all", unique(tmod$gs$SourceID)))
    if(mset != "all") { 
      if(is.null(modules)) modules <- tmod$gs$ID
      sel <- tmod$gs$SourceID[ match(modules, tmod$gs$ID) ] == mset
      modules <- modules[ sel ]
    }

    if(!is.null(modules)) {
      modules <- modules[ modules %in% tmod$gs$ID ]
      mset <- tmod[ modules ]
    } else {
      mset <- tmod
    }
  }


  if(known.only && "Title" %in% colnames(mset$gs)) {
    mset <- mset[ ! is.na(mset$gs$Title) & ! mset$gs$Title %in% c( "TBA", "Undetermined", ""), ]
  }

  mset
}



## this is the new and more efficient tmod class

#' Check an object of class tmodGS
#'
#' Check an object of class tmodGS
#' @param object an object of class tmodGS
#' @export
check_tmod_gs <- function(object) {

  if(is.null(object$gs))  return("Required list member `gs` missing")
  if(is.null(object$gs2gv)) return("Required list member `gs2gv` missing")

  if(!is(object$gs, "data.frame")) return("List member `gs` must be a data frame")
  if(!is(object$gs2gv, "list")) return("List member `gs2gv` must be a list")

  if(is.null(object$gs$ID))    return("List member `gs` must have column `ID`")

  if(nrow(object$gs) != length(object$gs2gv)) 
    return("Number of gene sets in `gs2gv` different from number of rows of `gs`")

  if(!is.null(object$weights)) {
    if(nrow(object$gs) != length(object$weights)) {
      return("Number of gene sets in `weights` different from number of rows of `gs`")
    }
  }

  TRUE

}



#' S3 class for tmod gene set collections
#'
#' S3 class for tmod gene set collections
#'
#' An object of class tmod contains the gene set annotations (`tmod$gs`), 
#' a character vector of gene identifiers (`tmod$gv`)
#' and a mapping between gene sets and gene identifiers (`tmod$gs2gv`).
#' Optionally, a vector of numeric weights of the same length as `gs2gv` may
#' be provided (`tmod$weights`).
#'
#' Furthermore, it may contain additional information about the gene set
#' (`tmod$info`).
#'
#' `tmod$gs` is a data frame which must contain the column "ID". Additional
#' optional columns `Title` and `Description` are recognized by some
#' functions. Any further columns may contain additional information on the
#' gene sets. The number of rows of that data frame is equal to the number
#' of gene sets in a gene set collection.
#'
#' Each element of the tmod$g2m list corresponds to the respective row of
#' the `tmod$gs` data frame. Each element is an integer vector containing
#' the positions of the gene identifiers in the `tmod$gv` character vector.
#' 
#' Objects of class tmodGS should be constructed 
#' by calling the function makeTmodGS(). This function check the validity and
#' consistency of the provided objects.
#'
#' The makeTmod function remains for compatibility with previous versions
#' of the package. It produces the objects of the new class tmodGS,
#' however.
#'
#' See the package vignette for more on constructing custom module sets.
#'
#' @rdname tmodGS-class
#' @importFrom methods setClass setMethod loadMethod is new representation signature
#' @seealso tmod-data
#' @param ... further arguments passed to `print()`
#' @param gs2gene,modules2genes A list with module IDs as names. Each member of the list is a character vector with IDs of genes contained in that module
#' @param gs,modules [Optional] A data frame with at least columns ID and Title
#' @param genes2modules,genes Ignored
#' @param weights [Optional] a named numeric vector of weights for each gene set
#' @param info [Optional] a list containing meta-information about the gene set collection
#' @examples
#' # A minimal example
#' gs <- data.frame(ID=letters[1:3], Title=LETTERS[1:3])
#' gs2gv <- list(a=c("g1", "g2"), b=c("g3", "g4"), c=c("g1", "g2", "g4"))
#' mymset <- makeTmodGS(gs2gene=gs2gv, gs=gs)
#' str(mymset)
#' @export
makeTmodGS <- function(gs2gene, gs=NULL, weights=NULL, info=NULL) {

  if(!is(gs2gene, "list")) stop("`gs2gene` must be a list")

  if(!is.null(gs)) {
    if(!is(gs, "data.frame")) {
      stop("`gs` must be a data frame")
    } else {
      gs <- tibble(gs)
    }
    if(is.null(gs$ID))    stop("`gs` must have column ID")
    stopifnot(all(gs$ID %in% names(gs2gene)))
  } 

  if(is.null(names(gs2gene))) {
    names(gs2gene) <- paste0("GS_", sprintf("%05d", 1:length(gs2gene)))
  }


  if(is.null(gs)) {
    gs <- tibble(ID=names(gs2gene))
  } else {
    stopifnot(all(names(gs2gene) %in% gs$ID))
    gs <- gs[ match(names(gs2gene), gs$ID), ]
  }


  gv <- sort(unique(unlist(gs2gene)))
  gs2gv <- lapply(gs2gene, function(.) { match(., gv) })
  names(gs2gv) <- NULL

  ret <- list( 
    gs=gs,
    gs2gv=gs2gv,
    gv=gv,
    info=info,
    weights=weights)
  class(ret) <- c(class(ret), "tmodGS")

  ret
}

#' @rdname tmodGS-class
#' @export
makeTmod <- function(modules, modules2genes, genes2modules=NULL, genes=NULL) {
  makeTmodGS(gs2gene=modules2genes, gs=modules, weights=NULL, info=NULL)
}


#' @rdname tmodGS-class
#' @param check_sanity whether the tmodGS object should be tested for correctness
#' @export
as_tmodGS <- function(x, check_sanity=FALSE) {
  stopifnot(is.list(x)) 
  
  if(!"tmodGS" %in% class(x)) {
    class(x) <- c("tmodGS", class(x))
  }

  if(check_sanity) {
    x <- .mset_sanity_check_gs(x)
  }

  x
}


## how to display a tmod object

#' @param x a tmod object
#' @rdname tmodGS-class
#' @export
print.tmodGS <- function(x, ...) {
    .catf("An object of class \"tmodGS\"\n" )
    .catf("\t%d gene sets, %d genes\n", 
      nrow(x$gs),
      length(x$gv))
}

#' Query and set IDs of gene sets in a tmodGS object
#' 
#' Query and set IDs (tmod_id) or Titles (tmod_title) of gene sets in a tmodGS object
#' @param x an object of class tmodGS
#' @param value a character vector of unique IDs
#' @return Returns character vector corresponding to x$gs$ID
#' @examples
#' data(tmod)
#' mset <- tmod[ c("LI.M37.0", "LI.M75", "LI.M3") ]
#' tmod_ids(mset)
#' tmod_ids(mset) <- c("em", "pstrem", "bzdrem")
#' tmod_titles(mset) <- c("foo", "bar", "baz")
#' mset$gs
#' @export
tmod_ids <- function(x) {
  x$gs$ID
}

#' @rdname tmod_ids
#' @export
`tmod_ids<-` <- function(x, value) {
  stopifnot(length(value) == nrow(x$gs))
  stopifnot(all(!duplicated(value)))
  x$gs$ID <- value
  x
}

#' @rdname tmod_ids
#' @export
tmod_titles <- function(x) {
  if("Title" %in% names(x$gs)) {
    return(x$gs$Title)
  } else {
    return(NULL)
  }
}

#' @rdname tmod_ids
#' @export
`tmod_titles<-` <- function(x, value) {
  stopifnot(length(value) == nrow(x$gs))
  stopifnot(all(!duplicated(value)))
  x$gs$Title <- value
  x
}


#' @rdname tmodGS-class
#' @export
length.tmodGS <- function(x) {
    nrow(x$gs)
}  

#' @param x a tmodGS or tmod object
#' @param i indices specifying elements to extract or replace
#' @rdname tmodGS-class
#' @export
`[.tmodGS` <- function(x, i) {

  object  <- x

  if(is.logical(i)) {
    stopifnot(length(i) == nrow(object$gs))
    i <- which(i)
  }


  if(!is.numeric(i)) {
    stopifnot(all(i %in% object$gs$ID))
    sel <- match(i, object$gs$ID)
  } else {
    sel <- i
  }

  #modules <- modules[ modules %in% object$MODULES$ID ]

  gs  <- object$gs[sel, , drop=FALSE]
  gs2gv <- object$gs2gv[sel]

  ## gene list: complicated

  gv <- object$gv[ unique(unlist(gs2gv)) ]

  gs2gv <- lapply(gs2gv, function(x) {
                  orig_gv <- object$gv[ x ]
                  new_gv  <- match(orig_gv, gv)
  })


  if(!is.null(object$weights)) {
    weights <- object$weights[sel]
  } else {
    weights <- NULL
  }

  as_tmodGS(list( 
    gs=gs,
    gs2gv=gs2gv,
    gv=gv,
    info=object$info,
    weights=weights), check_sanity=FALSE)
}


## Obsolete: old tmod objects. We need to handle them, though

#' @param x a tmodGS or tmod object
#' @param i indices specifying elements to extract or replace
#' @rdname tmodGS-class
#' @export
`[.tmod` <- function(x, i) {

    warning(
"You are loading an obsolete version of tmod R object.
The class `tmod` has been retired.
The data will still work, but it will incur a penalty
on the computational time. Please use the `tmod2tmodGS`
function to convert your object to the new tmodGS class.")
    x <- tmod2tmodGS(x)

  x[ i ]
}

#' Convert the old tmod objects to the tmodGS objects
#'
#' Convert the old tmod objects to the tmodGS objects
#'
#' The old tmod representation was very inefficient. This function converts
#' the objects to a new representation which is both allowing faster
#' computations and more memory efficient.
#' @param x an object of class tmod
#' @return Returns an object of class tmodGS.
#' @importFrom tibble tibble
#' @export
tmod2tmodGS <- function(x) {
  stopifnot(check_tmod(x))

  gs <- tibble(x$MODULES)
  gs2gv <- x$MODULES2GENES[ gs$ID ]
  gv <- sort(unique(unlist(gs2gv)))

  gs2gv <- lapply(gs2gv, function(.) {
                  match(., gv)
  })
  names(gs2gv) <- NULL

  if(!is.null(x$WEIGHTS)) {
    weights <- x$WEIGHTS[ gs$ID ]
  } else {
    weights <- NULL
  }

  as_tmodGS(list( 
    gs=gs,
    gs2gv=gs2gv,
    gv=gv,
    info=x$info,
    weights=weights))
}


.invert_hash <- function(l) {
  if(is.null(names(l))) names(l) <- seq_along(l)
  ret <- split(rep(names(l), lengths(l)), unlist(l))
  ret
}


## checking anold style tmod object
check_tmod <- function(object) {

  if(is.null(object$MODULES))       return("Required list member MODULES missing")
  if(is.null(object$MODULES2GENES)) return("Required list member MODULES2GENES missing")

  if(!is(object$MODULES, "data.frame")) return("MODULES must be a data frame")
  if(!is(object$MODULES2GENES, "list")) return("MODULES2GENES must be a list")

  if(is.null(object$MODULES$ID))    return("MODULES must have columns ID and Title")
  if(is.null(object$MODULES$Title)) return("MODULES must have columns ID and Title")

  if(!all(object$MODULES$ID %in% names(object$MODULES2GENES)))
    return("All MODULES$ID must be found in names of MODULES2GENES")


  if(!is.null(object$WEIGHTS)) {
    if(!all(object$MODULES$ID %in% names(object$WEIGHTS)))
      return("All MODULES$ID must be found in names of WEIGHTS")
  }

  TRUE

}

Any scripts or data that you put into this service are public.

tmod documentation built on March 31, 2023, 9 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

tmod
Feature Set Enrichment Analysis for Metabolomics and Transcriptomics

R/classes2.R
In tmod: Feature Set Enrichment Analysis for Metabolomics and Transcriptomics

Defines functions makeTmodGS check_tmod_gs .getmodules_gs .mset_sanity_check_gs .gettmod

Documented in check_tmod_gs makeTmodGS

Try the tmod package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

tmod Feature Set Enrichment Analysis for Metabolomics and Transcriptomics

R/classes2.R In tmod: Feature Set Enrichment Analysis for Metabolomics and Transcriptomics

Defines functions makeTmodGS check_tmod_gs .getmodules_gs .mset_sanity_check_gs .gettmod

Documented in check_tmod_gs makeTmodGS

Try the tmod package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

tmod
Feature Set Enrichment Analysis for Metabolomics and Transcriptomics

R/classes2.R
In tmod: Feature Set Enrichment Analysis for Metabolomics and Transcriptomics