R/geomx_import_data.R

Defines functions readGeoMxFromDGE geomx_import_fun readGeoMx

Documented in readGeoMx readGeoMxFromDGE

#' Import GeoMX DSP data into a saptial experiment object from file paths
#'
#' @param countFile tsv file or a dataframe object. Count matrix, with samples in columns and features/genes in rows. The first column is gene names/ids.
#' @param sampleAnnoFile tsv file or a dataframe object. Sample annotations.
#' @param featureAnnoFile tsv file or a dataframe object. Feature/Gene annotations.
#' @param rmNegProbe Logical. Default is TRUE, indicating there are negative probe genes in the data.
#' @param NegProbeName Character. Name of negative probe genes, default is NegProbe-WTX.
#' @param colnames.as.rownames Vector of characters, length of 3. Column names used to capture gene names, sample names and gene names in countFile, sampleAnnoFile and featureAnnoFile, respectively.
#' @param coord.colnames Vector of characters, length of 2. Column names used to capture ROI coordinates.
#'
#' @return A SpatialExperiment object.
#' @export
#'
#' @examples
#' library(ExperimentHub)
#' 
#' eh <- ExperimentHub()
#' query(eh, "standR")
#' countFile <- eh[["EH7364"]]
#' sampleAnnoFile <- eh[["EH7365"]]
#'
#' spe <- readGeoMx(countFile, sampleAnnoFile, rmNegProbe = FALSE)
#'
readGeoMx <- function(countFile, sampleAnnoFile, featureAnnoFile = NA,
                      rmNegProbe = TRUE, NegProbeName = "NegProbe-WTX",
                      colnames.as.rownames = c("TargetName", "SegmentDisplayName", "TargetName"),
                      coord.colnames = c("ROICoordinateX", "ROICoordinateY")) {
  stopifnot(is.character(NegProbeName))
  stopifnot(length(colnames.as.rownames) == 3)
  stopifnot(length(coord.colnames) == 2)
  spe <- geomx_import_fun(
    countFile, sampleAnnoFile, featureAnnoFile,
    rmNegProbe, NegProbeName, colnames.as.rownames, coord.colnames
  )
  return(spe)
}


# the importing function itself
geomx_import_fun <- function(countFile, sampleAnnoFile, featureAnnoFile,
                             rmNegProbe, NegProbeName,
                             colnames.as.rownames,
                             coord.colnames) {

  # remove the NegProbe gene from the count matrix and save it in the metadata
  if (rmNegProbe) {
    if(is.data.frame(countFile)){
      countdata <- countFile
    } else {
      countdata <- as.data.frame(readr::read_tsv(countFile), optional = TRUE)
    }

    # raw count without negprobes
    # make sure count data have the gene column name as pre-defined, such as TargetName.
    if (!colnames.as.rownames[1] %in% colnames(countdata)) {
      stop("colnames.as.rownames[1] is not in the column names of your count file.")
    }
    # make sure the name of negprobe is in the gene column of count data.
    if (!NegProbeName %in% as.matrix(countdata[, colnames.as.rownames[1]])) {
      stop("NegProbeName is not found in your count file.")
    }

    # filter the count data, remove the negprobe.
    countdata_filtered0 <- countdata[countdata[, colnames.as.rownames[1]] != NegProbeName, ]
    countdata_filtered <- countdata_filtered0[, !colnames(countdata_filtered0) %in%
      colnames.as.rownames[1]]
    rownames(countdata_filtered) <- as.vector(as.matrix(countdata_filtered0[, colnames.as.rownames[1]]))


    # gene meta without negprobes
    if (!all(is.na(featureAnnoFile))) {
      if(is.data.frame(featureAnnoFile)){
        genemeta <- featureAnnoFile
      } else {
        genemeta <- as.data.frame(readr::read_tsv(featureAnnoFile), optional = TRUE)
      }

      stopifnot(colnames.as.rownames[3] %in% colnames(genemeta)) # make sure column name is there in the gene meta.

      genemeta_filtered0 <- genemeta[genemeta[, colnames.as.rownames[3]] != NegProbeName, ]
      genemeta_filtered <- genemeta_filtered0[, !colnames(genemeta_filtered0) %in%
        colnames.as.rownames[3]]
      rownames(genemeta_filtered) <- genemeta_filtered0[, colnames.as.rownames[3]]
      genemeta_filtered <- genemeta_filtered[rownames(countdata_filtered), ]
      # arrange the gene meta, as the same order as count table.
    } else {
      genemeta_filtered <- data.frame(Type = rep("gene", nrow(countdata_filtered)))
    }

    # sample meta
    if(is.data.frame(sampleAnnoFile)){
      samplemeta <- sampleAnnoFile
    } else {
      samplemeta <- as.data.frame(readr::read_tsv(sampleAnnoFile), optional = TRUE)
    }

    stopifnot(colnames.as.rownames[2] %in% colnames(samplemeta)) # make sure column name is there.


    samplemeta_filtered <- samplemeta[, !colnames(samplemeta) %in%
      colnames.as.rownames[2]]
    rownames(samplemeta_filtered) <- samplemeta[, colnames.as.rownames[2]]
    samplemeta_filtered <- samplemeta_filtered[colnames(countdata_filtered), ]
    # arrange according to count table.

    # negprobe raw count
    negprobecount <- countdata[countdata[, colnames.as.rownames[1]] == 
                                 NegProbeName, ]
    nprobename <- as.vector(as.matrix(negprobecount[,colnames.as.rownames[1]]))
    if(length(nprobename) != length(unique(nprobename))){
      nprobename <- paste0(nprobename,"_",seq(length(nprobename)))
    }
    negprobecount <- negprobecount[, !colnames(negprobecount) %in% 
                                     colnames.as.rownames[1]]
    rownames(negprobecount) <- nprobename


    # logCPM count
    countdata_filtered_lcpm <- edgeR::cpm(countdata_filtered, log = TRUE)


    # output spe
    spe <- SpatialExperiment::SpatialExperiment(
      assay = list(
        counts = countdata_filtered,
        logcounts = countdata_filtered_lcpm
      ),
      colData = samplemeta_filtered,
      rowData = genemeta_filtered,
      metadata = list(NegProbes = negprobecount),
      spatialCoords = as.matrix(samplemeta_filtered[, coord.colnames])
    )
  } else {
    # it doesn't remove the NegProbe genes, leave them in the count matrix
    # raw count
    if (is.data.frame(countFile)) {
      countdata0 <- countFile
    }
    else {
      countdata0 <- as.data.frame(readr::read_tsv(countFile), 
                                 optional = TRUE)
    }
    stopifnot(colnames.as.rownames[1] %in% colnames(countdata0))
    
    countdata <- countdata0[, !colnames(countdata0) %in% colnames.as.rownames[1]]
    rownames(countdata) <- as.vector(as.matrix(countdata0[, colnames.as.rownames[1]]))
    
    if (!all(is.na(featureAnnoFile))) {
      if (is.data.frame(featureAnnoFile)) {
        genemeta0 <- featureAnnoFile
      }
      else {
        genemeta0 <- as.data.frame(readr::read_tsv(featureAnnoFile), 
                                  optional = TRUE)
      }
      stopifnot(colnames.as.rownames[3] %in% colnames(genemeta0))
      
      genemeta <- genemeta0[, !colnames(genemeta0) %in% colnames.as.rownames[3]]
      rownames(genemeta) <- genemeta0[, colnames.as.rownames[3]]
      genemeta <- genemeta[rownames(countdata), ]
    }
    else {
      genemeta <- data.frame(Type = rep("gene", nrow(countdata)))
    }
    
    if (is.data.frame(sampleAnnoFile)) {
      samplemeta0 <- sampleAnnoFile
    }
    else {
      samplemeta0 <- as.data.frame(readr::read_tsv(sampleAnnoFile), 
                                  optional = TRUE)
    }
    stopifnot(colnames.as.rownames[2] %in% colnames(samplemeta0))
    
    samplemeta <- samplemeta0[, !colnames(samplemeta0) %in% colnames.as.rownames[2]]
    rownames(samplemeta) <- samplemeta0[, colnames.as.rownames[2]]
    samplemeta <- samplemeta[colnames(countdata), ]
    
    countdata_lcpm <- edgeR::cpm(countdata, log = TRUE)
    spe <- SpatialExperiment::SpatialExperiment(assays = list(counts = countdata, 
                                                              logcounts = countdata_lcpm), colData = samplemeta, 
                                                rowData = genemeta, spatialCoords = as.matrix(samplemeta[, 
                                                                                                          coord.colnames]))
  }
  return(spe)
}



#' Import GeoMX DSP data into a spatial experiment object from DGEList object
#'
#' @param dge_object a DGEList object (created using edgeR::DGEList).
#' @param spatialCoord a matrix with coordinates of samples, rowname must be cosistent with the colnames of dge_object.
#'
#' @return A SpatialExperiment object.
#' @export
#'
#' @examples
#' # making a simple DGEList object
#' ng <- 1000
#' ns <- 10
#' Counts <- matrix(rnbinom(ng * ns, mu = 5, size = 2), ng, ns)
#' rownames(Counts) <- seq(ng)
#' y <- edgeR::DGEList(counts = Counts, group = rep(seq(2), each = 5))
#'
#' # transfer into spatial experiment object
#' coords <- matrix(rnorm(2 * ns), 10, 2)
#' spe <- readGeoMxFromDGE(dge_object = y, spatialCoord = coords)
#' spe
#'
readGeoMxFromDGE <- function(dge_object, spatialCoord = NULL) {
  spe <- SpatialExperiment::SpatialExperiment(
    assays = list(
      counts = dge_object$counts,
      logcounts = edgeR::cpm(dge_object, log = TRUE)
    ),
    colData = dge_object$samples,
    rowData = dge_object$genes,
    spatialCoords = spatialCoord
  )
}


utils::globalVariables(c("."))
DavisLaboratory/standR documentation built on June 28, 2024, 11:32 a.m.