R/utils.R

Defines functions calculate_vst raise_err get_gene_meta create_listw_from_dist create_listw_from_knn create_listw load_images color_parse tissue_names spatial_metadata count_cores

Documented in get_gene_meta load_images spatial_metadata tissue_names

##
# Utility and helper functions

##
# Get the number of cores to use in parallel as a function of the number of
# data to process. It will not yield a higher number of cores than half of the
# total available cores. Will default to 1 core if not Unix
# @param n an integer representing the number of units to process
# @return the number of cores to use
#
# @importFrom methods as is new
#
#
count_cores = function(n){
  cores = 1
  if(.Platform$OS.type == 'unix'){
    # Use parallelization (if possible) to read data.
    avail_cores = (parallel::detectCores()) / 2
    if(avail_cores <= n){
      cores = avail_cores
    } else{
      cores = ifelse(n > 0, n, 1)
    }
  }
  return(cores)
}


##
#' @title spatial_metadata: Prints the names of the available spot/cell annotations
#' @description returns a character vector with the names of the annotations in the
#' `x@spatial_meta` slot.
#'
#' @param x an STList object
#'
#' @export
#
#
spatial_metadata = function(x){
  meta_res = list()
  for(i in seq(x@spatial_meta)){
    meta_res[[i]] = base::colnames(x@spatial_meta[[i]][, -c(1:3)])
  }
  names(meta_res) = names(x@spatial_meta)

  return(meta_res)
}


##
#' @title tissue_names: Prints the names of the tissue samples in the STlist
#' @description returns a character vector with the names of tissue samples in the
#' STlist.
#'
#' @param x an STList object
#'
#' @export
#
#
tissue_names = function(x){
  sample_res = names(x@counts)

  return(sample_res)
}


##
# @title color_parse: Creates a color palette
# @description Uses Khroma or RColorBrewer to return the colors of a palette name.
# @details
# This function takes a character string and uses either khroma or RColoBrewer to
# create a color palette. The function first looks in khroma, then RColoBrewer. In
# other words, khroma colors have priority.
#
# @param color_pal A name of a Khroma or RColorBrewer. Alternatively, a vector with
# colors of lenght equal or larger than number of categories.
# @param n_cats The number of colors to produce. If NULL, assumes 5 colors.
# @return cat_cols A color palette
#
#' @importFrom methods as is new
#' @importFrom grDevices colorRampPalette
#
#
color_parse = function(color_pal=NULL, n_cats=NULL){
  # Get color palette and number of colors needed.
  # Get names of Khroma colors.
  khroma_cols = khroma::info()
  khroma_cols = khroma_cols$palette

  # Assume 5 categories if n_cats not provided (for kriging/quilts).
  if(is.null(n_cats)){
    n_cats = 5
  }

  # Test if input is a Khroma name or RColorBrewer.
  # If so, create palette.
  if(color_pal[1] %in% khroma_cols){
    cat_cols = as.vector(khroma::colour(color_pal[1], force=T)(n_cats))
  }else if(color_pal[1] %in% rownames(RColorBrewer::brewer.pal.info)){
    cat_cols = grDevices::colorRampPalette(RColorBrewer::brewer.pal(n_cats, color_pal[1]))(n_cats)
  }else{ # Test if user provided a vector of colors.
    if(length(color_pal) >= n_cats){
      cat_cols = color_pal[1:n_cats]
    } else{
      stop('Provide enough colors to plot or an appropriate Khroma/RColorBrewer palette.')
    }
  }

  # Subset colors is more colors in palette than categories
  if(length(cat_cols) > n_cats){
    cat_cols = cat_cols[1:n_cats]
  }
  return(cat_cols)
}


##
#' @title load_images: Place tissue images within STlist
#' @description Loads the images from tissues to the appropriate STlist slot.
#' @details
#' This function looks for `.PNG` or `.JPG` files within a folder matching the
#' sample names in an existing STlist. Then, loads the images to the STlist which
#' can be used for plotting along with other spatialGE plots.
#'
#' @param x an STlist
#' @param images a string indicating a folder to load images from
#' @return an STlist with images
#'
#' @export
#'
#' @importFrom methods as is new
#
#
load_images = function(x=NULL, images=NULL){
  # Test if an STList has been input.
  if(is.null(x) | !is(x, 'STlist')){
    stop("The input must be a STlist.")
  }

  if(is.null(images)){
    stop("Please, provide a vector with images file paths.")
  } else{
    if(length(images) == 1 & dir.exists(images)){ # In case a directory was provided
      images = list.files(images, full.names=T)
    }
  }

  # Process each image.
  for(i in names(x@counts)){
    fp = grep(paste0(i, '\\.'), images, value=T)
    if(length(fp) == 0){
      cat(paste0("Image for sample ", i, " was not found.\n"))
      next
    }

    # Return warning if more than one file matches the sample name
    if(length(fp) > 1){
      cat(paste0("Multiple image files matched sample ", i, ". Using the first match (", fp[1], ")."))
    }

    if(grepl('jpg$', fp[1])){ # CosMx uses jpeg files
      img_obj = jpeg::readJPEG(fp[1])
    } else{
      img_obj = png::readPNG(fp[1])
    }

    # Downsize image if too large
    if(any(dim(img_obj) > 1000)){
      img_obj = EBImage::resize(img_obj, w=1000)
    }

    x@misc[['sp_images']][[i]] = img_obj
  }
  return(x)
}


##
# @title create_listw
# @param x an STlist
#
create_listw = function(x=NULL){
  # Define cores available
  cores = count_cores(length(x@spatial_meta))

  # Create distance matrix based on the coordinates of each sampled location.
  listw_list = parallel::mclapply(seq(names(x@spatial_meta)), function(i){
    subj_dists = as.matrix(dist(x@spatial_meta[[i]][, c('ypos', 'xpos')]))
    subj_dists[subj_dists == 0] = 0.0001
    subj_dists_inv = 1/subj_dists
    diag(subj_dists_inv) = 0
    subj_dists_inv=spdep::mat2listw(subj_dists_inv, style='B')
    return(subj_dists_inv)
  }, mc.cores=cores, mc.preschedule=F)
  names(listw_list) = names(x@spatial_meta)
  return(listw_list)
}


##
# @title create_listw_from_knn
# @param x an STlist
# @param ks
#
create_listw_from_knn = function(x=NULL, ks=NULL){
  # Define cores available
  #cores = count_cores(length(x@spatial_meta))
  cores = 1

  # Create distance matrix based on the coordinates of each sampled location.
  listw_list = parallel::mclapply(seq(names(x@spatial_meta)), function(i){
    coords_mtx = as.matrix(x@spatial_meta[[i]][, c('libname', 'ypos', 'xpos')] %>% tibble::column_to_rownames('libname'))
    subj_listw = spdep::nb2listw(spdep::knn2nb(spdep::knearneigh(coords_mtx, k=ks, longlat=F)), style='B')
    return(subj_listw)
  }, mc.cores=cores, mc.preschedule=F)
  names(listw_list) = names(x@spatial_meta)
  return(listw_list)
}


##
# @title create_listw_from_dist
# @param x an STlist
# @param ks
#
create_listw_from_dist = function(x=NULL, cores=NULL){
  # Define cores available
  if(is.null(cores)){
    cores = count_cores(length(x@spatial_meta))
  } else{
    cores = as.integer(cores)
    if(is.na(cores)){
      stop('Could not recognize number of cores requested')
    }
  }

  # Create distance matrix based on the coordinates of each sampled location.
  listw_list = parallel::mclapply(seq(names(x@spatial_meta)), function(i){
    coords_mtx = as.matrix(x@spatial_meta[[i]][, c('libname', 'ypos', 'xpos')] %>% tibble::column_to_rownames('libname'))
    adj = as.matrix(stats::dist(coords_mtx))
    adj = adj/max(adj)
    diag(adj) = NA
    colnames(adj) = x@spatial_meta[[i]][['libname']]
    rownames(adj) = x@spatial_meta[[i]][['libname']]

    neighbours = list()
    weights = list()
    for(id in 1:nrow(adj)){
      idx = 1:nrow(adj)
      neighbours[[id]] = idx[!(rownames(adj) %in% rownames(adj)[id])]
      names(neighbours[[id]]) = idx[!(rownames(adj) %in% rownames(adj)[id])]
      weights[[id]] = as.vector(adj[id, ])
      #weights[[id]] = weights[[id]][!(rownames(adj) %in% rownames(adj)[id])]
      weights[[id]] = 1/(weights[[id]][!(rownames(adj) %in% rownames(adj)[id])])
    }
    class(neighbours) = 'nb'

    subj_listw = list(style='B',
                      neighbours=neighbours,
                      weights=weights)
    class(subj_listw) = c("listw", "nb")

    return(subj_listw)
  }, mc.cores=cores, mc.preschedule=F)
  names(listw_list) = names(x@spatial_meta)
  return(listw_list)
}


##
#' @title get_gene_meta: Extract gene-level metadata and statistics
#' @description Extracts gene-level metadata and spatial statistics (if already computed)
#' @details
#' This function extracts data from the `x@gene_meta` slot, optionally subsetting
#' only to those genes for which spatial statistics (Moran's I or Geary's C, see `SThet`)
#' have been calculated. The output is a data frame with data from all samples in the
#' STlist
#'
#' @param x an STlist
#' @param sthet_only logical, return only genes with spatial statistics
#' @return a data frame with gene-level data
#'
#' @examples
##'
#' # Using included melanoma example (Thrane et al.)
#' # Download example data set from spatialGE_Data
#' thrane_tmp = tempdir()
#' unlink(thrane_tmp, recursive=TRUE)
#' dir.create(thrane_tmp)
#' lk='https://github.com/FridleyLab/spatialGE_Data/raw/refs/heads/main/melanoma_thrane.zip?download='
#' download.file(lk, destfile=paste0(thrane_tmp, '/', 'melanoma_thrane.zip'), mode='wb')
#' zip_tmp = list.files(thrane_tmp, pattern='melanoma_thrane.zip$', full.names=TRUE)
#' unzip(zipfile=zip_tmp, exdir=thrane_tmp)
#' # Generate the file paths to be passed to the STlist function
#' count_files <- list.files(paste0(thrane_tmp, '/melanoma_thrane'),
#'                           full.names=TRUE, pattern='counts')
#' coord_files <- list.files(paste0(thrane_tmp, '/melanoma_thrane'),
#'                           full.names=TRUE, pattern='mapping')
#' clin_file <- list.files(paste0(thrane_tmp, '/melanoma_thrane'),
#'                         full.names=TRUE, pattern='clinical')
#' # Create STlist
#' library('spatialGE')
#' melanoma <- STlist(rnacounts=count_files[c(1,2)],
#'                    spotcoords=coord_files[c(1,2)],
#'                    samples=clin_file) # Only first two samples
#' melanoma <- transform_data(melanoma, method='log')
#' melanoma <- SThet(melanoma, genes=c('MLANA', 'TP53'), method='moran')
#' get_gene_meta(melanoma, sthet_only=TRUE)
#'
#' @export
#
get_gene_meta = function(x=NULL, sthet_only=F){
  # Test if an STList has been input.
  if(is.null(x) | !is(x, 'STlist')){
    stop("The input must be a STlist.")
  }
  # Test is gene_meta is empty (no normalization performed on STlist)
  if(rlang::is_empty(x@gene_meta)){
    stop('No gene-level metadata available in this STlist.')
  }

  # Loop through samples
  genemeta_dfs = list()
  for(i in names(x@gene_meta)){
    genemeta_dfs[[i]] = x@gene_meta[[i]] %>%
      tibble::add_column(sample=i, .before=1)
    # Subset genes if only SThet stats are required
    if(sthet_only & any(colnames(x@gene_meta[[i]]) %in% c('moran_i', 'geary_c'))){
      genemeta_dfs[[i]] = genemeta_dfs[[i]] %>%
        dplyr::filter(!is.na(moran_i) | !is.na(geary_c))
    }
  }
  res = do.call(dplyr::bind_rows, genemeta_dfs)

  # Check if sample meta data available and add to output
  if(nrow(x@sample_meta) > 0){
    df_tmp = x@sample_meta
    colnames(df_tmp)[1] = colnames(res)[1]
    res = dplyr::left_join(df_tmp, res, by="sample", multiple='all')

    rm(df_tmp) # Clean env
  }

  if(nrow(res) == 0){
    stop('No gene-level meta available. If only spatial statistics requested, please make sure SThet was run.')
  }
  return(res)
}


##
# @title raise_err
#
raise_err = function(err_code=NULL, samplename=NULL){
  pkg_fp = list.files(system.file(package='spatialGE'), full.names=T, pattern='err\\.csv')
  err_db = suppressWarnings(utils::read.csv(pkg_fp, header=F, quote="'"))
  str_to_print = err_db[[2]][ err_db[[1]] == err_code ]
  if(is.null(samplename)){
    stop(paste0('spatialGE exception: ', str_to_print), call.=F)
  } else{
    stop(paste0('spatialGE exception: ', str_to_print, ' Sample: ', samplename, '.'), call.=F)
  }
}


##
# @title calculate_vst: Calculate standardized variation (VST) using Seurat's implementation
# @param x an STlist or a matrix with spots/cells in columns
# @param samples the samples from `x` to calculate VST. Ignored if x is a matrix
# @param cores integer indicating the number of cores in parallelization
# @return an STlist with VST in @gene_meta
#
calculate_vst = function(x=NULL, samples=NULL, cores=NULL){

  # To prevent NOTES in R CMD check
  . = NULL

  if(is(x, 'STlist')){
    # Identify variable genes (Seurat's VST)
    vst_ls = parallel::mclapply(samples, function(i){
      # Find tops variable genes using Seurat approach
      # First check if VST hasn't been calculated. If not, calculate
      if(!any(colnames(x@gene_meta[[i]]) == 'vst.variance.standardized')){
        vst_tmp = Seurat_FindVariableFeatures(x@counts[[i]]) %>%
          tibble::rownames_to_column(var='gene') %>%
          dplyr::select('gene', 'vst.variance.standardized') %>%
          dplyr::left_join(x@gene_meta[[i]], ., by='gene')
        return(vst_tmp)
      } else{
        return(NULL)
      }
    }, mc.cores=cores)
    names(vst_ls) = samples

    for(i in samples){
      if(!is.null(vst_ls[[i]])){
        x@gene_meta[[i]] = vst_ls[[i]]
      }
    }
    return(x)
  } else{
    #vst_tmp = Seurat_FindVariableFeatures(x@counts[[i]]) %>%
    vst_tmp = Seurat_FindVariableFeatures(x) %>%
      tibble::rownames_to_column(var='gene') %>%
      dplyr::select('gene', 'vst.variance.standardized')
    return(vst_tmp)
  }
}
FridleyLab/spatialGE documentation built on April 14, 2025, 9:37 a.m.