#' Calculate Ripley's K
#'
#' @param mif object of class `mif` created with `create_mif`
#' @param mnames cell phenotype markers to calculate Ripley's K for
#' @param r_range radius range (including 0)
#' @param num_permutations number of permutations to use to estimate CSR. If `keep_perm_dis` is set to FALSE, this will be ignored
#' @param edge_correction edge correction method to pass to `Kest`. can take one of "translation", "isotropic", "none", or 'border'
#' @param method not used currently
#' @param permute whether to use CSR estimate or use permutations to determine CSR
#' @param keep_permutation_distribution whether to find mean of permutation distribution or each
#' permutation calculation
#' @param workers number of cores to use for calculations
#' @param overwrite whether to overwrite the `univariate_Count` slot within `mif$derived`
#' @param xloc the location of the center of cells. If left `NULL`, `XMin`, `XMax`, `YMin`, and `YMax` must be present.
#' @param yloc the location of the center of cells. If left `NULL`, `XMin`, `XMax`, `YMin`, and `YMax` must be present.
#' @param big the number of cells at which to flip from an edge correction method other than 'none' to 'none' due to size
#'
#' @description
#' ripleys_k() calculates the emperical Ripley's K measurement for the cell types specified by mnames in the mIF object. This
#' is very useful when exploring the spatial clustering of single cell types on TMA cores or ROI spots following proccessing
#' with a program such as HALO for cell phenotyping.
#'
#' In the `ripleys_k` function, there is the ability to perform permutations in order to assess whether the clustering
#' of a cell type is significant, or the ability to derive the exact CSR and forgo permutations for much faster sample
#' processing. Permutations can be helpful if the significance of clustering wasnts to be identified - run 1000 permutations
#' and if observed is outside 95-percentile then significant clustering. We, however, recommend using the exact CSR estimate
#' due to speed.
#'
#' Some things to be aware of when computing the exact Ripley's K estimate, if your spatial file is greater than
#' the `big` size, the edge correction will be converted to 'none' in order to save on resources and compute time.
#' Due to the introduction of Whole Slide Imaging (WSI), this can easily be well over 1,000,000 cells, and calculating
#' edge correction for these spatial files will not succeed when attempting to force an edge correction on it.
#'
#' @return object of class `mif`
#' @export
#'
#' @examples
#' x <- spatialTIME::create_mif(clinical_data =spatialTIME::example_clinical %>%
#' dplyr::mutate(deidentified_id = as.character(deidentified_id)),
#' sample_data = spatialTIME::example_summary %>%
#' dplyr::mutate(deidentified_id = as.character(deidentified_id)),
#' spatial_list = spatialTIME::example_spatial,
#' patient_id = "deidentified_id",
#' sample_id = "deidentified_sample")
#' mnames = x$spatial[[1]] %>%
#' colnames() %>%
#' grep("Pos|CD", ., value =TRUE) %>%
#' grep("Cyto|Nucle", ., value =TRUE, invert =TRUE)
#' x2 = ripleys_k(mif = x,
#' mnames = mnames[1],
#' r_range = seq(0, 100, 1),
#' num_permutations = 100,
#' edge_correction = "translation",
#' method = "K",
#' permute = FALSE,
#' keep_permutation_distribution =FALSE,
#' workers = 1,
#' overwrite =TRUE)
ripleys_k = function(mif,
mnames,
r_range = seq(0, 100, 1),
num_permutations = 50,
edge_correction = "translation",
method = "K",
permute = FALSE,
keep_permutation_distribution = FALSE,
workers = 1,
overwrite = FALSE,
xloc = NULL,
yloc = NULL,
big = 10000){
if(keep_permutation_distribution == TRUE & permute == FALSE){
stop("Conflicting `perm` and `keep_permutation_distribution` parameters. Using estimate\n
\tIf wanting to use permutatations, set `permute = TRUE`")
}
#check if 0 is contained within the range of r
if(!(0 %in% r_range)){
r_range = c(0, r_range)
}
if(!inherits(mif, "mif")){
stop("mIF should be of class `mif` created with function `createMIF()`\n\tTo check use `inherits(mif, 'mif')`")
}
out = parallel::mclapply(mif$spatial, function(spat){
#get center of the cells
if(is.null(xloc) | is.null(yloc)){
spat = spat %>%
dplyr::mutate(xloc = (XMax + XMin)/2,
yloc = (YMax + YMin)/2)
} else {
#rename columns to follow xloc and yloc names
spat = spat %>%
dplyr::rename("xloc" = !!xloc,
"yloc" = !!yloc)
}
if(nrow(spat) > big){
edge_correction = 'none'
}
#select only needed columns
spat = spat %>%
dplyr::select(!!mif$sample_id, xloc, yloc, !!mnames)
#window
win = spatstat.geom::convexhull.xy(spat$xloc, spat$yloc)
#begin calculating the ripley's k and the permutation/estimate
if(nrow(spat)<10000 & permute == TRUE){ #have to calculate the permutation distribution
dists = as.matrix(dist(spat[,c("xloc", "yloc")]))
area = spatstat.geom::area(win)
#calculate the edge corrections
if(edge_correction %in% c("translation", "trans")){
edge = spatstat.explore::edge.Trans(spatstat.geom::ppp(x = spat$xloc, y = spat$yloc, window = win), W = win)
} else if(edge_correction %in% c("isotropic", "iso")){
edge = spatstat.explore::edge.Ripley(spatstat.geom::ppp(x = spat$xloc, y = spat$yloc, window = win),
r = spatstat.geom::pairdist(spatstat.geom::ppp(x = spat$xloc, y = spat$yloc, window = win)))
} else if(edge_correction == "none"){
edge = matrix(nrow = nrow(spat), ncol = nrow(spat), data = 1)
}
#dists = fast_mm(dists, edge)
res = parallel::mclapply(mnames, function(marker){
#find the rows that are positive for marker
pos = which(spat[marker] == 1)
#if there are less than 3 cells then just return NA table because cannot calculate
if(length(pos) < 3){
d = data.frame(iter = as.character(seq(num_permutations)),
Label = spat[1,mif$sample_id],
Marker = marker,
`Observed K` = NA,
`Permuted CSR` = NA,
`Exact CSR` = NA,
check.names =FALSE)
d = dplyr::full_join(d, expand.grid(iter = as.character(seq(num_permutations)),
r = r_range), by = "iter") %>%
dplyr::mutate(`Theoretical CSR` = pi * r^2)
return(d)
}
if(edge_correction == "border"){
ppp = spatstat.geom::ppp(spat$xloc, spat$yloc, window = win, marks = as.factor(spat[[marker]]))
obs = spatstat.explore::Kest(subset(ppp, marks == 1), r = r_range, correction = 'border') %>%
data.frame(check.names = FALSE) %>% rename("Theoretical CSR" = 2, "Observed K" = 3)
perms = parallel::mclapply(seq(num_permutations), function(x){
pout = spatstat.explore::Kest(subset(ppp, sample(1:nrow(spat), sum(spat[[marker]]), replace = FALSE)),
r = r_range, correction = 'border') %>%
data.frame(check.names = FALSE) %>%
mutate(iter = as.character(x), .before = 1)
return(data.frame(iter = pout$iter,
r = pout$r,
`Theoretical CSR` = pout$theo,
`Permuted CSR` = pout$border,
`Exact CSR` = NA,
check.names = FALSE))
}, mc.allow.recursive = TRUE, mc.preschedule = FALSE) %>%
do.call(dplyr::bind_rows, .)
final = dplyr::full_join(obs, perms, by = c("r", "Theoretical CSR"))
final$Marker = marker
final$Label = spat[1,1] #hard coded for example
} else {
edge_pos = edge[pos, pos]
counts = sapply(r_range, function(r) sum(edge_pos[which(dists[pos, pos] > 0 & dists[pos, pos] < r)]))
obs = (counts * area)/(length(pos)*(length(pos)-1))
theo = pi * r_range^2
perms = parallel::mclapply(seq(num_permutations), function(x){
n_pos = sample(1:nrow(spat), length(pos), replace =FALSE)
edge_pos = edge[n_pos, n_pos]
counts = sapply(r_range, function(r) sum(edge_pos[which(dists[n_pos, n_pos] > 0 & dists[n_pos, n_pos] < r)]))
permed = (counts * area)/(length(pos)*(length(pos)-1))
theo = pi * r_range^2
return(data.frame(iter = as.character(x),
r = r_range,
`Theoretical CSR` = theo,
`Permuted CSR` = permed,
`Exact CSR` = NA,
check.names = FALSE))
}, mc.allow.recursive = TRUE, mc.preschedule = FALSE) %>%
do.call(dplyr::bind_rows, .)
final = dplyr::full_join(data.frame(r = r_range,
`Theoretical CSR` = theo,
`Observed K` = obs,
check.names = FALSE),
perms, by = c("r", "Theoretical CSR"))
final$Marker = marker
final$Label = spat[1,1] #hard coded for example
}
return(final[,c(4,8, 7, 1,2,3,5,6)])
}, mc.allow.recursive = TRUE, mc.preschedule = FALSE) %>% #collapse all markers for spat
do.call(dplyr::bind_rows, .) %>%
dplyr::mutate(`Degree of Clustering Permutation` = `Observed K` - `Permuted CSR`,
`Degree of Clustering Theoretical` = `Observed K` - `Theoretical CSR`,
`Degree of Clustering Exact` = `Observed K` - `Exact CSR`)
} else if(permute == TRUE & nrow(spat)>10000){ #if we need perm distribution but too many cells
res = parallel::mclapply(mnames, function(marker){
#select the center of cells and marker column
dat = spat %>%
dplyr::select(xloc, yloc, !!marker)
#select columns that are postive for marker
dat2 = dat %>% dplyr::filter(get(marker) != 0)
#calculate the observed K
kobs = spatstat.geom::ppp(dat2$xloc, dat2$yloc, window = win) %>%
spatstat.explore::Kest(r = r_range, correction = edge_correction) %>%
data.frame() %>%
dplyr::rename("Theoretical CSR" = 2, "Observed K" = 3) %>%
dplyr::mutate(Label = unique(spat[[mif$sample_id]]),
Marker = marker, .before=1)
#calculate the permuted CSR
kperms = parallel::mclapply(seq(num_permutations), function(perm){
dat2 = dat[sample(seq(nrow(dat)), sum(dat[[marker]]), replace=F),]
spatstat.geom::ppp(dat2$xloc, dat2$yloc, window = win) %>%
spatstat.explore::Kest(r = r_range, correction = edge_correction) %>%
data.frame() %>%
dplyr::rename("Theoretical CSR" = 2, "Permuted CSR" = 3) %>%
dplyr::mutate(Label = unique(spat[[mif$sample_id]]),
Marker = marker,.before=1) %>%
dplyr::mutate(iter = as.character(perm), .before = 1)
}, mc.allow.recursive = TRUE, mc.preschedule = FALSE) %>%
do.call(dplyr::bind_rows, .) %>%
mutate(`Exact CSR` = NA)
K = dplyr::full_join(kobs, kperms,
by = c("Label", "Marker", "r", "Theoretical CSR")) %>%
relocate(iter, .before = 1)
}, mc.allow.recursive = TRUE, mc.preschedule = FALSE) %>%
do.call(dplyr::bind_rows, .) %>%
dplyr::mutate(`Degree of Clustering Permutation` = `Observed K` - `Permuted CSR`,
`Degree of Clustering Theoretical` = `Observed K` - `Theoretical CSR`,
`Degree of Clustering Exact` = `Observed K` - `Exact CSR`)
} else if(permute == FALSE){
marker_res = parallel::mclapply(mnames, function(marker){
#select the center of cells and marker column
dat = spat %>%
dplyr::select(xloc, yloc, !!marker)
#select columns that are postive for marker
dat2 = dat %>% dplyr::filter(get(marker) != 0)
if(nrow(dat2) < 3){
return(data.frame(iter = "Estimator",
Label = unique(spat[[mif$sample_id]]),
Marker = marker,
r = r_range,
`Theoretical CSR` = pi * r_range^2,
`Observed K` = NA,
check.names =FALSE))
}
#calculate the observed K
kobs = spatstat.geom::ppp(dat2$xloc, dat2$yloc, window = win) %>%
spatstat.explore::Kest(r = r_range, correction = edge_correction) %>%
data.frame() %>%
dplyr::rename("Theoretical CSR" = 2, "Observed K" = 3) %>%
dplyr::mutate(Label = unique(spat[[mif$sample_id]]),
Marker = marker, .before=1,
r = r)
return(kobs)
}, mc.allow.recursive = TRUE, mc.preschedule = FALSE) %>% #collapse all markers for spat
do.call(dplyr::bind_rows, .)
if(nrow(spat) < big){
pp_obj = spatstat.geom::ppp(x = spat$xloc, y = spat$yloc, window = win)
k_est = spatstat.explore::Kest(pp_obj, r = r_range, correction = edge_correction)
gc(full=T)
k_est2 = k_est %>%
data.frame(check.names = FALSE) %>%
dplyr::select(1,3) %>%
dplyr::rename("Exact CSR" = 2) %>%
dplyr::mutate(r = r,
`Permuted CSR` = NA, .before = `Exact CSR`)
} else {
ns = nrow(spat)
slide = ceiling(ns / big)
ranges = getTile(slide = slide, l = ns, size = big)
counts = parallel::mclapply(ranges, function(i_range){
parallel::mclapply(ranges, function(j_range){
i_tmp = spatstat.geom::ppp(x = spat$xloc[i_range], y = spat$yloc[i_range], window = win)
j_tmp = spatstat.geom::ppp(x = spat$xloc[j_range], y = spat$yloc[j_range], window = win)
dists = spatstat.geom::crossdist(i_tmp, j_tmp)
dists[dists == 0 | dists > max(r_range)] = NA
if(edge_correction == "none"){
counts = cumsum(spatstat.univar::whist(dists,
spatstat.geom::handle.r.b.args(r_range, breaks=NULL, win, rmaxdefault = max(r_range))$val))
} else {
edge = spatstat.explore::edge.Trans(i_tmp, j_tmp)
edge[edge == 0] = NA
diag(edge) = NA
counts = sapply(r_range, function(r){sum(edge[which(dists < r)])})
}
rm(dists)
return(counts)
}, mc.allow.recursive = TRUE, mc.preschedule = FALSE)%>%
do.call(cbind, .) %>%
rowSums()
}, mc.allow.recursive = TRUE, mc.preschedule = FALSE)
counts = counts %>%
do.call(cbind, .) %>%
rowSums()
k = (counts * spatstat.geom::area(win))/(ns * (ns-1))
k_est2 = data.frame(r = r_range,
`Permuted CSR` = NA,
`Exact CSR` = k, check.names = FALSE)
}
res = dplyr::full_join(marker_res, k_est2) %>%
dplyr::mutate(iter = "Estimater", .before = 1) %>%
dplyr::mutate(`Degree of Clustering Permutation` = NA,
`Degree of Clustering Theoretical` = `Observed K` - `Theoretical CSR`,
`Degree of Clustering Exact` = `Observed K` - `Exact CSR`)
}
#return final results
return(res)
}, mc.cores = workers, mc.allow.recursive =TRUE, mc.preschedule =FALSE) %>% #set mclapply params
do.call(dplyr::bind_rows, .)
out = out%>% #collapse all samples to single data frame
dplyr::rename(!!mif$sample_id := Label)
if(permute == TRUE & keep_permutation_distribution == FALSE){
out = out %>%
dplyr::mutate(iter = "Permuted") %>%
dplyr::group_by(iter, across(mif$sample_id), Marker, r) %>%
dplyr::summarise_all(~mean(., na.rm=TRUE))
}
if(overwrite){ #overwrite existing data in univariate_Count slot
mif$derived$univariate_Count = out %>%
dplyr::mutate(Run = 1)
} else { #dont overwrite
mif$derived$univariate_Count = mif$derived$univariate_Count %>%
dplyr::bind_rows(out %>%
dplyr::mutate(Run = ifelse(exists("univariate_Count", mif$derived),
max(mif$derived$univariate_Count$Run) + 1,
1)))
}
return(mif)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.