# These raw data files can be obtained from https://data.mendeley.com/datasets/cydmwsfztj/1
# The most pertinent and easiest to integrate data are counts of various cell/tissue features
# by image in the raw output file "All_Image.csv".
# There is also an "All_Islet.csv" file that contains features by islet,
# but we skip that for now because including that data in the application
# will be somewhat more involved (but we can further process if there is demand for it).
# For using the data in "All_Image.csv", the goal here is to reduce the data to mean counts
# by donor for each image type, i.e. for each of 12 donors, multiple images were taken for
# two pancreas parts -- tail and body (see metadata.csv).
library(data.table)
meta <- fread("Metadata.csv")
imgs <- fread("All_Image.csv")
# We want the image-extracted feature counts data and key column from metadata file;
# other columns appear to be less important raw outputs spit out by their image processing method
imgs <- imgs[, grep("Count_*|Metadata_Core", names(imgs)), with = F]
# Upon closer inspection and some deduction, select the count columns that seem most representative,
# e.g. leave out intermediate versions of the final feature, e.g. for below we keep only "Count_Cells"
# "Count_UnfilteredCells" -> "Count_RescaledCells" -> "Count_Cells"
# Note: we proceed with this and can check with authors later
feats <- c("Count_BloodVessels", "Count_Cells", "Count_Islets", "Count_Nuclei", "Count_Vessels")
imgs <- imgs[, c(feats, "Metadata_Core"), with = F]
# Add metadata so we can do some calculations by donor
# In imgs table, the image ID is indexed by col named Metadata_Core;
# In meta table, the image ID is indexed by col named image
imgs <- merge(imgs, meta, by.x = "Metadata_Core", by.y = "image")
imgs[, part := tolower(part)]
feats_ <- gsub("Count_", "", feats)
setnames(imgs, old = feats, new = feats_)
# Means for each donor stratified by pancreas part
means <- imgs[, lapply(.SD, mean), .SDcols = feats_, by = c("case", "part")]
means <- dcast(means, ... ~ part, value.var = feats_, sep = ".")
sd <- imgs[, lapply(.SD, sd), .SDcols = feats_, by = c("case", "part")]
sd <- dcast(sd, ... ~ part, value.var = feats_, sep = ".")
setnames(sd, old = names(sd)[-1], paste0(names(sd)[-1], "_SD"))
# It appears that -- only for #6264 -- pancreas head tissue was used in place of pancreas body
# (probably because body tissue wasn't available)
# Means using all sample parts:
means2 <- imgs[, lapply(.SD, mean), .SDcols = feats_, by = "case"]
sd2 <- imgs[, lapply(.SD, sd), .SDcols = feats_, by = "case"]
setnames(sd2, old = names(sd2)[-1], paste0(names(sd2)[-1], "_SD"))
dataset <- Reduce(merge, list(means, means2, sd, sd2))
setnames(dataset, "case", "ID")
# Write table of feature means by donor case by pancreas part (tail vs. body)
write.table(dataset, "PMID30713109_1_Damond-2019.tsv", sep = "\t", quote = F, row.names = F)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.