Nothing
## I think the point is to extract/construct identifiers to use as row names.
rnames <- function(lst, item) {
f <- function(ll, inds) {
if ((ii <- match(item, names(ll), FALSE)))
list(inds=c(inds, ii), len=length(ll[[ii]]))
else if (all(is.atomic(unlist(ll, FALSE))) || !is.list(ll))
NULL
else
lapply(seq_along(ll), function(i) f(ll[[i]], inds=c(inds, i)))
}
unlist(f(lst, NULL))
}
## Make up sample names that include row number, padded with leading zeros
padnames <- function(prefix, values, ndigits = NULL) {
M <- max(values)
if (is.null(ndigits)) {
ndigits <- trunc(log10(M))
}
padlength <- sapply(values, function(n) sum(n < 10^(1:ndigits)))
pad <- sapply(padlength, function(p) paste(rep("0", p),collapse = ""))
paste(prefix, pad, values, sep="")
}
extractOneLGF <- function(J) {
bands <- J$iscn2016_bands # cytoband labels/names
## extract the status
OUT <- J$output
KY <- matrix(NA, ncol = 2, nrow = length(OUT))
colnames(KY) <- c("Status", "Karyotype")
for (I in 1:length(OUT)) {
x <- OUT[[I]]
kary <- x$karyotype
stat <- x$status
KY[I,] <- c(stat, kary)
}
KY <- as.data.frame(KY, stringsAsFactors = FALSE)
rownames(KY) <- padnames("RN", 1:length(OUT))
KY$Status <- factor(KY$Status)
## extract the (sub)clone ids
clone <- unlist(lapply(OUT, function(x){
if((x$status) %in% c("Success", "Fixable grammar error and success")) {
1:length(lapply(x$parsing_result, "[[", "loss_gain_fusion_computing"))
}
}))
## extract the binary-mapped LGF data
lgf <- lapply(OUT, function(x) { # Get the loss data from parsing_result
if((x$status) %in% c("Success", "Fixable grammar error and success")) {
lgf <- lapply(x$parsing_result, "[[","loss_gain_fusion_computing")
}
})
## compute row-name IDS
df.ID <- as.data.frame(matrix(rnames(lgf, "loss"), ncol = 4, byrow = TRUE))
id <- df.ID$V1
if(any(df.ID$V2 != clone)) {
warning("Disagreement among clone IDs.")
}
fullID <- apply(df.ID[, 1:3], 1, paste, collapse=".")
if (any(duplicated(fullID))) {
warning("Duplicated identifiers should not happen.")
}
## Extract the loss, gain, and fusion binary data separately
loss1 <- sapply(lgf, function(x){
lapply(x, "[[","loss")
})
df_Loss <- as.data.frame(matrix(unlist(loss1), ncol = 916, byrow = TRUE),
stringsAsFactors = FALSE)
colnames(df_Loss) <- paste("Loss", bands, sep = "_")
gain1 <- sapply(lgf, function(x){
lapply(x, "[[","gain")
})
df_Gain <- as.data.frame(matrix(unlist(gain1), ncol = 916, byrow = TRUE),
stringsAsFactors = FALSE)
colnames(df_Gain) <- paste("Gain", bands, sep = "_")
fusion1 <- sapply(lgf, function(x){
lapply(x, "[[","fusion")
})
df_Fusion <- as.data.frame(matrix(unlist(fusion1), ncol = 916, byrow = TRUE),
stringsAsFactors = FALSE)
colnames(df_Fusion) <- paste("Fusion", bands, sep = "_")
## Put them back together
df.lgf <- cbind(df_Loss, df_Gain, df_Fusion)
rownames(df.lgf) <- fullID
df.lgf$ID = id
df.lgf$Clones = clone
w <- which(colnames(df.lgf) == "ID")
df.lgf <- df.lgf[, c(w:(w + 1), 1:(w-1))]
list(Status = KY, LGF = df.lgf)
}
if(FALSE) { # maybe later...
setClass("LGF",
slots = c(
raw = "list", # of data frame, rows = clones, columns = LGF-cytobands
frequency = "data.frame", # rows = cytobands, columns = Loss, Gain, Fusion
source = "character", # file names
size = "numeric", # number of clones in each file
CL = "data.frame" # cytoband locations
))
}
Idioformat <- function(df, CL){
## element name will be the same as karyotype
## first example loss.1, loss.2
Loss <- df[, grepl("Loss", names(df))]
tags <- sub("Loss_", "", colnames(Loss))
Gain <- df[, grepl("Gain", names(df))]
Fusion <- df[, grepl("Fusion", names(df))]
temp <- data.frame(Loss = colMeans(Loss > 0),
Gain = colMeans(Gain > 0),
Fusion = colMeans(Fusion > 0))
rownames(temp) <- tags
list(Frequency = temp[rownames(CL),], N = nrow(df))
}
readLGF <- function(files = NULL, folder = NULL, verbose = TRUE) {
## Figure out which files we want to read
if (is.null(folder)) {
folder <- "."
}
if (is.null(files)) {
files <- list.files(folder, pattern = "*.json",
full.names = FALSE) # character vector , one file per entry
}
if (length(files) < 1) {
stop("No JSON input files to read.")
}
if (verbose) {
message("Reading ", length(files), " file(s) from '", folder, "'.\n")
}
## make sure that, when done, we leave the working directory in the same state that we found it.
home <- getwd()
on.exit(setwd(home))
setwd(folder)
## It might get large -- not said by Davis Guggenheim
myJSON <- lapply(files, function(x) fromJSON(file = x)) # a list, one element per JSON file
raw <- lapply(myJSON, extractOneLGF)
names(raw) <- sub(".json", "", basename(files))
## Get the summary statistics
ick <- lapply(raw, function(R) {
Idioformat(R$LGF, CL = cytobandLocations)
})
bundle <- do.call(cbind, lapply(ick, function(F) F$Frequency))
size <- sapply(ick, function(F) F$N)
list(source = files, raw = raw, frequency = bundle, size = size, CL = cytobandLocations)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.