# hetset analysis tools =======================================================
# summarize set of selected features ------------------------------------------
evaluate_set <- function(H, verbose = T){
if (is.null(H@metadata$slf)){
stop("No features selected: try scan_hetset(H)")
}
old_opt <- options()
options(digits = 2)
rep.sum <- data.frame("Feature" = H@metadata$slf,"sqHell.rest" = NA,"delta" = NA,"expl.dist" = NA,"rel.importance" = NA)
for (i in 1:nrow(rep.sum)){
H_r <- H
H_r@metadata$slf <- H@metadata$slf[-i]
rep.sum$sqHell.rest[i] <- calculate_dist(estimate_densities(H_r))@metadata$sqHell
rep.sum$delta[i] <- H@metadata$sqHell - rep.sum$sqHell.rest[i]
rep.sum$expl.dist[i] <- rep.sum$delta[i] / H@metadata$sqHell
}
CumDelta <- sum(rep.sum$delta)
rep.sum$rel.importance <- rep.sum$delta / CumDelta
rep.sum <- rep.sum[order(-rep.sum$rel.importance),]
if(verbose){
print(rep.sum)
}
options(old_opt)
rep.sum
}
# show subpopulations
show_partition <- function(H,cutoff = 0.99,inner = 0.95){
slf <- H@metadata$slf
N_acc <- 10^5
mu_A <- H@metadata$prm.A$mean
Sigma_A <- H@metadata$prm.A$cov
mu_B <- H@metadata$prm.B$mean
Sigma_B <- H@metadata$prm.B$cov
# increase robustness for Mahalanobis distance (Sigma^-1)
diag(Sigma_A) <- diag(Sigma_A) + 10^-3 * diag(H@metadata$prm.full$cov)
diag(Sigma_B) <- diag(Sigma_B) + 10^-3 * diag(H@metadata$prm.full$cov)
rmah_a <- mahalanobis(
x = mvtnorm::rmvnorm(n = N_acc,mean = mu_A,sigma = Sigma_A),
center = mu_A,
cov = Sigma_A)
rmah_b <- mahalanobis(
x = mvtnorm::rmvnorm(n = N_acc,mean = mu_B,sigma = Sigma_B),
center = mu_B,
cov = Sigma_B)
xmah_a <- mahalanobis(
x = t(assay(H)[slf,]),
center = mu_A,
cov = Sigma_A)
xmah_b <- mahalanobis(
x = t(assay(H)[slf,]),
center = mu_B,
cov = Sigma_B)
rep.sum <- data.frame("ID" = colnames(H),"group" = NA,"dist.A" = NA,"dist.B" = NA,"rel" = NA)
for (i in 1:dim(H)[2]){
rep.sum$"dist.A"[i] <- mean(rmah_a < xmah_a[i])
rep.sum$"dist.B"[i] <- mean(rmah_b < xmah_b[i])
if (min(rep.sum[i,3:4]) < inner){
d_A <- mvtnorm::dmvnorm(x = assay(H)[slf,i],mean = mu_A,sigma = Sigma_A)
d_B <- mvtnorm::dmvnorm(x = assay(H)[slf,i],mean = mu_B,sigma = Sigma_B)
crel <- max(d_A,d_B)/(d_A+d_B)
rep.sum$"rel"[i] <- crel
if(crel >= cutoff){
rep.sum$"group"[i] <- c("A","B")[1+(d_B > d_A)]
}
}
}
if (is.null(H$ID)){
H$ID <- colnames(H)
}
output <- merge.data.frame(x = rep.sum,y = SummarizedExperiment::colData(H),by = "ID")
output
}
# fisher.test for independence of fitted subpopulations and clinical factors --
test_independence <- function(H,feature,S){
type <- typeof(S)
t_out <- NULL
switch(type,
"double" = {
eval(parse(text =
paste0("t_out <- fisher.test(table(H$",
feature,">=",S,",H$prt))")))
},
"character" = {
eval(parse(text =
paste0("t_out <- fisher.test(table(H$",
feature,"== S,H$prt))")))
},
"logical" = {
eval(parse(text =
paste0("t_out <- fisher.test(table(H$",
feature,"==",S,",H$prt))")))
})
t_out
}
# present categorical dependencies --------------------------------------------
show_dependencies <- function(H,dep_list,S_list,type = "print"){
N_dep <- length(dep_list)
L_dep <- data.frame("var" = as.character(dep_list),
"S" = NA,
"OR" = NA,
"OR.lo" = NA,
"OR.up" = NA)
for (i in 1:N_dep){
aus <- test_independence(H = H,feature = dep_list[i],S = S_list[[i]])
L_dep[i,2:5] <- c(S_list[[i]],
round(aus$estimate,digits = 2),
round(aus$conf.int[1:2],digits = 2))
}
switch(type,
"print" = {
print(L_dep)
},
"plot" = {
l_sig <- sum(!is.na(H@metadata$slf))
plot_type <- as.character(min(3,l_sig))
par(new=T)
par(mar = c(0,0,0,0),mfrow=c(1,1))
switch(plot_type,
'1' = {
layout(mat = matrix(c(0,0,0,0,1,0),ncol = 2),
widths = c(4/5,1/5),
heights = c(0.25,0.17,0.58))
plot(x = c(0,1),y = c(0,1),xaxt='n',yaxt='n',type = 'n',ann=F,axes=F)
n_tests <- dim(L_dep)[1]
text(0.5,0.95,labels = "odds ratios",cex = 2)
for (j in 1: n_tests){
text(x = 0.1,y = 0.9-0.8*j/n_tests,cex = 1.5,
labels = substring(text = L_dep[j,1],
first = 1,
last = min(3,nchar(as.character(L_dep[j,1])))))
ausgabe <- paste0(L_dep[j,3]," in [",L_dep[j,4],"; ",L_dep[j,5],"]")
text(x = 0.6,y = 0.9-0.8*j/n_tests,labels = ausgabe,cex = 1.5)
}
},
'2' = {
layout(mat = matrix(c(0,0,1,0),ncol = 2),
widths = c(3/4,1/4),
heights = c(1/4,3/4))
par(mar = c(1,1,1,1))
plot(x = c(0,1),y = c(0,1),xaxt='n',yaxt='n',type = 'n',ann=F,axes=F)
n_tests <- dim(L_dep)[1]
text(0.5,0.95,labels = "odds ratios",cex = 1.5)
for (j in 1: n_tests){
text(x = 0.1,y = 0.9-0.8*j/n_tests,cex = 1.25,
labels = substring(text = L_dep[j,1],
first = 1,
last = min(3,nchar(as.character(L_dep[j,1])))))
ausgabe <- paste0(L_dep[j,3]," in [",L_dep[j,4],"; ",L_dep[j,5],"]")
text(x = 0.6,y = 0.9-0.8*j/n_tests,labels = ausgabe,cex = 1.25)
}
},
'3' = {
n_field <- ceiling(sqrt(l_sig/2*(l_sig-1)+1))
layout(mat = matrix(c(0,0,0,1),2),
widths = c((n_field-1)/n_field,1/n_field),
heights = c((n_field-1)/n_field,1/n_field))
plot(x = c(0,1),y = c(0,1),xaxt='n',yaxt='n',type = 'n')
n_tests <- dim(L_dep)[1]
text(0.5,0.95,labels = "odds ratios")
for (j in 1: n_tests){
text(x = 0.1,y = 0.9-0.8*j/n_tests,
labels = substring(text = L_dep[j,1],
first = 1,
last = min(3,nchar(as.character(L_dep[j,1])))))
ausgabe <- paste0(L_dep[j,3]," in [",L_dep[j,4],"; ",L_dep[j,5],"]")
text(x = 0.6,y = 0.9-0.8*j/n_tests,labels = ausgabe,cex = 0.5+4/l_sig)
}
})
par(mfrow = c(1,1),mar = c(5,4,4,2)+0.1,mgp = c(3,1,0))
})
}
# make categorical factors ----------------------------------------------------
make_partition_by_logical <- function(H,log_vec){
if (dim(H)[2]!=length(log_vec)){
stop(" length of log_vec must equal the number of samples in H /n")
}
H$prt <- NA
H$prt <- factor(x = c("A","B")[(!log_vec)+1],levels = c("A","B"))
H
}
# subset ---------------------------------------------------------------------
subset_hetset <- function(H,prt = NULL,
keep.samples = NULL,remove.samples = NULL,
keep.features = NULL,remove.features = NULL){
old_signature <- H@metadata$slf
old_sample_size <- ncol(H)
# reduce to subpopulation
if (!is.null(prt)){
if (length(prt) != 1){
stop("prt must equal one of the subpopulations 'A' or 'B' ")
} else {
if (prt %in% c("A","B")){
keep.samples <- (H$prt == prt)
H$prt <- NA
} else {
stop("prt must equal one of the subpopulations 'A' or 'B' ")
}
}
}
keep.select <- rep(T,ncol(H))
if (!is.null(keep.samples)){
if (is.logical(keep.samples)){
keep.select <- keep.samples
} else {
keep.select[!(colnames(H) %in% keep.samples)] <- F
}
}
if (!is.null(remove.samples)){
if (is.logical(remove.samples)){
keep.select[remove.samples] <- F
} else {
keep.select[(colnames(H) %in% remove.samples)] <- F
}
}
keep.subset <- rep(T,nrow(H))
if (!is.null(keep.features)){
if (is.logical(keep.features)){
keep.subset <- keep.features
} else {
keep.subset[!(H@NAMES %in% keep.features)] <- F
}
}
if (!is.null(remove.features)){
if (is.logical(remove.features)){
keep.subset[remove.features] <- F
} else {
keep.subset[(H@NAMES %in% remove.features)] <- F
}
}
H <- SummarizedExperiment::subset(H,
subset = keep.subset,
select = keep.select)
if ((sum(!(old_signature %in% H@NAMES)) > 0) | (ncol(H) < old_sample_size)){
nochdrin <- (old_signature %in% H@NAMES)
H@metadata$slf <- H@metadata$slf[nochdrin]
if (sum(nochdrin) == 0){
H@metadata$prm.full <- list("mean" = NA,"cov" = NA)
H@metadata$prm.A <- list("mean" = NA,"cov" = NA)
H@metadata$prm.B <- list("mean" = NA,"cov" = NA)
H@metadata$prp.A <- 0.5
H@metadata$sqHell <- 0
} else {
H@metadata$prm.full$mean <- H@metadata$prm.full$mean[nochdrin]
H@metadata$prm.A$mean <- H@metadata$prm.A$mean[nochdrin]
H@metadata$prm.B$mean <- H@metadata$prm.B$mean[nochdrin]
if (length(old_signature)>1){
H@metadata$prm.full$cov <- H@metadata$prm.full$cov[nochdrin,nochdrin]
H@metadata$prm.A$cov <- H@metadata$prm.A$cov[nochdrin,nochdrin]
H@metadata$prm.B$cov <- H@metadata$prm.B$cov[nochdrin,nochdrin]
}
H@metadata$sqHell <- calculate_dist(H)@metadata$sqHell
}
}
H
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.