knitr::opts_chunk$set(echo = TRUE)
require(ggplot2) require(plyr) require(tie) require(robustbase) require(tidyverse) require(plm) require(gridExtra) # a function for regressing the effect size of interest onto the statistic of interest reg.ana <- function(stat.x, stat.y) { tryCatch({ rm.rows <- is.na(stat.x) | is.na(stat.y) stat.x <- stat.x[!rm.rows]; stat.y <- stat.y[!rm.rows] reg.fit <- lm(stat.y ~ stat.x); coef.fit <- coef(reg.fit) # basic LM # sandwich variance estimates sandwich_se <- diag(vcovHC(reg.fit, type="HC"))^0.5 z_stat <- coef.fit/sandwich_se # run a one-sided Z test p_values <- pnorm(z_stat, lower.tail=FALSE) min.x=min(stat.x); max.x=max(stat.x) return(list(slope=coef.fit["stat.x"], intercept=coef.fit["(Intercept)"], p.value=p_values["stat.x"], min.x=min.x, max.x=max.x)) }, error=function(e) {return(list(slope=NaN, intercept=NaN, p.value=NaN, min=NaN, max=NaN))}) }
results <- readRDS('../data/real/dep_wt_fmri_results.rds') dset.cols <- readRDS("../data/real/dset_colors.rds") merged.dat <- results$processed$statistics %>% # merge with dcorr results dplyr::left_join(results$processed$dcor, by=c("Reg", "FF", "Scr", "GSR", "Parcellation", "xfm", "Dataset", "nsub", "nses", "nroi", "nscans")) %>% dplyr::rename(Method=method, Algorithm=alg, Task=task) %>% # purge IPCAS6 dataset with only 2 subjects and no variance # effect size as measured by DCorr dplyr::filter(Method == "DCorr" & xfm == "P" & embed == "Raw" & Dataset != "IPCAS6" & !(Algorithm %in% c("Kernel", "HSIC"))) %>% # make it ordered so we can force the plot format dplyr::mutate(Task=factor(Task, levels=c("Sex", "Age"), ordered=TRUE), Algorithm=factor(Algorithm, levels=c("Discr", "PICC", "I2C2", "FPI", "DISCO"), ordered=TRUE)) %>% # add information about the letter for the subplot dplyr::mutate(Task=recode_factor(Task, "Sex"="(A) Sex", "Age"="(B) Age"), Algorithm=recode_factor(Algorithm, "Discr" = "(i) Discr.", "PICC" = "(ii) PICC", "I2C2" = "(iii) I2C2", "FPI"="(iv) Finger.", "DISCO"="(v) Kernel")) alpha = .05
Statistic on the graphs, vs. MGC/Dcorr for age (regression) and sex (classification) task on the graphs
output figure should be 7 x 4.5 and then edited in illustrator
stat.vs.perf <- merged.dat %>% dplyr::group_by(xfm, Task, Algorithm) %>% # normalize by the max/min statistics across datasets for cross-statistic # comparisons; does not effect p-values as is a affine xfm dplyr::mutate(stat.x=(stat.x - min(stat.x, na.rm=TRUE))/(max(stat.x, na.rm=TRUE) - min(stat.x, na.rm=TRUE)), stat.y=(stat.y - min(stat.y, na.rm=TRUE))/(max(stat.y, na.rm=TRUE) - min(stat.y, na.rm=TRUE))) %>% dplyr::group_by(xfm, Task, Algorithm, Dataset, nsub) %>% # regress effect size onto the statistic bow(tie(Slope, Intercept, p.value, min.x, max.x) := reg.ana(stat.x, stat.y)) neuro.ct <- stat.vs.perf %>% dplyr::group_by(Task) %>% dplyr::summarise(N.Datasets=length(unique(Dataset))) # analyze p-values per-algorithm/task test.results <- stat.vs.perf %>% dplyr::group_by(xfm, Task, Algorithm) %>% # compute Fisher-corrected p-value and the weighted mean slope (by subjects) dplyr::summarize(pval.ct = sum(p.value < alpha, na.rm=TRUE), slope.ct = sum(Slope > 0, na.rm=TRUE), p.value=round(median(p.value, na.rm=TRUE), digits=3), Slope=weighted.mean(Slope, nsub, na.rm=TRUE), Intercept=weighted.mean(Intercept, nsub, na.rm=TRUE) ) %>% dplyr::mutate(Alg.Title=ifelse(p.value == 0, sprintf("Mean(Slope)=%.3f, p-value<%.3f", Slope, p.value + .001), sprintf("Mean(Slope)=%.3f, p-value=%.3f", Slope, p.value))) tabl.neuro <- test.results %>% dplyr::mutate(Task=recode_factor(Task, "(A) Sex"="(I.A) Sex, Neuro", "(B) Age"="(I.B) Age, Neuro")) %>% dplyr::ungroup() %>% dplyr::select(Algorithm, Task, slope.ct, pval.ct) %>% dplyr::left_join(neuro.ct %>% dplyr::ungroup() %>% dplyr::mutate(Task=recode_factor(Task, "(A) Sex"="(I.A) Sex, Neuro", "(B) Age"="(I.B) Age, Neuro")), by="Task") %>% dplyr::mutate(Algorithm=recode_factor(Algorithm, "(i) Discr."="Discr.", "(ii) PICC"="PICC", "(iii) I2C2"="I2C2", "(iv) Finger."="Finger.", "(v) Kernel"="Kernel")) neuro.plt <- stat.vs.perf %>% tidyr::gather("Condition", "xval", min.x, max.x) %>% dplyr::mutate(y.pos=Slope*xval + Intercept) %>% # add the pvalues and slopes dplyr::left_join(test.results, by=c("xfm", "Algorithm", "Task")) %>% # plot it ggplot(aes(x=xval, y=y.pos, color=Dataset, size=log(nsub), group=Dataset)) + geom_line(alpha=0.6) + geom_abline(data=test.results, aes(slope=Slope, intercept=Intercept), color="black", size=1.5) + facet_grid(Task ~ Algorithm) + theme_bw() + xlab("Reference Statistic") + ylab("Effect Size") + scale_color_manual(values=dset.cols) + scale_y_continuous(expand=c(.01, .01)) + scale_x_continuous(expand=c(.01, .01)) + coord_cartesian(ylim=c(0, 1), xlim=c(0,1)) + guides(color=FALSE) + scale_size(range=c(0.5, 3)) + guides(size=FALSE, color=FALSE) + ggtitle("(I) Neuroimaging") + theme(text=element_text(size=18)) neuro.plt
genetics.sex <- readRDS('../data/real/genomics_sex.rds') genomics.sex.results <- genetics.sex$Reference %>% dplyr::filter(xfm %in% c("Raw", "Rank", "Log") & !(Algorithm %in% c("HSIC", "Kernel"))) %>% dplyr::left_join(genetics.sex$Effect %>% dplyr::rename("Statistic"=statistic), by=c("Data", "xfm")) %>% dplyr::group_by(Algorithm.x) %>% # normalize by the max/min statistics across datasets for cross-statistic # comparisons; does not effect p-values as is a affine xfm dplyr::mutate(Statistic.x=(Statistic.x - min(Statistic.x))/(max(Statistic.x) - min(Statistic.x)), Statistic.y=(Statistic.y - min(Statistic.y))/(max(Statistic.y) - min(Statistic.y))) %>% dplyr::mutate(Task="(A) Sex") %>% dplyr::rename(Resolution=Data) genetics.cancer <- readRDS('../data/real/genomics_cancer.rds') genomics.canc.results <- genetics.cancer$Reference %>% dplyr::filter(xfm %in% c("Raw", "Rank", "Log") & !(Algorithm %in% c("HSIC", "Kernel"))) %>% dplyr::left_join(genetics.cancer$Effect %>% dplyr::rename("Statistic"=statistic), by=c("Resolution", "xfm")) %>% dplyr::group_by(Algorithm.x) %>% # normalize by the max/min statistics across datasets for cross-statistic # comparisons; does not effect p-values as is a affine xfm dplyr::mutate(Statistic.x=(Statistic.x - min(Statistic.x))/(max(Statistic.x) - min(Statistic.x)), Statistic.y=(Statistic.y - min(Statistic.y))/(max(Statistic.y) - min(Statistic.y))) %>% dplyr::mutate(Task="(B) Cancer") genomics.results <- genomics.canc.results %>% rbind(genomics.sex.results) %>% dplyr::mutate(Algorithm.x=recode_factor(Algorithm.x, "DISCO"="Kernel")) reg.results <- genomics.results %>% dplyr::group_by(Algorithm.x, Task) %>% # regress effect size onto the statistic bow(tie(Slope, Intercept, p.value, min.x, max.x) := reg.ana(Statistic.x, Statistic.y)) tabl.gen <- reg.results %>% dplyr::rename(Algorithm="Algorithm.x") %>% dplyr::group_by(Algorithm, Task) %>% dplyr::summarise(slope.ct = sum(Slope > 0), pval.ct = sum(p.value < .05), N.Datasets=n()) %>% dplyr::select(Task, Algorithm, slope.ct, pval.ct, N.Datasets) %>% dplyr::mutate(Task=recode_factor(Task, "(A) Sex"="(II.A) Sex, Genomics", "(B) Cancer"="(II.B) Cancer, Genomics")) res.shapes=c(19,18,17,16,15,19,18) names(res.shapes) <- c("chr", "50kb", "500kb", "5MB", "amplicon", "CPM", "counts") genomics.result <- reg.results %>% dplyr::mutate(Task=factor(Task, levels=rev(c("(A) Sex", "(B) Cancer")), ordered=TRUE)) %>% dplyr::mutate(Algorithm.x = recode_factor(Algorithm.x, "Discr" = "(i) Discr.", "PICC" = "(ii) PICC", "I2C2" = "(iii) I2C2", "FPI"="(iv) Finger.", "DISCO"="(v) Kernel")) %>% gather("Condition", "xval", min.x, max.x) %>% mutate(y.pos=Slope*xval + Intercept) %>% ggplot(aes(x=xval, y=y.pos, group)) + geom_line() + geom_point(data=genomics.results %>% dplyr::mutate(Algorithm.x=recode_factor(Algorithm.x, "Discr" = "(i) Discr.", "PICC" = "(ii) PICC", "I2C2" = "(iii) I2C2", "FPI"="(iv) Finger.", "DISCO"="(v) Kernel")), aes(x=Statistic.x, y=Statistic.y, color=xfm, shape=Resolution)) + facet_grid(Task~Algorithm.x) + scale_shape_manual(values=res.shapes) + scale_y_continuous(expand=c(.01, .01)) + scale_x_continuous(expand=c(.01, .01)) + coord_cartesian(xlim=c(0,1), ylim=c(0,1)) + xlab("Reference Statistic") + ylab("Effect Size") + ggtitle("(II) Genomics") + theme_bw() + guides(shape=FALSE, color=FALSE) + theme(text=element_text(size=18))
tabl.overall <- tabl.neuro %>% rbind(tabl.gen %>% dplyr::mutate(Algorithm=recode_factor(Algorithm,"Discr"="Discr.", "FPI"="Finger."))) total.row <- tabl.overall %>% dplyr::group_by(Algorithm) %>% dplyr::summarize(N.Datasets=sum(N.Datasets), pval.ct=sum(pval.ct), slope.ct=sum(slope.ct)) %>% dplyr::mutate(Task="Total") tabl.overall <- tabl.overall %>% rbind(total.row) tab.plt <- tabl.overall %>% dplyr::mutate(pval.frac = pval.ct/N.Datasets, slope.frac=slope.ct/N.Datasets) %>% pivot_longer(cols=c("pval.frac", "slope.frac"), names_to="Type", values_to="Fraction") %>% dplyr::mutate(Text=sprintf("%.2f", Fraction)) %>% dplyr::mutate(Task=factor(Task, levels=c("(I.A) Sex, Neuro", "(I.B) Age, Neuro", "(II.A) Sex, Genomics", "(II.B) Cancer, Genomics", "Total"), ordered=TRUE), Type=factor(Type, levels=rev(c("pval.frac", "slope.frac")), ordered=TRUE), TypeAlg=paste0(Algorithm, Type)) %>% dplyr::select(Task, Text, TypeAlg, Fraction) %>% rbind(data.frame(TypeAlg="#Datasets", Task=c("(I.A) Sex, Neuro", "(I.B) Age, Neuro", "(II.A) Sex, Genomics", "(II.B) Cancer, Genomics", "Total"), Text=c("n=22", "n=22", "n=1", "n=1", "n=46"), Fraction=NA)) %>% dplyr::mutate(TypeAlg=factor(TypeAlg, levels=c("Discr.slope.frac", "Discr.pval.frac", "PICCslope.frac", "PICCpval.frac", "I2C2slope.frac", "I2C2pval.frac", "Finger.slope.frac", "Finger.pval.frac", "Kernelslope.frac", "Kernelpval.frac", "#Datasets"))) %>% ggplot(aes(x=0, y=0, fill=Fraction)) + geom_tile() + geom_text(color="white", aes(label=Text), size=14) + scale_fill_gradient(low="red", high="forestgreen") + scale_x_continuous(expand=c(0,0)) + scale_y_continuous(expand=c(0,0)) + facet_grid(Task ~ TypeAlg, switch="y") + theme_bw() + theme(panel.spacing=unit(0, "lines"), strip.text.y.left=element_text(angle=0), legend.position = "bottom", axis.title.x=element_blank(), axis.ticks.x=element_blank(), axis.text.x=element_blank(), axis.title.y=element_blank(), axis.ticks.y=element_blank(), axis.text.y=element_blank()) + ggtitle("(III) Comparison") + theme(text=element_text(size=18))
13 x 11
plot(arrangeGrob(neuro.plt, genomics.result, tab.plt, heights=c(.5, .5, .5)))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.