exploratory/Q2.R

library(ROL)
library(ggplot2)
library(gridExtra)
library(driver)

# get distributions of distances between: (1) "baselines": empirical average ALR abundances
#                                         (2) "dynamics": microbial covariance matrices
model_list <- get_fitted_model_list(tax_level="ASV", MAP=TRUE)
baselines <- matrix(NA, model_list$D-1, length(model_list$model_list))
for(i in 1:length(model_list$model_list)) {
  data <- readRDS(model_list$model_list[i])
  props <- alrInv_array(data$alr_ys, coords=2)
  ilr_ys <- ilr_array(props, parts=2)
  baselines[,i] <- colMeans(data$alr_ys)
}
baseline_dist <- as.matrix(dist(t(baselines)))
dynamics_dist <- readRDS("output/Sigma_distance_ASV_MAP.rds")$distance_mat

# order of hosts in baseline and dynamics distance matrices is demonstrably
# identical as in model_list$hosts

baseline_distro <- baseline_dist[upper.tri(baseline_dist, diag=F)]
dynamics_distro <- dynamics_dist[upper.tri(dynamics_dist, diag=F)]

# visualize distributions over these distances
if(FALSE) {
  df.baseline <- data.frame(distance=c(baseline_distro))
  df.dynamics <- data.frame(distance=c(dynamics_distro))

  # plot histograms of these distributions
  p.1 <- ggplot(df.baseline) +
    geom_histogram(aes(x=distance)) +
    xlab("baseline distance (Euclidean)")
  p.2 <- ggplot(df.dynamics) +
    geom_histogram(aes(x=distance)) +
    xlab("dynamics distance (Riemannian)")
  p <- grid.arrange(p.1, p.2, nrow=1)
  ggsave("output/distance_distributions.png", p, dpi=150, units="in", height=4, width=8)
}

n_hosts <- length(model_list$hosts)
unique_combos <- combn(1:n_hosts, 2)
n_unique_combos <- ncol(unique_combos)

quantile_mat.baseline <- matrix(NA, n_hosts, n_hosts)
quantile_mat.dynamics <- matrix(NA, n_hosts, n_hosts)

for(i in 1:n_hosts) {
  for(j in 1:n_hosts) {
    if(i < j) {
      # only consider unique combos
      distance <- baseline_dist[i,j]
      distance.quantile <- sum(baseline_distro < distance)/n_unique_combos
      quantile_mat.baseline[i,j] <- distance.quantile
      distance <- dynamics_dist[i,j]
      distance.quantile <- sum(dynamics_distro < distance)/n_unique_combos
      quantile_mat.dynamics[i,j] <- distance.quantile
    }
  }
}

# for random pairs
n_sample <- 1000
iterate_combos <- sample(1:n_unique_combos)[1:n_sample]
selected_dist.baseline <- c()
selected_dist.dynamics <- c()
for(i in 1:n_sample) {
  hosts <- c(unique_combos[1,iterate_combos[i]], unique_combos[2,iterate_combos[i]])
  selected_dist.baseline <- c(selected_dist.baseline, quantile_mat.baseline[min(hosts), max(hosts)])
  selected_dist.dynamics <- c(selected_dist.dynamics, quantile_mat.dynamics[min(hosts), max(hosts)])
}

# plot any evidence of correlation?
df <- data.frame(x=selected_dist.baseline, y=selected_dist.dynamics)

p <- ggplot(df) +
  geom_point(aes(x=x, y=y)) +
  xlab("baseline quantile") +
  ylab("dynamics quantile")
ggsave("output/distance_quantile_correlations.png", p, dpi=150, units="in", height=6, width=6)

# visualize via heatmaps
df <- gather_array(quantile_mat.baseline, "quantile", "host.1", "host.2")
p.1 <- ggplot(df, aes(x=host.1, y=host.2)) +
  geom_tile(aes(fill = quantile)) +
  scale_fill_viridis_c(option = "B", direction = -1) +
  xlab("host 1") +
  ylab("host 2") +
  theme_light()
df <- gather_array(quantile_mat.dynamics, "quantile", "host.1", "host.2")
p.2 <- ggplot(df, aes(x=host.1, y=host.2)) +
  geom_tile(aes(fill = quantile)) +
  scale_fill_viridis_c(option = "B", direction = -1) +
  xlab("host 1") +
  ylab("host 2") +
  theme_light()
p <- grid.arrange(p.1, p.2, nrow=2)
ggsave("output/quantile_heatmaps.png", p, dpi=150, units="in", height=8, width=5)
kimberlyroche/ROL documentation built on Dec. 10, 2020, 2:18 a.m.