inst/doc/TNBC.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

# knitr::opts_chunk$set(eval = !is_check)

## ----setup--------------------------------------------------------------------
library(funkycells)

## ----define_functions, echo=FALSE---------------------------------------------
full_run <- FALSE # This runs only computations that are feasibly quick
paperRun <- FALSE * full_run  # This creates and saves figures as in paper
save_path <- tempdir() # Location to save files
specify_decimal <- function(x, k) {
  trimws(format(round(x, k), nsmall = k))
}

## ----getData------------------------------------------------------------------
TNBC_pheno <- TNBC_pheno
TNBC_meta <- TNBC_meta

## ----specifyPheno-------------------------------------------------------------
phenos <- unique(TNBC_pheno$Phenotype)
pheno_interactions <- rbind(
  data.frame(t(combn(phenos, 2))),
  data.frame("X1" = phenos, "X2" = phenos)
)

phenos_subset <- c(
  "Tumor", "CD3T", "CD4T", "CD8T", "B", "DC",
  "DCMono", "Macrophage", "MonoNeu", "NK", "Treg"
)
pheno_interactions_subset <- data.frame(
  Var1 = rep("Tumor", 11),
  Var2 = c(
    "Tumor", "CD3T", "CD4T", "CD8T", "B", "DC",
    "DCMono", "Macrophage", "MonoNeu", "NK", "Treg"
  )
)

## ----TNBCInfo-----------------------------------------------------------------
data_pheno_stat <- data.frame("pheno" = phenos)
for (person in unique(TNBC_pheno$Person)) {
  tmp <- as.data.frame(table(TNBC_pheno[TNBC_pheno$Person == person, "Phenotype"]))
  colnames(tmp) <- c("pheno", person)
  data_pheno_stat <- merge(x = data_pheno_stat, y = tmp, all.x = TRUE)
}

agent_info_subset <- data.frame(data_pheno_stat[1],
  "mean" = rowMeans(data_pheno_stat[-1], na.rm = TRUE),
  "med" = apply(data_pheno_stat[-1], 1, median, na.rm = TRUE)
)

## ----paper_K_example, echo=FALSE, eval=paperRun-------------------------------
tnbc_fig <- plotPP(TNBC_pheno[TNBC_pheno$Person == 2, c("cellx", "celly", "Phenotype")],
  ptSize = 3, colorGuide = "none", dropAxes = T
)

png(paste0(save_path,"/paper/tnbc_person2_phenotypes.png"), 
    width = 800, height = 800)
print(tnbc_fig)
dev.off()

kf <- getKFunction(
  data = TNBC_pheno[TNBC_pheno$Person == 2, -1],
  agents = c("Tumor", "MonoNeu"),
  unit = "Person",
  rCheckVals = seq(0, 50, 1)
)

k_fig <- ggplot2::ggplot() +
  ggplot2::geom_line(ggplot2::aes(x = r, y = K1), data = kf, linewidth = 2) +
  ggplot2::geom_line(ggplot2::aes(x = seq(0, 50, 1), y = pi * seq(0, 50, 1)^2),
    linewidth = 2, col = "red", linetype = "dashed"
  ) +
  ggplot2::theme_bw() +
  ggplot2::theme(
    axis.title = ggplot2::element_blank(),
    axis.text = ggplot2::element_blank()
  )

png(paste0(save_path,"/paper/tnbc_person2_tumor_mononeu_k.png"), 
    width = 800, height = 800)
print(k_fig)
dev.off()

## ----buildNUllData------------------------------------------------------------
set.seed(123)
dat0 <- simulatePP(
  agentVarData =
    data.frame(
      "outcome" = c(0, 1),
      "c1" = c(0, 0),
      "c2" = c(1 / 25, 1 / 25), "c3" = c(1 / 50, 1 / 50),
      "c4" = c(0, 0),
      "c5" = c(0, 0), "c6" = c(0, 0),
      "c7" = c(0, 0), "c8" = c(1 / 100, 1 / 100),
      "c9" = c(1 / 20, 1 / 20), "c10" = c(1 / 250, 1 / 250),
      "c11" = c(1 / 100, 1 / 100), "c12" = c(1 / 80, 1 / 80),
      "c13" = c(0, 0), "c14" = c(0, 0),
      "c15" = c(0, 0), "c16" = c(1 / 10, 1 / 10)
    ),
  agentKappaData = data.frame(
    "agent" = paste0("c", 1:16),
    "clusterAgent" = c(NA, "c1", "c1", rep(NA, 12), "c1"),
    "kappa" = c(
      rbinom(1, 100, 0.5),
      rbinom(1, 100, 0.5),
      rbinom(1, 30, 0.5),
      rbinom(1, 80, 0.5),
      rbinom(1, 350, 0.5),
      rbinom(1, 100, 0.5),
      rbinom(1, 120, 0.5),
      rbinom(1, 150, 0.5),
      rbinom(2, 250, 0.5),
      rbinom(1, 600, 0.5),
      rbinom(1, 60, 0.5),
      rbinom(2, 140, 0.5),
      rbinom(1, 20, 0.5),
      rbinom(1, 5, 0.5)
    )
  ),
  unitsPerOutcome = 15,
  replicatesPerUnit = 1,
  silent = FALSE
)
dat1 <- simulatePP(
  agentVarData =
    data.frame(
      "outcome" = c(0, 1),
      "c1" = c(0, 0),
      "c2" = c(1 / 25, 1 / 25), "c3" = c(1 / 50, 1 / 50),
      "c4" = c(0, 0),
      "c5" = c(0, 0), "c6" = c(0, 0),
      "c7" = c(0, 0), "c8" = c(1 / 100, 1 / 100),
      "c9" = c(1 / 20, 1 / 20), "c10" = c(1 / 250, 1 / 250),
      "c11" = c(1 / 100, 1 / 100), "c12" = c(1 / 80, 1 / 80),
      "c13" = c(0, 0), "c14" = c(0, 0),
      "c15" = c(0, 0), "c16" = c(1 / 10, 1 / 10)
    ),
  agentKappaData = data.frame(
    "agent" = paste0("c", 1:16),
    "clusterAgent" = c(NA, "c1", "c1", rep(NA, 12), "c1"),
    "kappa" = c(
      rbinom(1, 100, 0.5),
      rbinom(1, 100, 0.5),
      rbinom(1, 30, 0.5),
      rbinom(1, 80, 0.5),
      rbinom(1, 350, 0.5),
      rbinom(1, 100, 0.5),
      rbinom(1, 120, 0.5),
      rbinom(1, 150, 0.5),
      rbinom(2, 250, 0.5),
      rbinom(1, 600, 0.5),
      rbinom(1, 60, 0.5),
      rbinom(2, 140, 0.5),
      rbinom(1, 20, 0.5),
      rbinom(1, 5, 0.5)
    )
  ),
  unitsPerOutcome = 2,
  replicatesPerUnit = 1,
  silent = F
)
dat1$unit <- ifelse(dat1$unit == "u1", "u31",
  ifelse(dat1$unit == "u2", "u32",
    ifelse(dat1$unit == "u3", "u33",
      ifelse(dat1$unit == "u4", "u34", NA)
    )
  )
)
dat1$replicate <- ifelse(dat1$replicate == "1", "31",
  ifelse(dat1$replicate == "2", "32",
    ifelse(dat1$replicate == "3", "33",
      ifelse(dat1$replicate == "4", "34", NA)
    )
  )
)
dat <- rbind(dat0, dat1)

## ----tnbc_image, fig.width=5, fig.height=5, fig.cap="TNBC Patient Image"------
plotPP(
  data = TNBC_pheno[TNBC_pheno$Person == 2, c("cellx", "celly", "Phenotype")],
  ptSize = 1, dropAxes = T, colorGuide = "none"
)

## ----sim_image, fig.width=5, fig.height=5, fig.cap="Simulated Patient Image"----
plotPP(dat[dat$replicate == 18, c("x", "y", "type")],
  ptSize = 1, dropAxes = T, colorGuide = "none"
)

## ----paper_simtnbc_comparision, echo=FALSE, eval=paperRun---------------------
tmp <- length(table(dat[dat$replicate == 1, c("type")]))
clrs <- scales::hue_pal()(tmp)

tnbc_image <- plotPP(
  data = TNBC_pheno[TNBC_pheno$Person == 2, c("cellx", "celly", "Phenotype")],
  ptSize = 3, dropAxes = T, colorGuide = "none",
  colors = clrs[-length(clrs)]
)
png(paste0(save_path,"/paper/tnbc_image.png"), 
    width = 800, height = 800)
print(tnbc_image)
dev.off()

sim_image <- plotPP(
  data = dat[dat$replicate == 18, c("x", "y", "type")],
  ptSize = 3, dropAxes = T, colorGuide = "none",
  colors = c(clrs[1:2], clrs[16], clrs[3:7], clrs[15], clrs[8:14])
)
png(paste0(save_path,"/paper/sim_image.png"), 
    width = 800, height = 800)
print(sim_image)
dev.off()

## ----simulateNoInteraction, eval=full_run-------------------------------------
#  cells <- paste0("c", 1:16)
#  cells_interactions <- rbind(
#    data.frame(t(combn(cells, 2))),
#    data.frame("X1" = cells, "X2" = cells)
#  )
#  
#  set.seed(12345)
#  pcaData <- getKsPCAData(
#    data = dat, replicate = "replicate",
#    agents_df = cells_interactions,
#    xRange = c(0, 1), yRange = c(0, 1),
#    silent = F
#  )
#  pcaMeta <- simulateMeta(pcaData,
#    metaInfo = data.frame(
#      "var" = c("age"),
#      "rdist" = c("rnorm"),
#      "outcome_0" = c("25"),
#      "outcome_1" = c("25")
#    )
#  )

## ----analyzeNoEffect, eval=full_run-------------------------------------------
#  set.seed(123)
#  rfcv <- funkyModel(
#    data = pcaMeta,
#    outcome = "outcome",
#    unit = "unit",
#    metaNames = c("age")
#  )

## ----plot_noeffect_full, fig.width=5, fig.height=5, fig.cap="No Effect Variable Importance (All)", eval=full_run----
#  rfcv$viPlot

## ----plot_noeffect_top25, fig.width=5, fig.height=5, fig.cap="No Effect Variable Importance (Top 25)", eval=full_run----
#  rfcv$subset_viPlot

## ----paper_noEffect, echo=FALSE, eval=paperRun--------------------------------
# Get Vars
viData <- rfcv$VariableImportance
accData <- rfcv$AccuracyEstimate
NoiseCutoff <- rfcv$NoiseCutoff
InterpolationCutoff <- rfcv$InterpolationCutoff
subsetPlotSize <- rfcv$AdditionalParams$subsetPlotSize

# Plot Full
maxVal <- max(InterpolationCutoff, NoiseCutoff, viData$est)
plot_full <- ggplot2::ggplot(
  data = viData,
  mapping = ggplot2::aes(
    x = factor(stats::reorder(var, est)),
    y = ifelse(est / maxVal > 1, 1,
      ifelse(est / maxVal < 0, 0,
        est / maxVal
      )
    ),
    group = 1
  )
) +
  ggplot2::geom_errorbar(
    ggplot2::aes(
      ymin = ifelse((est - sd) / maxVal < 0, 0, (est - sd) / maxVal),
      ymax = ifelse((est + sd) / maxVal > 1, 1, (est + sd) / maxVal)
    ),
    color = "black", width = 0.2
  ) +
  ggplot2::geom_point(color = "black", size = 3) +
  ggplot2::geom_hline(
    ggplot2::aes(yintercept = max(0, min(1, NoiseCutoff / maxVal))),
    color = "red", linetype = "dotted", linewidth = 2
  ) +
  ggplot2::coord_flip(ylim = c(0, 1)) +
  ggplot2::xlab(NULL) +
  ggplot2::ylim(c(0, 1)) +
  ggplot2::ylab(NULL) +
  ggplot2::theme_bw() +
  ggplot2::theme(
    axis.text = ggplot2::element_blank(),
    axis.ticks.y = ggplot2::element_blank(),
    panel.grid.major.y = ggplot2::element_blank(),
    axis.title = ggplot2::element_text(size = 20)
  ) +
  ggplot2::geom_line(ggplot2::aes(
    x = ordered(viData[order(-est), "var"]),
    y = InterpolationCutoff / maxVal
  ), color = "orange", linetype = "dashed", linewidth = 2) +
  ggplot2::ylab(paste0(
    "OOB (",
    specify_decimal(accData$OOB, 2),
    "), Guess (",
    specify_decimal(accData$guess, 2),
    "), Bias (",
    specify_decimal(accData$bias, 2), ")"
  ))

png(paste0(save_path,"/paper/noeffect_variableimportance.png"))
print(plot_full)
dev.off()

# Plot Subset
InterpolationCutoff <- InterpolationCutoff[1:subsetPlotSize]
viData <- viData[order(-viData$est), ]
viData <- viData[1:subsetPlotSize, ]

maxVal <- max(InterpolationCutoff, NoiseCutoff, viData$est)
plot_top25 <- ggplot2::ggplot(
  data = viData,
  mapping = ggplot2::aes(
    x = factor(stats::reorder(var, est)),
    y = ifelse(est / maxVal > 1, 1,
      ifelse(est / maxVal < 0, 0,
        est / maxVal
      )
    ),
    group = 1
  )
) +
  ggplot2::geom_errorbar(
    ggplot2::aes(
      ymin = ifelse((est - sd) / maxVal < 0, 0, (est - sd) / maxVal),
      ymax = ifelse((est + sd) / maxVal > 1, 1, (est + sd) / maxVal)
    ),
    color = "black", width = 0.2
  ) +
  ggplot2::geom_point(color = "black", size = 3) +
  ggplot2::geom_hline(
    ggplot2::aes(yintercept = max(0, min(1, NoiseCutoff / maxVal))),
    color = "red", linetype = "dotted", linewidth = 2
  ) +
  ggplot2::coord_flip(ylim = c(0, 1)) +
  ggplot2::xlab(NULL) +
  ggplot2::ylim(c(0, 1)) +
  ggplot2::ylab(NULL) +
  ggplot2::theme_bw() +
  ggplot2::theme(
    axis.text = ggplot2::element_text(size = 18),
    axis.text.x = ggplot2::element_blank(),
    axis.ticks.y = ggplot2::element_blank(),
    panel.grid.major.y = ggplot2::element_blank(),
    axis.title = ggplot2::element_text(size = 20)
  ) +
  ggplot2::geom_line(ggplot2::aes(
    x = ordered(viData[order(-est), "var"]),
    y = InterpolationCutoff / maxVal
  ), color = "orange", linetype = "dashed", linewidth = 2) +
  ggplot2::ylab(paste0(
    "OOB (",
    specify_decimal(accData$OOB, 2),
    "), Guess (",
    specify_decimal(accData$guess, 2),
    "), Bias (",
    specify_decimal(accData$bias, 2), ")"
  ))


png(paste0(save_path,"/paper/noeffect_variableimportance_top25.png"))
print(plot_top25)
dev.off()

## ----effectSimulationAndModel, eval=full_run----------------------------------
#  set.seed(123456)
#  dat0 <- simulatePP(
#    agentVarData =
#      data.frame(
#        "outcome" = c(0, 1),
#        "c1" = c(0, 0),
#        "c2" = c(1 / 25, 1 / 60), "c3" = c(1 / 50, 1 / 10),
#        "c4" = c(0, 0),
#        "c5" = c(0, 0), "c6" = c(0, 0),
#        "c7" = c(0, 0), "c8" = c(1 / 100, 1 / 100),
#        "c9" = c(1 / 20, 1 / 20), "c10" = c(1 / 250, 1 / 250),
#        "c11" = c(1 / 100, 1 / 100), "c12" = c(1 / 80, 1 / 80),
#        "c13" = c(0, 0), "c14" = c(0, 0),
#        "c15" = c(0, 0), "c16" = c(1 / 10, 1 / 10)
#      ),
#    agentKappaData = data.frame(
#      "agent" = paste0("c", 1:16),
#      "clusterAgent" = c(NA, "c1", "c1", rep(NA, 12), "c1"),
#      "kappa" = c(
#        rbinom(1, 100, 0.5),
#        rbinom(1, 100, 0.5),
#        rbinom(1, 30, 0.5),
#        rbinom(1, 80, 0.5),
#        rbinom(1, 350, 0.5),
#        rbinom(1, 100, 0.5),
#        rbinom(1, 120, 0.5),
#        rbinom(1, 150, 0.5),
#        rbinom(2, 250, 0.5),
#        rbinom(1, 600, 0.5),
#        rbinom(1, 60, 0.5),
#        rbinom(2, 140, 0.5),
#        rbinom(1, 20, 0.5),
#        rbinom(1, 5, 0.5)
#      )
#    ),
#    unitsPerOutcome = 15,
#    replicatesPerUnit = 1,
#    silent = F
#  )
#  dat1 <- simulatePP(
#    agentVarData =
#      data.frame(
#        "outcome" = c(0, 1),
#        "c1" = c(0, 0),
#        "c2" = c(1 / 25, 1 / 25), "c3" = c(1 / 50, 1 / 50),
#        "c4" = c(0, 0),
#        "c5" = c(0, 0), "c6" = c(0, 0),
#        "c7" = c(0, 0), "c8" = c(1 / 100, 1 / 100),
#        "c9" = c(1 / 20, 1 / 20), "c10" = c(1 / 250, 1 / 250),
#        "c11" = c(1 / 100, 1 / 100), "c12" = c(1 / 80, 1 / 80),
#        "c13" = c(0, 0), "c14" = c(0, 0),
#        "c15" = c(0, 0), "c16" = c(1 / 10, 1 / 10)
#      ),
#    agentKappaData = data.frame(
#      "agent" = paste0("c", 1:16),
#      "clusterAgent" = c(NA, "c1", "c1", rep(NA, 12), "c1"),
#      "kappa" = c(
#        rbinom(1, 100, 0.5),
#        rbinom(1, 100, 0.5),
#        rbinom(1, 30, 0.5),
#        rbinom(1, 80, 0.5),
#        rbinom(1, 350, 0.5),
#        rbinom(1, 100, 0.5),
#        rbinom(1, 120, 0.5),
#        rbinom(1, 150, 0.5),
#        rbinom(2, 250, 0.5),
#        rbinom(1, 600, 0.5),
#        rbinom(1, 60, 0.5),
#        rbinom(2, 140, 0.5),
#        rbinom(1, 20, 0.5),
#        rbinom(1, 5, 0.5)
#      )
#    ),
#    unitsPerOutcome = 2,
#    replicatesPerUnit = 1,
#    silent = F
#  )
#  dat1$unit <- ifelse(dat1$unit == "u1", "u31",
#    ifelse(dat1$unit == "u2", "u32",
#      ifelse(dat1$unit == "u3", "u33",
#        ifelse(dat1$unit == "u4", "u34", NA)
#      )
#    )
#  )
#  dat1$replicate <- ifelse(dat1$replicate == "1", "31",
#    ifelse(dat1$replicate == "2", "32",
#      ifelse(dat1$replicate == "3", "33",
#        ifelse(dat1$replicate == "4", "34", NA)
#      )
#    )
#  )
#  dat <- rbind(dat0, dat1)
#  
#  cells <- paste0("c", 1:16)
#  cells_interactions <- rbind(
#    data.frame(t(combn(cells, 2))),
#    data.frame("X1" = cells, "X2" = cells)
#  )
#  
#  pcaData <- getKsPCAData(
#    data = dat, replicate = "replicate",
#    agents_df = cells_interactions,
#    xRange = c(0, 1), yRange = c(0, 1),
#    silent = F
#  )
#  pcaMeta <- simulateMeta(pcaData,
#    metaInfo = data.frame(
#      "var" = c("age"),
#      "rdist" = c("rnorm"),
#      "outcome_0" = c("25"),
#      "outcome_1" = c("27")
#    )
#  )
#  
#  rfcv <- funkyModel(
#    data = pcaMeta,
#    outcome = "outcome",
#    unit = "unit",
#    metaNames = c("age")
#  )

## ----plot_effect_full, fig.width=5, fig.height=5, fig.cap="Effect Simulation Variable Importance (All)", eval=full_run----
#  rfcv$viPlot

## ----plot_effect_top25, fig.width=5, fig.height=5, fig.cap="Effect Simulation Variable Importance (Top 25)", eval=full_run----
#  rfcv$subset_viPlot

## ----paper_effect, echo=FALSE, eval=paperRun----------------------------------
# Get Vars
viData <- rfcv$VariableImportance
accData <- rfcv$AccuracyEstimate
NoiseCutoff <- rfcv$NoiseCutoff
InterpolationCutoff <- rfcv$InterpolationCutoff
subsetPlotSize <- rfcv$AdditionalParams$subsetPlotSize

# Plot Full
maxVal <- max(InterpolationCutoff, NoiseCutoff, viData$est)
plot_full <- ggplot2::ggplot(
  data = viData,
  mapping = ggplot2::aes(
    x = factor(stats::reorder(var, est)),
    y = ifelse(est / maxVal > 1, 1,
      ifelse(est / maxVal < 0, 0,
        est / maxVal
      )
    ),
    group = 1
  )
) +
  ggplot2::geom_errorbar(
    ggplot2::aes(
      ymin = ifelse((est - sd) / maxVal < 0, 0, (est - sd) / maxVal),
      ymax = ifelse((est + sd) / maxVal > 1, 1, (est + sd) / maxVal)
    ),
    color = "black", width = 0.2
  ) +
  ggplot2::geom_point(color = "black", size = 3) +
  ggplot2::geom_hline(
    ggplot2::aes(yintercept = max(0, min(1, NoiseCutoff / maxVal))),
    color = "red", linetype = "dotted", linewidth = 2
  ) +
  ggplot2::coord_flip(ylim = c(0, 1)) +
  ggplot2::xlab(NULL) +
  ggplot2::ylim(c(0, 1)) +
  ggplot2::ylab(NULL) +
  ggplot2::theme_bw() +
  ggplot2::theme(
    axis.text = ggplot2::element_blank(),
    axis.ticks.y = ggplot2::element_blank(),
    panel.grid.major.y = ggplot2::element_blank(),
    axis.title = ggplot2::element_text(size = 20)
  ) +
  ggplot2::geom_line(ggplot2::aes(
    x = ordered(viData[order(-est), "var"]),
    y = InterpolationCutoff / maxVal
  ), color = "orange", linetype = "dashed", linewidth = 2) +
  ggplot2::ylab(paste0(
    "OOB (",
    specify_decimal(accData$OOB, 2),
    "), Guess (",
    specify_decimal(accData$guess, 2),
    "), Bias (",
    specify_decimal(accData$bias, 2), ")"
  ))

png(paste0(save_path,"/paper/effect_variableimportance.png"))
print(plot_full)
dev.off()

# Plot Subset
InterpolationCutoff <- InterpolationCutoff[1:subsetPlotSize]
viData <- viData[order(-viData$est), ]
viData <- viData[1:subsetPlotSize, ]

maxVal <- max(InterpolationCutoff, NoiseCutoff, viData$est)
plot_top25 <- ggplot2::ggplot(
  data = viData,
  mapping = ggplot2::aes(
    x = factor(stats::reorder(var, est)),
    y = ifelse(est / maxVal > 1, 1,
      ifelse(est / maxVal < 0, 0,
        est / maxVal
      )
    ),
    group = 1
  )
) +
  ggplot2::geom_errorbar(
    ggplot2::aes(
      ymin = ifelse((est - sd) / maxVal < 0, 0, (est - sd) / maxVal),
      ymax = ifelse((est + sd) / maxVal > 1, 1, (est + sd) / maxVal)
    ),
    color = "black", width = 0.2
  ) +
  ggplot2::geom_point(color = "black", size = 3) +
  ggplot2::geom_hline(
    ggplot2::aes(yintercept = max(0, min(1, NoiseCutoff / maxVal))),
    color = "red", linetype = "dotted", linewidth = 2
  ) +
  ggplot2::coord_flip(ylim = c(0, 1)) +
  ggplot2::xlab(NULL) +
  ggplot2::ylim(c(0, 1)) +
  ggplot2::ylab(NULL) +
  ggplot2::theme_bw() +
  ggplot2::theme(
    axis.text = ggplot2::element_text(size = 18),
    axis.text.x = ggplot2::element_blank(),
    axis.ticks.y = ggplot2::element_blank(),
    panel.grid.major.y = ggplot2::element_blank(),
    axis.title = ggplot2::element_text(size = 20)
  ) +
  ggplot2::geom_line(ggplot2::aes(
    x = ordered(viData[order(-est), "var"]),
    y = InterpolationCutoff / maxVal
  ), color = "orange", linetype = "dashed", linewidth = 2) +
  ggplot2::ylab(paste0(
    "OOB (",
    specify_decimal(accData$OOB, 2),
    "), Guess (",
    specify_decimal(accData$guess, 2),
    "), Bias (",
    specify_decimal(accData$bias, 2), ")"
  ))

png(paste0(save_path,"/paper/effect_variableimportance_top25.png"))
print(plot_top25)
dev.off()

## ----tnbc_pheno_model, eval=full_run------------------------------------------
#  dataPCA_pheno <- getKsPCAData(
#    data = TNBC_pheno, unit = "Person",
#    agents_df = pheno_interactions,
#    rCheckVals = seq(0, 50, 1)
#  )
#  
#  dataPCAAge_pheno <- merge(dataPCA_pheno, TNBC_meta)
#  
#  set.seed(123456)
#  rfcv <- funkyModel(
#    data = dataPCAAge_pheno, K = 10,
#    outcome = "Class", unit = "Person",
#    metaNames = c("Age"), synthetics = 100,
#    alpha = 0.05, silent = FALSE,
#    subsetPlotSize = 25
#  )

## ----tnbc_full, fig.width=5, fig.height=5, fig.cap="TNBC Phenotype Variable Importance (All)", eval=full_run----
#  rfcv$viPlot

## ----tnbc_top25, fig.width=5, fig.height=5, fig.cap="TNBC Phenotype Variable Importance (Top 25)", eval=full_run----
#  rfcv$subset_viPlot

## ----paper_tnbc_variable_importance, echo=FALSE, eval=paperRun----------------
# Get Vars
viData <- rfcv$VariableImportance
accData <- rfcv$AccuracyEstimate
NoiseCutoff <- rfcv$NoiseCutoff
InterpolationCutoff <- rfcv$InterpolationCutoff
subsetPlotSize <- rfcv$AdditionalParams$subsetPlotSize

# Plot Full
maxVal <- max(InterpolationCutoff, NoiseCutoff, viData$est)
plot_full <- ggplot2::ggplot(
  data = viData,
  mapping = ggplot2::aes(
    x = factor(stats::reorder(var, est)),
    y = ifelse(est / maxVal > 1, 1,
      ifelse(est / maxVal < 0, 0,
        est / maxVal
      )
    ),
    group = 1
  )
) +
  ggplot2::geom_errorbar(
    ggplot2::aes(
      ymin = ifelse((est - sd) / maxVal < 0, 0, (est - sd) / maxVal),
      ymax = ifelse((est + sd) / maxVal > 1, 1, (est + sd) / maxVal)
    ),
    color = "black", width = 0.2
  ) +
  ggplot2::geom_point(color = "black", size = 3) +
  ggplot2::geom_hline(
    ggplot2::aes(yintercept = max(0, min(1, NoiseCutoff / maxVal))),
    color = "red", linetype = "dotted", linewidth = 2
  ) +
  ggplot2::coord_flip(ylim = c(0, 1)) +
  ggplot2::xlab(NULL) +
  ggplot2::ylim(c(0, 1)) +
  ggplot2::ylab(NULL) +
  ggplot2::theme_bw() +
  ggplot2::theme(
    # axis.text = ggplot2::element_text(size = 18),
    axis.text = ggplot2::element_blank(),
    axis.ticks.y = ggplot2::element_blank(),
    panel.grid.major.y = ggplot2::element_blank(),
    axis.title = ggplot2::element_text(size = 20)
  ) +
  ggplot2::geom_line(ggplot2::aes(
    x = ordered(viData[order(-est), "var"]),
    y = InterpolationCutoff / maxVal
  ), color = "orange", linetype = "dashed", linewidth = 2) +
  ggplot2::ylab(paste0(
    "OOB (",
    specify_decimal(accData$OOB, 2),
    "), Guess (",
    specify_decimal(accData$guess, 2),
    "), Bias (",
    specify_decimal(accData$bias, 2), ")"
  ))

png(paste0(save_path,"/paper/tnbc_variableimportance.png"))
print(plot_full)
dev.off()

# Plot Subset
InterpolationCutoff <- InterpolationCutoff[1:subsetPlotSize]
viData <- viData[order(-viData$est), ]
viData <- viData[1:subsetPlotSize, ]

maxVal <- max(InterpolationCutoff, NoiseCutoff, viData$est)
plot_top25 <- ggplot2::ggplot(
  data = viData,
  mapping = ggplot2::aes(
    x = factor(stats::reorder(var, est)),
    y = ifelse(est / maxVal > 1, 1,
      ifelse(est / maxVal < 0, 0,
        est / maxVal
      )
    ),
    group = 1
  )
) +
  ggplot2::geom_errorbar(
    ggplot2::aes(
      ymin = ifelse((est - sd) / maxVal < 0, 0, (est - sd) / maxVal),
      ymax = ifelse((est + sd) / maxVal > 1, 1, (est + sd) / maxVal)
    ),
    color = "black", width = 0.2
  ) +
  ggplot2::geom_point(color = "black", size = 3) +
  ggplot2::geom_hline(
    ggplot2::aes(yintercept = max(0, min(1, NoiseCutoff / maxVal))),
    color = "red", linetype = "dotted", linewidth = 2
  ) +
  ggplot2::coord_flip(ylim = c(0, 1)) +
  ggplot2::xlab(NULL) +
  ggplot2::ylim(c(0, 1)) +
  ggplot2::ylab(NULL) +
  ggplot2::theme_bw() +
  ggplot2::theme(
    axis.text.x = ggplot2::element_blank(),
    axis.text.y = ggplot2::element_text(size = 14),
    axis.ticks.y = ggplot2::element_blank(),
    panel.grid.major.y = ggplot2::element_blank(),
    axis.title = ggplot2::element_text(size = 17)
  ) +
  ggplot2::geom_line(ggplot2::aes(
    x = ordered(viData[order(-est), "var"]),
    y = InterpolationCutoff / maxVal
  ), color = "orange", linetype = "dashed", linewidth = 2) +
  ggplot2::ylab(paste0(
    "OOB (",
    specify_decimal(accData$OOB, 2),
    "), Guess (",
    specify_decimal(accData$guess, 2),
    "), Bias (",
    specify_decimal(accData$bias, 2), ")"
  ))

png(paste0(save_path,"/paper/tnbc_variableimportance_top25.png"))
print(plot_top25)
dev.off()

## ----significantTNBCK---------------------------------------------------------
tmp <- getKFunction(TNBC_pheno[TNBC_pheno$Class == 0, -1],
  c("Tumor", "Tumor"),
  unit = "Person",
  rCheckVals = seq(0, 50, 1)
)
tmp1 <- getKFunction(TNBC_pheno[TNBC_pheno$Class == 1, -1],
  c("Tumor", "Tumor"),
  unit = "Person",
  rCheckVals = seq(0, 50, 1)
)

tmp_1 <- tidyr::pivot_longer(data = tmp, cols = -r)
tmp1_1 <- tidyr::pivot_longer(data = tmp1, cols = -r)

data_plot <- rbind(
  data.frame(
    "r" = tmp_1$r,
    "K" = tmp_1$value,
    "unit" = tmp_1$name,
    "outcome" = "0"
  ),
  data.frame(
    "r" = tmp1_1$r,
    "K" = tmp1_1$value,
    "unit" = paste0(tmp1_1$name, "_1"),
    "outcome" = "1"
  )
)

## ----significantTNBCKPlot, fig.width=5, fig.height=5, fig.cap="TNBC Phenotype Significant K Function"----
plot_K_functions(data_plot)

## ----paper_sig_K_functions, echo=FALSE, eval=paperRun-------------------------
data <- data_plot

# Prep Legend
info <- unique(data[, c("unit", "outcome")])
info$Missing <- NA
for (i in 1:nrow(info)) {
  info[i, 3] <- nrow(data[data$unit == info$unit[i] &
    !stats::complete.cases(data), ]) > 0
}

info_labels <- data.frame(
  "outcome" = unique(data$outcome),
  "units" = NA,
  "Missing" = NA,
  "Label" = NA
)
for (i in 1:nrow(info_labels)) {
  info_labels[i, 2] <- nrow(info[info$outcome == info_labels[i, 1], ])
  info_labels[i, 3] <- nrow(info[info$outcome == info_labels[i, 1] &
    info$Missing, ])
  # Only label outcomes with numbers if at least one is missing in whole set
  if (sum(info$Missing) != 0) {
    info_labels[i, 4] <- paste0(
      info_labels[i, 1], " (",
      info_labels[i, 2] - info_labels[i, 3], "/",
      info_labels[i, 2], ")"
    )
  } else {
    info_labels[i, 4] <- info_labels[i, 1]
  }
}

# Build Averages
data_wide <- tidyr::pivot_wider(data,
  names_from = "unit",
  values_from = "K"
)
data_avg <- data.frame("r" = unique(data_wide$r))

for (i in 1:nrow(info_labels)) {
  data_avg[[info_labels[i, "outcome"]]] <-
    rowMeans(data_wide[data_wide$outcome == info_labels[i, "outcome"], -c(1:2)],
      na.rm = TRUE
    )
}
data_avg <- tidyr::pivot_longer(data_avg, cols = -r)
colnames(data_avg) <- c("r", "outcome", "Value")

# Plot
return_plot <-
  ggplot2::ggplot(
    data = stats::na.omit(data),
    ggplot2::aes(
      x = r, y = K,
      group = unit, color = outcome
    )
  ) +
  ggplot2::geom_line(alpha = 0.2, linewidth = 1.25) +
  ggplot2::geom_line(
    ggplot2::aes(
      x = r, y = Value,
      group = outcome, color = outcome
    ),
    data = data_avg, linewidth = 2
  ) +
  ggplot2::theme_bw() +
  ggplot2::theme(
    axis.text = ggplot2::element_text(size = 18),
    axis.text.y = ggplot2::element_blank(),
    axis.ticks.y = ggplot2::element_blank(),
    panel.grid.major.y = ggplot2::element_blank(),
    # axis.title = ggplot2::element_text(size = 22),
    axis.title = ggplot2::element_blank(),
    legend.position = "none"
  ) # +
# ggplot2::geom_line(ggplot2::aes(x = r, y = pi * r^2,
#                                 group = outcome),
#                    data = data_avg, linewidth = 1.25,
#                    color = "black", linetype = "dashed"
# )

png(paste0(save_path,"/paper/TNBC_sig_KFunctions.png"))
print(return_plot)
dev.off()

## ----insignificantTNBCK-------------------------------------------------------
tmp <- getKFunction(TNBC_pheno[TNBC_pheno$Class == 0, -1],
  c("CD4T", "Endothelial"),
  unit = "Person",
  rCheckVals = seq(0, 50, 1)
)
tmp1 <- getKFunction(TNBC_pheno[TNBC_pheno$Class == 1, -1],
  c("CD4T", "Endothelial"),
  unit = "Person",
  rCheckVals = seq(0, 50, 1)
)

tmp_1 <- tidyr::pivot_longer(data = tmp, cols = -r)
tmp1_1 <- tidyr::pivot_longer(data = tmp1, cols = -r)

data_plot <- rbind(
  data.frame(
    "r" = tmp_1$r,
    "K" = tmp_1$value,
    "unit" = tmp_1$name,
    "outcome" = "0"
  ),
  data.frame(
    "r" = tmp1_1$r,
    "K" = tmp1_1$value,
    "unit" = paste0(tmp1_1$name, "_1"),
    "outcome" = "1"
  )
)

## ----insignificantTNBCKPlot, fig.width=5, fig.height=5, fig.cap="TNBC Phenotype Insignficant K Function"----
plot_K_functions(data_plot)

## ----paper_insig_K_functions, echo=FALSE, eval=paperRun-----------------------
data <- data_plot

# Prep Legend
info <- unique(data[, c("unit", "outcome")])
info$Missing <- NA
for (i in 1:nrow(info)) {
  info[i, 3] <- nrow(data[data$unit == info$unit[i] &
    !stats::complete.cases(data), ]) > 0
}

info_labels <- data.frame(
  "outcome" = unique(data$outcome),
  "units" = NA,
  "Missing" = NA,
  "Label" = NA
)
for (i in 1:nrow(info_labels)) {
  info_labels[i, 2] <- nrow(info[info$outcome == info_labels[i, 1], ])
  info_labels[i, 3] <- nrow(info[info$outcome == info_labels[i, 1] &
    info$Missing, ])
  # Only label outcomes with numbers if at least one is missing in whole set
  if (sum(info$Missing) != 0) {
    info_labels[i, 4] <- paste0(
      info_labels[i, 1], " (",
      info_labels[i, 2] - info_labels[i, 3], "/",
      info_labels[i, 2], ")"
    )
  } else {
    info_labels[i, 4] <- info_labels[i, 1]
  }
}

# Build Averages
data_wide <- tidyr::pivot_wider(data,
  names_from = "unit",
  values_from = "K"
)
data_avg <- data.frame("r" = unique(data_wide$r))

for (i in 1:nrow(info_labels)) {
  data_avg[[info_labels[i, "outcome"]]] <-
    rowMeans(data_wide[data_wide$outcome == info_labels[i, "outcome"], -c(1:2)],
      na.rm = TRUE
    )
}
data_avg <- tidyr::pivot_longer(data_avg, cols = -r)
colnames(data_avg) <- c("r", "outcome", "Value")

# Plot
return_plot <-
  ggplot2::ggplot(
    data = stats::na.omit(data),
    ggplot2::aes(
      x = r, y = K,
      group = unit, color = outcome
    )
  ) +
  ggplot2::geom_line(alpha = 0.2, linewidth = 1.25) +
  ggplot2::geom_line(
    ggplot2::aes(
      x = r, y = Value,
      group = outcome, color = outcome
    ),
    data = data_avg, linewidth = 2
  ) +
  ggplot2::theme_bw() +
  ggplot2::theme(
    axis.text = ggplot2::element_text(size = 18),
    axis.text.y = ggplot2::element_blank(),
    axis.ticks.y = ggplot2::element_blank(),
    panel.grid.major.y = ggplot2::element_blank(),
    # axis.title = ggplot2::element_text(size = 22),
    axis.title = ggplot2::element_blank(),
    legend.position = "none"
  ) # +
# ggplot2::geom_line(ggplot2::aes(x = r, y = pi * r^2,
#                                 group = outcome),
#                    data = data_avg, linewidth = 1.25,
#                    color = "black", linetype = "dashed"
# )

png(paste0(save_path,"/paper/TNBC_insig_KFunctions.png"))
print(return_plot)
dev.off()

Try the funkycells package in your browser

Any scripts or data that you put into this service are public.

funkycells documentation built on Aug. 9, 2023, 5:10 p.m.