suppressPackageStartupMessages({
  library(dplyr)
  library(ggplot2)
  library(tidyr)
  library(viridis)
  library(lubridate)
  library(Matrix)
  library(lymphclon)
  library(doMC)
  library(gridExtra)
  library(ape)
  library(grid)
  library(ggthemes)
  library(reshape2)
  library(readxl)
  library(stringr)
  library(tsne)
})

if (params$parallel) {
  doMC::registerDoMC(cores=parallel::detectCores())
}

knitr::opts_chunk$set(
  echo = params$devmode,
  message = params$devmode,
  warning = params$devmode
)

patients <- c(
  "SCID00001",
  "SCID00003",
  "SCID00004",
  "SCID00005",
  "SCID00007"
)

createColorPalette <- function (n) {
   library(RColorBrewer)
   library(grDevices)
   colorRampPalette(brewer.pal(12, "Paired"))(n)
}


default_family <- "sans"
default_size <- 14
fig.path <- function(name) {file.path(params$root_fp, "figures", name)}

theme_set(theme_classic(base_size=default_size, base_family=default_family) +
    theme(
      plot.title = element_text(hjust=0),
      axis.ticks = element_line(size=0.75),
      axis.line.x = element_line(colour = 'black', size=0.6, linetype='solid'),
      axis.line.y = element_line(colour = 'black', size=0.6, linetype='solid'),
      strip.background = element_blank()
    ))

data(tcr)
data(mb)
data(intsites)

int <- plyr::ldply(intsites, function(is) {
  is$popsize %>% filter(celltype %in% c("Tcells", "PBMC")) %>%
    select(patient, "nmonths"=timepoint_m, "chao1"=S.chao1, Gini) %>%
    mutate(chao1=as.numeric(str_trim(chao1))) %>%
    mutate(Gini=as.numeric(str_trim(Gini))) %>%
    as.data.frame()
})

# Temporary virome data
vir.samples <- stringr::str_replace(c(
  "SCID00005.ST.7.4",
  "SCID00005.ST.6.5",
  "SCID00005.ST.6.4",
  "SCID00005.ST.1.4",  
  "SCID00004.ST.5.2",
  "SCID00004.ST.2.3",  
  "SCID00004.ST.1.4",    
  "SCID00003.ST.3.2",
  "SCID00003.ST.2.3",
  "SCID00003.ST.1.3",  
  "SCID00001.ST.8.2",    
  "SCID00001.ST.7.6",    
  "SCID00001.ST.4.6"     
), "\\.[0-9]$", "")


vir <- mb$mdata %>% 
  mutate(SourceID = stringr::str_replace(SampleID, "\\.[0-9]$", "")) %>%
  select(-c(SampleID, ExtrNo, VialNo, ExtrTechnician, ExtrPlate, ExtrPlateWell, ExtrDate, Reads)) %>%
  distinct() %>%
  filter(SourceID %in% vir.samples)

# Filter TCR data to just these subjects and healthy controls, and just T cells
tcr$mdata <- tcr$mdata %>% 
  # filter(patient %in% c(patients, as.character(tcr$controls))) %>% 
  filter(cell.type %in% c("Tcells", "PBMC")) %>%
  droplevels()
tcr$seqs <- tcr$seqs[tcr$seqs$accn %in% tcr$mdata$accn, ]

tcr$mdata$accnRep <- factor(with(tcr$mdata, paste(accn, replicate, sep="_")))
tcr$seqs$accnRep <- factor(with(tcr$seqs, paste(accn, replicate, sep="_")))
tcr$seqs <- tcr$seqs %>% 
  filter(sequenceStatus == "In") %>%
  mutate(aminoAcid = factor(aminoAcid)) %>%
  group_by(accnRep) %>%
  mutate(freq = count/sum(count))

make_sparsemat <- function(df, r, c, x) {
  sm <- sparseMatrix(as.integer(df[[r]]), as.integer(df[[c]]), x=df[[x]])
  rownames(sm) <- levels(df[[r]])
  colnames(sm) <- levels(df[[c]])
  sm
}

tcr$freqmat <- make_sparsemat(tcr$seqs, "aminoAcid", "accnRep", "freq")
tcr$countmat <- make_sparsemat(tcr$seqs, "aminoAcid", "accnRep", "count")
invisible(with(data.frame(), {

  # browser()

  mb.timepoints <- mb$timepoints %>% 
    # filter(SubjectID %in% patients) %>%
    mutate(TimepointID = paste(SubjectID, Num, sep=".")) %>%
    select(SubjectID, TimepointID, TimeSinceTransplant) %>%
    mutate(mb = round(TimeSinceTransplant / 30)) %>%
    select(SubjectID, TimepointID, mb)

  vir <- vir %>%
    mutate(TimepointID = paste(Subject, SampleNo, sep="."))
  vir.timepoints <- mb.timepoints %>%
    filter(TimepointID %in% vir$TimepointID) %>%
    rename(vr=mb)

  tcr.timepoints <- tcr$mdata %>% 
    # filter(patient %in% patients) %>%
    select(SubjectID=patient, tcr=nmonths)

  int.timepoints <- tcr.timepoints %>%
    select("int"=tcr, SubjectID)

  timepoints <- left_join(mb.timepoints, tcr.timepoints) %>%
    left_join(vir.timepoints) %>%
    left_join(int.timepoints) %>%
    tidyr::gather(key="type", value="months", mb, tcr, vr, int) %>%
    distinct(SubjectID, type, months) %>%
    mutate(type = forcats::fct_relevel(type, "vr", "mb", "tcr"))# %>%


  p <- ggplot(timepoints, aes(x=months, y=type, color=type)) +
    # geom_hline(yintercept=0, color="grey20") +
    geom_line() +
    geom_point(size=2, aes(shape=months>0), fill="white") +

    scale_shape_manual(values=c("TRUE"=19, "FALSE"=21), labels=c("TRUE"="After therapy", "FALSE"="Before therapy")) +
    # scale_x_continuous(breaks=c(0, 6, 12, 18, 24)) +
    scale_y_discrete(expand = c(0, 1.5)) +
    ggsci::scale_color_nejm(
      breaks=c("int","tcr", "mb", "vr"),
      labels=c(
        int="Integration sites",
        mb="Microbiome",
        tcr="T cell sequencing",
        vr="Virome")) +
    # ggthemes::scale_color_fivethirtyeight() +
    theme_minimal() +
    # ggthemes::theme_tufte(base_family = "sans") +
    facet_grid(SubjectID ~ ., switch="y") +
    theme(
      panel.grid = element_line(color="white", size=0),
      panel.grid.major.x = element_line(color="grey50", linetype=3, size=0.4),
      panel.grid.major.y = element_line(color="grey50", linetype=3, size=0.4),
      panel.margin.y=unit(0, "lines"),
      axis.text.y = element_blank(), 
      axis.title.y = element_blank(),
      axis.ticks.y = element_blank(),
      legend.title= element_blank(),
      strip.text.y = element_text(angle=180)
    )

  plot(p)
  ggsave(fig.path("Fig1-Timeline.pdf"), width=7, height=4)

}))
invisible(with(data.frame(), {
  dat <- plyr::ldply(intsites, function(is) {
    is$popsize %>% filter(celltype == "Tcells") %>%
      select(patient, "nmonths"=timepoint_m, "chao1"=S.chao1, Gini) %>%
      mutate(chao1=as.numeric(str_trim(chao1))) %>%
      mutate(Gini=as.numeric(str_trim(Gini))) %>%
      as.data.frame()
  })

  dat <- dat %>% gather(metric, value, chao1, Gini) %>%
    mutate(metric = forcats::fct_recode(
      metric,
      "Est. int. sites"="chao1",
      "Gini index"="Gini"
    ))
  p <- ggplot(dat, aes(nmonths, value, color=patient)) +
    geom_line() +
    geom_point() +
    facet_wrap(~ metric, scales="free", ncol=1, switch="y") +
    ggsci::scale_color_aaas(name="Subject") +
    scale_x_continuous(
      "Months since therapy",
      limits=c(0, 37), breaks=c(0,6,12,18,24, 30, 36), expand=c(0,0)) +
    theme(
      axis.title.y=element_blank(),
      panel.margin.y=unit(1.5,"lines"),
      strip.background=element_blank(),
      strip.text=element_text(size=12)
    )
  plot(p)
  ggsave(fig.path("Fig2-IntSiteStats.pdf"), width=6, height=4)
}))
tcr$alphadiv <- with(tcr, {
  plyr::ddply(mdata, plyr::.(accn), function(accn_df) {

    sub.mat <- countmat[, as.character(accn_df$accnRep), drop=FALSE]
    sub.mat <- as.matrix(sub.mat[rowSums(sub.mat) > 0, , drop=FALSE])

    x <- data.frame(fossil::spp.est(sub.mat, abund=FALSE))
    colnames(x) <- c(
      "n.samples", "s.obs", "s.obs.upper", "s.obs.lower",
      "chao2", "chao2.upper", "chao2.lower",
      "ice", "ice.upper", "ice.lower", "jack1", "jack1.upper", "jack1.lower")
    x <- filter(x, n.samples == max(n.samples))
    if (ncol(sub.mat) >= 3) {
      x$clonality <- as.vector(infer.clonality(sub.mat)$lymphclon.clonality)

    } else {
      x$clonality <- NA
    }
    print(x$clonality)
    x$evenness <- mean(vegan::diversity(t(sub.mat))/log(vegan::specnumber(t(sub.mat))))
    x
  },
  .parallel=params$parallel,
  .paropts=list(.packages=c('fossil','lymphclon'), .export=c('countmat', 'meta')))
})
invisible(with(tcr, {
  patients <- mdata %>% filter(grepl("SCID", patient))
  p <- ggplot(patients, aes(nmonths, Productive.Uniques, color=patient, fill=patient)) +
    geom_point() + 
    geom_smooth(alpha=0.2) +
    ggsci::scale_color_aaas(name="Subject") +
    ggsci::scale_fill_aaas(name="Subject") +
    scale_x_continuous(breaks=c(6,12,18,24), limits=c(6,24))
  plot(p)
  ggsave(fig.path("Fig3X-TCRProdUniques.pdf"), width=6, height=4)
}))
invisible(with(tcr, {

  metric.labels = c("oof"="Frameshift", "stop"="Stop codon", "prod"="Productive")
  metric.colors = c("oof"="#fc8d59", "stop"="#d73027", "prod"="#4575b4")

  plot_tcr_stats <- function(dat) {
    ggplot(dat, aes(yaxis, vmed, color=metric, fill=metric)) +
      geom_bar(stat="identity", width=0.8) +
      facet_grid(. ~ Patient + trial, scales="free", space="free", switch="x") +
      scale_y_continuous("Unique TCRs", expand=c(0,0)) +
      scale_color_manual(labels=metric.labels, values=metric.colors) +
      scale_fill_manual(labels=metric.labels, values=metric.colors) +
      theme(
        axis.text.x = element_text(size=10,angle=90, hjust=1, vjust=0.5),
        legend.title=element_blank(),
        axis.title.x = element_blank(),
        strip.text.x = element_text(angle=90, vjust=1)
      )
  }


  # browser()
  dat <- mdata %>% 
    # filter(cell.type == "Tcells") %>%
    select(accn, nmonths, patient, trial, timepoint, patient_at_timept, cell.type, Productive.Uniques, Out.of.Frame.Uniques, Has.Stop.Uniques) %>%
    tidyr::gather(metric, value, Productive.Uniques:Has.Stop.Uniques) %>%
    mutate(metric = forcats::fct_recode(
      metric,
      "prod"="Productive.Uniques",
      "oof" = "Out.of.Frame.Uniques",
      "stop" = "Has.Stop.Uniques")) %>%
    mutate(metric = forcats::fct_relevel(metric, "prod", "oof", "stop")) %>%
    mutate(Patient = forcats::fct_collapse(
      patient, 
      "Healthy children"=c("060", "062", "063", "064", "065"), 
      "Healthy adults"=c("ND378", "ND390", "ND422"))) %>%
    mutate(Patient = forcats::fct_reorder(Patient, as.integer(trial))) %>%
    mutate(patient = forcats::fct_recode(
      patient, 
      "HC1"="060", "HC2"="064", "HC3"="065", "HC4"="063", "HC5"="062",
      "HA1"="ND378", "HA2"="ND390", "HA3"="ND422")) %>%
    mutate(yaxis = ifelse(
      trial == "Control", as.character(patient), paste0(nmonths, "m"))) %>%
    mutate(yaxis = forcats::fct_reorder(yaxis, nmonths)) %>%
    # mutate(yaxis = forcats::fct_rev(yaxis)) %>%
    group_by(yaxis, Patient, patient_at_timept, trial, cell.type, metric) %>%
    summarize(
      vmed=mean(value),
      vmin=min(value),
      vmax=max(value)
    )
  # browser()
  plyr::d_ply(dat, c("cell.type"), function(x) {
    cell.type <- as.character(distinct(x, cell.type)[1,1])
    p <- plot_tcr_stats(x)
    plot(p)
    ggsave(
      fig.path(sprintf("Fig3B_TCR_Stats_%s.pdf", cell.type)), 
      width=7, height=5, device = cairo_pdf)
  })

}))
invisible(with(tcr, {
  # browser()
  mdata <- left_join(mdata, alphadiv)
  metrics <- mdata %>% 
    filter(cell.type=="Tcells") %>%
    mutate(chao2 = ifelse(n.samples ==1 , NA, chao2)) %>%
    select(accn, patient, nmonths, chao2, clonality, evenness) %>%
    distinct() %>%
    tidyr::gather(metric, value, chao2, evenness) %>%
    mutate(metric = forcats::fct_relevel(metric, "chao2", "clonality" ,"evenness")) %>%
    mutate(metric = forcats::fct_recode(
      metric,
      "Est. unique T cells"="chao2",
      "Species evenness"="evenness"
    )) %>%
    filter(!is.na(value))
  healthy.adults <- metrics %>% filter(grepl("ND", patient)) %>%
    group_by(metric) %>% 
    summarize(lower=quantile(value, 0.025), upper=quantile(value, 0.975))
  healthy.children <- metrics %>% filter(grepl("06", patient)) %>%
    group_by(metric) %>% 
    summarize(lower=quantile(value, 0.025), upper=quantile(value, 0.975))
  patients <- metrics %>% filter(grepl("SCID", patient))

  xr <- c(3,25) # Plots' x-range

  p <- ggplot(patients, aes(nmonths, value, color=patient)) + 
    geom_rect(
      data=healthy.adults, 
      inherit.aes = FALSE,
      alpha=0.5,
      aes(ymin=lower, ymax=upper, xmin=xr[1], xmax=xr[2], fill="Adults")) +
    # geom_hline(data=healthy.adults, aes(yintercept=upper, linetype="Adults")) +
    # geom_hline(data=healthy.adults, aes(yintercept=lower, linetype="Adults")) +
    geom_rect(
      data=healthy.children, 
      inherit.aes = FALSE,
      alpha=0.5,
      aes(ymin=lower, ymax=upper, xmin=xr[1], xmax=xr[2], fill="Children")) +
    # geom_hline(data=healthy.children, aes(yintercept=upper, linetype="Children")) +
    # geom_hline(data=healthy.children, aes(yintercept=lower, linetype="Children")) +
    scale_x_continuous(
      "Months since therapy",
      limits=xr, breaks=c(0,6,12,18,24), expand=c(0,0)) +
    scale_linetype_manual(
      "Healthy (95% CI)", values=c(1,1)
    ) +
    scale_fill_manual(
      "Healthy (95% CI)",
      values=c("#fbb4ae", "#b3cde3")
      # values=rev(ggsci::pal_material(n=6, palette="grey")(6)[c(2,3)])
      ) +
    geom_point() +
    geom_line() +
    facet_wrap(~ metric, scales = "free", ncol=1, switch="y") +
    ggsci::scale_color_lancet(name="Subject")+
    # ggsci::scale_color_aaas(name="Subject") +
    # theme_minimal() +
    theme(
      axis.title.y=element_blank(),
      panel.margin.y=unit(1.5,"lines"),
      strip.background=element_blank(),
      strip.text=element_text(size=12)
    )
  plot(p)
  # browser()
  ggsave(fig.path("Fig3C-TCRLongitudinal.pdf"), width=6, height=4)
}))
tcr$vj <- with(tcr, {
  scid.accns <- unique((mdata %>% filter(grepl("SCID", patient), nmonths>4))$accn)
  # scid.accns <- unique((mdata %>% filter(group == "patient", nmonths>4))$accn)
  vj.clones <- seqs %>%
    filter(sequenceStatus == "In", accn %in% scid.accns) %>%
    group_by(accn, aminoAcid) %>%
    summarize(count=sum(count)) %>%
    mutate(frequency = count/sum(count)) %>%
    left_join(
      distinct(
        subset(seqs, select=-c(nucleotide, count, frequency, cdr3Length)), 
        accn, aminoAcid, .keep_all=TRUE)) %>%
    filter(vGeneName != "unresolved", jGeneName != "unresolved") %>%
    merge(mdata, by='accnRep') %>%
    as.data.frame() %>%
    group_by(vGeneName, jGeneName, patient, nmonths) %>%
    summarize(sumFreq = sum(frequency)) %>%
    group_by(patient, nmonths) %>%
    mutate(sumFreq = sumFreq/sum(sumFreq)) %>%
    mutate(comboRank = percent_rank(sumFreq))

  vj.clones %>%
    group_by(patient, vGeneName) %>% 
    mutate(vFreq = sum(sumFreq)) %>%
    group_by(patient) %>%
    mutate(vRank = percent_rank(vFreq)) %>%
    filter(vRank > 0.65) %>%
    complete(nmonths, nesting(vGeneName, jGeneName), fill=list(sumFreq=0, comboRank=0)) %>%
    mutate(Months = sprintf("%dm", as.integer(nmonths))) %>%
    mutate(Months = reorder(Months, nmonths)) %>%
    mutate_each(funs(sub("TCRB", "", .)), vGeneName, jGeneName)

})
invisible(with(tcr, {
  p <- ggplot(vj, aes(vGeneName, jGeneName, fill=sumFreq)) +
    geom_tile(color="black", size=0.3) +
    # scale_fill_gradientn(colors=c("#2c7bb6","#ffffbf","#d7191c"), labels=scales::percent) +
    scale_fill_viridis("Frequency", option="C", labels=scales::percent) +
    coord_equal() +
    facet_grid(Months~patient, scales="free", space="free") +
    guides(fill=guide_colorbar(title.position = "top", direction = "horizontal", title.hjust = 0.5)) +
    theme_classic(base_size=14, base_family=default_family) +
    theme(
      axis.text = element_blank(),
      # axis.text.x = element_text(angle=-45, vjust=1, hjust=0),
      strip.text = element_text(size=14, hjust=0.5),
      strip.background=element_blank(),
      axis.ticks=element_blank(),
      panel.margin=unit(0.1, "line"),
      panel.margin.x = unit(1, "line"),
      plot.title=element_text(hjust=0, face="bold"),
      # legend.position=c(0.29,0.3),
      legend.justification=c(0,0)
      # axis.title=element_blank()
    ) +
    labs(x="V Genes", y="J Genes")
  plot(eclectic::make_square(p))
  browser()
  ggsave(fig.path("Fig3C-TCRVJUsage.pdf"), width=9, height=7)
}))
tcr$dists <- with(tcr, {
  # browser()
  dists <- plyr::adply(combn(mdata$accnRep, 2), 2, function(pair) {
    x <- countmat[, pair[1]]
    y <- countmat[, pair[2]]
    # print(pair)
    # print(dim(x))
    # print(dim(y))
    data.frame(x=pair[1], y=pair[2], cj=fossil::jaccard(x, y))
  }, .parallel=params$parallel, .paropts=list(.packages='fossil', .export=c("countmat", "mdata")), .id=NULL)

  # dists <- dists %>% select(-X1)
  .tmp1 <- dists[!is.na(dists$cj), ]
  .tmp2 <- data.frame(x=.tmp1$y, y=.tmp1$x, cj=.tmp1$cj)
  rbind(.tmp1, .tmp2) %>% distinct(x, y, .keep_all=TRUE)
})
tcr$bcdists <- with(tcr, {
  bcdists <- vegan::vegdist(t(as.matrix(countmat)))
})
tcr$jdists <- with(tcr, {
  jdists <- vegan::vegdist(t(as.matrix(countmat)), "jaccard")
})
invisible(with(tcr, {


  .dists <- dists %>%
    complete(x, nesting(y)) %>%
    merge(tcr$mdata, by.x="x", by.y="accnRep") %>%
    # filter(patient == params$subject, cell.type=="Tcells") %>%
    filter(y %in% x) %>%
    droplevels()

  # Order factors based on timepoint
  .dists$x <- with(.dists, reorder(x, nmonths, ordered=TRUE))
  .dists$y <- factor(.dists$y, levels(.dists$x))

  .dists <- .dists %>% group_by(patient) %>%
    filter(y %in% x)

  p <- .dists %>% filter(group=="patient") %>% #filter(grepl("SCID", patient)) %>% 
    ggplot(aes(y, x, fill=cj)) +
    geom_tile(color="black", size=0.3) +
    scale_x_discrete(labels=eclectic::named_vector(.dists, "x", "timepoint")) +
    scale_y_discrete(labels=eclectic::named_vector(.dists, "x", "timepoint")) +
    coord_fixed() +
    facet_wrap(~ patient, scales = "free") +
    scale_fill_viridis("Jaccard similarity", option="D", breaks=scales::extended_breaks(n=5)) +
    # theme_minimal(base_size = 16) +
    guides(fill=guide_colorbar(title.position="top", title.hjust=0.5, barwidth = 15, barheight=0.8)) +
    theme(
      axis.line = element_blank(),
      plot.title=element_text(hjust=0),
      legend.direction="horizontal",
      legend.position="bottom",
      axis.title=element_blank(),
      axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
  plot(eclectic::make_square(p))
  browser()
  ggsave(fig.path("Fig3D-TCRRepSimilarity.pdf"), width=6, height=7)
}))
invisible(with(tcr, {

  to_omit <- c()
  dists_filtered <- dists %>% filter(!x %in% to_omit, !y %in% to_omit)
  d <- reshape2::acast(dists_filtered, x~y, value.var="cj")
  d[is.na(d)] <- 0
  ds <- sweep(d, 2, colSums(d), FUN="/")

  set.seed(10)
  dr <- data.frame(tsne(sqrt(bcdists), max_iter = 1000, perplexity=40))
  dr$accnRep <- rownames(ds)
  axes <- left_join(dr, mdata)
  axes <- axes %>%
    mutate(patient = ifelse(
      trial == "Control",
      as.character(Group),
      as.character(patient))) %>%
    mutate(patient = forcats::fct_reorder(patient, as.integer(Group))) %>%
    mutate(patient = forcats::fct_recode(patient, "Healthy adults"="ctrl_adult", "Healthy children"="ctrl_child"))

  browser()

  p <- ggplot(axes, aes(X1, X2, fill=patient, shape=patient)) +
    geom_point(size=3) +
    # stat_ellipse(aes(group=patient, color=patient), show.legend = F, alpha=0.5) +
    scale_shape_manual(values=c(rep(23, 2), rep(22, 5), rep(21,8))) +
    scale_fill_manual(values=createColorPalette(15)) +
    scale_color_manual(values=createColorPalette(15)) +
    labs(
      x="Axis 1",
      y="Axis 2"
    ) +
    theme(aspect.ratio=1)
  plot(p)
  # ggsave(fig.path("_TCR_tSNE_All.pdf"), width=6, height=5)

  print(vegan::adonis(bcdists ~ patient_at_timept*cell.type, data=mdata))
}))
invisible(with(tcr, {
  browser()
  int.richness <- int %>% filter(nmonths <= 24) %>%
    select("est.is"=chao1, nmonths, patient)
  tcr.richness <- alphadiv %>% 
    left_join(distinct(mdata, accn, .keep_all=TRUE)) %>%
    filter(Group == "SCIDn2") %>%
    select(nmonths, "est.tcrs"=chao2, patient)
  combined <- right_join(int.richness, tcr.richness) %>%
    mutate(cell.divs = log(est.tcrs/est.is)/log(2)) %>%
    mutate(dist = est.tcrs - est.is) %>%
    mutate(patient = as.factor(patient))

  ggplot(combined, aes(x=nmonths)) +
    geom_line(aes(y=est.is)) +
    geom_point(aes(y=(est.is), fill="Integration sites"), shape=21, size=3) +
    geom_line(aes(y=(est.tcrs))) +
    geom_point(aes(y=(est.tcrs), fill="Unique T cells"), shape=21, size=3) +
    geom_segment(aes(xend=nmonths, y=(est.is), yend=(est.tcrs)), linetype=3) +
    geom_label(aes(y=(dist/10 + (est.is)), label=sprintf("%1.1f", cell.divs))) +
    scale_fill_discrete("") +
    scale_x_continuous("Months since therapy", breaks=c(6,12,18,24)) +
    annotation_logticks(sides="l", scaled=TRUE) +
    scale_y_log10("Estimated unique t cells or integration sites") +
    facet_wrap(~patient) +
    ggtitle("Cell divisions") +
    theme_bw()

  ggsave(fig.path("Fig4-CellDivisions.pdf"), width=10, height=7)
}))
invisible(with(tcr, {
  convert_gene_name <- function(tcrb_name) {
    str_replace(tcrb_name, "TCRB", "TRB") %>%
      str_replace_all("0([0-9])", "\\1")
  }
  browser()
  accns <- unique(filter(mdata, patient=="SCID00001", nmonths==6)$accn)
  test <- seqs %>% filter(sequenceStatus == "In", accn %in% accns) %>%
    left_join(mdata, by="accn") %>%
    select("CDR3b"=aminoAcid, "TRBV"=vGeneName, "TRBJ"=jGeneName)
  test %>%
    mutate_each(funs(convert_gene_name), TRBV, TRBJ) %>%
    distinct() %>% 
    write.table(
      file="/home/ecl/data/100_SCID/test_gliph.txt", sep="\t", 
      quote=FALSE, row.names = FALSE)
}))

SCIDn1 Diagnostic Figures

invisible(with(tcr, {
  # browser()
  p <- mdata %>%
    group_by(patient_at_timept) %>%
    filter(n_distinct(cell.type)==2) %>%
  ggplot(aes(patient_at_timept, Unique)) +
    geom_boxplot(aes(color=cell.type), position=position_dodge(0.4)) +
    theme_bw() +
    labs(title="PBMCs show lower unique TCRs than sorted CD3+ cells", x="", y="Unique TCRs")
  ggsave(fig.path("_PBMC_vs_TCells.pdf"), plot = p, width=7, height=5)
}))
invisible(with(tcr, {
  browser()
  mdat <- mdata %>% filter(group == "patient")
  model <- lm(Unique ~ cell.type * nmonths, data=mdat)
  summary(model)
  p <- ggplot(mdat, aes(x=nmonths, y=Unique, color=cell.type)) +
    theme_bw() +
    geom_point(size=2) +
    # scale_shape_manual(values=c("PBMC"=1, "Tcells"=19)) +
    labs(title="PBMCs show lower overall TCRs (p < 0.0001)")
  ggsave(fig.path("_PBMC_vs_TCells_overall.pdf"), plot = p, width=7, height=5)
}))


eclarke/scid-multiomics-paper documentation built on May 13, 2019, 3:07 a.m.