library(knitr) opts_chunk$set(cache=FALSE, echo=TRUE, message=FALSE, warning=FALSE, fig.width=8, fig.height=4, dpi=100, fig.align="center") options(width=80) ## Custom hooks to add caption to figures knit_hooks$set(hfig.cap = function(before, options, envir) { if(!before & options$hfig.cap) { paste0('<div class="caption">',options$fig.cap,"</div>\n", sep="") } }) ## Custom hooks to add caption to figures knit_hooks$set(htab.cap = function(before, options, envir) { if(before & options$htab.cap) { paste0('<div class="caption">',options$fig.cap,"</div>", sep="") } else if(options$htab.cap) { paste0("\n\n") } })
library(ggplot2) library(reshape2) library(data.table) library(RColorBrewer) library(Rlabkey) library(Biobase) library(pheatmap) library(glmnet) library(ISpaceR)
mypalette <- rev(brewer.pal(name="PiYG", n=11)) brewer.qual <- "Paired" IS_theme <- theme_bw(base_size=14)
lub <- labkey.url.base <- "https://posey.fhcrc.org/" lup <- labkey.url.path <- "/Emory/SDY61/" param <-list(day = 7, GEA_acc = "GEA1", EM_acc = "EXPM0001", FDR_thresh = 0.02, FC_thresh = 0)
GEM <- read_exprs_mat(param$EM_acc)[[1]] GER_filter <- makeFilter(c("analysis_accession", "EQUAL", param$GEA_acc), c("adj_p_val", "LESS_THAN", param$FDR_thresh)) ds_GER <- labkey.selectRows(baseUrl = labkey.url.base, folderPath = labkey.url.path, schemaName = "lists", queryName = "gene_expression_analysis_result_wide", colNameOpt="rname", colFilter=GER_filter) GER <- data.table(ds_GER) HAI_filter <- makeFilter(c("subject_accession", "IN", paste(GEM$subject_accession, collapse=";"))) ds_HAI <- labkey.selectRows(baseUrl=labkey.url.base, schemaName="study", folderPath=labkey.url.path, queryName ="hai", colNameOpt="rname", colFilter=HAI_filter) HAI <- data.table(ds_HAI)
#Filter-subset #Filter out subjects with missing timepoints #Filter selected probes pd <- data.table(pData(GEM)) timepoints <- c(0, param$day) pd <- pd[,keep:=(sum(study_time_reported%in%timepoints)==length(timepoints)), by="biosample_accession_name,subject_accession"] pd <- pd[keep==TRUE] pd <- pd[study_time_reported %in% timepoints] probes_selected <- unique(GER$feature_id) GES <- GEM[probes_selected, pd$biosample_accession] #sorted by subject, time HAI <- HAI[subject_accession %in% GES$subject_accession & biosample_accession_name %in% GES$biosample_accession_name]
#lFC lFC <- exprs(GES)[, GES$study_time_reported == param$day] - exprs(GES)[, GES$study_time_reported == 0]
#hai #Class subjects as high/low responders HAI <- HAI[,list(study_time_collected=study_time_collected, hai_response=value_reported/value_reported[study_time_collected==0]),by="virus_strain,biosample_accession_name,subject_accession"] HAI <- HAI[study_time_collected==28] HAI <- HAI[,list(hai_response=max(hai_response)),by="subject_accession"] HAI <- HAI[,responder:=ifelse(hai_response<=2, "Low", ifelse(hai_response>=4, "High", "Medium"))]
#glmnet x <- t(lFC) y <- log2(HAI$hai_response) fit_hai <- glmnet(x, y, alpha=0.5) cv_fit_hai <- cv.glmnet(x, y) coef_hai <- predict(fit_hai, s=cv_fit_hai$lambda.min, type="coefficients")
#Selected probes selected_probes <- names(which(coef_hai[,1] > 0)) selected_probes <- fData(GES)[selected_probes[grep("Intercept", selected_probes, invert=TRUE)],] #If the user selected less variables than observation, no selection is done if(length(probes_selected) < nrow(HAI)){ x.selected <- x selected_probes <- fData(GES)[probes_selected,] } else{ x.selected <- x[, colnames(x) %in% selected_probes$feature_id, drop=FALSE] } if(nrow(selected_probes) < 2){ opts_chunk$set(eval=FALSE, cache=FALSE) stop("No probes were selected as predictive. You may try to change the filtering criteria.") }
#lasso relasso <- lm(y~x.selected) sum_relasso <- summary(relasso) sum_relasso_coef <- sum_relasso$coefficients predictor_table <- cbind(selected_probes, sum_relasso_coef[,c("t value","Pr(>|t|)")][-1,]) setnames(predictor_table, c("t value","Pr(>|t|)"), c("t-value","p-value"))
#Display #Table predictor_table[,"gene_symbol"] <- paste0('<a href="http://www.genenames.org/cgi-bin/quick_search.pl?.cgifields=type&type=equal&num=50&search=', gsub(";","+",predictor_table[,"gene_symbol"]),'&submit=Submit" target="_blank">', predictor_table[,"gene_symbol"], '</a>')
kable(predictor_table, digits=2, format="html", row.names=FALSE, table.attr="id=\"res_table\"")
options(markdown.HTML.header = unlist(sapply(system.file("misc", c("vignette.css", "datatables.txt"), package = "knitr"), readLines)))
#Heatmap annotation <- data.frame(hai=log2(HAI$hai_response)) rownames(annotation) <- colnames(lFC) mat <- lFC[ as.character(predictor_table$feature_id), order(annotation$hai)] pheatmap(mat, dendrogram="none", cluster_cols=FALSE, cluster_rows=TRUE, breaks = seq(-1, 1, length.out = length(mypalette) + 1), show_rownames=TRUE, show_colnames=FALSE, scale="none",cluster_method="ward", cluster_distance="correlation",color=mypalette, annotation=annotation, annotation_colors=list(hai=grey(10:0/10)))
#Prediction on training set x.test <- data.frame(t(lFC[ as.character(selected_probes$feature_id),]), check.names=FALSE) hai_pred <- as.matrix(x.test)%*%as.vector((coef(relasso)[-1]))+coef(relasso)[1] df <- data.frame(observed=HAI$hai_response, fitted=relasso$fitted.values) ggplot(df, aes(x=log2(observed), y=fitted)) + geom_point() + geom_smooth(method="lm") +IS_theme
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.