library(gridExtra) library(pheatmap) library(BiostatsALL) library(affy) library(ggplot2) library(preprocessCore) library(limma) library(plyr) library(sigaR) library(MergeMaid) getPCAs<-function (eset, maxpc = 3) { if (!is.matrix(eset)) { ex <- exprs(eset) out <- pData(eset) } else { out <- NULL ex <- eset } pca.res <- prcomp(t(ex)) x <- pca.res$x[, 1:maxpc] colnames(x) <- paste("PC", 1:maxpc, sep = ".") varex = round(100 * summary(pca.res)$importance[2, ]) db <- cbind.data.frame(x) if (!is.null(out)) db <- cbind(db, out) return(list(db = as.data.frame(db), varex = varex)) } plotPCA<-function(pca.db,title=paste0('PCA'), color.var='study2',var.shape='pathogen',var.size='months2' ){ p1<-ggplot(mutate(as.data.frame(pca.db$db), months=round((study_time_collected/28),1), months2=ifelse(months<0, -1, months)+2), aes_string(x='PC.1',y='PC.2', color=color.var, size=var.size, shape=var.shape)) + geom_jitter() + theme_bw() + scale_color_manual(values=ann.colors[[color.var]]) + labs(x=paste('PC-1 (',pca.db$varex[1],'%)',sep=''), y=paste('PC-2 (',pca.db$varex[2],'%)',sep=''), title=title) return(p1) } my.plot.pvca<-function (pvcaObj, cex.percentage = 1, fname = NULL, ht = 4, wd = 5, title = fname, race.vars=NULL) { require(stringr) require(ggplot2) title <- mgsub(c("/", ".", "PVCA"), rep("", 3), fixed = TRUE, fname) labels <- gsub(":", " x ", pvcaObj$label) labels <- gsub("_imputed", " ", labels) db <- data.frame(perc = t(pvcaObj$dat), labels = factor(labels, levels = labels)) if (!is.null(race.vars)) { db<-rbind(db, data.frame(perc=sum(subset(db, labels%in%race.vars)$perc), labels='race')) %>% subset(!(labels%in%race.vars)) %>% arrange(1*(labels=='resid'), perc) } p <- ggplot(db, aes(x = reorder(labels, perc), y = perc)) + geom_bar(stat = "identity", fill = "blue") + theme_bw() + scale_y_continuous(limits = c(0, 1.1)) + geom_text(aes(x = labels, y = perc + 0.05, label = paste0(as.character(round(100 * perc, 1)), "%"))) + labs(x = "Effects", y = "Weighted average proportion variance", title = paste("PVCA ", title)) + scale_x_discrete(labels = function(x) str_wrap(x, width = 8)) if (!is.null(fname)) { ggsave(file = paste0(fname, ".pdf"), height = ht, width = wd, plot = p) } return(p) } impute.na<-function(x){x[is.na(x)]<-mean(x,na.rm=T); return(x)}
dir.path<-'/Users/maytesuarezfarinas/Desktop/DHIPC/SignaturesIOF_Rproj/' eset<-readRDS(paste0(dir.path,"CURRENT_2020_08_10/2020_08_10_all_noNorm_noResponse_eset.rds")) eset.n<-readRDS(paste0(dir.path,"CURRENT_2020_08_10/2020_08_10_all_norm_noResponse_eset.rds")) Nfeatures.pca<-5000 eset$pathogen_adjuvant<-interaction(eset$pathogen, eset$adjuvant) eset$time<-eset$study_time_collected eset$age_group<-ifelse(eset$age_imputed<60,'young','old') eset$sex<-eset$gender_imputed eset.n$pathogen_adjuvant<-interaction(eset.n$pathogen, eset.n$adjuvant) eset.n$time<-eset.n$study_time_collected eset.n$sex<-eset.n$gender_imputed eset.n$age_group<-ifelse(eset.n$age_imputed<60,'young','old') race.vars<-c('Hispanic','White','Black','Asian')
load("/Users/maytesuarezfarinas/Desktop/DHIPC/SignaturesIOF_Rproj/adj_path_vt_colors.rda") # add for batch sc<-c("magenta","purple","orangered3","firebrick2","gray","green","lightblue4","yellow2", "cyan","firebrick4","royalblue3","darkorange2", "brown",'olivedrab','aquamarine','red','gold','dodgerblue','darkgray','lightsalmon1','darkgreen','royalblue3','orchid4', 'tan3','sandybrown','moccasin','pink','magenta','black','green','blue','violetred','snow4') ann.colors<-list(gender_imputed=c('red','blue'), study_accession=sc, study_accession2=c(sc,'black'), featureSetName2=sc, vaccine=sc[1:length(unique(eset$vaccine))]) ann.colors<-c(adj_path_vt_colors,ann.colors) path<-c(Meningitis="#80B1D3","Ebola"='gold', 'Hepatitis B'='purple', 'HIV'='red','Malaria'='cyan') ann.colors$pathogen<-c(path, ann.colors$pathogen[setdiff(names(ann.colors$pathogen), names(path))]) Nfeatures.pca<-5000
fname<-'Aug10' eset0<-eset[,eset$study_time_collected==0] pdf(paste0(fname,'BoxplotBaseline.4Paper.pdf'), height=3.5,width=12) boxplot(exprs(eset0),las=2) dev.off() features.pca.0<-featureNames(eset0)[which((rowMeans(is.na(exprs(eset0)))==0)&(apply(exprs(eset0),1,sd)>0))] #pca.db.0<-getPCAs(eset0[sample(features.pca.0, Nfeatures.pca, replace=FALSE),]) pca.db.0<-getPCAs(eset0[features.pca.0,]) pca.db.0$db$size=factor(1) p1<-plotPCA(pca.db.0,title='PCA Baseline', color.var='study_accession',var.shape='featureSetName2',var.size='size') p1<-p1+ scale_shape_manual(values=c(12,20,21:23,10,11,13:19)) ggsave(file=paste0(fname,'PCABaseline_4paper_v1.pdf'), plot=p1,height=6,width=9) p1<-plotPCA(pca.db.0,title='PCA Baseline', color.var='featureSetName2',var.shape='cell_type',var.size='size') p1<-p1+ scale_shape_manual(values=c(12,20,21:23,10,11,13:19)) ggsave(file=paste0(fname,'PCABaseline_4paper_v2.pdf'), plot=p1,height=6,width=9) eset.n0<-eset.n[,eset.n$study_time_collected==0] pdf(paste0(fname,'BoxplotBaselineNorm.4Paper.pdf'), height=3.5,width=12) boxplot(exprs(eset.n0),las=2) dev.off() features.pca.n0<-featureNames(eset.n0)[which((rowMeans(is.na(exprs(eset.n0)))==0)&(apply(exprs(eset.n0),1,sd)>0))] #pca.db.0<-getPCAs(eset0[sample(features.pca.0, Nfeatures.pca, replace=FALSE),]) pca.db.n0<-getPCAs(eset.n0[features.pca.n0,]) pca.db.n0$db$size=factor(1) p1<-plotPCA(pca.db.n0,title='PCA Baseline Norm+Batch', color.var='featureSetName2',var.shape='age_group',var.size='size') ggsave(file=paste0('PCABaselineNorm_4paper_v1.pdf'),plot=p1,height=6,width=9) #### shows the differences in old pca.db.n<-getPCAs(eset.n[features.pca.n0,]) pca.db.n$db$size=factor(1) p1<-plotPCA(pca.db.n,title='PCANorm+Batch', color.var='featureSetName2',var.shape='age_group',var.size='size') ggsave(file=paste0('PCANorm_4paper_v1_age.pdf'),plot=p1,height=6,width=9)
age.group='old' pca.db.n.y<-getPCAs(eset.n[features.pca.n0, eset.n$age_group==age.group]) pca.db.n.y$db$size=factor(1) p1<-plotPCA(pca.db.n.y,title=paste('PCANorm+Batch',age.group), color.var='featureSetName2',var.shape='cell_type',var.size='size') ggsave(file=paste0('PCANorm_4paper',age.group,'Only_v1.pdf'),plot=p1,height=4,width=7) pca.db.n.y$db$time.disc<-cut(pca.db.n.y$db$study_time_collected,c(-8,-7,0,7,28,Inf)) p1<-plotPCA(pca.db.n.y,title=paste('PCANorm+Batch',age.group), color.var='pathogen',var.shape='adjuvant',var.size='time.disc') p1<-p1+ scale_shape_manual(values=c(12,20,21:23,10,11,13:19)) ggsave(file=paste0('PCANorm_4paper',age.group,'Only_v2.pdf'),plot=p1,height=4,width=7) p1<-plotPCA(pca.db.n.y,title=paste('PCANorm+Batch',age.group), color.var='pathogen',var.shape='adjuvant',var.size='size') p1<-p1+ scale_shape_manual(values=c(12,20,21:23,10,11,13:19)) ggsave(file=paste0('PCANorm_4paper',age.group,'Only_v3.pdf'),plot=p1,height=4,width=7) p1<-p1+ scale_shape_manual(values=c(12,20,21:23,10,11,13:19)) ggsave(file=paste0('PCANorm_4paper',age.group,'Only_v3.legend.pdf'),plot=p1,height=7,width=7)
source('./functions pvca.R') eigen.0<-getEigen(eset0[features.pca.0,]) batch.vars<-c('age_group','gender_imputed',race.vars,'cell_type','study_accession') pvca0<-pvcaBatchAssess.MSF2(eset0[features.pca.n,], eigenData=eigen.0, batch.factors=batch.vars, include.inter=c( 'age_group:cell_type', 'gender_imputed:cell_type'), threshold = 0.8) p<-my.plot.pvca(pvca0,fname=paste0('PVCA_4paper_baseline_age_group'),ht=3,wd=6.2, race.vars = race.vars) save(pvca0,eigen.0,batch.vars,file='pvca0.rda')
### all data features.pca.n<-featureNames(eset.n)[which((rowMeans(is.na(exprs(eset.n)))==0)&(apply(exprs(eset.n),1,sd)>0))] eigen.all<-getEigen(eset.n[features.pca.n,]) # exposure=patogen_adjuvant batch.vars<-c('participant_id', 'age_group','gender_imputed',race.vars,'pathogen_adjuvant','time','cell_type','study_accession') a<-pvcaBatchAssess.MSF2(eset.n[features.pca.n,], eigenData=eigen.all, batch.factors=batch.vars, include.inter='none',threshold = 0.8) my.plot.pvca(a,fname=paste0('PVCA_AfterNorm4paper-age_group'),ht=3,wd=6.2, race.vars = race.vars) pv<-arrange(data.frame(Factor=a$label, perc=a$dat[1,]), desc(perc)) print(subset(pv,perc>0.01)) save(a,eigen.all,batch.vars,file='pvca_all.rda')
w<-which(eset.n$age_group=='young') eigen.y<-getEigen(eset.n[features.pca.n,w]) batch.vars<-c('participant_id','gender_imputed',race.vars,'pathogen_adjuvant','time','cell_type') a.y<-pvcaBatchAssess.MSF2(eset.n[features.pca.n,w], eigenData=eigen.y, batch.factors=batch.vars, threshold = 0.8, include.inter=c('pathogen_adjuvant:time','gender_imputed:pathogen_adjuvant:time')) my.plot.pvca(a.y,fname=paste0('PVCA_AfterNorm4paper_young'),ht=3,wd=5.5,race.vars=race.vars) w<-which(eset.n$age_group=='old') eigen.o<-getEigen(eset.n[features.pca.n,w]) a.o<-pvcaBatchAssess.MSF2(eset.n[features.pca.n,w], eigenData=eigen.o, batch.factors=batch.vars, threshold = 0.8, include.inter=c('pathogen_adjuvant:time','gender_imputed:pathogen_adjuvant:time')) my.plot.pvca(a.o,fname=paste0('PVCA_AfterNorm4paper_old'),ht=3,wd=5.5,race.vars=race.vars)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.