example/followup/TS-DLMO-PLSR-comparison.R

######################################################################
# TimeSignature addendum: performance with melatonin data and
# comparison to PLSR.
######################################################################

#=====================================================================
# First let's run the main example...
#   assumes TimeStampR/example/followup is the current working directory
#=====================================================================

setwd("..") # set current working dir to TimeStampR/example
source("TSexample.R")
setwd("followup") # set working dir to TimeStampR/example/letter

#=====================================================================
# Fig 1 : correlation between local time & hrs since DLMO
#=====================================================================

DLMObySubj <- sapply(with(all.meta,split(DLMO25, ID)),unique)
# median DLMO time is 23:10
summary((DLMObySubj +12)%%24-12)
# plots - Fig 1 of the letter
pdf("plots/letterFig1.pdf",width=7,height=4)
par(mfrow=c(1,2))
with(all.meta,hist(DLMObySubj,breaks=25,main="Training/Test, V1, V3 subjects",xlab="DLMO25% time",col="grey",xaxp=c(0,24,6)))
with(all.meta,plot(CPhrs,LocalTime,xlab="Hours since DLMO25%",ylab="Local time",main="",xaxp=c(0,24,6),yaxp=c(0,24,6)));abline(a=0,b=1,col=2)
dev.off()

#=====================================================================
# Re-train TS against CPhrs (DLMO phase) and apply
#=====================================================================

# select original training samples which had DLMO for training
CPtrain <- all.meta$train==1 & !is.na(all.meta$CPhrs)
all.foldid <- rep(0,length(all.meta$train))
all.foldid[all.meta$train==1] <- train.foldid
CPtrain.foldid <- all.foldid[CPtrain]
CPtrainDat <- all.expr[,CPtrain]
CPtrainSubjs <- all.meta[CPtrain,"ID"]
CPtrainTimes <- all.meta[CPtrain,"CPhrs"]
# within-subject normalized data (generated by TSexample.R) using two timepoints
CPtrainWSN <- all.calib.NA0[,CPtrain]

# training TS for circadian phase (hrs since DLMO25):
TSCP <- trainTimeStamp(
        expr=CPtrainWSN, # use within-subject normalized data
        subjIDs=CPtrainSubjs,
        times=CPtrainTimes,
        trainFrac=1, # no need to subset training samples; already done!
        recalib=FALSE, # no need to within-subj normalize; already done!
        a=0.5, s=exp(-1.42), # penalty params as used in the paper
        foldid=CPtrain.foldid, # foldIDs as in the paper
        plot=FALSE 
)

# TS predictions of circadian phase (hrs since DLMO25) from 
all.meta$TSCPpred <- predTimeStamp(TSCP,newx=all.calib.NA0,s=exp(-1.42))


#=====================================================================
# PLSR comparison -- our training/test datasets, Laing's code
#
#   NB: the data to re-run Laing's code with our training/test data
#   may be found in the matlab_PLSR_files/ subdir; below, we just 
#   load its output.
#
#   NB: the matlab_PLSR_files INPUT files were generated from the
#   source data used in this script as follows:
#      library(R.matlab)
#      # set NAs to 0
#      all.expr.NA0 <- all.expr
#      all.expr.NA0[is.na(all.expr.NA0)] <- 0
#      # write out training data 
#      time <- all.meta[CPtrain,"CPdeg"]
#      num <- t(all.expr.NA0[,CPtrain])
#      writeMat("matlab_PLSR_files/CPtrain.mat",dat=list(num=num,time=time))
#      # write out validation data 
#      time <- all.meta[,"CPdeg"]
#      num <- t(all.expr.NA0) # not standardized
#      writeMat("matlab_PLSR_files/CPall.mat",dat=list(num=num,time=time))  
#      # for the dPLSR, required are:
#      # indices_A -- the indices of the original samples
#      # indices_B -- the indices of the antipodal samples
#      # times_A -- the times of the original samples
#      antipodeInd <- 1:nrow(allAntipodes)
#      names(antipodeInd) <- rownames(allAntipodes)
#      indices_A <- antipodeInd[allAntipodes$samp]
#      indices_B <- antipodeInd[allAntipodes$asamp]
#      names(indices_A) <- names(indices_B) <- NULL
#      times_A <- all.meta[indices_A,"CPdeg"]
#      writeMat("matlab_PLSR_files/CPtrainPairs.mat", indices_A=indices_A[CPtrain], indices_B=indices_B[CPtrain], times_A=times_A[CPtrain])
#      writeMat("matlab_PLSR_files/CPallPairs.mat", indices_A=indices_A, indices_B=indices_B, times_A=times_A)
#
#   NB: to reproduce the output files from Laing's PLSR code,
#   see matlab_PLSR_files/README.txt
#=====================================================================

##### LOAD PLSR OUTPUT 

matPredDeg <- read.csv("matlab_PLSR_files/CP_Prediction_PLSR_1sample_5_100.csv",header=F)
colnames(matPredDeg) <- c("CPdeg","matPLpred")
all.meta$PLmatpred <- matPredDeg$matPLpred*24/360

matPred2Deg <- read.csv("matlab_PLSR_files/CP_Prediction_PLSR_2samples_5_100.csv",header=F)
colnames(matPred2Deg) <- c("CPdeg","matPL2pred")
all.meta$dPLmatpred <- matPred2Deg$matPL2pred*24/360

#=====================================================================
# Fig 2: prediction vs time since DLMO - TS/PLSR/dPLSR comparison
#=====================================================================

errplot <- function(trueHr,predHr,col=1,pch=21,main="Time of Day (24h)"){
  # plot predicted vs. true time of day , with grey bands at 2 & 4h MOE
  plot(trueHr,predHr,xlab="",ylab="",main=main,yaxs='i', xaxp=c(0,24,6),yaxp=c(0,24,6),col=col,pch=pch,xlim=c(0,24),    ylim=c(0,24),type="n")
  trueHr <- (trueHr+240)%%24
  predHr <- (predHr+240)%%24
  mtext("True",side=1,line=2.2,cex=0.8)
  mtext("Predicted",side=2,line=2.4,cex=0.8)
  abline(a=0,b=1,col="grey");
  abline(a=24,b=1,col="grey");
  abline(a=-24,b=1,col="grey");
  cols <- palette()[col]
  fills <- adjustcolor(cols,alpha=0.3)
  polygon(c(-48,48,48,-48),c(-48,48,48,-48)+c(-4,-4,4,4),border="grey",lty=3,col=adjustcolor("grey",alpha=0.2));
  polygon(c(-48,48,48,-48),c(-48,48,48,-48)+c(-2,-2,2,2),border="grey",lty=2,col=adjustcolor("grey",alpha=0.3));
  polygon(c(-48,48,48,-48),c(-48,48,48,-48)+c(-4,-4,4,4)+24,border="grey",lty=3,col=adjustcolor("grey",alpha=0.2));
  polygon(c(-48,48,48,-48),c(-48,48,48,-48)+c(-2,-2,2,2)+24,border="grey",lty=2,col=adjustcolor("grey",alpha=0.3));
  polygon(c(-48,48,48,-48),c(-48,48,48,-48)+c(-4,-4,4,4)-24,border="grey",lty=3,col=adjustcolor("grey",alpha=0.2));
  polygon(c(-48,48,48,-48),c(-48,48,48,-48)+c(-2,-2,2,2)-24,border="grey",lty=2,col=adjustcolor("grey",alpha=0.3));
  points(trueHr,predHr,col=cols,bg=fills,pch=pch)
}


plotAll <- function(all.meta2=all.meta){

  par(mfcol=c(2,3))
  palette(c("black", "orange2", "deepskyblue2", "green2", "cyan", "red", "yellow","gray"))
  
  statLegend4 <- function(){
    legend("bottomright", bty="n", text.font=c(3,2,2,2,2), lwd=1.5, cex=0.9, lty=c(0,1,3,2,4), col=c("white","black","blue3","purple","violet"), text.col=c("black","black","blue3","purple","violet"),
      legend=c("          AUC,  med",sprintf(
        "%s:  %.2f, %s",
        c("mTS"," oTS","mPL"," dPL"),
        c(TSauc$auc, oTSauc$auc,PLSauc$auc,dPLSauc$auc),
        asTime(c(TSauc$mederr, oTSauc$mederr, PLSauc$mederr,dPLSauc$mederr))
      )))
  }

  # split up the results by study
  all.meta.split <- split(all.meta2, all.meta2$study)
  
  # Train/Test data (Moller) - TEST SAMPLES ONLY 
  TSauc <- with(all.meta.split$TrTe[all.meta.split$TrTe$train==0,],
          predplot(CPhrs,TSCPpred,plot=T,main="Test Set (Moller &al)",col=cond))
  oTSauc <- with(all.meta.split$TrTe[all.meta.split$TrTe$train==0,],
          tolplot(CPhrs,TSpred, add=T,col="blue3",lty=3))
  PLSauc <- with(all.meta.split$TrTe[all.meta.split$TrTe$train==0,],
          tolplot(CPhrs, PLmatpred, add=T,col="purple",lty=2))
  dPLSauc <- with(all.meta.split$TrTe[all.meta.split$TrTe$train==0,], tolplot(CPhrs, dPLmatpred, add=T,col="violet",lty=4))
  xxx <- with(all.meta.split$TrTe[all.meta.split$TrTe$train==0,], tolplot(CPhrs,TSCPpred,add=T))
  statLegend4()
  
  # V1 validation data (Archer) 
  TSauc <- with(all.meta.split$V1,
          predplot(CPhrs,TSCPpred,plot=T,main="Validation V1 (Archer &al)",col=cond))
  oTSauc <- with(all.meta.split$V1,
          tolplot(CPhrs,TSpred, add=T,col="blue3",lty=3))
  PLSauc <- with(all.meta.split$V1,
          tolplot(CPhrs, PLmatpred, add=T,col="purple",lty=2))
  dPLSauc <- with(all.meta.split$V1, tolplot(CPhrs, dPLmatpred, add=T,col="violet",lty=4))
  xxx <- with(all.meta.split$V1, tolplot(CPhrs,TSCPpred,add=T))
  statLegend4()
  
  # V2 validation data (Arnardottir &al) 
  # no DLMO for Arnardottir!
  
  # V3 validation data (new RNA-seq) 
  TSauc <- with(all.meta.split$V3,
          predplot(CPhrs,TSCPpred,plot=T,main="Validation V3 (new RNA-seq)",col=1))
  oTSauc <- with(all.meta.split$V3,
          tolplot(CPhrs,TSpred, add=T,col="blue3",lty=3))
  PLSauc <- with(all.meta.split$V3,
          tolplot(CPhrs, PLmatpred, add=T,col="purple",lty=2))
  dPLSauc <- with(all.meta.split$V3, tolplot(CPhrs, dPLmatpred, add=T,col="violet",lty=4))
  xxx <- with(all.meta.split$V3, tolplot(CPhrs,TSCPpred,add=T))
  statLegend4()
}

pdf("plots/letterFig2.pdf",width=8, height=5.5)
plotAll()
dev.off()
braunr/TimeSignatR documentation built on Feb. 3, 2021, 2:39 p.m.