In brunocontrino/Doscheda: A DownStream Chemo-Proteomics Analysis Pipeline

library(hexbin)
library(vsn)
library(limma)
library(knitr)
if(params$datype == 'lFC'){

  inptypetab <- 'Log Fold Changes'
}else if(params$datype == 'intensity'){
  inptypetab <- 'Peptide Intensities'
}else{
  inptypetab <- 'Fold Changes'
}

This is a summary of the data processing and plots created in the DOSCHEDA shiny app.

Number of Channels | Number of Replicates | Input Type | Model fitted ---|---|---|--- r params$chans|r params$reps | r inptypetab | r ifelse(params$sigmodin == 'sigmoid','Sigmoidal','Linear')

   reptest <- ifelse(params$reps > 1,TRUE,FALSE)

Figure 1: Box plot of Data Columns

panel.shadeNtext <- function (x, y, corr = NULL, col.regions, ...)
{
  if (is.null(corr))
    corr <- cor(x, y, use = "pair")
  ncol <- 14
  pal <- col.regions(ncol)
  col.ind <- as.numeric(cut(corr, breaks = seq(from = -1, to = 1,
                                               length = ncol + 1), include.lowest = TRUE))
  usr <- par("usr")
  rect(usr[1], usr[3], usr[2], usr[4], col = pal[col.ind],
       border = NA)
  box(col = "lightgray")
  on.exit(par(usr))
  par(usr = c(0, 1, 0, 1))
  r <- formatC(corr, digits = 2, format = "f")
  cex.cor <- .8/strwidth("-X.xx")
  text(0.5, 0.5, r, cex = cex.cor)
}


vec<- params$channel
    vec<- length(vec)
       palette.bar <- rep(terrain.colors(params$reps),each = ifelse(params$datype != 'intensity',params$chans,params$chans - 1 ) )

    data.merged1<- params$data
    boxplot(data.merged1[,1:vec], col=palette.bar,las=2, cex.axis=0.6, main=c("Box Plots"),
            ylab=c("Log2(ratios)"))

        legend("topright",legend = paste("rep",1:params$reps,sep = ' '), fill = terrain.colors(params$reps))

Description:

Here we should observe the means centered at approximately zero across all channels and replicates.

Figure 2: Ranked plot of proteins FC and Density plot of Data Columns

    vec<- params$channel
    pal <- rainbow(length(vec))
    missing_val <- 0
    if(params$datype == 'intensity' & params$reps == 1 ){
     leg.nam <- paste0('rep1_C',0:(params$chans -2))
    }else{
        leg.nam <- params$finNam
   }
    par(mfrow = c(1,2))   
    plot(x=rank(data.merged1[,1]),y=data.merged1[,1], cex.axis=0.6,
         main=c(paste("N. of Missing val. ",missing_val ,
                      " \n Change Distribution after LOESS",sep="")),
         col=pal[1], ylab=c("Log2(ratios)"), xlab = c("Proteins"))
    if(length(vec) > 1){
      for(i in 2:length(vec)){
        points(x=rank(data.merged1[,i]),y=data.merged1[,i], cex.axis=0.6, main=c(""),col=pal[i])
      }
    }
   legend("topleft", legend = leg.nam, fill = pal)


    plot(density(x=data.merged1[,1]),col= pal[1], main=c(" Density after LOESS"), ylim = c(0,5 ))


    if(length(vec) > 1){
      for(i in 2:length(vec)){
        lines(density(x=data.merged1[,i]), col=pal[i], main = c(""))
      }
    }
 legend("topleft", legend = leg.nam, fill = pal)

Description:

These plots show the density distribution of each channel and the distribution of the ranked proteins. The Density plots should have a bell shaped distributions and be approximately centred at zero.

Figure 3: Venn Diagram

kinome1<- params$kin
kinome<-as.vector(toupper(kinome1$new.Symbol))

if((params$vennip) == TRUE){

      upload <- params$upVenn
      upload<- as.vector(toupper(upload))
      data.merged <- params$data

      proteome<-as.vector(toupper(data.merged$GeneID))
      universe <- unique(c(kinome,proteome,upload))

      count<- matrix(0, ncol = 3 , nrow = length(universe))
      colnames(count) <- c("kinome",'proteome','upload')

      for(i in 1:length(universe)){
        count[i,1]<- universe[i] %in% kinome
        count[i,2]<- universe[i] %in% proteome
        count[i,3]<- universe[i] %in% upload
      }

      vennDiagram(vennCounts(count), circle.col = c("blue","red","green"), cex = 1,lwd = 2)


    }else{

      data.merged <- params$data

      proteome<-as.vector(toupper(data.merged$GeneID))
      universe <- unique(c(kinome,proteome))
      # Generate a matrix, with the sets in columns and possible letters on rows
      Counts <- matrix(0, nrow=length(universe), ncol=2)
      # Populate the said matrix
      for (i in 1:length(universe)) {
        Counts[i,1] <- universe[i] %in% kinome
        Counts[i,2] <- universe[i] %in% proteome
        #Counts[i,3] <- universe[i] %in% Metacore
        #Counts[i,3] <- universe[i] %in% EXOCYTOSIS

      }

      colnames(Counts) <- c("Kinome","Proteome")
      cols<-c("Red", "Blue")

      #### VENN

      vennDiagram(vennCounts(Counts), circle.col=cols,
                  cex=1, #title size
                  lwd=2 #circle line size
      )
     }

Description:
The venn diagram shows the intersection of the kinome and the inputted proteins. There is also the option to load a list of protein names and see their intersection with the data.

Figure 3: Mean vs standard deviation plot

vsn::meanSdPlot(as.matrix(data.merged1[,1:length(vec)]))

Description:

The ranked row means versus the standard deviations for checking any dependence of the variance on the mean. The red line is a running median estimator with window-width at 10%. If this red line is approximately horizontal, this indicates no variance-mean dependence.

Figure 4: Mean vs SD plots for each Protein.

    rSu<- params$RSu
     for(i in 1:nrow(params$indexmat)){

    minxy <- min(c(min(rSu[,params$indexmat[i,5]]),min(rSu[,params$indexmat[i,6]])))
    maxxy <- max(c(max(rSu[,params$indexmat[i,5]]),max(rSu[,params$indexmat[i,6]])))

    print(tmd(
      xyplot(rSu[,params$indexmat[i,5]] ~ rSu[,params$indexmat[i,6]]), main=params$indexmat[i,1],xlim = c(minxy,maxxy),
      ylim = c(minxy,maxxy),
      panel=function(x, y, ...) {
        panel.xyplot(x, y, ...);
        ltext(x=x, y=y, labels=rSu$GeneID, pch=c(13,3,16), cex=0.5, lwd=2, pos=1, offset=1, pch = 19)
      }))

     }

Description:

The above plots are of the mean versus the difference in fold-change between replicates. The majority of proteins are expected to be scattered, as a cloud, without major axis dependency.

Figure 5: Pearson Correlation between Conditions

    index1 <- params$channel
    index1<- length(index1)
    cmat <-cor(params$RSu[,1:index1],use="pairwise", method = "pearson")
    corrgram(cmat, order=TRUE, lower.panel=panel.shadeNtext,
             upper.panel=panel.pie, text.panel=panel.txt,
             main="Corrgram Plots" )

Description:

The Pearson correlations (r) between all the different channels are displayed. None of the channels are expected to be anti-correlated (r < 0). The QC in DOSCHEDA will highlight whether there are anti-correlated pairs of channels.

Figure 6: Principal-Component Analysis of Data columns

if( params$datype == 'intensity'){
     if(params$reps == 1){
       nchan<- params$chans - 1
     }
  # else {
  #      nchan<- params$chans - 2
  #      
  #    }
    }else{

     nchan<- params$chans
    }

 if( params$datype == 'intensity'){

      nchan<- params$chans - 1

    }else{

      nchan<- params$chans
    }

    su <- params$RSu
    reps <- params$reps
    index <- params$channel

    pca <- prcomp(su[,1:length(index)], scale = FALSE)

    DTA<-data.frame( as.numeric(t(su[,1:length(index)])%*%pca$x[,1]),
                     as.numeric(t(su[,1:length(index)])%*%pca$x[,2]))


    p<-ggplot(DTA, aes(x=DTA$as.numeric.t.su...1.length.index........pca.x...1..,
                       y=DTA$as.numeric.t.su...1.length.index........pca.x...2..))
    # nchan/3 -> 3 if 3 replicates 2 if 2 reps and son on
        shapeval <- c(15:18,7:12)
    p <- p + geom_point(aes(colour = factor(rep(1:params$reps,each = (nchan)),labels = paste("Rep",1:params$reps))[index],
                            shape = factor(rep(1:nchan,params$reps),labels = c(paste("C",0:(nchan-1),sep = "")))[index] ), size = 5 ) + scale_shape_manual(values=shapeval[1:nchan]) + labs(x = paste0("PC1", " (", round(pca$sdev[1]/sum(pca$sdev)*100,0), "%)"),
                                                          y = paste0("PC2", " (", round(pca$sdev[2]/sum(pca$sdev)*100,0), "%)"), title="PCA") + labs(color = "Replicates", shape="Concentration")

    p

Description:

Plot of two highest principal components. Replicates should (roughly) cluster together. This plot highlights if any data points are vastly 'out of place' given the experimental design.

Figure 7: Replicate vs Replicate plots for each Channel.

for( i in 1:nrow(params$indexmat)){

  val = max(c(max(data.merged1[,params$indexmat[i,5]],na.rm = TRUE),max(data.merged1[,params$indexmat[i,6]],na.rm = TRUE)))

    plot(x=data.merged1[,params$indexmat[i,5]],y=data.merged1[,params$indexmat[i,6]], xlim=c(0,val +0.2), col="green" , ylim=c(0,val+0.2),
         cex.axis=1.2, main=paste("LogFC-LogFC Plot:", params$indexmat[i,1]),
         xlab=paste("rep",params$indexmat[i,3]),  ylab=paste("rep",params$indexmat[i,4]))
    text(data.merged1[,params$indexmat[i,5]], data.merged1[,params$indexmat[i,6]], labels=data.merged1$GeneID, cex= 0.7,pos=4)
    lines(x = c(0,val), y = c(0,val),col="red")
}

Description:

Scatterplots between replicates to identify targets (drug competed proteins) and potential off-targets. Points that have high fold change in both replicates and are close to the red x = y line are considered to be targets.

if(params$sigmodin == 'sigmoid'){
  show_text <- TRUE
} else{

  show_text <- FALSE
}

```{asis, echo=!show_text}

Linear Model Plots

The following plots are relevant if a linear model has been fitted to the data.

Figure 8: Distribution of p-values of model coefficients

```r

data.merged2 <- params$data2
    m0 <- ggplot(data.merged2, aes(x=data.merged2$P.Value))
    m0<-m0 + geom_histogram(aes(fill = ..count..),binwidth = 0.01) +
      scale_fill_gradient("Count", low = "green", high = "red")+
      xlab("P.val slope")


    m1 <- ggplot(data.merged2, aes(x=data.merged2$P.Value.x))
    m1<- m1 + geom_histogram(aes(fill = ..count..),binwidth = 0.01) +
      scale_fill_gradient("Count", low = "green", high = "red")+
      xlab("Pval intercept")

    m2 <- ggplot(data.merged2, aes(x=data.merged2$P.Value.y))
    m2 <- m2 + geom_histogram(aes(fill = ..count..),binwidth = 0.01) +
      scale_fill_gradient("Count", low = "green", high = "red")+
      xlab("Pval quadratic")

    grid.arrange(m0,m1,m2)

```{asis, echo=!show_text} Description:

The p-value distributions for each of the model coefficients are expected to not be uniform. The QC in DOSCHEDA will highlight whether a coeffcient does not contail any significant p-values.

Figure 9: Volcano plots of model coefficients

```r

    res<- data.merged2
    # avgthr=0.2 #sign threshold for the averege fold change 0.3(log2)  is 1.3 FC

    par(mar=c(5,5,5,10), xpd=TRUE)
    # Make a basic volcano plot
    with(res, plot(res$AveExpr.x, -log10(res$P.Value), pch=20, main="Volcano plot (slope pval )",
                   xlab=c("Log2_AvgFC"),ylab=c("-Log10(Pval)"),
                   xlim=c(-abs(max(res$AveExpr.x)+1),abs(max(res$AveExpr.x)+1))))

    # Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
    s=subset(res, P.Value< params$pvalsli )
    with(s, points(s$AveExpr.x, -log10(s$P.Value), pch=20, col="red"))

    s=subset(res, abs(res$AveExpr.x)>params$avthrsli)
    with(s, points(s$AveExpr.x, -log10(s$P.Value), pch=20, col="orange"))

    s=subset(res, P.Value< params$pvalsli & abs(res$AveExpr.x)>params$avthrsli)
    with(s, points(s$AveExpr.x, -log10(s$P.Value), pch=20, col="green"))

    # Label points with the textxy function from the calibrate plot
    s=subset(res, P.Value< params$pvalsli & abs(res$AveExpr.x)>params$avthrsli)
    with(s, textxy( s$AveExpr.x, -log10(s$P.Value),  labs=s$GeneID, cex=.7)
    )
        legend("bottomleft", title="Legend",cex = 0.7,
           c("Not significant",
             paste("P.Value",params$pvalsli, sep = " "), #red
             paste("AvgFC >", params$avthrsli, sep=""), #orange,
             paste("P.Value", params$pvalsli,"& AvgFC >", params$avthrsli, sep="") #green
           ),
           col=c("black","red","orange","green"),
           horiz=F, pch=c(19))
    # legend("topright", inset=c(-0.4,0.1), title="Legend",cex = 0.7,
    #        c("Not significant",
    #          "P.Value<.05", #red
    #          paste("AvgFC >", params$avthrsli, sep=""), #orange,
    #          paste("P.Value<.05 & AvgFC >", params$avthrssli, sep="") #green
    #        ),
    #        col=c("black","red","orange","green"),
    #        horiz=F, pch=c(19))

res<- data.merged2
    # avgthr=0.2 #sign threshold for the averege fold change 0.3(log2)  is 1.3 FC

    par(mar=c(5,5,5,10), xpd=TRUE)
    # Make a basic volcano plot
    with(res, plot(res$AveExpr.x, -log10(res$P.Value.x), pch=20, main="Volcano plot (Intercept pval )",xlab=c("Log2_AvgFC"),ylab=c("-Log10(Pval)"), xlim=c(-abs(max(res$AveExpr.x)+1),abs(max(res$AveExpr.x)+1))))

    # Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
    s=subset(res, P.Value.x<params$pvalsli )
    with(s, points(s$AveExpr.x, -log10(s$P.Value.x), pch=20, col="red"))

    s=subset(res, abs(res$AveExpr.x)> params$avthrsli)
    with(s, points(s$AveExpr.x, -log10(s$P.Value.x), pch=20, col="orange"))

    s=subset(res, P.Value.x< params$pvalsli & abs(res$AveExpr.x)> params$avthrsli)
    with(s, points(s$AveExpr.x, -log10(s$P.Value.x), pch=20, col="green"))

    # Label points with the textxy function from the calibrate plot
    s=subset(res, P.Value.x< params$pvalsli & abs(res$AveExpr.x)>params$avthrsli)
    with(s, textxy( s$AveExpr.x, -log10(s$P.Value.x),  labs=s$GeneID.x, cex=.7)
    )
            legend("bottomleft", title="Legend",cex = 0.7,
           c("Not significant",
             paste("P.Value",params$pvalsli, sep = " "), #red
             paste("AvgFC >", params$avthrsli, sep=""), #orange,
             paste("P.Value", params$pvalsli,"& AvgFC >", params$avthrsli, sep="") #green
           ),
           col=c("black","red","orange","green"),
           horiz=F, pch=c(19))
    # legend("topright", inset=c(-0.4,0.1), title="Legend",cex = 0.7,
    #        c("Not significant",
    #          "P.Value<.05", #red
    #          paste("AvgFC >", params$avthrsli, sep=""), #orange,
    #          paste("P.Value<.05 & AvgFC >", params$avthrsli, sep="") #green
    #        ),
    #        col=c("black","red","orange","green"),
    #        horiz=F, pch=c(19))

res<- data.merged2
    # avgthr=0.2 #sign threshold for the averege fold change 0.3(log2)  is 1.3 FC

    par(mar=c(5,5,5,10), xpd=TRUE)
    # Make a basic volcano plot
    with(res, plot(res$AveExpr.x, -log10(res$P.Value.y), pch=20, main="Volcano plot (Quadratic pval )",xlab=c("Log2_AvgFC"),ylab=c("-Log10(Pval)"), xlim=c(-abs(max(res$AveExpr.x)+1),abs(max(res$AveExpr.x)+1))))

    # Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
    s=subset(res, P.Value.y<params$pvalsli )
    with(s, points(s$AveExpr.x, -log10(s$P.Value.y), pch=20, col="red"))

    s=subset(res, abs(res$AveExpr.x)>params$avthrsli)
    with(s, points(s$AveExpr.x, -log10(s$P.Value.y), pch=20, col="orange"))

    s=subset(res, P.Value.y<params$pvalsli & abs(res$AveExpr.x)>params$avthrsli)
    with(s, points(s$AveExpr.x, -log10(s$P.Value.y), pch=20, col="green"))

    # Label points with the textxy function from the calibrate plot
    s=subset(res, P.Value.y<params$pvalsli & abs(res$AveExpr.x)>params$avthrsli)
    with(s, textxy( s$AveExpr.x, -log10(s$P.Value.y),  labs=s$GeneID.x, cex=.7)
    )
            legend("bottomleft", title="Legend",cex = 0.7,
           c("Not significant",
             paste("P.Value",params$pvalsli, sep = " "), #red
             paste("AvgFC >", params$avthrsli, sep=""), #orange,
             paste("P.Value", params$pvalsli,"& AvgFC >", params$avthrsli, sep="") #green
           ),
           col=c("black","red","orange","green"),
           horiz=FALSE, pch=c(19))
    # legend("topright", inset=c(-0.4,0.1), title="Legend",cex = 0.7,
    #        c("Not significant",
    #          "P.Value<.05", #red
    #          paste("AvgFC >", params$avthrsli, sep=""), #orange,
    #          paste("P.Value<.05 & AvgFC >", params$avthrsli, sep="") #green
    #        ),
    #        col=c("black","red","orange","green"),
    #        horiz=F, pch=c(19))

```{asis, echo=!show_text} Description:

Volcano plots for all the linear fit coefficients showing the relationship between the average log fold change and the p-values.

```r

    vec <- params$channel
    vec <- length(vec)
    su1 <- su[,1:vec]
    vec.nam <- params$finNam
    vec.nam <- vec.nam[params$channel]
    # nchan <- nchans()
    # index<- channels()
    # data_merged_Pvals<-data.frame(su[,index],
    #                               Accession=su$Accession,GeneID=su$GeneID)
    # #data_merged_Pvals<-data_merged_Pvals[GeneID=="NA", GeneID := Accession] substituting NA with Accession
    #
    plot.new()
     #d3heatmap(su1, Colv = FALSE,labRow = as.character(make.names(su$GeneID.x,unique = TRUE)))
    # m<- heatmaply(su,Colv= FALSE,
    #               labRow =  as.character(make.names(su$GeneID.x,unique = TRUE))) %>% layout(margin = list(l = 70, b = 70))

```{asis, echo=show_text}

Sigmoidal Fit Plots

The following plots are relevant when a sigmoidal model is fitted to the data. They show the top protein profiles for each of the model parameters.

```r
shape_for_ggplot_pred<-function(df_ordered,conc,pred.names){
  cols_to_keep_pred<-c(pred.names,"GeneID","Accession")

  forggplot_pred<-vector(mode = "list",length = length(df_ordered$GeneID))

  for(i in 1:length(df_ordered$GeneID)){
    tmp_pred<-df_ordered[,cols_to_keep_pred]
    forggplot_pred[[i]]<-melt(tmp_pred[i,], id = c("GeneID", "Accession"), na.rm = TRUE)
  }

  forggplot_pred_1<-do.call(rbind, forggplot_pred)
  forggplot_pred_1<-data.frame(forggplot_pred_1,"x"=conc)
  #return(forggplot_pred_1)
}

shape_for_ggplot_perc<-function(df_ordered,conc,finalNames){
  cols_to_keep_perc<-c(finalNames, "GeneID","Accession")

  forggplot_perc<- vector(mode = "list",length = length(df_ordered$GeneID))
  for(i in 1:length(df_ordered$GeneID)){
    tmp_perc<-df_ordered[,cols_to_keep_perc]
    forggplot_perc[[i]]<-melt(tmp_perc[i,], id = c("GeneID", "Accession"), na.rm = TRUE)
  }

  forggplot_perc_1<-do.call(rbind, forggplot_perc)
  forggplot_perc_1<-data.frame(forggplot_perc_1,"x"=conc)
  #return(forggplot_perc_1)
}


pie_chart <- function(df, main, labels = NULL, condition = NULL) {

  # convert the data into percentages. group by conditional variable if needed
  df <- group_by_(df, .dots = c(condition, main)) %>%
    summarise(counts = n()) %>%
    mutate(perc = counts / sum(counts)) %>%
    arrange(desc(perc)) %>%
    mutate(label_pos = cumsum(perc) - perc / 2,
           perc_text = paste0(round(perc * 100), "%", "\n","(",counts, ")"))

  # reorder the category factor levels to order the legend
  df[[main]] <- factor(df[[main]], levels = unique(df[[main]]))

  # if labels haven't been specified, use what's already there
  if (is.null(labels)) labels <- as.character(df[[main]])

  p <- ggplot(data = df, aes_string(x = factor(1), y = "perc", fill = main)) +

    # make stacked bar chart with black border
    geom_bar(stat = "identity", color = "black", width = 1) +

    # add the percents to the interior of the chart
    geom_text(aes(x = 1.25, y = label_pos, label = perc_text), size = 4) +

    # add the category labels to the chart
    # increase x / play with label strings if labels aren't pretty
    geom_text(aes(x = 1.82, y = label_pos, label = labels), size = 4) +

    # convert to polar coordinates
    coord_polar(theta = "y") +

    # formatting
    scale_y_continuous(breaks = NULL) +
    scale_fill_discrete(name = "", labels = unique(labels)) +
    theme(text = element_text(size = 12),
          axis.ticks = element_blank(),
          axis.text = element_blank(),
          axis.title = element_blank())

  # facet wrap if that's happening
  if (!is.null(condition)) p <- p + facet_wrap(condition)

  return(p)
}

```{asis, echo=show_text} Figure 8: Plot of the largest differences between the proteins from the lowest and highest concentrations (over 30%)

```r

if(params$sigmodin != 'sigmoid'){
     plot.new()
      legend('topleft', c("Linear fit applied, no sigmoidal plots available"),bty = 'n')
    }else{
    if(params$datype == 'intensity'){
        data_merged_2 <- params$data2
        conc<- params$concen
        pred.names <- paste0('predX',1:(params$chans -1))
        final.Names <- paste0('rep1_C',0:(params$chans - 2))

        topperc<-30 #difference in % between top and bottom
        # data_merged_2 <- dataMerge2()
        diffinter<- data_merged_2[(data_merged_2$predX1 -data_merged_2[,paste("predX",(params$chans-1),sep = "")]) > topperc & data_merged_2$predX1 <= 100, ]


        if(nrow(diffinter)>0){
          Diff_Top_bottom_pred<-shape_for_ggplot_pred(diffinter,log2(conc),pred.names)
          Diff_Top_bottom_perc<-shape_for_ggplot_perc(diffinter,log2(conc),final.Names)
          what<-c("(Top - Bottom) >")
           GeneID <-  factor(Diff_Top_bottom_pred$GeneID)
          Diff_Top_bottom<-ggplot()+
            geom_line(data = Diff_Top_bottom_pred, aes(x=x,y=value, colour=GeneID), size = 1) +
            geom_point(data = Diff_Top_bottom_perc, aes(x=x,y=value, colour=Diff_Top_bottom_perc$GeneID)) +
            labs(title=paste(what,topperc,sep=""))


        }else{
          Diff_Top_bottom<-ggplot()+
            labs(title=paste("No significant Top-Bottom >" ,topperc,"%","\n","has been found", sep=""))
        }

        print(Diff_Top_bottom)
    } else{
        conc<- params$concen
    pred.names <- params$sigPred
    final.Names <- params$finNam

    topperc<-30 #difference in % between top and bottom
    data_merged_2 <- params$data2
    diffinter<- data_merged_2[(data_merged_2$predX1 -data_merged_2[,paste("predX",params$chans,sep = "")]) > topperc & data_merged_2$predX1 <= 100, ]




    if(nrow(diffinter)>0){
      Diff_Top_bottom_pred<-shape_for_ggplot_pred(diffinter,log2(conc),pred.names)
      Diff_Top_bottom_perc<-shape_for_ggplot_perc(diffinter,log2(conc),final.Names)
      what<-c("(Top - Bottom) >")
      GeneID <-  factor(Diff_Top_bottom_pred$GeneID)
      Diff_Top_bottom<-ggplot()+
        geom_line(data = Diff_Top_bottom_pred, aes(x=x,y=value, colour=GeneID), size = 1) +
        geom_point(data = Diff_Top_bottom_perc, aes(x=x,y=value, colour=Diff_Top_bottom_perc$GeneID)) +
        labs(title=paste(what,topperc,sep=""))


    }else{
      Diff_Top_bottom<-ggplot()+
        labs(title=paste("No significant Top-Bottom >" ,topperc,"%","\n","has been found", sep=""))
    }
        print(Diff_Top_bottom)    
      }




    }

```{asis, echo=show_text} Description:

Plot of the proteins profiles whose difference between the top and bottom parameters of the sigmoidal model are greater than 30%.

```{asis, echo=show_text}
**Figure 9: The top proteins with significant Slope Value**

    if(params$sigmodin != 'sigmoid'){
      plot.new()
      legend('topleft', c("Linear fit applied, no sigmoidal plots available"),bty = 'n')
    }else{
      if(params$datype == 'intensity'){
         top<-15 #max prot to plot

        conc<- params$concen
        pred.names <- paste0('predX',1:(params$chans -1))
        final.Names <- paste0('rep1_C',0:(params$chans - 2))

        data_merged_2 <- params$data2

        #Here make the subselections for using the ggplot functions SLOPE
        slope<-na.omit(data_merged_2[data_merged_2$SlopePval<0.05 ,])
        slope_ordered<-na.omit(slope[order(slope$SlopePval, decreasing = FALSE),][1:top,])
        if(nrow(slope_ordered)>0){
          slope_pred<-shape_for_ggplot_pred(slope_ordered,log10(conc),pred.names)
          slope_perc<- shape_for_ggplot_perc(slope_ordered,log10(conc),final.Names)
          what<-c("Slope (p.val) ")
          GeneID <- factor(slope_pred$GeneID)
          Slope_pl<-ggplot()+
            geom_line(data = slope_pred, aes(x=x,y=value, colour=GeneID), size = 1) +
            geom_point(data = slope_perc, aes(x=x,y=value,colour=slope_perc$GeneID))+
            labs(title=paste(what,"Top",top,sep=""))

        }else{Slope_pl<-ggplot()+
          labs(title="No significant Sigmoidal Slope has been found")
        }
        print(Slope_pl)
      }else{
        top<-15 #max prot to plot

    #Here make the subselections for using the ggplot functions SLOPE
    slope<-na.omit(data_merged_2[data_merged_2$SlopePval<0.05 ,])
    slope_ordered<-na.omit(slope[order(slope$SlopePval, decreasing = FALSE),][1:top,])
    if(nrow(slope_ordered)>0){
      slope_pred<-shape_for_ggplot_pred(slope_ordered,log10(conc),pred.names)
      slope_perc<- shape_for_ggplot_perc(slope_ordered,log10(conc),final.Names)
      what<-c("Slope (p.val) ")
      GeneID <- factor(slope_pred$GeneID)
      Slope_pl<-ggplot()+
        geom_line(data = slope_pred, aes(x=x,y=value, colour=GeneID), size = 1) +
        geom_point(data = slope_perc, aes(x=x,y=value,colour=slope_perc$GeneID))+
        labs(title=paste(what,"Top",top,sep=""))

    }else{Slope_pl<-ggplot()+
      labs(title="No significant Sigmoidal Slope has been found")
    }
    print(Slope_pl)

      }


    }

```{asis, echo=show_text} Description:

The top 15 protein profiles with a significant slope parameter of the sigmoidal fit.

```{asis, echo=show_text}
**Figure 10: The top proteins with significant RB50 values**

 # if(params$sigmodin != 'sigmoid'){
 #      plot.new()
 #      legend('topleft', c("Linear fit applied, no sigmoidal plots available"),bty = 'n')
 #    }else{
 #      
 #      if(params$datype == 'intensity'){
 #        
 #        pred.names <- paste0('predX',1:(params$chans -1))
 #        final.Names <- paste0('rep1_C',0:(params$chans - 2))
 #        
 #        top<-15 #max prot to plot
 #        
 #        data_merged_2 <- params$data2
 #        
 #        # RB50<-na.omit(data_merged_2[data_merged_2$RB50Err < as.numeric(summary(data_merged_2$RB50Err)[5]) & data_merged_2$RB50Pval<0.05 & data_merged_2$predX1-data_merged_2$predX9 >0 & data_merged_2$predX1 <= 100,])
 #        RB50 <- data.frame(na.omit(data_merged_2[data_merged_2$RB50Err < as.numeric(summary(data_merged_2$RB50Err)[5]) & data_merged_2$RB50Pval < 0.05
 #                                                 & data_merged_2$predX1-data_merged_2[,paste0('predX',(params$chans - 1))] >0 & data_merged_2$predX1 <= 100,]))
 #        
 #        
 #        RB50_ordered<- na.omit(RB50[order(RB50$RB50Pval, decreasing = FALSE),][1:top,])
 #        
 #        if(nrow(RB50_ordered)>0){
 #          RB50_pred<-shape_for_ggplot_pred(RB50_ordered,log10(conc),pred.names)
 #          RB50_perc<-shape_for_ggplot_perc(RB50_ordered,log10(conc),final.Names)
 #          what<-c("RB50 (p.val) ")
 #          RB50_pl<-ggplot()+
 #            geom_line(data = RB50_pred, aes(x=x,y=value, colour=factor(RB50_pred$GeneID)), size = 1) +
 #            geom_point(data = RB50_perc, aes(x=x,y=value,colour=RB50_perc$GeneID))+
 #            labs(title=paste(what,"Top",top,sep=""))
 #          print(RB50_pl)
 #        }else{
 #          RB50_pl<-ggplot()+
 #            labs(title="No significant RB50 has been found")
 #        print(RB50_pl)
 #        }
 #      }else{
 #        
 #        top<-15 #max prot to plot
 #    
 # 
 #    
 #    # RB50<-na.omit(data_merged_2[data_merged_2$RB50Err < as.numeric(summary(data_merged_2$RB50Err)[5]) & data_merged_2$RB50Pval<0.05 & data_merged_2$predX1-data_merged_2$predX9 >0 & data_merged_2$predX1 <= 100,])
 #    RB50 <- data.frame(na.omit(data_merged_2[data_merged_2$RB50Err < as.numeric(summary(data_merged_2$RB50Err)[5]) & data_merged_2$RB50Pval < 0.05
 #                                             & data_merged_2$predX1-data_merged_2[,paste0('predX',(params$chans-1))] >0 & data_merged_2$predX1 <= 100,]))
 #    
 #    
 #    RB50_ordered<- na.omit(RB50[order(RB50$RB50Pval, decreasing = FALSE),][1:top,])
 #    
 #    if(nrow(RB50_ordered)>0){
 #      RB50_pred<-shape_for_ggplot_pred(RB50_ordered,log10(conc),pred.names)
 #      RB50_perc<-shape_for_ggplot_perc(RB50_ordered,log10(conc),final.Names)
 #      what<-c("RB50 (p.val) ")
 #      RB50_pl<-ggplot()+
 #        geom_line(data = RB50_pred, aes(x=x,y=value, colour=factor(RB50_pred$GeneID)), size = 1) +
 #        geom_point(data = RB50_perc, aes(x=x,y=value,colour=RB50_perc$GeneID))+
 #        labs(title=paste(what,"Top",top,sep=""))
 #    }else{
 #      RB50_pl<-ggplot()+
 #        labs(title="No significant RB50 has been found")
 #    }
 #    print(RB50_pl)
 #      }
 #    
 #    
 #    }

    if(params$sigmodin != 'sigmoid'){
      plot.new()
      legend('topleft', c("Linear fit applied, no sigmoidal plots available"),bty = 'n')
    }else{
      if(params$datype =='intensity'){
        conc<- params$concen

        pred.names <- paste0('predX',1:(params$chans -1))
        final.Names <- paste0('rep1_C',0:(params$chans - 2))

        top<-15 #max prot to plot

        data_merged_2 <- params$data2

        # RB50<-na.omit(data_merged_2[data_merged_2$RB50Err < as.numeric(summary(data_merged_2$RB50Err)[5]) & data_merged_2$RB50Pval<0.05 & data_merged_2$predX1-data_merged_2$predX9 >0 & data_merged_2$predX1 <= 100,])
        RB50 <- data.frame(na.omit(data_merged_2[data_merged_2$RB50Err < as.numeric(summary(data_merged_2$RB50Err)[5]) & data_merged_2$RB50Pval < 0.05
                                                 & data_merged_2$predX1-data_merged_2[,paste0('predX',(params$chans - 1))] >0 & data_merged_2$predX1 <= 100,]))


        RB50_ordered<- na.omit(RB50[order(RB50$RB50Pval, decreasing = FALSE),][1:top,])

        if(nrow(RB50_ordered)>0){
          RB50_pred<-shape_for_ggplot_pred(RB50_ordered,log10(conc),pred.names)
          RB50_perc<-shape_for_ggplot_perc(RB50_ordered,log10(conc),final.Names)
          what<-c("RB50 (p.val) ")
          GeneID <- factor(RB50_pred$GeneID)
          RB50_pl<-ggplot()+
            geom_line(data = RB50_pred, aes(x=x,y=value, colour=GeneID), size = 1) +
            geom_point(data = RB50_perc, aes(x=x,y=value,colour=RB50_perc$GeneID))+
            labs(title=paste(what,"Top",top,sep=""))
          print(RB50_pl)
        }else{
          RB50_pl<-ggplot()+
            labs(title="No significant RB50 has been found")
        print(RB50_pl)
        }

      }else {
        conc<- params$concen
        pred.names <- params$sigPred
        final.Names <- params$finNam
        top<-15 #max prot to plot

        data_merged_2 <- params$data2

        # RB50<-na.omit(data_merged_2[data_merged_2$RB50Err < as.numeric(summary(data_merged_2$RB50Err)[5]) & data_merged_2$RB50Pval<0.05 & data_merged_2$predX1-data_merged_2$predX9 >0 & data_merged_2$predX1 <= 100,])
        RB50 <- data.frame(na.omit(data_merged_2[data_merged_2$RB50Err < as.numeric(summary(data_merged_2$RB50Err)[5]) & data_merged_2$RB50Pval < 0.05
                                                 & data_merged_2$predX1-data_merged_2$predX9 >0 & data_merged_2$predX1 <= 100,]))


        RB50_ordered<- na.omit(RB50[order(RB50$RB50Pval, decreasing = FALSE),][1:top,])

        if(nrow(RB50_ordered)>0){
          RB50_pred<-shape_for_ggplot_pred(RB50_ordered,log10(conc),pred.names)
          RB50_perc<-shape_for_ggplot_perc(RB50_ordered,log10(conc),final.Names)
          what<-c("RB50 (p.val) ")
          GeneID <- factor(RB50_pred$GeneID)
          RB50_pl<-ggplot()+
            geom_line(data = RB50_pred, aes(x=x,y=value, colour=GeneID), size = 1) +
            geom_point(data = RB50_perc, aes(x=x,y=value,colour=RB50_perc$GeneID))+
            labs(title=paste(what,"Top",top,sep=""))
        }else{
          RB50_pl<-ggplot()+
            labs(title="No significant RB50 has been found")
        }
        print(RB50_pl)
      }
    }

```{asis, echo=show_text} Description:

The top 15 protein profiles with significant RB50 parameter of the sigmoidal fit.

##Top Proteins 

Below are the top ten significant proteins for each of the model coeffcients.

```r
a<- params$data2

    if(params$sigmodin == 'sigmoid'){
      if(params$kin == TRUE){
        a<- (data.frame(GeneID = a$GeneID, Slope = a$SlopeCoef, SlopePval = a$SlopePval , RB50 = a$RB50Coef, RB50pval = a$RB50Pval,Topminusbottom = a$Top_minus_min ,correctedRB50 = a$correctedRB50, depletionConst = a$depletionConstant, Kinase = a$Kinase))
      }else{
  a<- data.frame(GeneID = a$GeneID, RB50 = a$RB50Coef, RB50pval = a$RB50Pval,Topminusbottom = a$Top_minus_min , Slope = a$SlopeCoef,
                 SlopePval = a$SlopePval , Kinase = a$Kinase)
      }

    } else{
      a<- data.frame(GeneID = a$GeneID.x, Intercept = signif(a$P.Value), Slope = signif(a$P.Value.x), Quadratic = signif(a$P.Value.y), Kinase = a$Kinase)
    }

if(params$sigmodin == 'sigmoid'){
  inttab <- a[order(a$Topminusbottom,decreasing = TRUE),][1:10,]
   quadtab <- a[order(a$RB50pval,decreasing = FALSE),][1:10,]
    slopetab <- a[order(a$SlopePval,decreasing = FALSE),][1:10,]
}else{
  inttab <-  a[order(a$Intercept,decreasing = FALSE),][1:10,]

   slopetab <- a[order(a$Slope,decreasing = FALSE),][1:10,]

   quadtab <- a[order(a$Quadratic,decreasing = FALSE),][1:10,]
}

```{asis, echo=!show_text} Top 10 proteins sorted by smallest Intercept coefficient p-values

```{asis, echo=show_text}
**Top 10 proteins sorted by the largest top - bottom  coefficient differences**

r kable(inttab)

```{asis, echo=TRUE} Top 10 proteins sorted by smallest Slope coefficient p-values

`r kable(slopetab)`


```{asis, echo=!show_text}
**Top 10 proteins sorted by smallest Quadratic coefficient p-values**

```{asis, echo=show_text} Top 10 proteins sorted by smallest RB50 coefficient p-values

```

r kable(quadtab)

brunocontrino/Doscheda documentation built on Sept. 14, 2020, 4:45 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

brunocontrino/Doscheda
A DownStream Chemo-Proteomics Analysis Pipeline

In brunocontrino/Doscheda: A DownStream Chemo-Proteomics Analysis Pipeline

Linear Model Plots

Sigmoidal Fit Plots

R Package Documentation

Browse R Packages

We want your feedback!

brunocontrino/Doscheda A DownStream Chemo-Proteomics Analysis Pipeline

In brunocontrino/Doscheda: A DownStream Chemo-Proteomics Analysis Pipeline

Linear Model Plots

Sigmoidal Fit Plots

R Package Documentation

Browse R Packages

We want your feedback!

brunocontrino/Doscheda
A DownStream Chemo-Proteomics Analysis Pipeline