R/missingValuesPlots.R

Defines functions hc_mvTypePlot2 wrapper.hc_mvTypePlot2 mvImage wrapper.mvImage mvHisto_HC wrapper.mvHisto_HC mvPerLinesHistoPerCondition_HC wrapper.mvPerLinesHistoPerCondition_HC mvPerLinesHisto_HC wrapper.mvPerLinesHisto_HC

Documented in hc_mvTypePlot2 mvHisto_HC mvImage mvPerLinesHisto_HC mvPerLinesHistoPerCondition_HC wrapper.hc_mvTypePlot2 wrapper.mvHisto_HC wrapper.mvImage wrapper.mvPerLinesHisto_HC wrapper.mvPerLinesHistoPerCondition_HC

#' #' This method is a wrapper to plots from a \code{MSnSet} object a 
#' #' histogram which represents the distribution of the 
#' #' number of missing values (NA) per lines (ie proteins).
#' #' 
#' #' @title Histogram of missing values per lines from an object 
#' #' \code{MSnSet}
#' #' 
#' #' @param obj An object of class \code{MSnSet}.
#' #' 
#' #' @param indLegend The indice of the column name's in \code{pData()} tab .
#' #' 
#' #' @param showValues A logical that indicates wether numeric values should be
#' #' drawn above the bars.
#' #' 
#' #' @return A histogram
#' #' 
#' #' @author Alexia Dorffer
#' #' 
#' #' @examples
#' #' utils::data(Exp1_R25_pept, package='DAPARdata')
#' #' wrapper.mvPerLinesHisto(Exp1_R25_pept)
#' #' 
#' #' @export
#' #' 
#' #' @importFrom Biobase pData exprs fData
#' #' 
#' wrapper.mvPerLinesHisto <- function(obj, indLegend="auto", showValues=FALSE){
#'   qData <- Biobase::exprs(obj)
#'   samplesData <- Biobase::pData(obj)
#'   mvPerLinesHisto(qData, samplesData, indLegend, showValues)
#' }


#' This method is a wrapper to plots from a \code{MSnSet} object a 
#' histogram which represents the distribution of the 
#' number of missing values (NA) per lines (ie proteins).
#' 
#' @title Histogram of missing values per lines from an object using highcharter
#' \code{MSnSet}
#' 
#' @param obj An object of class \code{MSnSet}.
#' 
#' @param indLegend The indice of the column name's in \code{pData()} tab .
#' 
#' @param showValues A logical that indicates wether numeric values should be
#' drawn above the bars.
#' 
#' @return A histogram
#' 
#' @author Alexia Dorffer
#' 
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' wrapper.mvPerLinesHisto_HC(Exp1_R25_pept)
#' 
#' @export
#' 
#' @importFrom Biobase pData exprs fData
#' 
wrapper.mvPerLinesHisto_HC <- function(obj, indLegend="auto", showValues=FALSE){
  if (is.null(obj)){
    warning("The dataset in NULL. Cannot continue.")
    return(NULL)
  }
  
  qData <- Biobase::exprs(obj)
  samplesData <- Biobase::pData(obj)
  hc <- mvPerLinesHisto_HC(qData, samplesData, indLegend, showValues)
  return(hc)
}



#' This method plots a bar plot which represents the distribution of the 
#' number of missing values (NA) per lines (ie proteins).
#' 
#' @title Bar plot of missing values per lines using highcharter
#' @param qData A dataframe that contains the data to plot.
#' @param samplesData A dataframe which contains informations about 
#' the replicates.
#' @param indLegend The indice of the column name's in \code{pData()} tab 
#' @param showValues A logical that indicates wether numeric values should be
#' drawn above the bars.
#' @return A bar plot
#' @author Florence Combes, Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' qData <- Biobase::exprs(Exp1_R25_pept)
#' samplesData <- Biobase::pData(Exp1_R25_pept)
#' mvPerLinesHisto_HC(qData, samplesData)
#' 
#' @export
#'
mvPerLinesHisto_HC <- function(qData, samplesData, indLegend="auto", showValues=FALSE){
  
  if (identical(indLegend,"auto")) { indLegend <- c(2:length(colnames(samplesData)))}
  
  for (j in 1:length(colnames(qData))){
    noms <- NULL
    for (i in 1:length(indLegend)){
      noms <- paste(noms, samplesData[j,indLegend[i]], sep=" ")
    }
    colnames(qData)[j] <- noms
  }
  
  coeffMax <- .1
  
  NbNAPerCol <- colSums(is.na(qData))
  NbNAPerRow <- rowSums(is.na(qData))
  #par(mar = c(10,3, 3, 3))
  
  nb.col <- dim(qData)[2] 
  nb.na <- NbNAPerRow
  temp <- table(NbNAPerRow)
  nb.na2barplot <- c(temp, rep(0,1+ncol(qData)-length(temp)))
  
  if (sum(NbNAPerRow) == 0){
    nb.na2barplot <- rep(0,1+ncol(qData))
  }
  
  df <- data.frame(y=nb.na2barplot[-1])
  myColors = rep("lightgrey",nrow(df))
  myColors[nrow(df)] <- "red"
  
  #df1 <- df2 <- df
  #df2[1:(nrow(df)-1),] <- 0
  #df1 [nrow(df),] <- 0
  
  
  #, series = list( pointWidth = 50)
  
  h1 <-  highchart() %>% 
    hc_title(text = "#[lines] with X NA values") %>% 
    hc_add_series(data = df, type="column", colorByPoint = TRUE) %>%
    hc_colors(myColors) %>%
    hc_plotOptions( column = list(stacking = "normal"),
                    animation=list(duration = 100)) %>%
    hc_legend(enabled = FALSE) %>%
    hc_xAxis(categories = row.names(df), title = list(text = "#[NA values] per line")) %>%
    my_hc_ExportMenu(filename = "missingValuesPlot1") %>%
    hc_tooltip(enabled = TRUE,
               headerFormat= '',
               pointFormat = "{point.y} ")
  
  return(h1)
  
}



#' This method is a wrapper to plots (using highcharts) from a \code{MSnSet} object a 
#' bar plot which represents the distribution of the 
#' number of missing values (NA) per lines (ie proteins) and per conditions.
#' 
#' @title Bar plot of missing values per lines and per conditions from an 
#' object \code{MSnSet}
#' 
#' @param obj An object of class \code{MSnSet}.
#' 
#' @param ... xxx
#' 
#' @return A bar plot
#' 
#' @author Samuel Wieczorek
#' 
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' wrapper.mvPerLinesHistoPerCondition_HC(Exp1_R25_pept)
#' 
#' @export
#'
#' @importFrom Biobase pData exprs fData
#' 
wrapper.mvPerLinesHistoPerCondition_HC <- function(obj, ...){
  if (is.null(obj)){
    warning("The dataset in NULL. Cannot continue.")
    return(NULL)
  }
  qData <- Biobase::exprs(obj)
  samplesData <- Biobase::pData(obj)
  mvPerLinesHistoPerCondition_HC(qData, samplesData, ...)
}




#' This method plots a bar plot which represents the distribution of the 
#' number of missing values (NA) per lines (ie proteins) and per conditions.
#' 
#' @title Bar plot of missing values per lines and per condition
#' 
#' @param qData A dataframe that contains quantitative data.
#' 
#' @param samplesData A dataframe where lines correspond to samples and 
#' columns to the meta-data for those samples.
#' 
#' @param indLegend The indice of the column name's in \code{pData()} tab 
#' 
#' @param showValues A logical that indicates wether numeric values should be
#' drawn above the bars.
#' 
#' @param palette xxx
#' 
#' @return A bar plot
#' 
#' @author Samuel Wieczorek
#' 
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' qData <- Biobase::exprs(Exp1_R25_pept)
#' samplesData <- Biobase::pData(Exp1_R25_pept)
#' mvPerLinesHistoPerCondition_HC(qData, samplesData, palette=(c('#AAAAAA', '#AAAAAA')))
#' 
#' @export
#'
mvPerLinesHistoPerCondition_HC <- function(qData, 
                                           samplesData, 
                                           indLegend="auto", 
                                           showValues=FALSE, 
                                           palette=NULL){
  
  conds <- samplesData[,"Condition"]
  
  palette <- BuildPalette(conds, palette)
  
  if (identical(indLegend,"auto")) { indLegend <- c(2:length(colnames(samplesData)))}
  
  nbConditions <- length(unique(samplesData[,"Condition"]))
  
  ncolMatrix <- max(unlist(lapply(unique(samplesData[,"Condition"]), function(x){length(which(samplesData[,"Condition"]==x))})))
  m <- matrix(rep(0, nbConditions*(1+ncolMatrix)), 
              ncol = nbConditions, 
              dimnames=list(seq(0:(ncolMatrix)),unique(samplesData[,"Condition"])))
  
  for (i in unique(samplesData[,"Condition"]))
  {
    nSample <- length(which(samplesData[,"Condition"] == i))
    t <- NULL
    if (nSample == 1) {
      t <- table(as.integer(is.na(qData[,which(samplesData[,"Condition"] == i)])))
    } else {t <- table(rowSums(is.na(qData[,which(samplesData[,"Condition"] == i)])))}
    
    m[as.integer(names(t))+1,i] <- t
  }
  m <- as.data.frame(m)
  
  rownames(m) <- 0:(nrow(m)-1)
  
  h1 <-  highchart() %>% 
    hc_title(text = "#[lines] with X NA values (condition-wise)") %>% 
    my_hc_chart(chartType = "column") %>%
    hc_plotOptions( column = list(stacking = ""),
                    dataLabels = list(enabled = FALSE),
                    animation=list(duration = 100)) %>%
    hc_colors(unique(palette)) %>%
    hc_legend(enabled = FALSE) %>%
    hc_xAxis(categories = row.names(m), title = list(text = "#[NA values] per line (condition-wise)")) %>%
    my_hc_ExportMenu(filename = "missingValuesPlot_2") %>%
    hc_tooltip(headerFormat= '',
               pointFormat = "{point.y} ")
  
  for (i in 1:nbConditions){
    h1 <- h1 %>% hc_add_series(data=m[,unique(samplesData[,"Condition"])[i]]) }
  
  
  return(h1)
  
  
  
}



#' This method plots from a \code{MSnSet} object a histogram of 
#' missing values.
#' 
#' @title Histogram of missing values from a \code{MSnSet} object
#' @param obj An object of class \code{MSnSet}.
#' @param indLegend The indices of the column name's in \code{pData()} tab.
#' @param showValues A logical that indicates wether numeric values should be
#' drawn above the bars.
#' @param ... xxx
#' @return A histogram
#' @author Alexia Dorffer
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' wrapper.mvHisto_HC(Exp1_R25_pept, showValues=TRUE)
#' 
#' @export
#'
#' @importFrom Biobase pData exprs fData
#' 
wrapper.mvHisto_HC <- function(obj, indLegend="auto", showValues=FALSE, ...){
  if (is.null(obj)){
    warning("The dataset in NULL. Cannot continue.")
    return(NULL)
  }
  
  qData <- Biobase::exprs(obj)
  samplesData <- Biobase::pData(obj)
  conds <- samplesData[,"Condition"]
  mvHisto_HC(qData, samplesData, conds, indLegend, showValues, ...)
}




#' This method plots a histogram of missing values. Same as the function \code{mvHisto}
#' but uses the package \code{highcharter}
#' 
#' @title Histogram of missing values
#' @param qData A dataframe that contains quantitative data.
#' @param samplesData A dataframe where lines correspond to samples and 
#' columns to the meta-data for those samples.
#' @param conds A vector of the conditions (one condition per sample).
#' @param indLegend The indices of the column name's in \code{pData()} tab
#' @param showValues A logical that indicates wether numeric values should be
#' drawn above the bars.
#' @param base_palette xxx
#' @return A histogram
#' @author Florence Combes, Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' qData <- Biobase::exprs(Exp1_R25_pept)
#' samplesData <- Biobase::pData(Exp1_R25_pept)
#' conds <- Biobase::pData(Exp1_R25_pept)[,"Condition"]
#' mvHisto_HC(qData, samplesData, conds, indLegend="auto", showValues=TRUE)
#' 
#' @export
#'
mvHisto_HC <- function(qData, 
                       samplesData, 
                       conds, 
                       indLegend="auto", 
                       showValues=FALSE, 
                       base_palette = NULL){
  
  
  palette <- BuildPalette(conds, base_palette)
  print(palette)
  
  if (identical(indLegend,"auto")) { 
    indLegend <- c(2:length(colnames(samplesData)))
  }
  
  
  NbNAPerCol <- colSums(is.na(qData))
  NbNAPerRow <- rowSums(is.na(qData))
  
  df <- data.frame(NbNAPerCol)
  names(df) <- 'y'
  
  
  h1 <-  highchart() %>%
    my_hc_chart(chartType = "column") %>%
    hc_title(text = "#NA by replicate") %>%
    hc_add_series(df,type="column", colorByPoint = TRUE) %>%
    hc_colors(palette) %>%
    hc_plotOptions( column = list(stacking = "normal"),
                    animation=list(duration = 100)) %>%
    hc_legend(enabled = FALSE) %>%
    hc_xAxis(categories = conds, title = list(text = "Replicates")) %>%
    my_hc_ExportMenu(filename = "missingValuesPlot_3") %>%
    hc_tooltip(headerFormat= '',
               pointFormat = "{point.y}")
  
  
  return(h1)

}



#' Plots a heatmap of the quantitative data. Each column represent one of
#' the conditions in the object of class \code{MSnSet} and 
#' the color is proportional to the mean of intensity for each line of
#' the dataset.
#' The lines have been sorted in order to vizualize easily the different
#' number of missing values. A white square is plotted for missing values.
#' 
#' @title Heatmap of missing values from a \code{MSnSet} object
#' @param obj An object of class \code{MSnSet}.
#' @return A heatmap
#' @author Alexia Dorffer
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' obj <- Exp1_R25_pept
#' keepThat <- mvFilterGetIndices(obj, condition='WholeMatrix', threshold=1)
#' obj <- mvFilterFromIndices(obj, keepThat)
#' wrapper.mvImage(obj)
#' 
#' @importFrom Biobase exprs pData fData
#' 
#' 
#' @export
#'
#' @importFrom Biobase pData exprs fData
#' 
wrapper.mvImage <- function(obj){
  
  qData <- Biobase::exprs(obj)
  conds <- Biobase::pData(obj)[,"Condition"]
  originValues <- Biobase::fData(obj)[,obj@experimentData@other$OriginOfValues]
  indices <- which(apply(is.OfType(originValues, "MEC"),1,sum) >0)
  
  if (length(indices)==0){
    warning("The dataset contains no Missing value on Entire Condition. So this plot is not available.")
    return(NULL)
  }else if (length(indices)==1){
    warning("The dataset contains only one Missing value on Entire Condition. Currently, Prostar does not handle such dataset to build the plot. 
          As it has no side-effects on the results, you can continue your imputation.")
    return(NULL)
  }
  
  mvImage(qData[indices,], conds)
  
}



#' Plots a heatmap of the quantitative data. Each column represent one of
#' the conditions in the object of class \code{MSnSet} and 
#' the color is proportional to the mean of intensity for each line of
#' the dataset.
#' The lines have been sorted in order to vizualize easily the different
#' number of missing values. A white square is plotted for missing values.
#' 
#' @title Heatmap of missing values
#' @param qData A dataframe that contains quantitative data.
#' @param conds A vector of the conditions (one condition per sample).
#' @return A heatmap
#' @author Samuel Wieczorek, Thomas Burger
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' qData <- Biobase::exprs(Exp1_R25_pept)
#' conds <- Biobase::pData(Exp1_R25_pept)[,"Condition"]
#' mvImage(qData, conds)
#' 
#' @export
#' 
#' @importFrom stats setNames
#'
mvImage <- function(qData, conds){
  
  ### build indices of conditions
  indCond <- list()
  ConditionNames <- unique(conds)
  for (i in ConditionNames) {
    indCond <- append(indCond, list(which(i == conds)))
  }
  indCond <- setNames(indCond, as.list(c("cond1", "cond2")))
  
  nNA1 = apply(as.matrix(qData[,indCond$cond1]), 1, function(x) sum(is.na(x)))
  nNA2 = apply(as.matrix(qData[,indCond$cond2]), 1, function(x) sum(is.na(x)))
  o <- order(((nNA1 +1)^2) / (nNA2 +1))
  exprso <- qData[o,]
  
  for (i in 1:nrow(exprso)){
    k <- order(exprso[i,indCond$cond1])
    exprso[i,rev(indCond$cond1)] <- exprso[i, k]
    .temp <- mean(exprso[i,rev(indCond$cond1)], na.rm = TRUE)
    exprso[i,which(!is.na(exprso[i,indCond$cond1]))] <- .temp
    
    k <- order(exprso[i,indCond$cond2])
    exprso[i,indCond$cond2] <- exprso[i, k+length(indCond$cond1)]
    .temp <- mean(exprso[i,indCond$cond2], na.rm = TRUE)
    exprso[i,length(indCond$cond1) + 
             which(!is.na(exprso[i,indCond$cond2]))] <- .temp
  }
  
  
  heatmap.DAPAR(exprso,
                col = colorRampPalette(c("yellow", "red"))(100),
                key=TRUE,
                srtCol= 0,
                labCol=conds,
                ylab = "Peptides / proteins",
                main = "MEC heatmap"
  )
  
  #heatmap_HC(exprso,col = colfunc(100),labCol=conds)
  
  
}





#' This method is a wrapper for the function \code{\link{hc_mvTypePlot2}} adapted to objects
#' of class \code{MSnSet}).

#' @title Distribution of observed values with respect to intensity values 
#' from a \code{MSnSet} object
#' @param obj An object of class \code{MSnSet}.
#' @param ... See \code{\link{hc_mvTypePlot2}} 
#' @return A scatter plot
#' @author Florence Combes, Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' wrapper.hc_mvTypePlot2(Exp1_R25_pept)
#' 
#' @export
#'
#' @importFrom Biobase pData exprs fData
#' 
wrapper.hc_mvTypePlot2 <- function(obj,...){
  qData <- Biobase::exprs(obj)
  conds <- Biobase::pData(obj)[,"Condition"]
  hc_mvTypePlot2(qData, conds = conds,...)
}




#' This method shows density plots which represents the repartition of
#' Partial Observed Values for each replicate in the dataset.
#' The colors correspond to the different conditions (slot Condition in in the
#' dataset of class \code{MSnSet}).
#' The x-axis represent the mean of intensity for one condition and one
#' entity in the dataset (i. e. a protein) 
#' whereas the y-axis count the number of observed values for this entity
#' and the considered condition.
#' 
#' @title Distribution of Observed values with respect to intensity values
#' @param qData A dataframe that contains quantitative data.
#' @param conds A vector of the conditions (one condition per sample).
#' @param palette The different colors for conditions
#' @param typeofMV xxx
#' @param title The title of the plot
#' @return Density plots
#' @author Samuel Wieczorek
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' qData <- Biobase::exprs(Exp1_R25_pept)
#' conds <- Biobase::pData(Exp1_R25_pept)[,"Condition"]
#' hc_mvTypePlot2(qData, conds, title="POV distribution")
#' 
#' @export
#'
hc_mvTypePlot2 <- function(qData, conds, palette = NULL, typeofMV=NULL, title=NULL){
  if (is.null(conds)){return(NULL)}
  
  if (is.null(palette)){
    palette <- grDevices::colorRampPalette(brewer.pal(8, "Dark2"))(length(unique(conds)))
  }else{
    if (length(palette) != length(unique(conds))){
      warning("The color palette has not the same dimension as the number of conditions. Set to default palette.")
      palette <- grDevices::colorRampPalette(brewer.pal(8, "Dark2"))(length(unique(conds)))
      #return(NULL)
    }
  }
  
  conditions <- conds
  mTemp <- nbNA <- nbValues <- matrix(rep(0,nrow(qData)*length(unique(conditions))), nrow=nrow(qData),
                                      dimnames=list(NULL,unique(conditions)))
  dataCond <- data.frame()
  ymax <- 0
  series <- list()
  myColors <- NULL
  j <- 1 
  
  for (iCond in unique(conditions)){
    if (length(which(conditions==iCond)) == 1){
      
      mTemp[,iCond] <- qData[,which(conditions==iCond)]
      nbNA[,iCond] <- as.integer(is.OfType(qData[,which(conditions==iCond)]))
      nbValues[,iCond] <- length(which(conditions==iCond)) - nbNA[,iCond]
    } else {
      mTemp[,iCond] <- apply(qData[,which(conditions==iCond)], 1, mean, na.rm=TRUE)
      nbNA[,iCond] <- apply(qData[,which(conditions==iCond)],1,function(x) length(which(is.na(x) == TRUE)))
      nbValues[,iCond] <- length(which(conditions==iCond)) - nbNA[,iCond]
    }
    
    
    for (i in 1:length(which(conditions==iCond))){
      data <- mTemp[which(nbValues[, iCond] == i), iCond]
      tmp <- NULL    
      if (length(data) >= 2)
      {
        tmp <- density(mTemp[which(nbValues[,iCond]==i),iCond])
        tmp$y <- tmp$y + i
        if (max(tmp$y) > ymax) { ymax <- max(tmp$y)}
      }
      series[[j]] <- tmp
      myColors <- c(myColors, palette[which(unique(conditions)==iCond)])
      j <- j+1
    }
    
  }
  
  
  hc <-  highchart() %>%
    hc_title(text = title) %>%
    my_hc_chart(chartType = "spline", zoomType="xy") %>%
    
    hc_legend(align = "left", verticalAlign = "top",
              layout = "vertical") %>%
    hc_xAxis(title = list(text = "Mean of intensities")) %>%
    hc_yAxis(title = list(text = "Number of quantity values per condition"),
             #categories = c(-1:3)
             #min = 1, 
             # max = ymax,
             tickInterval= 0.5
    ) %>%
    # hc_colors(palette) %>%
    hc_tooltip(headerFormat= '',
               pointFormat = "<b> {series.name} </b>: {point.y} ",
               valueDecimals = 2) %>%
    my_hc_ExportMenu(filename = "POV_distribution") %>%
    hc_plotOptions(
      series=list(
        showInLegend = TRUE,
        animation=list(
          duration = 100
        ),
        connectNulls= TRUE,
        marker=list(
          enabled = FALSE)
        
      )
    )
  
  for (i in 1:length(series)){
    hc <- hc_add_series(hc,
                        data = list_parse(data.frame(cbind(x = series[[i]]$x, 
                                                           y = series[[i]]$y))), 
                        showInLegend=FALSE,
                        color = myColors[i],
                        name=conds[i])
  }
  
  # add three empty series for the legend entries. Change color and marker symbol
  for (c in 1:length(unique(conds))){
    hc <-  hc_add_series(hc,data = data.frame(),
                         name = unique(conds)[c],
                         color = palette[c],
                         marker = list(symbol = "circle"),
                         type = "line")
  }
  
  hc
  return(hc)
}

Try the DAPAR package in your browser

Any scripts or data that you put into this service are public.

DAPAR documentation built on April 11, 2021, 6 p.m.