R/massqc_report.R

Defines functions massqc_report

Documented in massqc_report

#' @title Report data quality
#' @description Report data quality
#' @author Xiaotao Shen
#' \email{shenxt1990@@163.com}
#' @param object tidymass-class object.
#' @param path path
#' @param type type html, pdf or all.
#' @return reporting result
#' @export
#'@examples
#'\dontrun{
#' library(massdataset)
#' data("expression_data")
#' data("sample_info")
#' data("sample_info_note")
#' data("variable_info")
#' data("variable_info_note")
#' object =
#'   create_mass_dataset(
#'     expression_data = expression_data,
#'     sample_info = sample_info,
#'     variable_info = variable_info,
#'     sample_info_note = sample_info_note,
#'     variable_info_note = variable_info_note
#'   )
#' object
#' massqc_report(object, path = "demo_data")
#'}

massqc_report <-
  function(object,
           path = ".",
           type = c("html", "pdf", "all")) {
    dir.create(path, showWarnings = FALSE, recursive = TRUE)
    type = match.arg(type)
    if (!is(object = object, class2 = "mass_dataset")) {
      stop("obejct should be mass_dataset class.\n")
    }
    options(warn = -1)
    ###path
    if (length(grep("Report", dir(path))) > 0) {
      output_path = file.path(path, paste('Report', length(grep(
        "Report", dir(path)
      )) + 1, sep = "_"))
    } else{
      output_path = file.path(path, "Report")
    }
    
    # dir.create(output_path, showWarnings = FALSE)
    
    ####get the template
    message("Get report template.")
    rmarkdown::draft(
      file = output_path,
      template = "massqc",
      package = "massqc",
      create_dir = TRUE,
      edit = FALSE
    )
    
    ###parameters
    message("Parameters.")
    parameters <-
      massdataset::extract_process_info(object) %>%
      lapply(function(x) {
        if (length(x) == 1) {
          massdataset::parse_tidymass_parameter(object = x)
        } else{
          x %>%
            lapply(function(y) {
              massdataset::parse_tidymass_parameter(object = y)
            }) %>%
            dplyr::bind_rows()
        }
      }) %>%
      dplyr::bind_rows() %>%
      dplyr::arrange(time)
    
    save(parameters, file = file.path(output_path, "parameters.rda"))
    
    ####sample information
    message("Sample infromation.")
    save(object, file = file.path(output_path, "object.rda"))
    
    if (nrow(object) >= 1000) {
      hex = TRUE
    } else{
      hex = FALSE
    }
    
    plot <-
      massdataset::show_mz_rt_plot(object = log(object), hex = hex)
    
    ggplot2::ggsave(
      filename = file.path(output_path, "mz_rt_plot.png"),
      plot = plot,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    ggplot2::ggsave(
      filename = file.path(output_path, "mz_rt_plot.pdf"),
      plot = plot,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    ####missing value
    ####in dataset
    message("Missing values in dataset.")
    plot <-
      massdataset::show_missing_values(
        object = object,
        show_row_names =
          ifelse(nrow(object) < 20, TRUE, FALSE),
        show_column_names = ifelse(ncol(object) < 20, TRUE, FALSE),
        percentage = TRUE,
        return_as_ggplot = TRUE
      )
    
    ggplot2::ggsave(
      filename = file.path(output_path, "mv_plot.png"),
      plot = plot,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    ggplot2::ggsave(
      filename = file.path(output_path, "mv_plot.pdf"),
      plot = plot,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    ####in variables
    message("Missing values in all variables.")
    if (nrow(object) > 10000) {
      plot <-
        massdataset::show_variable_missing_values(
          object = object[seq_len(10000), ],
          order_by = "rt",
          show_x_text = ifelse(nrow(object) < 20, TRUE, FALSE),
          show_x_ticks = ifelse(nrow(object) < 20, TRUE, FALSE),
          percentage = TRUE
        ) +
        labs(title = "Only first 10000 variables.")
    } else{
      plot <-
        massdataset::show_variable_missing_values(
          object = object,
          order_by = "rt",
          show_x_text = ifelse(nrow(object) < 20, TRUE, FALSE),
          show_x_ticks = ifelse(nrow(object) < 20, TRUE, FALSE),
          percentage = TRUE
        )
    }
    
    ggplot2::ggsave(
      filename = file.path(output_path, "variable_mv_plot.png"),
      plot = plot,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    ggplot2::ggsave(
      filename = file.path(output_path, "variable_mv_plot.pdf"),
      plot = plot,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    ####in samples
    message("Missing values in all samples.")
    plot <-
      massdataset::show_sample_missing_values(
        object = object,
        color_by = "class",
        order_by = "injection.order",
        percentage = TRUE
      )
    
    ggplot2::ggsave(
      filename = file.path(output_path, "sample_mv_plot.png"),
      plot = plot,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    ggplot2::ggsave(
      filename = file.path(output_path, "sample_mv_plot.pdf"),
      plot = plot,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    ###RSD
    message("RSD for variables.")
    if (sum(object@sample_info$class == "QC") >= 3) {
      plot1 <-
        massqc_rsd_plot(
          object = object %>%
            massdataset::activate_mass_dataset(what = "sample_info") %>%
            dplyr::filter(class == "QC"),
          order_by = "rt",
          show_x_text = ifelse(nrow(object) < 20, TRUE, FALSE),
          show_x_ticks = ifelse(nrow(object) < 20, TRUE, FALSE)
        ) +
        labs(title = "All QC samples")
      
      
      plot2 =
        massqc_cumulative_rsd_plot(
          object =  object %>%
            massdataset::activate_mass_dataset(what = "sample_info") %>%
            dplyr::filter(class == "QC"),
          rsd_cutoff = 30
        ) +
        labs(title = "All QC samples")
      
    } else{
      plot1 <-
        massqc_rsd_plot(
          object = object %>%
            massdataset::activate_mass_dataset(what = "sample_info") %>%
            dplyr::filter(class == "Subject"),
          order_by = "rt",
          show_x_text = ifelse(nrow(object) < 20, TRUE, FALSE),
          show_x_ticks = ifelse(nrow(object) < 20, TRUE, FALSE)
        ) +
        labs(title = "All Subject samples")
      
      plot2 <-
        massqc_cumulative_rsd_plot(
          object =  object %>%
            massdataset::activate_mass_dataset(what = "sample_info") %>%
            dplyr::filter(class == "Subject"),
          rsd_cutoff = 30
        ) +
        labs(title = "All Subject samples")
      
    }
    
    ggplot2::ggsave(
      filename = file.path(output_path, "variable_rsd.pdf"),
      plot = plot1,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    ggplot2::ggsave(
      filename = file.path(output_path, "variable_cumulative_rsd.pdf"),
      plot = plot2,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    plot <-
      plot1 + plot2 + patchwork::plot_layout(nrow = 1)
    
    ggplot2::ggsave(
      filename = file.path(output_path, "rsd.png"),
      plot = plot,
      width = 12,
      height = 6,
      dpi = 600
    )
    
    ####sample box plot
    message("Intensity for all the variables in all samples.")
    if (sum(object@sample_info$class == "QC") >= 3) {
      plot <-
        massqc_sample_boxplot(
          object = log(object) %>%
            massdataset::activate_mass_dataset(what = "sample_info") %>%
            dplyr::filter(class == "QC"),
          color_by = ifelse(any(
            colnames(object@sample_info) == "batch"
          ), "batch", "class")
        )
    } else{
      plot <-
        massqc_sample_boxplot(object = log(object)[, seq_len(30)],
                              color_by = ifelse(any(
                                colnames(object@sample_info) == "batch"
                              ), "batch", "class")) +
        labs(title = ifelse(ncol(object) > 30, "First 30 samples", ""))
    }
    
    if (ncol(object) > 100) {
      plot <-
        plot +
        ggplot2::theme(axis.text.x = element_blank(),
                       axis.ticks.x = element_blank())
    } else{
      plot <-
        plot +
        ggplot2::theme(axis.text.x = ggplot2::element_text(
          angle = 45,
          hjust = 1,
          vjust = 1
        ))
    }
    
    ggplot2::ggsave(
      filename = file.path(output_path, "sample_box.pdf"),
      plot = plot,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    ggplot2::ggsave(
      filename = file.path(output_path, "sample_box.png"),
      plot = plot,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    ####sample correlation
    message("Sample correlation.")
    
    if (sum(object@sample_info$class == "QC") >= 3) {
      plot <-
        massqc_sample_correlation(
          object = object %>%
            massdataset::activate_mass_dataset(what = "sample_info") %>%
            dplyr::filter(class == "QC"),
          method = "square"
        )
    } else{
      plot <-
        massqc_sample_correlation(object = object[seq_len(100)],
                                  method = "square") +
        labs(title = ifelse(ncol(object) > 100, "First 100 samples", ""))
    }
    
    ggplot2::ggsave(
      filename = file.path(output_path, "sample_correlation.png"),
      plot = plot,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    ggplot2::ggsave(
      filename = file.path(output_path, "sample_correlation.pdf"),
      plot = plot,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    ###PCA
    message("PCA score plot.")
    sample_info <-
      extract_sample_info(object)
    if (any(colnames(sample_info) == "batch")) {
      sample_info$batch <-
        as.character(sample_info$batch)
      methods::slot(object, "sample_info") <-
        sample_info
    }
    plot <-
      object %>%
      `+`(1) %>%
      log(2) %>%
      massqc_pca(
        color_by = ifelse(any(
          colnames(object@sample_info) == "batch"
        ),
        "batch", "class"),
        frame = TRUE,
        line = FALSE
      )
    message("PCA done.")
    ggplot2::ggsave(
      filename = file.path(output_path, "pca.pdf"),
      plot = plot,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    ggplot2::ggsave(
      filename = file.path(output_path, "pca.png"),
      plot = plot,
      width = 8,
      height = 6,
      dpi = 600
    )
    
    message("Render report.")
    ##transform rmd to HTML or pdf
    if (type == "html" | type == "all") {
      rmarkdown::render(file.path(output_path, "massqc.template.Rmd"),
                        rmarkdown::html_document())
      file.rename(
        from = file.path(output_path, "massqc.template.html"),
        to = file.path(output_path, "massqc_report.html")
      )
    }
    
    
    ########render rmarkddown to html or pdf
    if (type == "pdf" | type == "all") {
      rmarkdown::render(file.path(output_path, "massqc.template.Rmd"),
                        rmarkdown::pdf_document())
      file.rename(
        from = file.path(output_path, "massqc.template.pdf"),
        to = file.path(output_path, "massqc_report.pdf")
      )
    }
    
    ####remove some files
    message("Remove some files.")
    file = dir(output_path)
    remove_file = grep("png|Rmd|parameters|rda", file, value = TRUE)
    unlink(
      x = file.path(output_path, remove_file),
      recursive = TRUE,
      force = TRUE
    )
  }
tidymass/massqc documentation built on Sept. 13, 2023, 10:31 p.m.