IbdCharacterization: Characterization of Inflammatory Bowel Disease Patient Cohorts

Documented in preMergeDiagnosticsFiles preMergeResultsFiles

# Copyright 2022 Observational Health Data Sciences and Informatics
#
# This file is part of IbdCharacterization
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' @export
launchShinyApp <- function(outputFolder, 
                           shinySettings = list(storage = "filesystem", 
                                                dataFolder = outputFolder, 
                                                dataFile = "PreMerged.RData")) 
{
  ensure_installed("shiny")
  ensure_installed("shinydashboard")
  ensure_installed("shinyWidgets")
  ensure_installed("DT")
  ensure_installed("VennDiagram")
  ensure_installed("htmltools")
  ensure_installed("rmarkdown")
  
  appDir <- system.file("shiny/IbdCharacterizationResultsExplorer", package = getThisPackageName(), mustWork = TRUE)
  .GlobalEnv$shinySettings <- shinySettings
  on.exit(rm(shinySettings, envir = .GlobalEnv))
  shiny::runApp(appDir)
}

#' Premerge Shiny diagnostics files
#' 
#' @description 
#' If there are many diagnostics files, starting the Shiny app may take a very long time. This function 
#' already does most of the preprocessing, increasing loading speed.
#' 
#' The merged data will be stored in the same folder, and will automatically be recognized by the Shiny app.
#'
#' @param dataFolder  folder where the exported zip files for the diagnostics are stored. Use
#'                         the \code{\link{runCohortDiagnostics}} function to generate these zip files. 
#'                         Zip files containing results from multiple databases can be placed in the same
#'                         folder.
#'                         
#' @export
preMergeDiagnosticsFiles <- function(dataFolder) {
  zipFiles <- list.files(dataFolder, pattern = ".zip", full.names = TRUE)
  
  loadFile <- function(file, folder, overwrite) {
    # print(file)
    tableName <- gsub(".csv$", "", file)
    camelCaseName <- SqlRender::snakeCaseToCamelCase(tableName)
    data <- readr::read_csv(file.path(folder, file), col_types = readr::cols(), guess_max = 1e7, locale = readr::locale(encoding = "UTF-8"))
    colnames(data) <- SqlRender::snakeCaseToCamelCase(colnames(data))
    
    if (!is.na(match("cohortId", colnames(data)))) {
      data$cohortId <- as.double(data$cohortId)
    }
    if (!is.na(match("cohortEntries", colnames(data)))) {
      data$cohortEntries <- as.integer(data$cohortEntries)
    }
    if (!is.na(match("cohortSubjects", colnames(data)))) {
      data$cohortSubjects <- as.integer(data$cohortSubjects)
    }
    if (!is.na(match("conceptSetId", colnames(data)))) {
      data$conceptSetId <- as.integer(data$conceptSetId)
    }
    if (!is.na(match("conceptId", colnames(data)))) {
      data$conceptId <- as.integer(data$conceptId)
    }
    if (!is.na(match("sourceConceptId", colnames(data)))) {
      data$sourceConceptId <- as.integer(data$sourceConceptId)
    }
    if (!is.na(match("conceptSubjects", colnames(data)))) {
      data$conceptSubjects <- as.integer(data$conceptSubjects)
    }
    if (!is.na(match("conceptCount", colnames(data)))) {
      data$conceptCount <- as.integer(data$conceptCount)
    }
    if (!is.na(match("conceptCode", colnames(data)))) {
      data$conceptCode <- as.character(data$conceptCode)
    }
    if (!is.na(match("vocabularyVersionCdm", colnames(data)))) {
      data$vocabularyVersionCdm <- as.character(data$vocabularyVersionCdm)
    }
    
    if (!overwrite && exists(camelCaseName, envir = .GlobalEnv)) {
      existingData <- get(camelCaseName, envir = .GlobalEnv)
      if (nrow(existingData) > 0) {
        if (nrow(data) > 0) {
          # Use the intersection of names to subset the data
          commonColumns <- intersect(colnames(existingData), colnames(data))
          data <- data[, commonColumns]
        }
      }
      
      data <- dplyr::bind_rows(existingData, data)
    }
    assign(camelCaseName, data, envir = .GlobalEnv)
    
    invisible(NULL)
  }
  
  tableNames <- c()
  for (i in 1:length(zipFiles)) {
    writeLines(paste("Processing", zipFiles[i]))
    tempFolder <- tempfile()
    dir.create(tempFolder)
    unzip(zipFiles[i], exdir = tempFolder)
    
    csvFiles <- list.files(tempFolder, pattern = ".csv")
    tableNames <- c(tableNames, csvFiles)
    lapply(csvFiles, loadFile, folder = tempFolder, overwrite = (i == 1))
    
    unlink(tempFolder, recursive = TRUE)
  }
  
  # Remove any duplicate cohort names
  if (exists("cohort", envir = .GlobalEnv)) {
    cohort <- get("cohort", envir = .GlobalEnv)
    # cohort <- unique(cohort[,c("cohortName", "cohortFullName", "cohortId")])
    cohort <- unique(cohort[,c("cohortName", "cohortId", "json")])
    # Re-assign to the global environment
    assign("cohort", cohort, envir = .GlobalEnv)
  }
  
  tableNames <- unique(tableNames)
  tableNames <- gsub(".csv$", "", tableNames)
  tableNames <- SqlRender::snakeCaseToCamelCase(tableNames)
  save(list = tableNames, file = file.path(dataFolder, "PreMerged.RData"), compress = TRUE)
  ParallelLogger::logInfo("Merged data saved in ", file.path(dataFolder, "PreMerged.RData"))
}


#' Premerge Shiny results files
#' 
#' @description 
#' If there are many results files, starting the Shiny app may take a very long time. This function 
#' already does most of the preprocessing, increasing loading speed.
#' 
#' The merged data will be stored in the same folder, and will automatically be recognized by the Shiny app.
#'
#' @param dataFolder  folder where the exported zip files for the diagnostics are stored. Use
#'                         the runStudy function to generate these zip files. 
#'                         Zip files containing results from multiple databases can be placed in the same
#'                         folder.
#'                         
#' @export
preMergeResultsFiles <- function(dataFolder) {
  zipFiles <- list.files(dataFolder, pattern = ".zip", full.names = TRUE)
  
  loadFile <- function(file, folder, overwrite) {
    # print(file)
    tableName <- gsub(".csv$", "", file)
    camelCaseName <- SqlRender::snakeCaseToCamelCase(tableName)
    data <- readr::read_csv(file.path(folder, file), col_types = readr::cols(), guess_max = 1e7, locale = readr::locale(encoding = "UTF-8"))
    colnames(data) <- SqlRender::snakeCaseToCamelCase(colnames(data))
    
    if (!overwrite && exists(camelCaseName, envir = .GlobalEnv)) {
      existingData <- get(camelCaseName, envir = .GlobalEnv)
      if (nrow(existingData) > 0) {
        if (nrow(data) > 0) {
          if (all(colnames(existingData) %in% colnames(data)) &&
              all(colnames(data) %in% colnames(existingData))) {
            data <- data[, colnames(existingData)]
          } else {
            stop("Table columns do no match previously seen columns. Columns in ", 
                 file, 
                 ":\n", 
                 paste(colnames(data), collapse = ", "), 
                 "\nPrevious columns:\n",
                 paste(colnames(existingData), collapse = ", "))
            
          }
        }
      }
      data <- rbind(existingData, data)
    }
    assign(camelCaseName, data, envir = .GlobalEnv)
    
    invisible(NULL)
  }
  
  tableNames <- c()
  for (i in 1:length(zipFiles)) {
    writeLines(paste("Processing", zipFiles[i]))
    tempFolder <- tempfile()
    dir.create(tempFolder)
    unzip(zipFiles[i], exdir = tempFolder)
    
    csvFiles <- list.files(tempFolder, pattern = ".csv")
    tableNames <- c(tableNames, csvFiles)
    lapply(csvFiles, loadFile, folder = tempFolder, overwrite = (i == 1))
    
    unlink(tempFolder, recursive = TRUE)
  }
  
  # Update the covariate names for the age groups 
  # that are > 100 years since they are truncated to 2
  # digits
  ageCovariateIdsToReformat <- seq(200031,380031, by=10000) # This represents the covariate IDs that are used to represent age groups from 100-200
  if (exists("covariate", envir = .GlobalEnv)) {
    covars <- get("covariate", envir = .GlobalEnv)
    
    #Ensure all covariates are unique by making all covarateName fields lower case
    covars$covariateName <- tolower(covars$covariateName)
    covars <- unique(covars)
    
    # Reformat age covariates
    ageCovars <- covars[covars$covariateId %in% ageCovariateIdsToReformat, ]
    if (nrow(ageCovars) > 0) {
      for (i in 1:nrow(ageCovars)) {
        covars[covars$covariateId == ageCovars$covariateId[i], ]$covariateName <- reformatAgeCovariateDescription(ageCovars$covariateName[i])
      }
    }
    
    # Re-assign to the global environment
    assign("covariate", covars, envir = .GlobalEnv)
  }

  tableNames <- unique(tableNames)
  tableNames <- gsub(".csv$", "", tableNames)
  tableNames <- SqlRender::snakeCaseToCamelCase(tableNames)
  save(list = tableNames, file = file.path(dataFolder, "PreMerged.RData"), compress = TRUE)
  ParallelLogger::logInfo("Merged data saved in ", file.path(dataFolder, "PreMerged.RData"))
}

reformatAgeCovariateDescription <- function(description) {
  splitDesc <- strsplit(description, " ") # Split to get the age range
  ageRange <- strsplit(splitDesc[[1]][3], "-")
  lowerBound <- as.integer(ageRange[[1]][1]) + 100
  upperBound <- as.integer(ageRange[[1]][2]) + 100
  return(paste0("age group: ", lowerBound, "-", upperBound))
}


# Borrowed from devtools:
# https://github.com/hadley/devtools/blob/ba7a5a4abd8258c52cb156e7b26bb4bf47a79f0b/R/utils.r#L44
is_installed <- function(pkg, version = 0) {
  installed_version <- tryCatch(utils::packageVersion(pkg), error = function(e) NA)
  !is.na(installed_version) && installed_version >= version
}

# Borrowed and adapted from devtools:
# https://github.com/hadley/devtools/blob/ba7a5a4abd8258c52cb156e7b26bb4bf47a79f0b/R/utils.r#L74
ensure_installed <- function(pkg) {
  if (!is_installed(pkg)) {
    msg <- paste0(sQuote(pkg), " must be installed for this functionality.")
    if (interactive()) {
      message(msg, "\nWould you like to install it?")
      if (menu(c("Yes", "No")) == 1) {
        install.packages(pkg)
      } else {
        stop(msg, call. = FALSE)
      }
    } else {
      stop(msg, call. = FALSE)
    }
  }
}

ohdsi-studies/IbdCharacterization documentation built on July 26, 2024, 11:20 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ohdsi-studies/IbdCharacterization
Characterization of Inflammatory Bowel Disease Patient Cohorts

R/Shiny.R
In ohdsi-studies/IbdCharacterization: Characterization of Inflammatory Bowel Disease Patient Cohorts

Defines functions ensure_installed is_installed reformatAgeCovariateDescription preMergeResultsFiles preMergeDiagnosticsFiles launchShinyApp

Documented in preMergeDiagnosticsFiles preMergeResultsFiles

R Package Documentation

Browse R Packages

We want your feedback!

ohdsi-studies/IbdCharacterization Characterization of Inflammatory Bowel Disease Patient Cohorts

R/Shiny.R In ohdsi-studies/IbdCharacterization: Characterization of Inflammatory Bowel Disease Patient Cohorts

Defines functions ensure_installed is_installed reformatAgeCovariateDescription preMergeResultsFiles preMergeDiagnosticsFiles launchShinyApp

Documented in preMergeDiagnosticsFiles preMergeResultsFiles

R Package Documentation

Browse R Packages

We want your feedback!

ohdsi-studies/IbdCharacterization
Characterization of Inflammatory Bowel Disease Patient Cohorts

R/Shiny.R
In ohdsi-studies/IbdCharacterization: Characterization of Inflammatory Bowel Disease Patient Cohorts