# Import phenotypes from ATLAS -------------------------------------------------
oldCohortDefinitions <- PhenotypeLibrary::getPhenotypeLog(showHidden = TRUE)
oldCohortDefinitionSet <-
PhenotypeLibrary::getPlCohortDefinitionSet(cohortIds = oldCohortDefinitions$cohortId)
# Delete existing cohorts--------------------------------------------
unlink(
x = "inst/Cohorts.csv",
recursive = TRUE,
force = TRUE
)
unlink(
x = "inst/cohorts",
recursive = TRUE,
force = TRUE
)
unlink(
x = "inst/sql",
recursive = TRUE,
force = TRUE
)
# Fetch approved cohort definition----------------------------------
# from atlas-phenotype.ohdsi.org (note: approved phenotypes do not have '[')
baseUrl <- "https://atlas-phenotype.ohdsi.org/WebAPI"
ROhdsiWebApi::authorizeWebApi(
baseUrl = baseUrl,
authMethod = "db",
webApiUsername = keyring::key_get("ohdsiAtlasPhenotypeUser"),
webApiPassword = keyring::key_get("ohdsiAtlasPhenotypePassword")
)
webApiCohorts <-
ROhdsiWebApi::getCohortDefinitionsMetaData(baseUrl = baseUrl)
exportableCohorts <-
dplyr::bind_rows(
webApiCohorts |>
dplyr::filter(
stringr::str_detect(
string = name,
pattern = stringr::fixed("["),
# Cohorts without prefix
negate = TRUE
)
),
webApiCohorts |>
dplyr::filter(
stringr::str_detect(
string = name,
pattern = stringr::fixed("[D]"),
# Deprecated cohorts, as another cohort definition subsumes the cohort definitions intent
negate = FALSE
)
),
webApiCohorts |>
dplyr::filter(
stringr::str_detect(
string = name,
pattern = stringr::fixed("[P]"),
# Cohorts under consideration for peer review
negate = FALSE
)
),
webApiCohorts |>
dplyr::filter(
stringr::str_detect(
string = name,
pattern = stringr::fixed("[E]"),
# Cohorts that have errors and should not be used
negate = FALSE
)
),
webApiCohorts |>
dplyr::filter(
stringr::str_detect(
string = name,
pattern = stringr::fixed("[W]"),
# Cohorts that have been withdrawn
negate = FALSE
)
)
) |>
dplyr::distinct() |>
dplyr::select(
id,
name,
description,
createdDate,
modifiedDate,
createdBy,
modifiedBy
) |>
dplyr::mutate(
cohortId = id,
atlasId = id,
cohortName = name,
createdDate = as.Date(createdDate),
modifiedDate = as.Date(modifiedDate)
) |>
dplyr::arrange(cohortId) |>
dplyr::select(
cohortId,
atlasId,
cohortName,
description,
createdDate,
modifiedDate,
createdBy,
modifiedBy
)
cohortRecord <- c()
for (i in 1:nrow(exportableCohorts)) {
cohortRecord[[i]] <- exportableCohorts[i, ]
librarian <-
stringr::str_replace(
string = exportableCohorts[i, ]$createdBy[[1]]$name,
pattern = "na\\\\",
replacement = ""
)
cohortRecord[[i]]$librarian <- librarian
librarian <- NULL
cohortRecord[[i]]$cohortNameFormatted <- gsub(
pattern = "_",
replacement = " ",
x = gsub("\\[(.*?)\\]_", "", gsub(" ", "_", cohortRecord[[i]]$cohortName))
) |>
stringr::str_squish() |>
stringr::str_trim()
cohortRecord[[i]]$lastModifiedBy <- NA
if (length(cohortRecord[[i]]$modifiedBy) > 1) {
cohortRecord[[i]]$lastModifiedBy <-
cohortRecord[[i]]$modifiedBy[[1]]$name
}
if (all(
!is.na(cohortRecord[[i]]$description),
nchar(cohortRecord[[i]]$description) > 5
)) {
textInDescription <-
cohortRecord[[i]]$description |>
stringr::str_replace_all(pattern = ";", replacement = "") |>
stringr::str_split(pattern = "\n")
strings <- textInDescription[[1]]
textInDescription <- NULL
strings <-
stringr::str_split(string = strings, pattern = stringr::fixed(":"))
if (all(
!is.na(cohortRecord[[i]]$description[[1]]),
stringr::str_detect(
string = cohortRecord[[i]]$description,
pattern = stringr::fixed(":")
)
)) {
stringValues <- c()
for (j in (1:length(strings))) {
stringValues[[j]] <- dplyr::tibble()
if (length(strings[[j]]) == 2) {
stringValues[[j]] <- dplyr::tibble(
name = strings[[j]][[1]] |> stringr::str_squish() |> stringr::str_trim(),
value = strings[[j]][[2]] |>
stringr::str_squish() |>
stringr::str_trim()
)
}
}
stringValues <- dplyr::bind_rows(stringValues)
if (nrow(stringValues) > 0) {
data <- stringValues |>
tidyr::pivot_wider()
stringValues <- NULL
if (nrow(data) > 0) {
cohortRecord[[i]] <- cohortRecord[[i]] |>
tidyr::crossing(data |>
dplyr::select(dplyr::all_of(
setdiff(
x = colnames(data),
y = colnames(cohortRecord[[i]])
)
)))
}
}
}
}
}
cohortRecord <- dplyr::bind_rows(cohortRecord) |>
dplyr::select(-createdBy, -modifiedBy) |>
dplyr::mutate(
id = cohortId,
name = id
) |>
dplyr::relocate(cohortId, cohortName)
cohortRecord |>
readr::write_excel_csv(
file = "inst/Cohorts.csv",
append = FALSE,
na = "",
quote = "all"
)
saveRDS(
object = cohortRecord,
file = "inst/CohortRecord.rds"
)
cohortRecord <- readRDS("inst/CohortRecord.rds")
try(
ROhdsiWebApi::insertCohortDefinitionSetInPackage(
fileName = "inst/Cohorts.csv",
baseUrl = baseUrl,
jsonFolder = "inst/cohorts",
sqlFolder = "inst/sql/sql_server",
generateStats = TRUE
),
silent = TRUE
)
# generate cohort sql using latest version of circeR
# remotes::install_github("OHDSI/circeR")
circeOptions <- CirceR::createGenerateOptions(generateStats = TRUE)
cohortJsonFiles <-
list.files(path = file.path("inst", "cohorts"), pattern = ".json") |> sort()
for (i in (1:length(cohortJsonFiles))) {
jsonFileName <- cohortJsonFiles[i]
sqlFileName <-
stringr::str_replace_all(
string = jsonFileName,
pattern = stringr::fixed(".json"),
replacement = ".sql"
)
writeLines(paste0(" - Generating ", sqlFileName))
json <-
SqlRender::readSql(sourceFile = file.path("inst", "cohorts", jsonFileName))
sql <-
CirceR::buildCohortQuery(expression = json, options = circeOptions)
writeLines(paste0(" --", sqlFileName))
unlink(
x = file.path("inst", "sql", "sql_server", sqlFileName),
recursive = TRUE,
force = TRUE
)
SqlRender::writeSql(
sql = sql,
targetFile = file.path("inst", "sql", "sql_server", sqlFileName)
)
}
if ("id" %in% colnames(cohortRecord)) {
cohortRecord$id <- NULL
}
if ("atlasId" %in% colnames(cohortRecord)) {
cohortRecord$atlasId <- NULL
}
if ("name" %in% colnames(cohortRecord)) {
cohortRecord$name <- NULL
}
if ("description" %in% colnames(cohortRecord)) {
cohortRecord$description <- NULL
}
cohortRecord <- cohortRecord |>
dplyr::mutate(isCirceJson = 1)
expectedFields <- c(
"cohortId",
"cohortName",
"cohortNameFormatted",
"cohortNameLong",
"librarian",
"status",
"addedVersion",
"logicDescription",
"hashTag",
"isCirceJson",
"contributors",
"contributorOrcIds",
"contributorOrganizations",
"peerReviewers",
"peerReviewerOrcIds",
"recommendedReferentConceptIds",
"cohortNameLong",
"ohdsiForumPost",
"createdDate",
"modifiedDate",
"lastModifiedBy",
"replaces",
"notes"
)
presentInBoth <- intersect(
expectedFields,
colnames(cohortRecord)
)
new <- setdiff(
colnames(cohortRecord),
c(expectedFields, "atlasId")
)
missing <- setdiff(
expectedFields,
colnames(cohortRecord)
)
if (length(new) > 0) {
stop(paste0(
"The following new fields observed please check and update: ",
paste0(new, collapse = ", ")
))
}
if (length(missing) > 0) {
stop(paste0(
"The following fields were missing please check and update ",
paste0(missing, collapse = ", ")
))
}
if (!all(sort(presentInBoth) |> unique() == sort(expectedFields) |> unique())) {
stop("Something is odd. Please check.")
}
cohortRecord <- cohortRecord |>
dplyr::select(dplyr::all_of(presentInBoth)) |>
dplyr::arrange(cohortId)
cohortRecord <- cohortRecord |>
dplyr::mutate(isReferenceCohort = dplyr::if_else(
stringr::str_detect(
string = cohortName,
pattern = stringr::fixed("[R]")
),
1,
0
))
saveRDS(cohortRecord, file = "cohortRecord.rds")
cohortRecord <- readRDS("cohortRecord.rds")
cohortRecordAugmented <- c()
for (i in (1:nrow(cohortRecord))) {
cohortRecordUnit <- cohortRecord[i, ]
if (!file.exists(file.path(
"inst",
"cohorts",
paste0(cohortRecordUnit$cohortId, ".json")
))) {
stop("cant find file")
}
cohortJson <- SqlRender::readSql(sourceFile = file.path(
"inst",
"cohorts",
paste0(cohortRecordUnit$cohortId, ".json")
))
parsed <-
CohortDefinitionReviewer::parseCohortDefinitionSpecifications(cohortDefinition = cohortJson |>
RJSONIO::fromJSON(digits = 23))
if (nrow(parsed) > 0) {
cohortRecordAugmented[[i]] <- cohortRecordUnit |>
tidyr::crossing(parsed)
}
}
cohortRecordAugmented <- dplyr::bind_rows(cohortRecordAugmented)
## correct the url
correctUrl <- function(url) {
# Check if the string likely represents a URL by looking for "http"
if (grepl("http", url, ignore.case = TRUE)) {
# Ensure the URL starts with "https://"
if (startsWith(tolower(url), "https//")) {
corrected_url <- sub("https//", "https://", url, ignore.case = TRUE)
return(corrected_url)
}
return(url)
}
return(url)
}
# Function to transform the URL
transformUrl <- function(url) {
# Check if URL exists
if (is.na(url) || url == "" || is.na(url)) {
return(url)
}
# Check if URL has the correct base
base_url <- "https://forums.ohdsi.org/t/"
if (!stringr::str_starts(url, base_url)) {
return(NA)
}
extract_number1 <- function(url) {
# Use a regular expression to capture the first set of numbers after '/t/' and another '/'
match_data <- stringr::str_match(url, "/t/[^/]*/(\\d+)")
if (!is.na(match_data[1, 2])) {
return(match_data[1, 2])
}
return(NA)
}
# Extract the first set of numbers after the base URL
number1 <- extract_number1(url)
# If number1 is found, construct the new URL, else return NA
if (!is.na(number1)) {
new_url <- paste0(base_url, number1)
return(new_url)
} else {
return(url)
}
}
cohortRecordAugmented <- cohortRecordAugmented |>
dplyr::mutate(ohdsiForumPost = sapply(ohdsiForumPost, FUN = correctUrl)) |>
dplyr::mutate(ohdsiForumPost = sapply(ohdsiForumPost, FUN = transformUrl)) |>
dplyr::arrange(cohortId)
readr::write_excel_csv(
x = cohortRecordAugmented,
file = "inst/Cohorts.csv",
append = FALSE,
na = "",
quote = "all"
)
# Function to get name from ORCID ID
#' @export
getOrcidDetails <- function(orcidId) {
# Create the URL for the API request
url <- paste0("https://pub.orcid.org/v3.0/", orcidId)
# Make the API request
res <-
httr::GET(url, httr::add_headers("Accept" = "application/json"))
# Parse the JSON response
parsedRes <- RJSONIO::fromJSON(httr::content(res, "text"))
details <- c()
# Extract the name
details$givenName <-
paste0(as.character(parsedRes$person$name$`given-names`), "")
details$familyName <-
paste0(as.character(parsedRes$person$name$`family-name`), "")
details$email <- paste0(parsedRes$person$emails$email |> as.character(), "")
return(details)
}
# Function to get log based on OrcId
#' @export
getOrcidFromPhenotypeLog <-
function(log = PhenotypeLibrary::getPhenotypeLog()) {
# Process the data
uniqueOrcIds <- log |>
dplyr::mutate(
contributorOrcIds = stringr::str_replace(
string = contributorOrcIds,
pattern = stringr::fixed("."),
replacement = ","
)
) |>
tidyr::separate_rows(contributorOrcIds, sep = ",") |> # Split by comma
dplyr::mutate(contributorOrcIds = stringr::str_trim(contributorOrcIds, side = "both")) |> # Trim whitespace
dplyr::filter(contributorOrcIds != "") |> # Remove empty strings
dplyr::filter(contributorOrcIds != "''") |> # Remove empty strings
dplyr::filter(contributorOrcIds != "'") |> # Remove empty strings
dplyr::mutate(contributorOrcIds = stringr::str_replace_all(contributorOrcIds, "'", "")) |> # Remove quotations
dplyr::distinct(contributorOrcIds) |> # Get unique ORCIDs
dplyr::arrange(contributorOrcIds) |>
dplyr::pull(contributorOrcIds)
orcidLog <- c()
for (i in (1:length(uniqueOrcIds))) {
orcIdDetails <- NULL
orcIdDetails <- getOrcidDetails(uniqueOrcIds[[i]])
orcidLog[[i]] <- dplyr::tibble(
orcId = uniqueOrcIds[[i]],
givenName = orcIdDetails$givenName,
lastName = orcIdDetails$familyName,
email = orcIdDetails$email
)
}
orcidLog <- dplyr::bind_rows(orcidLog)
orcidLogWithContributions <- c()
for (i in (1:nrow(orcidLog))) {
numberOfCohorts <- log |>
dplyr::filter(
stringr::str_detect(
string = .data$contributorOrcIds,
pattern = orcidLog[i, ]$orcId |> stringr::fixed()
)
) |>
dplyr::pull(cohortId) |>
unique() |>
length()
numberOfCohortsAccepted <- log |>
dplyr::filter(stringr::str_detect(
string = tolower(status),
pattern = "accepted"
)) |>
dplyr::filter(
stringr::str_detect(
string = .data$contributorOrcIds,
pattern = orcidLog[i, ]$orcId |> stringr::fixed()
)
) |>
dplyr::pull(cohortId) |>
unique() |>
length()
orcidLogWithContributions[[i]] <- orcidLog[i, ] |>
dplyr::mutate(
contributions = numberOfCohorts,
accepted = numberOfCohortsAccepted
)
}
return(dplyr::bind_rows(orcidLogWithContributions))
}
orcidFromPhenotypeLog <- getOrcidFromPhenotypeLog(log = cohortRecordAugmented)
readr::write_excel_csv(
x = orcidFromPhenotypeLog,
file = "inst/OrcidLog.csv",
append = FALSE,
na = "",
quote = "all"
)
if (file.exists("cohortRecord.rds")) {
file.remove("cohortRecord.rds")
}
newCohortDefinitionSet <-
CohortGenerator::getCohortDefinitionSet(
settingsFileName = file.path("inst", "Cohorts.csv"),
jsonFolder = file.path("inst", "cohorts"),
sqlFolder = file.path("inst", "sql", "sql_server")
) |>
dplyr::select(
cohortId,
json
) |>
dplyr::tibble() |>
dplyr::arrange(cohortId)
conceptSetsInAllCohortDefinition <- ConceptSetDiagnostics::extractConceptSetsInCohortDefinitionSet(
cohortDefinitionSet = newCohortDefinitionSet
)
saveRDS(
object = conceptSetsInAllCohortDefinition |>
dplyr::arrange(
uniqueConceptSetId,
cohortId,
conceptSetId
),
file = file.path("inst", "ConceptSetsInCohortDefinition.RDS")
)
oldLogFile <- PhenotypeLibrary::getPhenotypeLog(showHidden = TRUE)
needToUpdate <- TRUE
if (identical(x = oldLogFile, y = cohortRecord)) {
needToUpdate <- FALSE
writeLines("No changes to cohort definitions. No update to version needed.")
}
if (needToUpdate) {
# Update description -----------------------------------------------------------
description <- readLines("DESCRIPTION")
# Increment minor version:
versionLineNr <- grep("Version: .*$", description)
version <- sub("Version: ", "", description[versionLineNr])
versionParts <- strsplit(version, "\\.")[[1]]
versionParts[2] <- as.integer(versionParts[2]) + 1
newVersion <- paste(versionParts, collapse = ".")
description[versionLineNr] <- sprintf("Version: %s", newVersion)
# Set date:
dateLineNr <- grep("Date: .*$", description)
description[dateLineNr] <-
sprintf("Date: %s", format(Sys.Date(), "%Y-%m-%d"))
writeLines(description, con = "DESCRIPTION")
# Update news -----------------------------------------------------------
news <- readLines("NEWS.md")
newCohorts <- setdiff(
x = sort(cohortRecord$cohortId),
y = sort(oldLogFile$cohortId)
)
messages <- c("")
if (length(newCohorts) == 0) {
messages <-
c(
messages,
"New Cohorts: No new cohorts were added in this release."
)
} else {
messages <-
c(messages, paste0("New Cohorts: ", length(newCohorts), " were added."))
messages <- c(
messages,
""
)
for (i in (1:length(newCohorts))) {
dataCohorts <- cohortRecord |>
dplyr::filter(cohortId %in% newCohorts[[i]])
messages <-
c(
messages,
paste0(
" ",
dataCohorts$cohortId,
": ",
dataCohorts$cohortName
)
)
}
}
acceptedCohorts <- cohortRecord |>
dplyr::filter(addedVersion == newVersion)
if (nrow(acceptedCohorts) == 0) {
messages <-
c(
"Accepted Cohorts: No cohorts were accepted in this release.",
messages
)
} else {
for (i in (1:nrow(acceptedCohorts))) {
dataCohorts <- cohortRecord |>
dplyr::filter(cohortId %in% acceptedCohorts[i, ]$cohortId)
messages <-
c(
paste0(
" ",
dataCohorts$cohortId,
": ",
dataCohorts$cohortName
),
messages
)
}
messages <-
c(
paste0(
"Accepted Cohorts: ",
nrow(acceptedCohorts),
" were accepted."
),
messages
)
messages <- c(
messages,
""
)
}
news <- c(
paste0("PhenotypeLibrary ", newVersion),
"======================",
messages,
"",
news
)
writeLines(news, con = "NEWS.md")
}
unlink("inst/CohortRecord.rds")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.