extras/01_importDataAsRDS.R

library(magrittr)
rdsFileName <- list()
##### configuration (Achilles) ###########
rdsFileName[['configuration']] <-
  StudyManagement::getSourceKeyConfiguration(baseUrl = baseUrl,
                                             sourceKeys = sourceKeys)
saveRDS(rdsFileName, file = rdsLoc)
##### data sources (Achilles) ###########
rdsFileName[['dataSources']] <-
  StudyManagement::getDataSourceReportsDataForSourceKeys(baseUrl = baseUrl,
                                                         sourceKeys = sourceKeys)
saveRDS(rdsFileName, file = rdsLoc)

# all <- list()
# for (i in (1:length(rdsFileName$dataSources))) {#i = 1
#   dataSourceName <- names(rdsFileName$dataSources)[i]
#   dataSource <- rdsFileName$dataSources[[i]]
#
#   for (j in (1:length(dataSource))) {#j = 1
#     reportName <- names(dataSource)[j]
#     report <- dataSource[[j]]$parsed



##### Concept-Set ###########
rdsFileName <- readRDS(file = rdsLoc)
rdsFileName[['conceptSet']] <-
  StudyManagement::getConceptSets(baseUrl = baseUrl,
                                  conceptSetIds = conceptSetIds)

##### Cohort ###########
rdsFileName[['cohort']] <-
  StudyManagement::getCohorts(baseUrl = baseUrl,
                              cohortIds = cohortIds,
                              sourceKeys = sourceKeys)
saveRDS(rdsFileName, file = rdsLoc)

##### Cohort characterization ###########
rdsFileName <- readRDS(file = rdsLoc)
rdsFileName[['characterization']] <-
  StudyManagement::getCharacterizations(baseUrl = baseUrl,
                                        characterizationIds = characterizationIds)
saveRDS(rdsFileName, file = rdsLoc)


##### Incident rate analysis ###########
rdsFileName <- readRDS(file = rdsLoc)
rdsFileName[['incidenceRateAnalysis']]  <-
  StudyManagement::getIncidenceRateAnalyses(baseUrl = baseUrl,
                                            incidenceRateIds = incidenceRateIds)
saveRDS(rdsFileName, file = rdsLoc)


####### estimation ############
##### Estimation specification ###########
rdsFileName <- readRDS(file = rdsLoc)
rdsFileName[['estimation']][[paste0('id_', estimationId)]][['specification']]  <-
  StudyManagement::getEstimationSpecification(baseUrl = baseUrl,
                                              estimationId = estimationId)
saveRDS(rdsFileName, file = rdsLoc)



##### Estimation results from cohort method ###########
#get folder names in results location
rdsFileName <- readRDS(file = rdsLoc)
foldersWithResults <- list.files(estimationCohortMethodOutputLoc)
#foldersWithResults <- foldersWithResults[foldersWithResults == "combined"]
for (i in (1:length(foldersWithResults))) {
  # i = 5
  # print(paste0("i = ",i))
  fullPath <- paste(estimationCohortMethodOutputLoc,
                    foldersWithResults[i],
                    'shinyData',
                    sep = '/')

  filesToRead <- list.files(
    path = fullPath,
    pattern = '.rds',
    recursive = FALSE,
    full.names = FALSE
  )
  if (length(filesToRead) > 0) {
    for (j in (1:length(filesToRead))) {
      #j = 2
      # print(paste0("    j = ",j))
      rdsFile <-
        stringr::str_replace(filesToRead[[j]],
                             pattern = '\\.rds',
                             replacement = '')
      rdsFileName$estimation[[paste0('id_', estimationId)]]$results[[foldersWithResults[i]]][[rdsFile]]  <-
        readRDS(paste(fullPath, filesToRead[[j]], sep = '/')) %>%
        dplyr::mutate_if(is.factor, as.character) %>%
        tidyr::as_tibble()
    }
  } else {
    print(paste0("No RDS files in ", foldersWithResults[i]))
  }
}
saveRDS(rdsFileName, file = rdsLoc)



############## appending all evidence files ##################
estimationRdsFiles <- data.frame(rdsFullPath = (
  list.files(
    path = estimationCohortMethodOutputLoc,
    pattern = '.rds',
    recursive = TRUE,
    full.names = TRUE
  )
),
stringsAsFactors = FALSE) %>%
  dplyr::mutate(sourceRdsFileName = basename(rdsFullPath)) %>%
  dplyr::mutate(sourceRdsFileName = gsub(pattern = '.rds$', '', sourceRdsFileName))

parsed <-
  StudyManagement::getParsedNamesForEvidenceDataModel(vectorToCompare = estimationRdsFiles$sourceRdsFileName)


folderSource <-
  StudyManagement::matchTwoVectors(estimationRdsFiles$rdsFullPath, foldersWithResults) %>%
  dplyr::rename(rdsFullPath = vectorToCompare,
                folder = vectorReference)

estimationRdsFiles <- estimationRdsFiles %>%
  dplyr::left_join(y = parsed,
                   by = c("sourceRdsFileName" = "sourceRdsFileName")) %>%
  dplyr::left_join(folderSource, by = c("rdsFullPath" = "rdsFullPath")) %>%
  dplyr::filter(folder != 'combined' |
                  (folder == 'combined' &
                     database_id == 'Meta-analysis'))


rdsFileName <- readRDS(file = rdsLoc)

evidenceDataModelTables <-
  estimationRdsFiles$targetRdsFileName %>% unique()

all <- list()
for (i in (1:length(evidenceDataModelTables))) {
  #i = 5
  evidenceTable <- evidenceDataModelTables[[i]]
  estimationRdsFile <- estimationRdsFiles %>%
    dplyr::filter(targetRdsFileName == evidenceTable) %>%
    tidyr::as_tibble() %>%
    unique()
  temp <- list()
  for (j in (1:nrow(estimationRdsFile))) {
    #j = 3
    temp[[j]] <- readRDS(estimationRdsFile[j, ]$rdsFullPath) %>%
      dplyr::mutate_if(is.factor, as.character) %>%
      tidyr::as_tibble()
    names(temp[[j]]) <-
      SqlRender::snakeCaseToCamelCase(names(temp[[j]]))
  }
  all[[SqlRender::snakeCaseToCamelCase(evidenceTable)]] <-
    dplyr::bind_rows(temp) %>% tidyr::as_tibble() %>% unique()
}

rdsFileName$estimation[[paste0('id_', estimationId)]]$results$all <-
  all

saveRDS(rdsFileName, file = rdsLoc)

##### ####### TO DO ############
####### prediction ############
gowthamrao/StudyManagement documentation built on March 9, 2020, 10:48 p.m.