R/LoadSyntheaTables.r

#' @title Load Synthea Tables.
#'
#' @description This function populates all Synthea tables. 
#'
#' @usage LoadSyntheaTables(connectionDetails, syntheaDatabaseSchema, syntheaFileLoc)
#'
#' @details This function assumes \cr\code{createSyntheaTables()} has already been run.  Additionally,
#'              this function assumes the data files used to populate the tables are csv files generated by 
#'              running the basic setup (creating a database with 1000 people):    
#'                  \cr\code{git clone https://github.com/synthetichealth/synthea.git}
#'                  \cr\code{cd synthea}
#'                  \cr\code{./gradlew build check test}          
#'                  \cr\code{./run_synthea -p 1000}
#' 
#'               You can enable csv records in src/main/resources/synthea.properties by setting exporter.csv.export = true.
#'               As of the time of this writing the csv files can be found at synthe/output/csv.
#'               For more details: \href{https://github.com/synthetichealth/synthea/wiki/Basic-Setup-and-Running}{Synthea Basic Setup}
#'
#' @param connectionDetails  An R object of type\cr\code{connectionDetails} created using the
#'                                     function \code{createConnectionDetails} in the
#'                                     \code{DatabaseConnector} package.
#' @param syntheaDatabaseSchema  The name of the database schema that will contain the Synthea
#'                                     instance.  Requires read and write permissions to this database. On SQL
#'                                     Server, this should specifiy both the database and the schema,
#'                                     so for example 'cdm_instance.dbo'.
#' @param syntheaFileLoc     The location of the Synthea csv files created by running the executable run_synthea.
#'
#'@export


LoadSyntheaTables <- function (connectionDetails, syntheaDatabaseSchema, syntheaFileLoc)
{

    csvList <- c("allergies.csv","conditions.csv","imaging_studies.csv","medications.csv","organizations.csv","procedures.csv",
	             "careplans.csv","encounters.csv","immunizations.csv","observations.csv","patients.csv","providers.csv")
	
	conn <- DatabaseConnector::connect(connectionDetails) 
	
    for (csv in csvList) {

	    syntheaTable <- data.table::fread(file = paste0(syntheaFileLoc, "/", csv), stringsAsFactors = FALSE, header = TRUE, sep = ",")    
		
		writeLines(paste0("Loading: ",csv))
	
        # experiencing type conversion errors and need to explicitly case some columns

        if("START"       %in% colnames(syntheaTable))  syntheaTable$START        <- as.Date(syntheaTable$START,format="%Y-%m-%d")
        if("STOP"        %in% colnames(syntheaTable))  syntheaTable$STOP         <- as.Date(syntheaTable$STOP,format="%Y-%m-%d")
        if("DATE"        %in% colnames(syntheaTable))  syntheaTable$DATE         <- as.Date(syntheaTable$DATE,format="%Y-%m-%d")
        if("BIRTHDATE"   %in% colnames(syntheaTable))  syntheaTable$BIRTHDATE    <- as.Date(syntheaTable$BIRTHDATE,format="%Y-%m-%d")
        if("DEATHDATE"   %in% colnames(syntheaTable))  syntheaTable$DEATHDATE    <- as.Date(syntheaTable$DEATHDATE,format="%Y-%m-%d")
		if("CODE"        %in% colnames(syntheaTable))  syntheaTable$CODE         <- as.character(syntheaTable$CODE)
		if("REASONCODE"  %in% colnames(syntheaTable))  syntheaTable$REASONCODE   <- as.character(syntheaTable$REASONCODE)
        if("PHONE"       %in% colnames(syntheaTable))  syntheaTable$PHONE        <- as.character(syntheaTable$PHONE)
        if("UTILIZATION" %in% colnames(syntheaTable))  syntheaTable$UTILIZATION  <- as.numeric(syntheaTable$UTILIZATION)

		
	    DatabaseConnector::insertTable(conn,paste0(syntheaDatabaseSchema,".",strsplit(csv,"[.]")[[1]][1]), data=as.data.frame(syntheaTable), dropTableIfExists = FALSE, createTable = FALSE, progressBar = TRUE)
	}

    on.exit(DatabaseConnector::disconnect(conn)) 
	
}
chrisroederucdenver/ETL-Synthea documentation built on Aug. 1, 2020, 4:21 p.m.