library(RODBC) library(chordsTables) library(dplyr) library(tidyr) library(ggplot2) library(flextable) library(officer) ##Add time_it = TRUE to opts_chunk to time all chunks. #Set opts_chunk error = FALSE to debug. It will force the app to stop on an error. #error can also be overridden at each individual chunk #Be sure error is set to TRUE before pushing to Github knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, error = TRUE, time_it = TRUE) startTime <- Sys.time() all_times <- list() knitr::knit_hooks$set(time_it = local({ now <- NULL function(before, options) { if (before) { now <<- Sys.time() } else { res <- difftime(Sys.time(), now) all_times[[options$label]] <<- c(options$label, format(round(res, 3))) #Only prints times greater than 5 minutes. if(res > as.difftime("00:05", format="%H:%M")){ paste("Time for the chunk", options$label, "to run:", format(round(res, 3))) } } } })) # supporting look up tables stateCnty <- data.frame(lapply(chordsTables::stateCnty, as.character), stringsAsFactors=FALSE) # ISO language lookup isoLang <- data.frame(lapply(chordsTables::isoLang, as.character), stringsAsFactors=FALSE) # VDW value sets valSets <- data.frame(lapply(chordsTables::valSets, as.character), stringsAsFactors=FALSE) ICD10CM <- data.frame(lapply(chordsTables::ICD10CM, as.character), stringsAsFactors=FALSE) ICD9CM <- data.frame(lapply(chordsTables::ICD9CM, as.character), stringsAsFactors=FALSE) icdLU <- rbind(ICD10CM[c('dxCode','Description')],ICD9CM[c('dxCode','Description')]) maxQryRows = 0
connectionString <- getConnectionString(params)
run_query <- function(query_text, ...){ result <- run_db_query(connectionString, query_text, ... ) return(result) } get_connection <- function(){ connection <- get_new_connection(connectionString) return(connection) }
tbresult <-runTableReplacements(connectionString)
The purpose of the data quality program is to characterize the data in CHORDS VDW 3.1 priority level 1 (P1) tables. The P1 tables include the following: DEMOGRAPHICS, ENCOUNTERS, CENSUS_LOCATION, DIAGNOSES, VITAL_SIGNS, and BENEFITS. The program uses a series of SQL queries operationalized using RStudio to produce this report. These tables provide descriptive information about data stored in a data partner's VDW and can be used to assess data model conformance, data plausibility, and data completeness. This data quality report was generated from CHORDS r params$DBName
.
Data Partner:\
Analyst:\
Query Run Date: r Sys.Date()
This table contains summary counts and date ranges by table. Distinct encounters and patients are shown for applicable tables. Data ranges should be used to compare data time windows between tables.
tab1_dem <- run_query(paste0("select count(*) as nrows, count(distinct person_id) as npats from ", demographics)) tab1_dem <- data.frame(nrows = tab1_dem$nrows, npats = tab1_dem$npats, nencts = NA, fieldname = NA, mindt = NA, maxdt = NA) tab1_enc <- run_query(paste0("select count(*) as nrows, count(distinct person_id) as npats, count(distinct enc_id) as nencts, 'ADATE' as fieldname, min(cast(ADATE as date)) as mindt, max(cast(ADATE as date)) as maxdt from ", encounters)) tab1_dx <- run_query(paste0("select count(*) as nrows, count(distinct person_id) as npats, 'ADATE' as fieldname, count(distinct enc_id) as nencts, min(cast(ADATE as date)) as mindt, max(cast(ADATE as date))as maxdt from ", diagnoses)) tab1_vs <- run_query(paste0("select count(*) as nrows, count(distinct person_id) as npats, 'MEASURE_DATE' as fieldname, count(distinct enc_id) as nencts, min(cast(measure_date as date)) as mindt, max(cast(measure_date as date)) as maxdt from ", vital_signs)) tab1_ben <- run_query(paste0("select count(*) as nrows, count(distinct person_id) as npats, 'BENEFIT_DATE' as fieldname, count(distinct enc_id) as nencts, min(cast(benefit_date as date)) as mindt, max(cast(benefit_date as date)) as maxdt from ", benefit)) tab1_cen <- run_query(paste0(" select count(*) as nrows, count(distinct person_id) as npats, 'LOC_START' as fieldname, count(distinct concat(PERSON_ID,cast(cast(LOC_START as date) as varchar))) as nencts, min(cast(LOC_START as date)) as mindt, max(cast(LOC_START as date)) as maxdt from ", census_location)) emptyDataframe <- data.frame(matrix(ncol = 6, nrow = 0)) cols <- c("nrows", "npats", "fieldname", "nencts", "mindt", "maxdt") colnames(emptyDataframe) <- cols if (is.na(tab1_dem)){ tab1_dem <- data.frame(emptyDataframe) } if(is.na(tab1_enc)){ tab1_enc <- data.frame(emptyDataframe) } if(is.na(tab1_dx)){ tab1_dx <- data.frame(emptyDataframe) } if(is.na(tab1_vs)){ tab1_vs <- data.frame(emptyDataframe) } if(is.na(tab1_ben)){ tab1_ben <- data.frame(emptyDataframe) } if(is.na(tab1_cen)){ tab1_cen <- data.frame(emptyDataframe) } tab1_all <- bind_rows(tab1_dem, tab1_enc, tab1_cen, tab1_dx, tab1_vs, tab1_ben) %>% bind_cols(Table = c("DEMOGRAPHICS", "ENCOUNTERS", "CENSUS_LOCATION", "DIAGNOSES", "VITAL_SIGNS", "BENEFIT"), .)
kable(tab1_all, col.names = c("Table", "Records", "Patients", "Encounters", "Date Field", "Min Date", "Max Date"), align = 'c', format.args = list(big.mark = ","))
This table contains record counts for null and unknown values across P1 tables.
tab2_demo_null <- run_query( paste0("SELECT SUM(CASE WHEN BIRTH_DATE IS NULL THEN 1 ELSE 0 END) as BIRTH_DATE, SUM(CASE WHEN GENDER IS NULL THEN 1 ELSE 0 END) as GENDER, SUM(CASE WHEN PRIMARY_LANGUAGE IS NULL THEN 1 ELSE 0 END) as PRIMARY_LANGUAGE, SUM(CASE WHEN NEEDS_INTERPRETER IS NULL THEN 1 ELSE 0 END) as NEEDS_INTERPRETER, SUM(CASE WHEN RACE1 IS NULL THEN 1 ELSE 0 END) as RACE1, SUM(CASE WHEN RACE2 IS NULL THEN 1 ELSE 0 END) as RACE2, SUM(CASE WHEN RACE3 IS NULL THEN 1 ELSE 0 END) as RACE3, SUM(CASE WHEN RACE4 IS NULL THEN 1 ELSE 0 END) as RACE4, SUM(CASE WHEN RACE5 IS NULL THEN 1 ELSE 0 END) as RACE5, SUM(CASE WHEN HISPANIC IS NULL THEN 1 ELSE 0 END) as HISPANIC, SUM(CASE WHEN SEXUAL_ORIENTATION IS NULL THEN 1 ELSE 0 END) as SEXUAL_ORIENTATION, SUM(CASE WHEN GENDER_IDENTITY IS NULL THEN 1 ELSE 0 END) as GENDER_IDENTITY FROM ", demographics)) tab2_demo_null2 <- data.frame(table = rep("DEMOGRAPHICS", ncol(tab2_demo_null)), var = names(tab2_demo_null), records_null = t(tab2_demo_null)[,1]) row.names(tab2_demo_null2) <- NULL tab2_enc_null <- run_query( paste0("SELECT SUM(CASE WHEN ADATE IS NULL THEN 1 ELSE 0 END) as ADATE, SUM(CASE WHEN DDATE IS NULL THEN 1 ELSE 0 END) as DDATE, SUM(CASE WHEN PROVIDER IS NULL THEN 1 ELSE 0 END) as PROVIDER, SUM(CASE WHEN ENCTYPE IS NULL THEN 1 ELSE 0 END) as ENCOUNTER_TYPE, SUM(CASE WHEN ENCOUNTER_SUBTYPE IS NULL THEN 1 ELSE 0 END) as ENCOUNTER_SUBTYPE, SUM(CASE WHEN FACILITY_CODE IS NULL THEN 1 ELSE 0 END) as FACILITY, SUM(CASE WHEN DEPARTMENT IS NULL THEN 1 ELSE 0 END) as DEPARTMENT FROM ", encounters)) tab2_enc_null2 <- data.frame(table = rep("ENCOUNTERS", ncol(tab2_enc_null)), var = names(tab2_enc_null), records_null = t(tab2_enc_null)[,1]) row.names(tab2_enc_null2) <- NULL tab2_cl_null <- run_query( paste0("SELECT SUM(CASE WHEN GEOCODE IS NULL THEN 1 ELSE 0 END) as GEOCODE, SUM(CASE WHEN CITY_GEOCODE IS NULL THEN 1 ELSE 0 END) as CITY_GEOCODE FROM ", census_location)) tab2_cl_null2 <- data.frame(table = rep("CENSUS_LOCATION", ncol(tab2_cl_null)), var = names(tab2_cl_null), records_null = t(tab2_cl_null)[,1]) row.names(tab2_cl_null2) <- NULL tab2_dx_null <- run_query( paste0("SELECT SUM(CASE WHEN ENC_ID IS NULL THEN 1 ELSE 0 END) as ENC_ID, SUM(CASE WHEN ADATE IS NULL THEN 1 ELSE 0 END) as ADATE, SUM(CASE WHEN DX IS NULL THEN 1 ELSE 0 END) as DX, SUM(CASE WHEN DX_CODETYPE IS NULL THEN 1 ELSE 0 END) as DX_CODETYPE, SUM(CASE WHEN PRINCIPAL_DX IS NULL THEN 1 ELSE 0 END) as PRINCIPAL_DX, SUM(CASE WHEN PRIMARY_DX IS NULL THEN 1 ELSE 0 END) as PRIMARY_DX FROM ", diagnoses)) tab2_dx_null2 <- data.frame(table = rep("DIAGNOSES", ncol(tab2_dx_null)), var = names(tab2_dx_null), records_null = t(tab2_dx_null)[,1]) row.names(tab2_dx_null2) <- NULL tab2_vs_null <- run_query( paste0("SELECT SUM(CASE WHEN ENC_ID IS NULL THEN 1 ELSE 0 END) as ENC_ID, SUM(CASE WHEN ENCTYPE IS NULL THEN 1 ELSE 0 END) as ENCTYPE, SUM(CASE WHEN HT IS NULL THEN 1 ELSE 0 END) as HT, SUM(CASE WHEN WT IS NULL THEN 1 ELSE 0 END) as WT, SUM(CASE WHEN DIASTOLIC IS NULL THEN 1 ELSE 0 END) as DIASTOLIC, SUM(CASE WHEN SYSTOLIC IS NULL THEN 1 ELSE 0 END) as SYSTOLIC, SUM(CASE WHEN HT_RAW IS NULL THEN 1 ELSE 0 END) as HT_RAW, SUM(CASE WHEN WT_RAW IS NULL THEN 1 ELSE 0 END) as WT_RAW, SUM(CASE WHEN BMI_RAW IS NULL THEN 1 ELSE 0 END) as BMI_RAW, SUM(CASE WHEN DIASTOLIC_RAW IS NULL THEN 1 ELSE 0 END) as DIASTOLIC_RAW, SUM(CASE WHEN SYSTOLIC_RAW IS NULL THEN 1 ELSE 0 END) as SYSTOLIC_RAW, SUM(CASE WHEN BP_TYPE IS NULL THEN 1 ELSE 0 END) as BP_TYPE, SUM(CASE WHEN POSITION IS NULL THEN 1 ELSE 0 END) as POSITION, SUM(CASE WHEN HEAD_CIR_RAW IS NULL THEN 1 ELSE 0 END) as HEAD_CIR_RAW, SUM(CASE WHEN RESPIR_RAW IS NULL THEN 1 ELSE 0 END) as RESPIR_RAW, SUM(CASE WHEN TEMP_RAW IS NULL THEN 1 ELSE 0 END) as TEMP_RAW, SUM(CASE WHEN PULSE_RAW IS NULL THEN 1 ELSE 0 END) as PULSE_RAW FROM ", vital_signs)) tab2_vs_null2 <- data.frame(table = rep("VITAL_SIGNS", ncol(tab2_vs_null)), var = names(tab2_vs_null), records_null = t(tab2_vs_null)[,1]) row.names(tab2_vs_null2) <- NULL tab2_benefit_null <- run_query( paste0("SELECT SUM(IIF(LOAD_DATE IS NULL, 1, 0)) LOAD_DATE ,SUM(IIF(REFRESH_DATE IS NULL, 1, 0)) REFRESH_DATE ,SUM(IIF(BENEFIT_TYPE IS NULL, 1, 0)) BENEFIT_TYPE ,SUM(IIF(BENEFIT_CAT IS NULL, 1, 0)) BENEFIT_CAT ,SUM(IIF(BENEFIT_DATE IS NULL, 1, 0)) BENEFIT_DATE ,SUM(IIF(ENC_ID IS NULL, 1, 0)) ENC_ID ,SUM(IIF(START_DATE IS NULL, 1, 0)) START_DATE ,SUM(IIF(END_DATE IS NULL, 1, 0)) END_DATE FROM ", benefit)) tab2_benefit_null2 <- data.frame(table = rep("BENEFIT", ncol(tab2_benefit_null)), var = names(tab2_benefit_null), records_null = t(tab2_benefit_null)[,1]) row.names(tab2_benefit_null2) <- NULL tab2_null <- bind_rows(tab2_demo_null2, tab2_enc_null2, tab2_cl_null2, tab2_dx_null2, tab2_vs_null2, tab2_benefit_null2) %>% mutate( pct_rec = ifelse(table == "DEMOGRAPHICS", (records_null / tab1_dem$nrows * 100), ifelse(table == "ENCOUNTERS", (records_null / tab1_enc$nrows * 100), ifelse(table == "CENSUS_LOCATION", (records_null / tab1_cen$nrows * 100), ifelse(table == "DIAGNOSES", (records_null / tab1_dx$nrows * 100), ifelse(table == "VITAL_SIGNS", (records_null / tab1_vs$nrows * 100), ifelse(table == "BENEFIT", (records_null / tab1_ben$nrows * 100), NA)))))) ) tab2_demo_uk <- run_query( paste0("SELECT SUM(CASE WHEN GENDER = 'U' THEN 1 ELSE 0 END) as GENDER, SUM(CASE WHEN PRIMARY_LANGUAGE = 'UNK' THEN 1 ELSE 0 END) as PRIMARY_LANGUAGE, SUM(CASE WHEN NEEDS_INTERPRETER = 'U' THEN 1 ELSE 0 END) as NEEDS_INTERPRETER, SUM(CASE WHEN RACE1 = 'UN' THEN 1 ELSE 0 END) as RACE1, SUM(CASE WHEN RACE2 = 'UN' THEN 1 ELSE 0 END) as RACE2, SUM(CASE WHEN RACE3 = 'UN' THEN 1 ELSE 0 END) as RACE3, SUM(CASE WHEN RACE4 = 'UN' THEN 1 ELSE 0 END) as RACE4, SUM(CASE WHEN RACE5 = 'UN' THEN 1 ELSE 0 END) as RACE5, SUM(CASE WHEN HISPANIC = 'U' THEN 1 ELSE 0 END) as HISPANIC, SUM(CASE WHEN SEXUAL_ORIENTATION = 'UN' OR SEXUAL_ORIENTATION = 'NI' THEN 1 ELSE 0 END) as SEXUAL_ORIENTATION, SUM(CASE WHEN GENDER_IDENTITY = 'UN' OR GENDER_IDENTITY = 'NI' THEN 1 ELSE 0 END) as GENDER_IDENTITY FROM ", demographics)) tab2_demo_uk2 <- data.frame(table = rep("DEMOGRAPHICS", ncol(tab2_demo_uk)), var = names(tab2_demo_uk), records_uk = t(tab2_demo_uk)[,1]) row.names(tab2_demo_uk2) <- NULL tab2_enc_uk <- run_query( paste0("SELECT SUM(CASE WHEN PROVIDER = 'UNKNOWN' THEN 1 ELSE 0 END) as PROVIDER, SUM(CASE WHEN FACILITY_CODE = 'UNK' THEN 1 ELSE 0 END) as FACILITY, SUM(CASE WHEN DEPARTMENT = 'UNK' THEN 1 ELSE 0 END) as DEPARTMENT FROM ", encounters)) tab2_enc_uk2 <- data.frame(table = rep("ENCOUNTERS", ncol(tab2_enc_uk)), var = names(tab2_enc_uk), records_uk = t(tab2_enc_uk)[,1]) row.names(tab2_enc_uk2) <- NULL tab2_benefit_uk <- run_query( paste0("SELECT SUM(IIF(BENEFIT_TYPE = 'NI', 1, 0)) BENEFIT_TYPE ,SUM(IIF((BENEFIT_CAT = 'NI' OR BENEFIT_CAT = 'UN'), 1, 0)) BENEFIT_CAT FROM ", benefit)) tab2_benefit_uk2 <- data.frame(table = rep("BENEFIT", ncol(tab2_benefit_uk)), var = names(tab2_benefit_uk), records_uk = t(tab2_benefit_uk)[,1]) row.names(tab2_benefit_uk2) <- NULL tab2_uk <- bind_rows(tab2_demo_uk2, tab2_enc_uk2, tab2_benefit_uk2) %>% mutate( pct_rec_uk = ifelse(table == "DEMOGRAPHICS", (records_uk / tab1_dem$nrows * 100), ifelse(table == "ENCOUNTERS", (records_uk / tab1_enc$nrows * 100), ifelse(table == "BENEFIT", (records_uk / tab1_ben$nrows * 100), NA))) ) tab2_all <- left_join(tab2_null, tab2_uk, by = c("table", "var"))
kable(tab2_all, col.names = c("Table", "Variable", "Null Records", "Percent of Records Null", "'Unknown' or 'No Information' Records", "Percent of Records 'Unknown' or 'No Information'"), digits = 2, format.args = list(big.mark = ","))
nDemogRows <- run_query(paste0("select count(*) as nDemogRows FROM ", demographics)) demogSummary <- run_query( paste0(' WITH CTE_nDemogRows AS (SELECT COUNT(*) AS nDemogRows FROM ', demographics, '), CTE_nPatsDemog AS (SELECT COUNT(DISTINCT PERSON_ID) AS nPatsDemog FROM ', demographics, '), CTE_nPatsWithEncs AS (SELECT COUNT(DISTINCT b.PERSON_ID) nPatsWithEncs FROM ', demographics, ' a JOIN ', encounters, ' b ON a.PERSON_ID = b.PERSON_ID), CTE_nPatsWithLoc AS (SELECT COUNT(DISTINCT b.PERSON_ID) nPatsWithLoc FROM ', demographics, ' a JOIN ', census_location, ' b ON a.PERSON_ID = b.PERSON_ID), nPatsWithDiag AS (SELECT COUNT(DISTINCT b.PERSON_ID) nPatsWithDiag FROM ', demographics, ' a JOIN ', diagnoses, ' b ON a.PERSON_ID = b.PERSON_ID), nPatsWithVital AS (SELECT COUNT(DISTINCT b.PERSON_ID) nPatsWithVital FROM ', demographics, ' a JOIN ', vital_signs, ' b ON a.PERSON_ID = b.PERSON_ID) SELECT nDemogRows, nPatsDemog, nPatsWithEncs, nPatsWithLoc, nPatsWithDiag, nPatsWithVital FROM CTE_nDemogRows , CTE_nPatsDemog , CTE_nPatsWithEncs , CTE_nPatsWithLoc , nPatsWithDiag , nPatsWithVital;', sep=" " ), max=maxQryRows ) demogSummary <- cbind(demogSummary,nDemogRows) demogTable <- with(demogSummary, rbind( c('Total rows in the DEMOGRAPHICS table' ,nDemogRows ,round(100*nDemogRows/nPatsDemog,1)), c('Unique patients in the DEMOGRAPHICS table' ,nPatsDemog ,round(100*nPatsDemog/nPatsDemog,1)), c('Unique patients in the ENCOUNTERS table' ,nPatsWithEncs ,round(100*nPatsWithEncs/nPatsDemog,1)), c('Unique patients found in CENSUS_LOCATION' ,nPatsWithLoc ,round(100*nPatsWithLoc/nPatsDemog,1)), c('Unique patients found in DIAGNOSES' ,nPatsWithDiag ,round(100*nPatsWithDiag/nPatsDemog,1)), c('Unique patients found in VITAL_SIGNS' ,nPatsWithVital,round(100*nPatsWithVital/nPatsDemog,1)) ) )
knitr::kable(demogTable, col.names=c("Characteristic","Frequency","Percent of unique patients"), row.names=FALSE, format.args = list(big.mark = ","))
r if(exists("demogSummary")){ifelse(demogSummary$nDemogRows==demogSummary$nPatsDemog," ","The demographics table has duplicate rows, by person_id")}
demogCounts <- run_query( paste('SELECT gender, gender_identity, sexual_orientation, primary_language, race1, race2, race3, race4, race5, hispanic, count(*) as nRows', 'FROM ', demographics, 'group by gender, gender_identity, sexual_orientation, primary_language, race1, race2, race3, race4, race5, hispanic', sep=" " ), as.is=c(TRUE,TRUE,TRUE,TRUE), max=maxQryRows ) demogCounts <- trimChrVars(demogCounts)
primLang <- demogCounts %>% group_by(primary_language) %>% summarise(langCnt = sum(nRows)) %>% ungroup() %>% mutate(langTot = sum(langCnt), langPct = round(100*langCnt/langTot,1) ) primLang <- within(merge(primLang,isoLang[c('code3B','InEnglish')],by.x='primary_language', by.y='code3B', all.x=T) %>% arrange(desc(langPct)) ,{ InEnglish <- ifelse(!is.na(InEnglish),InEnglish, ifelse(is.na(primary_language) , 'Missing', 'Invalid')) })
knitr::kable(primLang[1:5, c('primary_language', 'InEnglish','langCnt','langPct')], col.names= c("ISO Language","Language name ","Frequency","Percent"), row.names=FALSE, format.args = list(big.mark = ","))
unkLang <- subset(primLang, primary_language %in% c("unk","oth", "und")) #Total other/unknown #total row rownames(unkLang) <- unkLang$primary_language unkLang$InEnglish <- NULL unkLang$InEnglish[unkLang$primary_language == "unk"] = "Unknown" unkLang$InEnglish[unkLang$primary_language == "oth"] = "Other" unkLang$InEnglish[unkLang$primary_language == "und"] = "Undetermined" unkLangRowsNames <- as.vector(unkLang$InEnglish) unkLang$primary_language <- unkLang$InEnglish <-NULL rowsum <-colSums(unkLang) unkLang <-rbind(unkLang, rowsum) #row names rownames(unkLang) <- c(unkLangRowsNames, "Total Other, Unknown, or Undetermined Language")
knitr::kable(unkLang[1:3, c('langCnt','langPct')], col.names= c("Frequency","Percent"), row.names= T, format.args = list(big.mark = ","))
gender <- demogCounts %>% group_by(gender) %>% summarise(count = sum(nRows)) %>% ungroup() %>% mutate(totCount = sum(count), countPct = round(100*count/totCount,1) ) gender <- within(merge(gender,subset(valSets,tableName=='DEMOGRAPHICS' & columnName=='GENDER')[c('code','decode')], by.x='gender', by.y='code', all.x=TRUE) %>% arrange(desc(countPct)) ,{ decode <- ifelse(!is.na(decode),decode, ifelse(is.na(gender) , 'Missing', 'Invalid')) })
knitr::kable(gender[ c('gender', 'decode','count','countPct')], col.names = c("Value","Label","Frequency","Percent"), row.names=FALSE, format.args = list(big.mark = ","))
gender_identity <- demogCounts %>% group_by(gender_identity) %>% summarise(count = sum(nRows)) %>% ungroup() %>% mutate(totCount = sum(count), countPct = round(100*count/totCount,1) )
knitr::kable(gender_identity[c('gender_identity','count','countPct')], col.names = c('Gender_Identity',"Frequency", "Percent"), row.names=F, format.args = list(big.mark = ","))
tab_gender_identity<- run_query(gsub("%DEMOGRAPHICS%", demographics, " SET NOCOUNT ON; declare @GENDER TABLE (GI varchar(2), GENDER_IDENTITYDESC varchar(50)); insert into @GENDER values ('DC', 'Declined to answer') ,('F', 'Female') ,('M', 'Male') ,('NI', 'No information') ,('OT', 'Other') ,('TM', 'Transgender Male') ,('TF', 'Transgender Female') ,('UN', 'Unknown'); select GENDER_IDENTITYDESC as [Gender Identity], (select count(*) from %DEMOGRAPHICS% where GI in (gender_identity) and 'M' in (gender)) as 'Male', (select count(*) from %DEMOGRAPHICS% where GI in (gender_identity) and 'F' in (gender)) as 'Female', (select count(*) from %DEMOGRAPHICS% where GI in (gender_identity) and 'U' in (gender)) as 'Unknown Gender' from @GENDER;")) #Row totals rownames(tab_gender_identity) <- tab_gender_identity$'Gender Identity' tab_gender_identity$'Gender Identity' <- NULL tab_gender_identity<- tab_gender_identity [c("Male","Female","Transgender Male","Transgender Female","Other","Declined to answer","No information","Unknown"),] rowsum <-colSums(tab_gender_identity) tab_gender_identity <-rbind(tab_gender_identity, rowsum) #Column totals colsum <-rowSums(tab_gender_identity) tab_gender_identity <- cbind(tab_gender_identity,colsum) #Row percents tab_gender_identity$Percent_Male <- round(tab_gender_identity$Male/(tab_gender_identity$Male+tab_gender_identity$Female)*100, digits=2) tab_gender_identity$Percent_Female <-round(tab_gender_identity$Female/(tab_gender_identity$Male+tab_gender_identity$Female)*100, digits=2) #Renaming for final table colnames(tab_gender_identity) [4] <- "Total Frequency" rownames(tab_gender_identity) [9] <- "Total" category <- rownames(tab_gender_identity) tab_gender_identity <- cbind("Gender Identity" = category, tab_gender_identity)
knitr::kable(tab_gender_identity[c('Gender Identity','Male','Percent_Male', 'Female', 'Percent_Female', 'Unknown Gender', 'Total Frequency')], col.names = c('Gender Identity','Male','Percent Male', 'Female', 'Percent Female', 'Unknown Gender', 'Total Frequency'), row.names=F, format.args = list(big.mark = ","))
sexual_orientation <- demogCounts %>% group_by(sexual_orientation) %>% summarise(count = sum(nRows)) %>% ungroup() %>% mutate(totCount = sum(count), countPct = round(100*count/totCount,1) ) sexual_orientation <- within(merge(sexual_orientation,subset(valSets,tableName=='DEMOGRAPHICS' & columnName=='SEXUAL_ORIENTATION')[c('code','decode')], by.x='sexual_orientation', by.y='code', all.x=TRUE) %>% arrange(desc(countPct)) ,{ decode <- ifelse(!is.na(decode),decode, ifelse(is.na(sexual_orientation) , 'Missing', 'Invalid')) })
knitr::kable(sexual_orientation[c( 'sexual_orientation','count','countPct')], col.names = c('Sexual Orientation',"Frequency", "Percent"), row.names=F, format.args = list(big.mark = ","))
race <- demogCounts %>% group_by(race1) %>% summarise(count = sum(nRows)) %>% ungroup() %>% mutate(totCount = sum(count), countPct = round(100*count/totCount,1) ) race <- within(merge(race,subset(valSets,tableName=='DEMOGRAPHICS' & columnName=='RACE1')[c('code','decode')], by.x='race1', by.y='code', all.x=TRUE) %>% arrange(desc(countPct)) ,{ decode <- ifelse(!is.na(decode),decode, ifelse(is.na(gender) , 'Missing', 'Invalid')) })
knitr::kable(race[ c('race1', 'decode','count','countPct')], col.names = c("Value","Label","Frequency","Percent"), row.names=FALSE, format.args = list(big.mark = ","))
race2 <- demogCounts %>% group_by(race2) %>% summarise(count = sum(nRows)) %>% ungroup() %>% mutate(totCount = sum(count), countPct = round(100*count/totCount,1) ) race2 <- within(merge(race2,subset(valSets,tableName=='DEMOGRAPHICS' & columnName=='RACE2')[c('code','decode')], by.x='race2', by.y='code', all.x=TRUE) %>% arrange(desc(countPct)) ,{ decode <- ifelse(!is.na(decode),decode, ifelse(is.na(gender) , 'Missing', 'Invalid')) })
knitr::kable(race2[ c('race2', 'decode','count','countPct')], col.names = c("Value","Label","Frequency","Percent"), row.names=FALSE, format.args = list(big.mark = ","))
race3 <- demogCounts %>% group_by(race3) %>% summarise(count = sum(nRows)) %>% ungroup() %>% mutate(totCount = sum(count), countPct = round(100*count/totCount,1) ) race3 <- within(merge(race3,subset(valSets,tableName=='DEMOGRAPHICS' & columnName=='RACE3')[c('code','decode')], by.x='race3', by.y='code', all.x=TRUE) %>% arrange(desc(countPct)) ,{ decode <- ifelse(!is.na(decode),decode, ifelse(is.na(gender) , 'Missing', 'Invalid')) })
knitr::kable(race3[ c('race3', 'decode','count','countPct')], col.names = c("Value","Label","Frequency","Percent"), row.names=FALSE, format.args = list(big.mark = ","))
race4 <- demogCounts %>% group_by(race4) %>% summarise(count = sum(nRows)) %>% ungroup() %>% mutate(totCount = sum(count), countPct = round(100*count/totCount,1) ) race4 <- within(merge(race4,subset(valSets,tableName=='DEMOGRAPHICS' & columnName=='RACE4')[c('code','decode')], by.x='race4', by.y='code', all.x=TRUE) %>% arrange(desc(countPct)) ,{ decode <- ifelse(!is.na(decode),decode, ifelse(is.na(gender) , 'Missing', 'Invalid')) })
knitr::kable(race4[ c('race4', 'decode','count','countPct')], col.names = c("Value","Label","Frequency","Percent"), row.names=FALSE, format.args = list(big.mark = ","))
race5 <- demogCounts %>% group_by(race5) %>% summarise(count = sum(nRows)) %>% ungroup() %>% mutate(totCount = sum(count), countPct = round(100*count/totCount,1) ) race5 <- within(merge(race5,subset(valSets,tableName=='DEMOGRAPHICS' & columnName=='RACE5')[c('code','decode')], by.x='race5', by.y='code', all.x=TRUE) %>% arrange(desc(countPct)) ,{ decode <- ifelse(!is.na(decode),decode, ifelse(is.na(gender) , 'Missing', 'Invalid')) })
knitr::kable(race5[ c('race5', 'decode','count','countPct')], col.names = c("Value","Label","Frequency","Percent"), row.names=FALSE, format.args = list(big.mark = ","))
tab_race1race2 <- run_query(gsub("%DEMOGRAPHICS%", demographics, " SET NOCOUNT ON; DECLARE @RACES TABLE (RACE VARCHAR(2), RACEDESC VARCHAR(50) ); INSERT INTO @RACES VALUES ( 'AS', 'Asian'), ( 'BA', 'Black or African American'), ( 'HP', 'Native Hawaiian or Other Pacific Islander'), ( 'IN', 'American Indian/Alaska Native'), ( 'UN', 'Unknown or Not Reported'), ( 'WH', 'White'); WITH CTE_RACE_AS AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'AS' IN(Race2) GROUP BY RACEDESC), CTE_RACE_BA AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'BA' IN(Race2) GROUP BY RACEDESC), CTE_RACE_HP AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'HP' IN(Race2) GROUP BY RACEDESC), CTE_RACE_IN AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'IN' IN(Race2) GROUP BY RACEDESC), CTE_RACE_UN AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE (RACE IN(Race1) OR RACE1 IS NULL) AND ('UN' IN (Race2) OR RACE2 IS NULL) GROUP BY RACEDESC), CTE_RACE_WH AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'WH' IN(Race2) GROUP BY RACEDESC) SELECT a.RACEDESC AS [Race Group], IIF(b.cnt IS NULL, 0, b.cnt) 'Asian', IIF(c.cnt IS NULL, 0, c.cnt) 'Black or African American', IIF(d.cnt IS NULL, 0, d.cnt) 'Native Hawaiian or Other Pacific Islander', IIF(e.cnt IS NULL, 0, e.cnt) 'American Indian/Alaska Native', IIF(f.cnt IS NULL, 0, f.cnt) 'Unknown or Not Reported', IIF(g.cnt IS NULL, 0, g.cnt) 'White' FROM @RACES a LEFT JOIN CTE_RACE_AS b ON a.RACEDESC = b.RACEDESC LEFT JOIN CTE_RACE_BA c ON a.RACEDESC = c.RACEDESC LEFT JOIN CTE_RACE_HP d ON a.RACEDESC = d.RACEDESC LEFT JOIN CTE_RACE_IN e ON a.RACEDESC = e.RACEDESC LEFT JOIN CTE_RACE_UN f ON a.RACEDESC = f.RACEDESC LEFT JOIN CTE_RACE_WH g ON a.RACEDESC = g.RACEDESC;")) #Row totals rownames(tab_race1race2) <- tab_race1race2$'Race Group' tab_race1race2$'Race Group' <- NULL rowsum <-colSums(tab_race1race2) tab_race1race2 <-rbind(tab_race1race2, rowsum) #Column totals colsum <-rowSums(tab_race1race2) tab_race1race2 <- cbind(tab_race1race2,colsum) #Renaming for final table colnames (tab_race1race2) [7] <- "Total" rownames(tab_race1race2) [7] <- "Total" category <- rownames(tab_race1race2) tab_race1race2 <- cbind("Race Group" = category, tab_race1race2)
#knitr::kable(tab_race1race2, format.args = list(big.mark = ","), row.names = FALSE) tab_race1race2 <- flextable(format(tab_race1race2, big.mark=",")) %>% set_table_properties(layout = "autofit") tab_race1race2 <- align(add_header_row(tab_race1race2, values = c("Race 1", "Race 2", " "), colwidths = c(1,6, 1), top = FALSE), align = "center", part = "header") tab_race1race2 <- flextable::vline(x = tab_race1race2, part = "all", border = fp_border(color = "black", style = "solid")) %>% hline_bottom(border = fp_border(color = "black", style = "solid"), part = "header") knitr::knit_print(tab_race1race2)
tab_race1race3 <- run_query(gsub("%DEMOGRAPHICS%", demographics, " SET NOCOUNT ON; DECLARE @RACES TABLE (RACE VARCHAR(2), RACEDESC VARCHAR(50) ); INSERT INTO @RACES VALUES ( 'AS', 'Asian'), ( 'BA', 'Black or African American'), ( 'HP', 'Native Hawaiian or Other Pacific Islander'), ( 'IN', 'American Indian/Alaska Native'), ( 'UN', 'Unknown or Not Reported'), ( 'WH', 'White'); WITH CTE_RACE_AS AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'AS' IN(Race3) GROUP BY RACEDESC), CTE_RACE_BA AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'BA' IN(Race3) GROUP BY RACEDESC), CTE_RACE_HP AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'HP' IN(Race3) GROUP BY RACEDESC), CTE_RACE_IN AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'IN' IN(Race3) GROUP BY RACEDESC), CTE_RACE_UN AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE (RACE IN(Race1) OR RACE1 IS NULL) AND ('UN' IN(Race3) OR RACE3 IS NULL) GROUP BY RACEDESC), CTE_RACE_WH AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'WH' IN(Race3) GROUP BY RACEDESC) SELECT a.RACEDESC AS [Race Group], IIF(b.cnt IS NULL, 0, b.cnt) 'Asian', IIF(c.cnt IS NULL, 0, c.cnt) 'Black or African American', IIF(d.cnt IS NULL, 0, d.cnt) 'Native Hawaiian or Other Pacific Islander', IIF(e.cnt IS NULL, 0, e.cnt) 'American Indian/Alaska Native', IIF(f.cnt IS NULL, 0, f.cnt) 'Unknown or Not Reported', IIF(g.cnt IS NULL, 0, g.cnt) 'White' FROM @RACES a LEFT JOIN CTE_RACE_AS b ON a.RACEDESC = b.RACEDESC LEFT JOIN CTE_RACE_BA c ON a.RACEDESC = c.RACEDESC LEFT JOIN CTE_RACE_HP d ON a.RACEDESC = d.RACEDESC LEFT JOIN CTE_RACE_IN e ON a.RACEDESC = e.RACEDESC LEFT JOIN CTE_RACE_UN f ON a.RACEDESC = f.RACEDESC LEFT JOIN CTE_RACE_WH g ON a.RACEDESC = g.RACEDESC;")) #Row totals rownames(tab_race1race3) <- tab_race1race3$'Race Group' tab_race1race3$'Race Group' <- NULL rowsum <-colSums(tab_race1race3) tab_race1race3 <-rbind(tab_race1race3, rowsum) #Column totals colsum <-rowSums(tab_race1race3) tab_race1race3 <- cbind(tab_race1race3,colsum) #Renaming for final table colnames (tab_race1race3) [7] <- "Total" rownames(tab_race1race3) [7] <- "Total" category <- rownames(tab_race1race3) tab_race1race3 <- cbind("Race Group" = category, tab_race1race3)
#knitr::kable(tab_race1race3, format.args = list(big.mark = ","), row.names = FALSE) %>% tab_race1race3 <- flextable(format(tab_race1race3, big.mark=",")) %>% set_table_properties(layout = "autofit") tab_race1race3 <- align(add_header_row(tab_race1race3, values = c("Race 1", "Race 3", " "), colwidths = c(1,6,1), top = FALSE), align = "center", part = "header") tab_race1race3 <- flextable::vline(x = tab_race1race3, part = "all", border = fp_border(color = "black", style = "solid")) %>% hline_bottom(border = fp_border(color = "black", style = "solid"), part = "header") knitr::knit_print(tab_race1race3)
tab_race1race4 <- run_query(gsub("%DEMOGRAPHICS%", demographics, " SET NOCOUNT ON; DECLARE @RACES TABLE (RACE VARCHAR(2), RACEDESC VARCHAR(50) ); INSERT INTO @RACES VALUES ( 'AS', 'Asian'), ( 'BA', 'Black or African American'), ( 'HP', 'Native Hawaiian or Other Pacific Islander'), ( 'IN', 'American Indian/Alaska Native'), ( 'UN', 'Unknown or Not Reported'), ( 'WH', 'White'); WITH CTE_RACE_AS AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'AS' IN(Race4) GROUP BY RACEDESC), CTE_RACE_BA AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'BA' IN(Race4) GROUP BY RACEDESC), CTE_RACE_HP AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'HP' IN(Race4) GROUP BY RACEDESC), CTE_RACE_IN AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'IN' IN(Race4) GROUP BY RACEDESC), CTE_RACE_UN AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE (RACE IN (Race1) OR Race1 is NULL) AND ('UN' IN(Race4) OR Race4 is NULL) GROUP BY RACEDESC), CTE_RACE_WH AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'WH' IN(Race4) GROUP BY RACEDESC) SELECT a.RACEDESC AS [Race Group], IIF(b.cnt IS NULL, 0, b.cnt) 'Asian', IIF(c.cnt IS NULL, 0, c.cnt) 'Black or African American', IIF(d.cnt IS NULL, 0, d.cnt) 'Native Hawaiian or Other Pacific Islander', IIF(e.cnt IS NULL, 0, e.cnt) 'American Indian/Alaska Native', IIF(f.cnt IS NULL, 0, f.cnt) 'Unknown or Not Reported', IIF(g.cnt IS NULL, 0, g.cnt) 'White' FROM @RACES a LEFT JOIN CTE_RACE_AS b ON a.RACEDESC = b.RACEDESC LEFT JOIN CTE_RACE_BA c ON a.RACEDESC = c.RACEDESC LEFT JOIN CTE_RACE_HP d ON a.RACEDESC = d.RACEDESC LEFT JOIN CTE_RACE_IN e ON a.RACEDESC = e.RACEDESC LEFT JOIN CTE_RACE_UN f ON a.RACEDESC = f.RACEDESC LEFT JOIN CTE_RACE_WH g ON a.RACEDESC = g.RACEDESC;")) #Row totals rownames(tab_race1race4) <- tab_race1race4$'Race Group' tab_race1race4$'Race Group' <- NULL rowsum <-colSums(tab_race1race4) tab_race1race4 <-rbind(tab_race1race4, rowsum) #Column totals colsum <-rowSums(tab_race1race4) tab_race1race4 <- cbind(tab_race1race4,colsum) #Renaming for final table colnames (tab_race1race4) [7] <- "Total" rownames(tab_race1race4) [7] <- "Total" category <- rownames(tab_race1race4) tab_race1race4 <- cbind("Race Group" = category, tab_race1race4)
#knitr::kable(tab_race1race4, format.args = list(big.mark = ","), row.names = FALSE) %>% tab_race1race4 <- flextable(format(tab_race1race4, big.mark=",")) %>% set_table_properties(layout = "autofit") tab_race1race4 <- align(add_header_row(tab_race1race4, values = c("Race 1", "Race 4", " "), colwidths = c(1,6,1), top = FALSE), align = "center", part = "header") tab_race1race4 <- flextable::vline(x = tab_race1race4, part = "all", border = fp_border(color = "black", style = "solid")) %>% hline_bottom(border = fp_border(color = "black", style = "solid"), part = "header") knitr::knit_print(tab_race1race4)
tab_race1race5 <- run_query(gsub("%DEMOGRAPHICS%", demographics, " SET NOCOUNT ON; DECLARE @RACES TABLE (RACE VARCHAR(2), RACEDESC VARCHAR(50) ); INSERT INTO @RACES VALUES ( 'AS', 'Asian'), ( 'BA', 'Black or African American'), ( 'HP', 'Native Hawaiian or Other Pacific Islander'), ( 'IN', 'American Indian/Alaska Native'), ( 'UN', 'Unknown or Not Reported'), ( 'WH', 'White'); WITH CTE_RACE_AS AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'AS' IN(Race5) GROUP BY RACEDESC), CTE_RACE_BA AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'BA' IN(Race5) GROUP BY RACEDESC), CTE_RACE_HP AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'HP' IN(Race5) GROUP BY RACEDESC), CTE_RACE_IN AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'IN' IN(Race5) GROUP BY RACEDESC), CTE_RACE_UN AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE (RACE IN (Race1) OR RACE1 IS NULL) AND ('UN' IN(Race5) OR RACE5 IS NULL) GROUP BY RACEDESC), CTE_RACE_WH AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'WH' IN(Race5) GROUP BY RACEDESC) SELECT a.RACEDESC AS [Race Group], IIF(b.cnt IS NULL, 0, b.cnt) 'Asian', IIF(c.cnt IS NULL, 0, c.cnt) 'Black or African American', IIF(d.cnt IS NULL, 0, d.cnt) 'Native Hawaiian or Other Pacific Islander', IIF(e.cnt IS NULL, 0, e.cnt) 'American Indian/Alaska Native', IIF(f.cnt IS NULL, 0, f.cnt) 'Unknown or Not Reported', IIF(g.cnt IS NULL, 0, g.cnt) 'White' FROM @RACES a LEFT JOIN CTE_RACE_AS b ON a.RACEDESC = b.RACEDESC LEFT JOIN CTE_RACE_BA c ON a.RACEDESC = c.RACEDESC LEFT JOIN CTE_RACE_HP d ON a.RACEDESC = d.RACEDESC LEFT JOIN CTE_RACE_IN e ON a.RACEDESC = e.RACEDESC LEFT JOIN CTE_RACE_UN f ON a.RACEDESC = f.RACEDESC LEFT JOIN CTE_RACE_WH g ON a.RACEDESC = g.RACEDESC;")) #Row totals rownames(tab_race1race5) <- tab_race1race5$'Race Group' tab_race1race5$'Race Group' <- NULL rowsum <-colSums(tab_race1race5) tab_race1race5 <-rbind(tab_race1race5, rowsum) #Column totals colsum <-rowSums(tab_race1race5) tab_race1race5 <- cbind(tab_race1race5,colsum) #Renaming for final table colnames (tab_race1race5) [7] <- "Total" rownames(tab_race1race5) [7] <- "Total" category <- rownames(tab_race1race5) tab_race1race5 <- cbind("Race Group" = category, tab_race1race5)
#knitr::kable(tab_race1race5, format.args = list(big.mark = ","), row.names = FALSE) %>% tab_race1race5 <- flextable(format(tab_race1race5, big.mark=",")) %>% set_table_properties(layout = "autofit") tab_race1race5 <- align(add_header_row(tab_race1race5, values = c("Race 1", "Race 5", " "), colwidths = c(1,6,1), top = FALSE), align = "center", part = "header") tab_race1race5 <- flextable::vline(x = tab_race1race5, part = "all", border = fp_border(color = "black", style = "solid")) %>% hline_bottom(border = fp_border(color = "black", style = "solid"), part = "header") knitr::knit_print(tab_race1race5)
ethn <- run_query( paste0("SELECT SUM(CASE WHEN HISPANIC = 'Y' THEN 1 ELSE 0 END) as Yes, SUM(CASE WHEN HISPANIC = 'N' THEN 1 ELSE 0 END) as No, SUM(CASE WHEN HISPANIC = 'U' THEN 1 ELSE 0 END) as Unknown FROM ", demographics)) ethn2 <- data.frame(Value = c("Y", "N", "U"), Label = names(ethn), Frequency = c(ethn$Yes, ethn$No, ethn$Unknown), Percent = c((ethn$Yes/tab1_dem$npats * 100), (ethn$No/tab1_dem$npats * 100), (ethn$Unknown/tab1_dem$npats * 100)))
knitr::kable(ethn2, digits = 2, format.args = list(big.mark = ","))
tab_race1hispanic <- run_query(gsub("%DEMOGRAPHICS%", demographics, " SET NOCOUNT ON; DECLARE @RACES TABLE (RACE VARCHAR(2), RACEDESC VARCHAR(50) ); INSERT INTO @RACES VALUES ( 'AS', 'Asian'), ( 'BA', 'Black or African American'), ( 'HP', 'Native Hawaiian or Other Pacific Islander'), ( 'IN', 'American Indian/Alaska Native'), ( 'UN', 'Unknown or Not Reported'), ( 'WH', 'White'); WITH CTE_HISP_Y AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'Y' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_N AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'N' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_U AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE IN(Race1) AND 'U' in (Hispanic) GROUP BY RACEDESC) SELECT a.RACEDESC AS [Race Group], IIF(b.cnt IS NULL, 0, b.cnt) 'Hispanic', IIF(c.cnt IS NULL, 0, c.cnt) 'Non-Hispanic', IIF(d.cnt IS NULL, 0, d.cnt) 'Unknown Hispanic' FROM @RACES a LEFT JOIN CTE_HISP_Y b ON a.RACEDESC = b.RACEDESC LEFT JOIN CTE_HISP_N c ON a.RACEDESC = c.RACEDESC LEFT JOIN CTE_HISP_U d ON a.RACEDESC = d.RACEDESC ORDER BY a.RACEDESC;")) #Row totals rownames(tab_race1hispanic) <- tab_race1hispanic$'Race Group' tab_race1hispanic$'Race Group' <- NULL rowsum <-colSums(tab_race1hispanic) tab_race1hispanic <-rbind(tab_race1hispanic, rowsum) #Column totals colsum <-rowSums(tab_race1hispanic) tab_race1hispanic <- cbind(tab_race1hispanic,colsum) #Renaming for final table colnames (tab_race1hispanic) [4] <- "Total" rownames(tab_race1hispanic) [7] <- "Total" category <- rownames(tab_race1hispanic) tab_race1hispanic <- cbind("Race Group" = category, tab_race1hispanic)
knitr::kable(tab_race1hispanic, format.args = list(big.mark = ","), row.names = FALSE)
tab_race2hispanic <- run_query(gsub("%DEMOGRAPHICS%", demographics, " SET NOCOUNT ON; DECLARE @RACES TABLE (RACE VARCHAR(2), RACEDESC VARCHAR(50) ); INSERT INTO @RACES VALUES ( 'AS', 'Asian'), ( 'BA', 'Black or African American'), ( 'HP', 'Native Hawaiian or Other Pacific Islander'), ( 'IN', 'American Indian/Alaska Native'), ( 'UN', 'Unknown or Not Reported'), ( 'WH', 'White'); WITH CTE_HISP_Y AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE1 = 'UN' AND RACE IN(Race2) AND 'Y' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_N AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE1 = 'UN' AND RACE IN(Race2) AND 'N' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_U AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE1 = 'UN' AND RACE IN(Race2) AND 'U' in (Hispanic) GROUP BY RACEDESC) SELECT a.RACEDESC AS [Race Group], IIF(b.cnt IS NULL, 0, b.cnt) 'Hispanic', IIF(c.cnt IS NULL, 0, c.cnt) 'Non-Hispanic', IIF(d.cnt IS NULL, 0, d.cnt) 'Unknown Hispanic' FROM @RACES a LEFT JOIN CTE_HISP_Y b ON a.RACEDESC = b.RACEDESC LEFT JOIN CTE_HISP_N c ON a.RACEDESC = c.RACEDESC LEFT JOIN CTE_HISP_U d ON a.RACEDESC = d.RACEDESC ORDER BY a.RACEDESC;")) #Row totals rownames(tab_race2hispanic) <- tab_race2hispanic$'Race Group' tab_race2hispanic$'Race Group' <- NULL rowsum <-colSums(tab_race2hispanic) tab_race2hispanic <-rbind(tab_race2hispanic, rowsum) #Column totals colsum <-rowSums(tab_race2hispanic) tab_race2hispanic <- cbind(tab_race2hispanic,colsum) #Renaming for final table colnames (tab_race2hispanic) [4] <- "Total" rownames(tab_race2hispanic) [7] <- "Total" category <- rownames(tab_race2hispanic) tab_race2hispanic <- cbind("Race Group" = category, tab_race2hispanic)
knitr::kable(tab_race2hispanic, format.args = list(big.mark = ","), row.names = FALSE)
tab_race3hispanic <- run_query(gsub("%DEMOGRAPHICS%", demographics, " SET NOCOUNT ON; DECLARE @RACES TABLE (RACE VARCHAR(2), RACEDESC VARCHAR(50) ); INSERT INTO @RACES VALUES ( 'AS', 'Asian'), ( 'BA', 'Black or African American'), ( 'HP', 'Native Hawaiian or Other Pacific Islander'), ( 'IN', 'American Indian/Alaska Native'), ( 'UN', 'Unknown or Not Reported'), ( 'WH', 'White'); WITH CTE_HISP_Y AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE1 = 'UN' AND RACE IN(Race3) AND 'Y' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_N AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE1 = 'UN' AND RACE IN(Race3) AND 'N' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_U AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE1 = 'UN' AND RACE IN(Race3) AND 'U' in (Hispanic) GROUP BY RACEDESC) SELECT a.RACEDESC AS [Race Group], IIF(b.cnt IS NULL, 0, b.cnt) 'Hispanic', IIF(c.cnt IS NULL, 0, c.cnt) 'Non-Hispanic', IIF(d.cnt IS NULL, 0, d.cnt) 'Unknown Hispanic' FROM @RACES a LEFT JOIN CTE_HISP_Y b ON a.RACEDESC = b.RACEDESC LEFT JOIN CTE_HISP_N c ON a.RACEDESC = c.RACEDESC LEFT JOIN CTE_HISP_U d ON a.RACEDESC = d.RACEDESC ORDER BY a.RACEDESC;")) #Row totals rownames(tab_race3hispanic) <- tab_race3hispanic$'Race Group' tab_race3hispanic$'Race Group' <- NULL rowsum <-colSums(tab_race3hispanic) tab_race3hispanic <-rbind(tab_race3hispanic, rowsum) #Column totals colsum <-rowSums(tab_race3hispanic) tab_race3hispanic <- cbind(tab_race3hispanic,colsum) #Renaming for final table colnames (tab_race3hispanic) [4] <- "Total" rownames(tab_race3hispanic) [7] <- "Total" category <- rownames(tab_race3hispanic) tab_race3hispanic <- cbind("Race Group" = category, tab_race3hispanic)
knitr::kable(tab_race3hispanic, format.args = list(big.mark = ","), row.names = FALSE)
tab_race4hispanic <- run_query(gsub("%DEMOGRAPHICS%", demographics, " SET NOCOUNT ON; DECLARE @RACES TABLE (RACE VARCHAR(2), RACEDESC VARCHAR(50) ); INSERT INTO @RACES VALUES ( 'AS', 'Asian'), ( 'BA', 'Black or African American'), ( 'HP', 'Native Hawaiian or Other Pacific Islander'), ( 'IN', 'American Indian/Alaska Native'), ( 'UN', 'Unknown or Not Reported'), ( 'WH', 'White'); WITH CTE_HISP_Y AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE1 = 'UN' AND RACE IN(Race4) AND 'Y' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_N AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE1 = 'UN' AND RACE IN(Race4) AND 'N' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_U AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE1 = 'UN' AND RACE IN(Race4) AND 'U' in (Hispanic) GROUP BY RACEDESC) SELECT a.RACEDESC AS [Race Group], IIF(b.cnt IS NULL, 0, b.cnt) 'Hispanic', IIF(c.cnt IS NULL, 0, c.cnt) 'Non-Hispanic', IIF(d.cnt IS NULL, 0, d.cnt) 'Unknown Hispanic' FROM @RACES a LEFT JOIN CTE_HISP_Y b ON a.RACEDESC = b.RACEDESC LEFT JOIN CTE_HISP_N c ON a.RACEDESC = c.RACEDESC LEFT JOIN CTE_HISP_U d ON a.RACEDESC = d.RACEDESC ORDER BY a.RACEDESC;")) #Row totals rownames(tab_race4hispanic) <- tab_race4hispanic$'Race Group' tab_race4hispanic$'Race Group' <- NULL rowsum <-colSums(tab_race4hispanic) tab_race4hispanic <-rbind(tab_race4hispanic, rowsum) #Column totals colsum <-rowSums(tab_race4hispanic) tab_race4hispanic <- cbind(tab_race4hispanic,colsum) #Renaming for final table colnames (tab_race4hispanic) [4] <- "Total" rownames(tab_race4hispanic) [7] <- "Total" category <- rownames(tab_race4hispanic) tab_race4hispanic <- cbind("Race Group" = category, tab_race4hispanic)
knitr::kable(tab_race4hispanic, format.args = list(big.mark = ","), row.names = FALSE)
tab_race5hispanic <- run_query(gsub("%DEMOGRAPHICS%", demographics, " SET NOCOUNT ON; DECLARE @RACES TABLE (RACE VARCHAR(2), RACEDESC VARCHAR(50) ); INSERT INTO @RACES VALUES ( 'AS', 'Asian'), ( 'BA', 'Black or African American'), ( 'HP', 'Native Hawaiian or Other Pacific Islander'), ( 'IN', 'American Indian/Alaska Native'), ( 'UN', 'Unknown or Not Reported'), ( 'WH', 'White'); WITH CTE_HISP_Y AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE1 = 'UN' AND RACE IN(Race5) AND 'Y' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_N AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE1 = 'UN' AND RACE IN(Race5) AND 'N' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_U AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE RACE1 = 'UN' AND RACE IN(Race5) AND 'U' in (Hispanic) GROUP BY RACEDESC) SELECT a.RACEDESC AS [Race Group], IIF(b.cnt IS NULL, 0, b.cnt) 'Hispanic', IIF(c.cnt IS NULL, 0, c.cnt) 'Non-Hispanic', IIF(d.cnt IS NULL, 0, d.cnt) 'Unknown Hispanic' FROM @RACES a LEFT JOIN CTE_HISP_Y b ON a.RACEDESC = b.RACEDESC LEFT JOIN CTE_HISP_N c ON a.RACEDESC = c.RACEDESC LEFT JOIN CTE_HISP_U d ON a.RACEDESC = d.RACEDESC ORDER BY a.RACEDESC;")) #Row totals rownames(tab_race5hispanic) <- tab_race5hispanic$'Race Group' tab_race5hispanic$'Race Group' <- NULL rowsum <-colSums(tab_race5hispanic) tab_race5hispanic <-rbind(tab_race5hispanic, rowsum) #Column totals colsum <-rowSums(tab_race5hispanic) tab_race5hispanic <- cbind(tab_race5hispanic,colsum) #Renaming for final table colnames (tab_race5hispanic) [4] <- "Total" rownames(tab_race5hispanic) [7] <- "Total" category <- rownames(tab_race5hispanic) tab_race5hispanic <- cbind("Race Group" = category, tab_race5hispanic)
knitr::kable(tab_race5hispanic, format.args = list(big.mark = ","), row.names = FALSE)
tab_race1lang <- run_query(gsub("%DEMOGRAPHICS%", demographics, " SET NOCOUNT ON; DECLARE @RACES TABLE (RACE VARCHAR(2), RACEDESC VARCHAR(50) ); INSERT INTO @RACES VALUES ( 'AS', 'Asian'), ( 'BA', 'Black or African American'), ( 'HP', 'Native Hawaiian or Other Pacific Islander'), ( 'IN', 'American Indian/Alaska Native'), ( 'UN', 'Unknown or Not Reported'), ( 'WH', 'White'); WITH CTE_HISP_Y AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE Primary_Language='spa' and RACE IN(Race1) AND 'Y' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_N AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE Primary_Language='spa' and RACE IN(Race1) AND 'N' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_U AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE Primary_Language='spa' and RACE IN(Race1) AND 'U' in (Hispanic) GROUP BY RACEDESC) SELECT a.RACEDESC AS [Race Group], IIF(b.cnt IS NULL, 0, b.cnt) 'Hispanic', IIF(c.cnt IS NULL, 0, c.cnt) 'Non-Hispanic', IIF(d.cnt IS NULL, 0, d.cnt) 'Unknown Hispanic' FROM @RACES a LEFT JOIN CTE_HISP_Y b ON a.RACEDESC = b.RACEDESC LEFT JOIN CTE_HISP_N c ON a.RACEDESC = c.RACEDESC LEFT JOIN CTE_HISP_U d ON a.RACEDESC = d.RACEDESC ORDER BY a.RACEDESC;")) #Row totals rownames(tab_race1lang) <- tab_race1lang$'Race Group' tab_race1lang$'Race Group' <- NULL rowsum <-colSums(tab_race1lang) tab_race1lang <-rbind(tab_race1lang, rowsum) #Column totals colsum <-rowSums(tab_race1lang) tab_race1lang <- cbind(tab_race1lang,colsum) #Renaming for final table colnames (tab_race1lang) [4] <- "Total" rownames(tab_race1lang) [7] <- "Total" category <- rownames(tab_race1lang) tab_race1lang <- cbind("Race Group" = category, tab_race1lang)
knitr::kable(tab_race1lang, format.args = list(big.mark = ","), row.names = FALSE)
tab_race2lang <- run_query(gsub("%DEMOGRAPHICS%", demographics, " SET NOCOUNT ON; DECLARE @RACES TABLE (RACE VARCHAR(2), RACEDESC VARCHAR(50) ); INSERT INTO @RACES VALUES ( 'AS', 'Asian'), ( 'BA', 'Black or African American'), ( 'HP', 'Native Hawaiian or Other Pacific Islander'), ( 'IN', 'American Indian/Alaska Native'), ( 'UN', 'Unknown or Not Reported'), ( 'WH', 'White'); WITH CTE_HISP_Y AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE Primary_Language='spa' and RACE IN(Race2) AND 'Y' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_N AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE Primary_Language='spa' and RACE IN(Race2) AND 'N' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_U AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE Primary_Language='spa' and RACE IN(Race2) AND 'U' in (Hispanic) GROUP BY RACEDESC) SELECT a.RACEDESC AS [Race Group], IIF(b.cnt IS NULL, 0, b.cnt) 'Hispanic', IIF(c.cnt IS NULL, 0, c.cnt) 'Non-Hispanic', IIF(d.cnt IS NULL, 0, d.cnt) 'Unknown Hispanic' FROM @RACES a LEFT JOIN CTE_HISP_Y b ON a.RACEDESC = b.RACEDESC LEFT JOIN CTE_HISP_N c ON a.RACEDESC = c.RACEDESC LEFT JOIN CTE_HISP_U d ON a.RACEDESC = d.RACEDESC ORDER BY a.RACEDESC;;")) #Row totals rownames(tab_race2lang) <- tab_race2lang$'Race Group' tab_race2lang$'Race Group' <- NULL rowsum <-colSums(tab_race2lang) tab_race2lang <-rbind(tab_race2lang, rowsum) #Column totals colsum <-rowSums(tab_race2lang) tab_race2lang <- cbind(tab_race2lang,colsum) #Renaming for final table colnames (tab_race2lang) [4] <- "Total" rownames(tab_race2lang) [7] <- "Total" category <- rownames(tab_race2lang) tab_race2lang <- cbind("Race Group" = category, tab_race2lang)
knitr::kable(tab_race2lang, format.args = list(big.mark = ","), row.names = FALSE)
tab_race3lang <- run_query(gsub("%DEMOGRAPHICS%", demographics, " SET NOCOUNT ON; DECLARE @RACES TABLE (RACE VARCHAR(2), RACEDESC VARCHAR(50) ); INSERT INTO @RACES VALUES ( 'AS', 'Asian'), ( 'BA', 'Black or African American'), ( 'HP', 'Native Hawaiian or Other Pacific Islander'), ( 'IN', 'American Indian/Alaska Native'), ( 'UN', 'Unknown or Not Reported'), ( 'WH', 'White'); WITH CTE_HISP_Y AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE Primary_Language='spa' and RACE IN(Race3) AND 'Y' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_N AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE Primary_Language='spa' and RACE IN(Race3) AND 'N' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_U AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE Primary_Language='spa' and RACE IN(Race3) AND 'U' in (Hispanic) GROUP BY RACEDESC) SELECT a.RACEDESC AS [Race Group], IIF(b.cnt IS NULL, 0, b.cnt) 'Hispanic', IIF(c.cnt IS NULL, 0, c.cnt) 'Non-Hispanic', IIF(d.cnt IS NULL, 0, d.cnt) 'Unknown Hispanic' FROM @RACES a LEFT JOIN CTE_HISP_Y b ON a.RACEDESC = b.RACEDESC LEFT JOIN CTE_HISP_N c ON a.RACEDESC = c.RACEDESC LEFT JOIN CTE_HISP_U d ON a.RACEDESC = d.RACEDESC ORDER BY a.RACEDESC;")) #Row totals rownames(tab_race3lang) <- tab_race3lang$'Race Group' tab_race3lang$'Race Group' <- NULL rowsum <-colSums(tab_race3lang) tab_race3lang <-rbind(tab_race3lang, rowsum) #Column totals colsum <-rowSums(tab_race3lang) tab_race3lang <- cbind(tab_race3lang,colsum) #Renaming for final table colnames (tab_race3lang) [4] <- "Total" rownames(tab_race3lang) [7] <- "Total" category <- rownames(tab_race3lang) tab_race3lang <- cbind("Race Group" = category, tab_race3lang)
knitr::kable(tab_race3lang, format.args = list(big.mark = ","), row.names = FALSE)
tab_race4lang <- run_query(gsub("%DEMOGRAPHICS%", demographics, " SET NOCOUNT ON; DECLARE @RACES TABLE (RACE VARCHAR(2), RACEDESC VARCHAR(50) ); INSERT INTO @RACES VALUES ( 'AS', 'Asian'), ( 'BA', 'Black or African American'), ( 'HP', 'Native Hawaiian or Other Pacific Islander'), ( 'IN', 'American Indian/Alaska Native'), ( 'UN', 'Unknown or Not Reported'), ( 'WH', 'White'); WITH CTE_HISP_Y AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE Primary_Language='spa' and RACE IN(Race4) AND 'Y' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_N AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE Primary_Language='spa' and RACE IN(Race4) AND 'N' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_U AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE Primary_Language='spa' and RACE IN(Race4) AND 'U' in (Hispanic) GROUP BY RACEDESC) SELECT a.RACEDESC AS [Race Group], IIF(b.cnt IS NULL, 0, b.cnt) 'Hispanic', IIF(c.cnt IS NULL, 0, c.cnt) 'Non-Hispanic', IIF(d.cnt IS NULL, 0, d.cnt) 'Unknown Hispanic' FROM @RACES a LEFT JOIN CTE_HISP_Y b ON a.RACEDESC = b.RACEDESC LEFT JOIN CTE_HISP_N c ON a.RACEDESC = c.RACEDESC LEFT JOIN CTE_HISP_U d ON a.RACEDESC = d.RACEDESC ORDER BY a.RACEDESC;")) #Row totals rownames(tab_race4lang) <- tab_race4lang$'Race Group' tab_race4lang$'Race Group' <- NULL rowsum <-colSums(tab_race4lang) tab_race4lang <-rbind(tab_race4lang, rowsum) #Column totals colsum <-rowSums(tab_race4lang) tab_race4lang <- cbind(tab_race4lang,colsum) #Renaming for final table colnames (tab_race4lang) [4] <- "Total" rownames(tab_race4lang) [7] <- "Total" category <- rownames(tab_race4lang) tab_race4lang <- cbind("Race Group" = category, tab_race4lang)
knitr::kable(tab_race4lang, format.args = list(big.mark = ","), row.names = FALSE)
tab_race5lang <- run_query(gsub("%DEMOGRAPHICS%", demographics, " SET NOCOUNT ON; DECLARE @RACES TABLE (RACE VARCHAR(2), RACEDESC VARCHAR(50) ); INSERT INTO @RACES VALUES ( 'AS', 'Asian'), ( 'BA', 'Black or African American'), ( 'HP', 'Native Hawaiian or Other Pacific Islander'), ( 'IN', 'American Indian/Alaska Native'), ( 'UN', 'Unknown or Not Reported'), ( 'WH', 'White'); WITH CTE_HISP_Y AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE Primary_Language='spa' and RACE IN(Race5) AND 'Y' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_N AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE Primary_Language='spa' and RACE IN(Race5) AND 'N' in (Hispanic) GROUP BY RACEDESC), CTE_HISP_U AS (SELECT RACEDESC, COUNT(*) cnt FROM %DEMOGRAPHICS%, @RACES WHERE Primary_Language='spa' and RACE IN(Race5) AND 'U' in (Hispanic) GROUP BY RACEDESC) SELECT a.RACEDESC AS [Race Group], IIF(b.cnt IS NULL, 0, b.cnt) 'Hispanic', IIF(c.cnt IS NULL, 0, c.cnt) 'Non-Hispanic', IIF(d.cnt IS NULL, 0, d.cnt) 'Unknown Hispanic' FROM @RACES a LEFT JOIN CTE_HISP_Y b ON a.RACEDESC = b.RACEDESC LEFT JOIN CTE_HISP_N c ON a.RACEDESC = c.RACEDESC LEFT JOIN CTE_HISP_U d ON a.RACEDESC = d.RACEDESC ORDER BY a.RACEDESC;")) #Row totals rownames(tab_race5lang) <- tab_race5lang$'Race Group' tab_race5lang$'Race Group' <- NULL rowsum <-colSums(tab_race5lang) tab_race5lang <-rbind(tab_race5lang, rowsum) #Column totals colsum <-rowSums(tab_race5lang) tab_race5lang <- cbind(tab_race5lang,colsum) #Renaming for final table colnames (tab_race5lang) [4] <- "Total" rownames(tab_race5lang) [7] <- "Total" category <- rownames(tab_race5lang) tab_race5lang <- cbind("Race Group" = category, tab_race5lang)
knitr::kable(tab_race5lang, format.args = list(big.mark = ","), row.names = FALSE)
# create a temp table in the SQL DB temp drive, this will not be pulled into the R workspace.This pulls in the most recent encounter date and combines it with race and hispanic data from demographics tempCon1 <- get_connection() encTmp1 <- sqlQuery(tempCon1, paste('SET NOCOUNT ON; SELECT d.*, demo.RACE1, demo.HISPANIC ', 'INTO #encTmp1 ', 'FROM (', 'SELECT a.PERSON_ID, max(adate) as lastEnc', 'FROM ',encounters, ' as a inner join (select distinct person_id from ',demographics,') as b on a.person_id = b.person_id group by a.PERSON_ID) as d', 'inner join ',demographics,' demo on demo.PERSON_ID = d.PERSON_ID', sep=" " ) ) tab7.11_rowCnt <-sqlQuery(tempCon1, paste("SELECT lastEnc.year, ISNULL(missingRaceHispanic.rowCnt, 0) rowCnt FROM ( SELECT YEAR(lastEnc) AS year, COUNT(*) AS rowCnt FROM #encTmp1 GROUP BY YEAR(lastEnc) ) lastEnc LEFT JOIN ( SELECT YEAR(lastEnc) AS year, COUNT(*) AS rowCnt FROM #encTmp1 WHERE Race1 IN('UN') AND Hispanic IN('U') GROUP BY YEAR(lastEnc) ) missingRaceHispanic ON missingRaceHispanic.year = lastEnc.year ORDER BY lastEnc.year", sep=" " ) ) tab7.11_totalCnt <-sqlQuery(tempCon1, paste("SELECT year(lastEnc) as year, count(*) as rowCnt", "FROM #encTmp1 GROUP BY year(lastEnc)", sep=" " ) ) RODBC::odbcClose(tempCon1) tab7.11_combined <- data.frame(cbind(tab7.11_rowCnt, tab7.11_totalCnt$rowCnt)) tab7.11_combined$rowPct <- round(100*tab7.11_combined$rowCnt/tab7.11_combined$tab7.11_totalCnt.rowCnt,1) tab7.11_combined$year <- as.character(tab7.11_combined$year)
knitr::kable(tab7.11_combined[ c('year', 'rowCnt','tab7.11_totalCnt.rowCnt','rowPct')], col.names = c("Year","Frequency Unknown Race1 and Hispanic","Total Frequency","Percent Missing Race1 and Hispanic"), row.names=FALSE, format.args = list(big.mark = ","), align = 'c')
ggplot(data = tab7.11_combined, aes(x = year, y = rowPct, group=1)) + geom_line() + geom_point() + ylab("Percent") + theme(axis.text.x = element_text(angle = 45, hjust = 1))
gender_re <- demogCounts[ which (demogCounts$race1=='UN' & demogCounts$hispanic=='U'),] gender_re <- demogCounts %>% group_by(gender) %>% summarise(count = sum(nRows)) %>% ungroup() %>% mutate(totCount = sum(count), countPct = round(100*count/totCount,1) ) gender_re <- within(merge(gender_re,subset(valSets,columnName=='GENDER')[c('code','decode')], by.x='gender', by.y='code', all.x=TRUE) %>% arrange(desc(countPct)) ,{ decode <- ifelse(!is.na(decode),decode, ifelse(is.na(gender) , 'Missing', 'Invalid')) })
knitr::kable(gender_re[ c('gender', 'decode','count','countPct')], col.names = c("Value","Label","Frequency","Percent"), row.names=FALSE, format.args = list(big.mark = ","))
tempCon2 <- get_connection() encTmp2 <- sqlQuery(tempCon2, paste('SET NOCOUNT ON; SELECT D.*, demo.RACE1, demo.HISPANIC, demo.birth_date ', 'INTO #encTmp10 ', 'FROM (', 'SELECT a.PERSON_ID, max(adate) as lastEnc, floor(DATEDIFF(day, b.BIRTH_DATE, A.adate)/365.25) as age', 'FROM ',encounters, ' as a inner join (select distinct person_id, birth_date from ',demographics,') as b on a.person_id = b.person_id group by a.PERSON_ID, a.adate, b.birth_date) as d', 'inner join ',demographics,' demo on demo.PERSON_ID = d.PERSON_ID', sep=" " ) ) tab7.13_rowCnt <- sqlQuery(tempCon2, paste("SELECT ageBase.age, CASE WHEN ageSubGroup.rowCnt IS NULL THEN 0 ELSE ageSubGroup.rowCnt END AS rowCnt FROM (SELECT age, COUNT(*) AS rowCnt FROM #encTmp10 GROUP BY age) ageBase LEFT JOIN (SELECT age, COUNT(*) AS rowCnt FROM #encTmp10 WHERE Race1 IN('UN') AND Hispanic IN('U') GROUP BY age) ageSubGroup ON ageSubGroup.age = ageBase.age ORDER BY age;", sep=" " ) ) tab7.13_rowCnt$ageCat <- ageCatCalc(as.numeric(tab7.13_rowCnt$age)) tab7.13_rowCnt <- tab7.13_rowCnt %>% group_by(ageCat) %>% summarise(count = sum(rowCnt)) %>% ungroup() %>% mutate(totCount = sum(count), columnPct = round(100*count/totCount,1) ) tab7.13_total <- sqlQuery(tempCon2, paste("SELECT age, count(*) as rowCnt", "FROM #encTmp10 GROUP BY age ", sep=" " ) ) RODBC::odbcClose(tempCon2) tab7.13_total$ageCat <- ageCatCalc(as.numeric(tab7.13_total$age)) tab7.13_total <- tab7.13_total %>% group_by(ageCat) %>% summarise(count = sum(rowCnt)) %>% ungroup() %>% mutate(totCount = sum(count), countPct = round(100*count/totCount,1) ) tab7.13_combined <- data.frame(cbind(tab7.13_rowCnt, tab7.13_total$count)) tab7.13_combined$rowPct <- round(100*tab7.13_combined$count/tab7.13_combined$tab7.13_total.count,1)
knitr::kable(tab7.13_combined[ c('ageCat', 'count','tab7.13_total.count','columnPct', 'rowPct')], col.names = c("Age Group","Frequency Missing Race1 & Hispanic","Total Frequency","Column Percent", "Row Percent"), row.names=FALSE, format.args = list(big.mark = ","))
tab7.14a <- run_query(paste0(" SELECT ENCTYPE_TE_EM_TV_TO, Y as POS2Y, N as POS2N FROM ( SELECT CASE WHEN e.ENCTYPE in ('TE', 'EM', 'TV', 'TO') THEN 'Y' ELSE 'N' END AS ENCTYPE_TE_EM_TV_TO, CASE WHEN e.POS = '02' THEN 'Y' ELSE 'N' END AS POS2 FROM ", encounters," e ) ENCTYPEPOS PIVOT(COUNT(POS2) FOR POS2 IN( Y, N)) AS pvt;")) tab7.14a <- flextable(data = data.frame(format(tab7.14a, big.mark=",", scientific = F))) %>% set_table_properties(layout = "autofit") %>% set_header_labels(ENCTYPE_TE_EM_TV_TO="ENCTYPE = 'TE', 'EM', 'TV', 'TO'", POS2Y = "Y", POS2N = "N") %>% add_header_row(values = c("", "POS = 02"), colwidths=c(1,2) ,top = TRUE )%>% flextable::fontsize(size = 9, part = "all") %>% flextable::align(align = "center", part = "all") %>% border(part = "all", border = fp_border(color = "black", style = "solid")) knitr::knit_print(tab7.14a)
tab7.14b <- run_query(paste0(" SELECT CPTMOD, Y AS POS2Y, N AS POS2N FROM ( SELECT CASE WHEN p.CPTMOD1 IN('GT', '95') OR p.CPTMOD2 IN('GT', '95') OR p.CPTMOD3 IN('GT', '95') THEN 'Y' ELSE 'N' END AS CPTMOD, CASE WHEN e.POS = '02' THEN 'Y' ELSE 'N' END AS POS2 FROM ", procedures," p JOIN ", encounters," e ON p.ENC_ID = e.ENC_ID ) CPTPOS PIVOT(COUNT(POS2) FOR POS2 IN( Y, N)) AS pvt;")) tab7.14b <- flextable(data = data.frame(format(tab7.14b, big.mark=",", scientific = F))) %>% set_table_properties(layout = "autofit") %>% set_header_labels(CPTMOD="CPTMOD = 'GT' or '95'", POS2Y = "Y", POS2N = "N") %>% add_header_row(values = c("", "POS = 02"), colwidths=c(1,2) ,top = TRUE )%>% flextable::fontsize(size = 9, part = "all") %>% flextable::align(align = "center", part = "all") %>% border(part = "all", border = fp_border(color = "black", style = "solid")) knitr::knit_print(tab7.14b)
tab7.14c <- run_query(paste0(" SELECT CPTMOD, Y AS ENCTYPE_TE_EM_TV_TO_Y, N AS ENCTYPE_TE_EM_TV_TO_N FROM ( SELECT CASE WHEN p.CPTMOD1 IN('GT', '95') OR p.CPTMOD2 IN('GT', '95') OR p.CPTMOD3 IN('GT', '95') THEN 'Y' ELSE 'N' END AS CPTMOD, CASE WHEN e.ENCTYPE in ('TE', 'EM', 'TV', 'TO') THEN 'Y' ELSE 'N' END AS ENCTYPE_TE_EM_TV_TO FROM ", procedures," p JOIN ", encounters," e ON p.ENC_ID = e.ENC_ID ) CPTENCTYPE PIVOT(COUNT(ENCTYPE_TE_EM_TV_TO) FOR ENCTYPE_TE_EM_TV_TO IN( Y, N)) AS pvt;")) tab7.14c <- flextable(data = data.frame(format(tab7.14c, big.mark=",", scientific = F))) %>% set_table_properties(layout = "autofit") %>% set_header_labels(CPTMOD="CPTMOD = 'GT' or '95'", ENCTYPE_TE_EM_TV_TO_Y = "Y", ENCTYPE_TE_EM_TV_TO_N = "N") %>% add_header_row(values = c("", "ENCTYPE = 'TE', 'EM', 'TV', 'TO'"), colwidths=c(1,2) ,top = TRUE )%>% flextable::fontsize(size = 9, part = "all") %>% flextable::align(align = "center", part = "all") %>% border(part = "all", border = fp_border(color = "black", style = "solid")) knitr::knit_print(tab7.14c)
tempCon3 <- get_connection() # create a temp table in the SQL DB temp drive, this will not be pulled into the R workspace. nada_ <- sqlQuery(tempCon3, paste0("SET NOCOUNT ON; SELECT a.*, floor(DATEDIFF(day, b.birth_date, a.ADATE)/365.25) as age, b.birth_date, month(a.adate) as aMon, year(a.adate) as aYear, month(b.birth_date) as dobMon, year(b.birth_date) as dobYear INTO #encTmp FROM ", encounters," as a left join (select distinct person_id, birth_date from ", demographics, ") as b on a.person_id = b.person_id", sep=" " ) ) # short labels for encounter type encLabels <- data.frame(c('AV','ED','EM','IP','IS','LO','OE','RO', 'TE', 'TV', 'TO'), c('Ambulatory Visit','Emergency Department', 'E-mail', 'Acute Inpatient','Institutional Stay','Lab only','Other','Radiology only', 'Telephone', 'Telehealth', 'Other Synchronous Telehealth' ) , stringsAsFactors = F ) names(encLabels)<-c('code','decode') # overall enc summary # read encounter table encSumm <- sqlQuery(tempCon3, paste0('SELECT count(*) as rowCnt, count(distinct enc_id) as encCnt, count(distinct person_id) as personCnt, min(adate) as firstEnc, max(adate) as lastEnc FROM #encTmp', sep=" " ), as.is=c(TRUE,TRUE,TRUE,TRUE,TRUE), max=maxQryRows ) encRows <- encSumm$rowCnt encPrimKeyRows <- encSumm$encCnt nEncPats <- encSumm$personCnt
Total rows in the dataset: r format(encRows, big.mark=",")
\
Total unique combinations of the primary key: r format(encPrimKeyRows, big.mark=",")
\
Total patients in the encounter table: r format(nEncPats, big.mark=",")
\
Month and year of first encounter: r format(as.Date(encSumm$firstEnc),"%B %Y")
\
Month and year of last encounter: r format(as.Date(encSumm$lastEnc),"%B %Y")
\
r ifelse(encRows== encPrimKeyRows," ","*The encounter table has duplicate rows, by enc_id")
encPersonYear <- sqlQuery(tempCon3, paste('SELECT aMon, aYear, age, count(*) as personCnt', 'FROM (SELECT distinct person_id, aYear, min(age) as age, min(aMon) as aMon ', ' FROM #encTmp', ' GROUP BY person_id, aYear) as qry', 'GROUP BY aMon, aYear, age', sep=" " ), as.is=c(TRUE,TRUE,TRUE,TRUE,TRUE), max=maxQryRows ) encPersonAgeYear <- within(encPersonYear,{ adate_YM <- as.Date(paste(as.character(aYear),as.character(aMon),'01',sep='-')) #dob_YM <- as.Date(paste(as.character(dobYear),as.character(dobMon),'01',sep='-')) #age <- as.numeric(floor((difftime(adate_YM, dob_YM, units='days'))/365.25)) ageCat <- ageCatCalc(as.numeric(age)) }) %>% group_by(aYear, ageCat) %>% summarise(Freq = sum(personCnt)) %>% group_by(aYear) %>% mutate(yearTot = sum(Freq), pct = round(100*Freq/yearTot,1), toDisplay = paste0(Freq,' (',pct,')') ) encPersonAgeYear <- within(encPersonYear,{ adate_YM <- as.Date(paste(as.character(aYear),as.character(aMon),'01',sep='-')) #dob_YM <- as.Date(paste(as.character(dobYear),as.character(dobMon),'01',sep='-')) #age <- as.numeric(floor((difftime(adate_YM, dob_YM, units='days'))/365.25)) ageCat <- ageCatCalc(as.numeric(age)) }) %>% group_by(aYear, ageCat) %>% summarise(Freq = sum(personCnt)) %>% group_by(aYear) %>% mutate(yearTot = sum(Freq), pct = round(100*Freq/yearTot,1)) encPersonAgeYear$toDisplay = paste0(format(encPersonAgeYear$Freq, big.mark = ","),' (',encPersonAgeYear$pct,')') # transpose to square table encPersonAgeYear_tx <- spread(encPersonAgeYear[c('ageCat','aYear','toDisplay')],aYear,toDisplay)
knitr::kable(encPersonAgeYear_tx, col.names=c("Age at First Encounter in CY, N(%)",names(encPersonAgeYear_tx)[-1]), row.names=FALSE, align = "c", format.args = list(big.mark = ","))
Percentages are calculated with all patients within the calendar year as the denominator
ggplot(data = encPersonAgeYear, aes(x =aYear, y = Freq, fill = ageCat)) + geom_bar(stat = "identity") + labs(y='Count', x='Year', fill='Age at encounter')+theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_y_continuous(labels = function(x) format(x, big.mark = ",", scientific = FALSE))
tab9 <- run_query( paste0("select year(adate) as year, count(*) as nrows, count(distinct person_id) as npats from ", encounters, " group by year(adate) order by year(adate)")) tab9_avg_pats <- run_query( paste0("select year(adate) as year, count(*) as nrows from ", encounters, " group by year(adate), person_id order by year(adate)")) %>% arrange(year) %>% group_by(year) %>% summarise_all(list(mean = mean, median = median, min = min, max = max))%>% mutate( pats_val = paste0(median, "(", min, ", ", max, ")") ) tab9_all <- left_join(tab9, tab9_avg_pats, by = "year") %>% mutate( ptc_rec = if_else(is.na(nrows / dplyr::lag(nrows)), "--", paste0(round(nrows / dplyr::lag(nrows) * 100, 0), "%")) ) %>% select(year, nrows, ptc_rec, npats, pats_val) tab9_all$year <- as.character(tab9_all$year)
kable(tab9_all, col.names = c("Year", "Total Number of Recorded Encounters", "Percent Change from Previous Year", "Total Number of Patients with at Least One Encounter", "Median Number of Encounters per Patient (Min, Max)"), format.args = list(big.mark = ","))
encEncType_YM <- sqlQuery(tempCon3, paste0("SELECT aMon, aYear, encType, count(*) as rowCnt FROM #encTmp GROUP BY aMon, aYear, encType", sep=" " ), as.is=c(TRUE,TRUE,TRUE,TRUE,TRUE), max=maxQryRows ) encEncType_YM <- within(encEncType_YM,{ adate_YM <- as.Date(paste(as.character(aYear),as.character(aMon),'01',sep='-')) })
encEncType <- encEncType_YM %>% group_by(encType) %>% summarize(Freq = sum(rowCnt)) encEncType <- merge(encEncType,subset(valSets,tableName=='ENCOUNTERS' & columnName=='ENCTYPE')[c('code','decode')], by.x='encType', by.y='code', all.x=TRUE) encEncType <- within(encEncType, { pct <- round(100*Freq/sum(Freq),1) # validVal <- ifelse(is.na(decode),'No','Yes') decode <- ifelse(!is.na(decode),decode, ifelse(is.na(encType), 'Missing', 'Invalid')) })[c('encType','decode','Freq','pct')] encEncType <- encEncType %>% select(-encType)
knitr::kable(encEncType, col.names=c("Encounter Type",'Number of Encounters','Percent of Total Encounters'), row.names=FALSE, format.args = list(big.mark = ","))
encByYear <- encEncType_YM %>% group_by(aYear ) %>% summarize(Freq = sum(rowCnt)) encByYear <- within(encByYear,{ pct <- round(100*Freq/sum(Freq),1) }) encByYear$pctChng <- NA for(i in 2:nrow(encByYear)){ encByYear$pctChng[i]<- round(100*(encByYear$Freq[i]-encByYear$Freq[i-1])/encByYear$Freq[i-1],1) } encByYear$aYear <- as.character(encByYear$aYear)
uniqPatByYear <- sqlQuery(tempCon3, paste0("SELECT aYear, count(distinct person_id) as Freq FROM #encTmp GROUP BY aYear", sep=" " ), as.is=c(TRUE,TRUE,TRUE,TRUE,TRUE), max=maxQryRows ) uniqPatByYear <- within(uniqPatByYear,{ pct <- round(100*Freq/sum(Freq),1) }) uniqPatByYear$pctChng<-NA for(i in 2:nrow(uniqPatByYear)){ uniqPatByYear$pctChng[i]<- round(100*(uniqPatByYear$Freq[i]-uniqPatByYear$Freq[i-1])/uniqPatByYear$Freq[i-1],1) } uniqPatByYear$aYear <- as.character(uniqPatByYear$aYear) odbcClose(tempCon3)
encTypeBymonYr <- merge(encEncType_YM,encLabels, by.x='encType', by.y='code', all.x=TRUE) encTypeBymonYr <- within(encTypeBymonYr,{decode <- ifelse(!is.na(decode),decode, ifelse(is.na(encType), 'Missing', 'Invalid'))})
fig3_timeTest <- timeTest(time=as.Date(encTypeBymonYr$adate_YM), Freq=encTypeBymonYr$rowCnt, outcome=encTypeBymonYr$encType ) fig3_timeTest <- merge(fig3_timeTest,encLabels, by.x='outcome', by.y='code', all.x=TRUE) fig3_timeTest <- within(fig3_timeTest,{decode <- ifelse(!is.na(decode),decode, ifelse(is.na(outcome), 'Missing', 'Invalid'))})
ggplot(subset(encTypeBymonYr,encType == 'AV'), aes(adate_YM, rowCnt)) + geom_line() + scale_x_date(date_labels = "%b %y",date_breaks='1 year') + xlab("Month of encounter") + ylab("Frequency")+theme(axis.text.x = element_text(angle = 45, hjust = 1)) + geom_point(data=subset(fig3_timeTest, iqrTest==T & outcome=='AV') %>% rename(adate_YM=testMonth, rowCnt=Freq2), aes(adate_YM, rowCnt))
ggplot(subset(encTypeBymonYr,encType != 'AV'), aes(adate_YM, rowCnt, colour=decode)) + geom_line() + scale_x_date(date_labels = "%b %y",date_breaks='1 year') + xlab("Month of encounter") + ylab("Frequency")+theme(axis.text.x = element_text(angle = 45, hjust = 1))+labs(color='Encounter type')+ geom_point(data=subset(fig3_timeTest, iqrTest==T & outcome !='AV') %>% rename(adate_YM=testMonth, rowCnt=Freq2), aes(adate_YM, rowCnt, colour=decode)) #,legend.position="bottom" "
# # Encounter types by year # encTypeByYr <- encEncType_YM %>% group_by(encType, aYear ) %>% summarize(Freq = sum(rowCnt)) encTypeByYr <- merge(encTypeByYr,encLabels, by.x='encType', by.y='code', all.x=TRUE) encTypeByYr <- within(encTypeByYr,{decode <- ifelse(!is.na(decode),decode, ifelse(is.na(encType), 'Missing', 'Invalid'))})
locSumm <- run_query( paste0('SELECT count(*) as rowCnt, COUNT(DISTINCT person_id+CAST(loc_start AS varchar(10)) ) as primKeyCnt, count(distinct person_id) as personCnt, cast(min(loc_start) as date) as firstDT, cast(max(loc_start) as date) as lastDT FROM ',census_location, sep="" ) ) locSumm$rowCnt <- format(locSumm$rowCnt, big.mark = ",") locSumm$primKeyCnt <- format(locSumm$primKeyCnt, big.mark = ",") locSumm$personCnt <- format(locSumm$personCnt, big.mark = ",") tempCon4 <- get_connection() tmpLastLoc_ <- sqlQuery(tempCon4, paste0("SELECT distinct a.* INTO #tmpLastLoc FROM ",census_location," AS a INNER JOIN (SELECT person_id, max(loc_start) as loc_start FROM ", census_location, " group by person_id) AS b ON a.person_id = b.person_id and a.loc_start=b.loc_start" ) ) lastLocation <- sqlQuery(tempCon4, paste0("SELECT stateCnty, count(*) as Freq FROM (select distinct substring(geocode,1,5) as stateCnty, person_id FROM #tmpLastLoc) as q GROUP BY stateCnty", sep=" ") ) lastLocation <- trimChrVars(lastLocation) multLastLocs <- sqlQuery(tempCon4, paste0("SELECT count(distinct person_id) as patsMultLoc FROM (select person_id FROM #tmpLastLoc GROUP BY person_id HAVING count(*)>1) as q", sep=" ")) odbcClose(tempCon4)
Total rows in the dataset: r format(locSumm$rowCnt, big.mark=",")
\
Total unique combinations of the primary key: r format(locSumm$primKeyCnt, big.mark=",")
\
Total patients in the location table: r format(locSumm$personCnt, big.mark=",")
\
Month and year of first location: r format( as.Date(locSumm$firstDT),"%B %Y")
\
Month and year of last location: r format(as.Date(locSumm$lastDT),"%B %Y")
\
r ifelse(locSumm$rowCnt==locSumm$primKeyCnt," ","*The Census Location table has duplicate rows, by person_id and loc_start")
\
r format(multLastLocs$patsMultLoc, big.mark=",")
patients have more than one most recent location record\
lastLocation <- merge(lastLocation, stateCnty[c('stateCnty','stCntyNm')], by = 'stateCnty', all.x=T) %>% arrange(desc(Freq)) %>% mutate(pct = round(100*Freq/sum(Freq),1), stCntyNm = ifelse(!is.na(stCntyNm), stCntyNm, ifelse( is.na(stateCnty),'Missing','Invalid')) )
coloradoLoc <- subset(lastLocation, stCntyNm != "Invalid" & stCntyNm != "Missing") nonColoradoLoc <- subset(lastLocation, stCntyNm == "Invalid") missingLoc <- subset(lastLocation, stCntyNm == "Missing") knitr::kable(coloradoLoc[c('stateCnty','stCntyNm','Freq','pct')], col.names=c('State/county code','State: county','Frequency','Percent'), row.names=FALSE, format.args = list(big.mark = ","))
Count of Non-Colorado Locations: r format(sum(nonColoradoLoc$Freq), big.mark=",")
\
Percentage of Non-Colorado Locations: r sum(nonColoradoLoc$pct)
\
Count of Locations Missing: r format(sum(missingLoc$Freq), big.mark=",")
\
Percentage of Locations Missing: r format(sum(missingLoc$pct), big.mark=",")
\
diagSumm <- run_query( paste('SELECT count(*) as rowCnt, COUNT(DISTINCT enc_id+dx+CAST(adate AS varchar(10))+diagprovider ) as primKeyCnt, count(distinct person_id) as personCnt, min(adate) as firstDT, max(adate) as lastDT', 'FROM ',diagnoses, sep=" " ), as.is=c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE), max=maxQryRows )
Total rows in the dataset: r format(diagSumm$rowCnt, big.mark=",")
\
Total unique combinations of the primary key: r format(diagSumm$primKeyCnt, big.mark=",")
\
Total patients in the table: r format(diagSumm$personCnt, big.mark=",")
\
Month and year of first diagnosis: r format( as.Date(diagSumm$firstDT),"%B %Y")
\
Month and year of last diagnosis: r format( as.Date(diagSumm$lastDT),"%B %Y")
\
r ifelse( diagSumm$rowCnt==diagSumm$primKeyCnt," ","*The table has duplicate rows, by enc_id dx adate diagprovider")
\
The following tables explore the contents of the DIAGNOSES table with a focus on trends over time and most common types of diagnoses.
This table examines the volume of diagnoses over time and the number of patients with encounters represented in the DIAGNOSES table. Average number of diagnoses per encounter, and per patient, are calculated to compare diagnosis volume trends over time.
tab12 <- run_query( paste0("select year(adate) as year, count(*) as nrows, count(distinct enc_id) as nencts, count(distinct person_id) as npats from ", diagnoses, " where year(adate) >= 2005 AND DX_ORIGIN in ('BI') group by year(adate) order by year(adate)")) tab12_avg_encts <- run_query( paste0("select year(adate) as year, count(*) as nrows from ", diagnoses, " where year(adate) >= 2005 AND DX_ORIGIN in ('BI') group by year(adate), enc_id order by year(adate)")) %>% arrange(year) %>% group_by(year) %>% summarise_all(list(~mean(.), ~median(.), ~min(.), ~max(.))) %>% mutate( enct_val = paste0(median, "(", min, ", ", max, ")") ) %>% select(enct_val) tab12_avg_pats <- run_query( paste0("select year(adate) as year, count(*) as nrows from ", diagnoses, " where year(adate) >= 2005 AND DX_ORIGIN in ('BI') group by year(adate), person_id order by year(adate)")) %>% arrange(year) %>% group_by(year) %>% summarise_all(list(~mean(.), ~median(.), ~min(.), ~max(.)))%>% mutate( pats_val = paste0(median, "(", min, ", ", max, ")") ) %>% select(pats_val) tab12_add <- tab12 %>% bind_cols(., tab12_avg_encts, tab12_avg_pats) %>% mutate( ptc_rec = if_else(is.na(nrows / dplyr::lag(nrows)), "--", paste0(round(nrows / dplyr::lag(nrows) * 100, 0), "%")) ) %>% select(year, nrows, ptc_rec, nencts, npats, enct_val, pats_val) tab12_add$year <- as.character(tab12_add$year)
kable(tab12_add, col.names = c("Year", "Total Number of Recorded Diagnoses", "Percent Change from Previous Year", "Total Number of Encounters with 1+ Diagnosis", "Total Number of Unique Patients with 1+ Diagnosis", "Median Number of Diagnoses per Encounter (Min, Max)", "Median Number of Diagnoses per Patient (Min, Max)"), format.args = list(big.mark = ","))
tab_2_Last_Recent_Years_f <- function(){ current_year <- as.numeric(format(Sys.Date() ,"%Y")) y1 <- as.character(current_year - 1) y2 <- as.character(current_year - 2) return (c(y1, y2)) } tab_2_Last_Recent_Years <- tab_2_Last_Recent_Years_f()
r paste0(tab_2_Last_Recent_Years[[1]], " & ", tab_2_Last_Recent_Years[[2]])
)These tables include information on the ten most common diagnoses recorded in your DIAGNOSES table. Data on diagnosis codes, types, counts and patients are presented for all encounters and for ambulatory only encounters (ENCTYPE=AV).
tab13_dxall <- run_query( paste0("select dx, dx_codetype, count(*) as nrows_all, count(distinct person_id) as npats from ", diagnoses, " where YEAR(ADATE) IN (", tab_2_Last_Recent_Years[[1]],", ",tab_2_Last_Recent_Years[[2]], " ) AND DX_ORIGIN in ('BI') group by dx, dx_codetype")) %>% arrange(desc(nrows_all)) %>% head(., 10) %>% mutate( dxCode = paste(dx_codetype,'-',gsub('.','',dx, fixed=T), sep='') ) %>% left_join(., icdLU, by = "dxCode") %>% select(dx, dx_codetype, Description, nrows_all, npats) tab13_dxav <- run_query( paste0("select dx as dx_av, dx_codetype as dx_codetype_av, count(*) as nrows_av, count(distinct person_id) as npats_av from ", diagnoses, " where ENCTYPE = 'AV' AND DX_ORIGIN in ('BI') AND YEAR(ADATE) IN (", tab_2_Last_Recent_Years[[1]],", ",tab_2_Last_Recent_Years[[2]], " ) group by dx, dx_codetype")) %>% arrange(desc(nrows_av)) %>% head(., 10) %>% mutate( dxCode = paste(dx_codetype_av,'-',gsub('.','',dx_av, fixed=T), sep='') ) %>% left_join(., icdLU, by = "dxCode") %>% select(dx_av, dx_codetype_av, Description, nrows_av, npats_av) tab13_all <- bind_cols(tab13_dxall, tab13_dxav)
kable(tab13_dxall, col.names = c("Diagnosis Code", "Code Type", "Diagnosis Description", "Number of Recorded Diagnoses", "Number of Patients"), format.args = list(big.mark = ","))
\newline
kable(tab13_dxav, col.names = c("Diagnosis Code for AV", "Diagnosis Type for AV", "Diagnosis Description", "Number of Recorded Diagnosis for AV", "Number of Patients for AV"), format.args = list(big.mark = ","))
This table includes information on the ten most common diagnoses recorded in your DIAGNOSES table. Data on diagnosis codes, counts and patients are presented for ICD-9 and ICD-10.
tab14_09 <- run_query( paste0("select top 10 dx, cast(dx_codetype as varchar) as dx_codetype, count(*) as nrows_all, count(distinct person_id) as npats from ", diagnoses, " where dx_codetype = '09' AND DX_ORIGIN in ('BI') group by dx, dx_codetype order by nrows_all DESC"), as.is = c(TRUE, TRUE, FALSE, FALSE)) tab14_10 <- run_query( paste0("select top 10 dx, cast(dx_codetype as varchar) as dx_codetype, count(*) as nrows_all, count(distinct person_id) as npats from ", diagnoses, " where dx_codetype = '10' AND DX_ORIGIN in ('BI') group by dx, dx_codetype order by nrows_all DESC"), as.is = c(TRUE, TRUE, FALSE, FALSE))
#if and else statements are used to determine if the respective table has any #rows. If not, then an error message is written indicating so. if (nrow(tab14_09) != 0){ tab14_09 <- mutate(.data=tab14_09, dxCode = paste(dx_codetype,'-',gsub('.','',dx, fixed=T), sep='') ) tab14_09 <- left_join(x = tab14_09, y=icdLU, by = "dxCode") tab14_09 <-select(.data = tab14_09, dx, Description, nrows_all, npats) kable(tab14_09, col.names = c("Diagnosis","Diagnosis Description", "Records", "Patients"), caption = "Diagnosis Code Type = 09", format.args = list(big.mark = ",")) } else { knitr::knit_print("Table had zero rows. DX_CODETYPE = '09' likely isn't a category for this variable.") } if (nrow(tab14_10) != 0){ tab14_10 <- mutate(.data = tab14_10, dxCode = paste(dx_codetype,'-',gsub('.','',dx, fixed=T), sep='') ) tab14_10 <- left_join(x=tab14_10, y=icdLU, by = "dxCode") tab14_10 <- select(.data = tab14_10, dx, Description, nrows_all, npats) kable(tab14_10, col.names = c("Diagnosis", "Diagnosis Description", "Records", "Patients"), caption = "Diagnosis Code Type = 10", format.args = list(big.mark = ",")) } else { knitr::knit_print("Table had zero rows. DX_CODETYPE = '10' likely isn't a category for this variable.") }
dxByMonEncTp <- run_query( paste0("SELECT YEAR(adate) as year, MONTH(adate) as month, encType, count(*) as Freq from ", diagnoses, " WHERE YEAR(adate) >= 2005 AND DX_ORIGIN in ('BI') GROUP BY YEAR(adate), MONTH(adate), encType", sep=" "), max=maxQryRows ) dxByMonEncTp <- within(dxByMonEncTp,{ monYr <- as.Date(paste(as.character(year),as.character(month),'01',sep="-")) }) dxByMonEncTp <- merge(dxByMonEncTp,encLabels, by.x='encType', by.y='code', all.x=TRUE) dxByMonEncTp <- within(dxByMonEncTp,{decode <- ifelse(!is.na(decode),decode, ifelse(is.na(encType), 'Missing', 'Invalid'))})
fig4_timeTest <- timeTest(time=as.Date(dxByMonEncTp$monYr), Freq=dxByMonEncTp$Freq, outcome=dxByMonEncTp$encType ) fig4_timeTest <- merge(fig4_timeTest,encLabels, by.x='outcome', by.y='code', all.x=TRUE) fig4_timeTest <- within(fig4_timeTest,{decode <- ifelse(!is.na(decode),decode, ifelse(is.na(outcome), 'Missing', 'Invalid'))})
ggplot(subset(dxByMonEncTp,encType == 'AV'), aes(monYr, Freq)) + geom_line(size=.75) + scale_x_date(date_labels = "%b %y",date_breaks='1 year') + xlab("Month of encounter") + ylab("Frequency")+theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_color_brewer(palette ="Paired") + geom_point(data=subset(fig4_timeTest, iqrTest==T & outcome=='AV') %>% rename(monYr=testMonth, rowCnt=Freq2), aes(monYr, rowCnt))
ggplot(subset(dxByMonEncTp,encType != 'AV'), aes(monYr, Freq, colour=decode)) + geom_line(size=.75) + scale_x_date(date_labels = "%b %y",date_breaks='1 year') + xlab("Month of encounter") + ylab("Frequency")+theme(axis.text.x = element_text(angle = 45, hjust = 1),legend.position="bottom")+labs(color='Encounter type') + scale_color_brewer(palette ="Paired") + geom_point(data=subset(fig4_timeTest, iqrTest==T & outcome !='AV') %>% rename(monYr=testMonth, rowCnt=Freq2), aes(monYr, rowCnt, colour=decode))
vitalSumm <- run_query( paste('SELECT count(*) as rowCnt, COUNT(DISTINCT person_id+CAST(measure_date AS varchar(10))+CAST(measure_time AS varchar(20)) ) as primKeyCnt, count(distinct person_id) as personCnt, min(measure_date) as firstDT, max(measure_date) as lastDT', 'FROM ',vital_signs, sep=" " ), max=maxQryRows )
Total rows in the dataset: r format(vitalSumm$rowCnt, big.mark=",")
\
Total unique combinations of the primary key: r format(vitalSumm$primKeyCnt, big.mark=",")
\
Total patients in the table: r format(vitalSumm$personCnt, big.mark=",")
\
Month and year of first record: r format(as.Date(vitalSumm$firstDT),"%B %Y")
\
Month and year of last record: r format(as.Date(vitalSumm$lastDT),"%B %Y")
\
r ifelse(vitalSumm$rowCnt==vitalSumm$primKeyCnt," ","*The table has duplicate rows, by person_id, measure_date, measure_time")
\
# be sure to include space breaks in any re-design vitalVarSumm <- function(var,label){ vitalCon <- get_connection() vitalResult <- sqlQuery(vitalCon, paste0('WITH ranks AS (SELECT NULL AS PERSON_ID, min(',var,') AS ', var,', 0 AS qRanking FROM ', vital_signs,' WHERE ', var,' > 0 UNION ALL SELECT PERSON_ID, ', var,', NTILE(4) OVER( ORDER BY ', var,') qRanking FROM ', vital_signs,' WHERE ', var,' > 0) SELECT variable, (SELECT COUNT(1) FROM ranks WHERE qRanking > 0) as nobs, (SELECT COUNT(distinct PERSON_ID) FROM ranks WHERE qRanking > 0) as npats, minimum, q1, median, q3, maximum FROM ( SELECT \'', label, '\' AS Variable, ', var,', CASE WHEN qRanking = \'0\' THEN \'Minimum\' WHEN qRanking = \'1\' THEN \'q1\' WHEN qRanking = \'2\' THEN \'Median\' WHEN qRanking = \'3\' THEN \'q3\' WHEN qRanking = \'4\' THEN \'Maximum\' END AS qRanked FROM ranks ) qRanks PIVOT(MAX(',var,') FOR qRanked IN( minimum, q1, median, q3, maximum)) AS pv;', sep="" ), max=maxQryRows, as.is=c(TRUE) ) odbcClose(vitalCon) return (vitalResult) } vlst <- do.call('rbind', list(vitalVarSumm('ht','Height'), vitalVarSumm('wt','Weight'), vitalVarSumm('systolic','Systolic_BP'), vitalVarSumm('diastolic','Diastolic_BP'))) vlst <- within(vlst,{ minimum <- round(as.numeric(minimum),1) q1 <- round(as.numeric(q1),1) median <- round(as.numeric(median),1) q3 <- round(as.numeric(q3),1) maximum <- round(as.numeric(maximum),1) }) knitr::kable(vlst[c('variable','nobs','npats','minimum','q1','median','q3', 'maximum')], col.names=c('Vital record','Greater Than 0 Obs','Unique patients with Greater Than 0 Obs','Minimum','Q1','Median','Q3', 'Maximum'), row.names=FALSE, format.args = list(big.mark = ","))
This table examines the volume of vital signs over time and the number of patients with encounters represented in the VITAL_SIGNS table. Average number of vital signs per encounter, and per patient, are calculated to compare vital signs volume trends over time; and also to indicate whether or not vital signs are captured in one row or multiple rows for each encounter.
tab16 <- run_query( paste0("select year(measure_date) as year, count(*) as nrows, count(distinct enc_id) as nencts, count(distinct person_id) as npats from ", vital_signs, " where year(measure_date) > 2005 group by year(measure_date) order by year(measure_date)")) tab16_avg_encts <- run_query( paste0("select year(measure_date) as year, count(*) as nrows from ", vital_signs, " where year(measure_date) > 2005 group by year(measure_date), enc_id order by year(measure_date)")) %>% arrange(year) %>% group_by(year) %>% summarise_all(list(~mean(.), ~median(.), ~min(.), ~max(.))) %>% mutate( enct_val = paste0(median, "(", min, ", ", max, ")") ) %>% select(enct_val) tab16_avg_pats <- run_query( paste0( "select year(measure_date) as year, count(*) as nrows from ", vital_signs, " where year(measure_date) > 2005 group by year(measure_date), person_id order by year(measure_date)")) %>% arrange(year) %>% group_by(year) %>% summarise_all(list(~mean(.), ~median(.), ~min(.), ~max(.)))%>% mutate( pats_val = paste0(median, "(", min, ", ", max, ")") ) %>% select(pats_val) tab16_add <- tab16 %>% bind_cols(., tab16_avg_encts, tab16_avg_pats) %>% mutate( ptc_rec = if_else(is.na(nrows / dplyr::lag(nrows)), "--", paste0(round(nrows / dplyr::lag(nrows) * 100, 0), "%")) ) %>% select(year, nrows, ptc_rec, nencts, npats, enct_val, pats_val) tab16_add$year <- as.character(tab16_add$year)
kable(tab16_add, col.names = c("Year", "Total Number of Recorded Vitals", "Percent Change from Previous Year", "Total Number of Encounters with Vital_Signs", "Total Number of Patients with Vitals", "Median Number of Vitals per Encounter (Min, Max)", "Median Number of Vitals per Patient (Min, Max)"), format.args = list(big.mark = ","))
\newpage
vitalsByMonYr <- run_query( paste0("SELECT year(measure_date) as year ,month(measure_date) as month ,sum(case when ht>0 then 1 else 0 end) as height ,sum(case when wt>0 then 1 else 0 end) as weight ,sum(case when systolic>0 then 1 else 0 end) as systolic ,sum(case when diastolic>0 then 1 else 0 end) as diastolic FROM ",vital_signs, " GROUP BY year(measure_date), month(measure_date)", sep=" " ), max=maxQryRows ) vitalsByMonYr <- within(vitalsByMonYr,{ monYr <- as.Date(paste(as.character(year),as.character(month),'01', sep='-')) }) %>% subset(select=-c(month,year)) vitalsByMonYrTx <- gather(vitalsByMonYr, 'Vital_record','Count', 1:4) # time test fig5_timeTest <- timeTest(time=as.Date(vitalsByMonYrTx$monYr), Freq=vitalsByMonYrTx$Count, outcome=vitalsByMonYrTx$Vital_record) ggplot(subset(vitalsByMonYrTx, Vital_record=="height"), aes(monYr, Count, colour=Vital_record)) + geom_line() + ggtitle("Height") + scale_x_date(date_labels = "%b %y",date_breaks='1 year') + xlab("Month of encounter") + ylab("Frequency") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(color='Vital type') + theme(legend.position='none') + geom_point(data=subset(fig5_timeTest, iqrTest==T & outcome=="height" ) %>% rename(monYr=testMonth, Count=Freq2, Vital_record = outcome), aes(monYr, Count, colour=Vital_record)) ggplot(subset(vitalsByMonYrTx, Vital_record=="weight"), aes(monYr, Count, colour=Vital_record)) + geom_line() + ggtitle("Weight") + scale_x_date(date_labels = "%b %y",date_breaks='1 year') + xlab("Month of encounter") + ylab("Frequency")+theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(color='Vital type') + theme(legend.position='none') + geom_point(data=subset(fig5_timeTest, iqrTest==T & outcome=="weight" ) %>% rename(monYr=testMonth, Count=Freq2, Vital_record = outcome), aes(monYr, Count, colour=Vital_record)) ggplot(subset(vitalsByMonYrTx, Vital_record=="systolic"), aes(monYr, Count, colour=Vital_record)) + geom_line() + ggtitle("Systolic") + scale_x_date(date_labels = "%b %y",date_breaks='1 year') + xlab("Month of encounter") + ylab("Frequency")+theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(color='Vital type') + theme(legend.position='none') + geom_point(data=subset(fig5_timeTest, iqrTest==T & outcome=="systolic" ) %>% rename(monYr=testMonth, Count=Freq2, Vital_record = outcome), aes(monYr, Count, colour=Vital_record)) ggplot(subset(vitalsByMonYrTx, Vital_record=="diastolic"), aes(monYr, Count, colour=Vital_record)) + geom_line() + ggtitle("Diastolic") + scale_x_date(date_labels = "%b %y",date_breaks='1 year') + xlab("Month of encounter") + ylab("Frequency") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(color='Vital type') + theme(legend.position='none') + geom_point(data=subset(fig5_timeTest, iqrTest==T & outcome=="diastolic" ) %>% rename(monYr=testMonth, Count=Freq2, Vital_record = outcome), aes(monYr, Count, colour=Vital_record))
tab17 <- run_query( paste0("select year(benefit_date) as year, count(*) as num_rec, SUM(CASE WHEN BENEFIT_CAT = 'CC' THEN 1 ELSE 0 END) as correctional_care, SUM(CASE WHEN benefit_cat = 'CO' THEN 1 ELSE 0 END) as comm_private_insurance, SUM(CASE WHEN benefit_cat = 'CP' THEN 1 ELSE 0 END) as CHIP, SUM(CASE WHEN benefit_cat = 'MC' THEN 1 ELSE 0 END) as medicare, SUM(CASE WHEN benefit_cat = 'MD' THEN 1 ELSE 0 END) as medicaid, SUM(CASE WHEN benefit_cat = 'OG' THEN 1 ELSE 0 END) as other_gov, SUM(CASE WHEN benefit_cat = 'NC' THEN 1 ELSE 0 END) as no_coverage, SUM(CASE WHEN benefit_cat = 'OT' THEN 1 ELSE 0 END) as other_ins, SUM(CASE WHEN benefit_cat = 'UN' THEN 1 ELSE 0 END) as unknown_ins, SUM(CASE WHEN benefit_cat = 'WC' THEN 1 ELSE 0 END) as work_comp, SUM(CASE WHEN benefit_cat = 'NI' THEN 1 ELSE 0 END) as na, SUM(CASE WHEN (benefit_cat NOT IN ('CC', 'CO', 'CP', 'MC', 'MD', 'OG', 'NC', 'OT', 'UN', 'WC', 'NI') OR benefit_cat is null) THEN 1 ELSE 0 END ) as not_chords_categorized from ", benefit, " group by year(benefit_date) order by year(benefit_date)")) #Final display tab17$correctional_care = paste0(format(tab17$correctional_care,big.mark = ",") ,' (',round(tab17$correctional_care/tab17$num_rec*100, digits=1),'%)') tab17$comm_private_insurance = paste0(format(tab17$comm_private_insurance,big.mark = ",") ,' (',round(tab17$comm_private_insurance/tab17$num_rec*100, digits=1),'%)') tab17$CHIP = paste0(format(tab17$CHIP,big.mark = ",") ,' (',round(tab17$CHIP/tab17$num_rec*100, digits=1),'%)') tab17$medicare = paste0(format(tab17$medicare,big.mark = ",") ,' (',round(tab17$medicare/tab17$num_rec*100, digits=1),'%)') tab17$medicaid = paste0(format(tab17$medicaid,big.mark = ",") ,' (',round(tab17$medicaid/tab17$num_rec*100, digits=1),'%)') tab17$other_gov = paste0(format(tab17$other_gov,big.mark = ",") ,' (',round(tab17$other_gov/tab17$num_rec*100, digits=1),'%)') tab17$no_coverage = paste0(format(tab17$no_coverage,big.mark = ",") ,' (',round(tab17$no_coverage/tab17$num_rec*100, digits=1),'%)') tab17$other_ins = paste0(format(tab17$other_ins,big.mark = ",") ,'\n(',round(tab17$other_ins/tab17$num_rec*100, digits=1),'%)') tab17$unknown_ins = paste0(format(tab17$unknown_ins,big.mark = ",") ,' (',round(tab17$unknown_ins/tab17$num_rec*100, digits=1),'%)') tab17$work_comp = paste0(format(tab17$work_comp,big.mark = ",") ,' (',round(tab17$work_comp/tab17$num_rec*100, digits=1),'%)') tab17$na = paste0(format(tab17$na,big.mark = ",") ,' (',round(tab17$na/tab17$num_rec*100, digits=1),'%)') tab17$not_chords_categorized = paste0(format(tab17$not_chords_categorized,big.mark = ",") ,' (',round(tab17$not_chords_categorized/tab17$num_rec*100, digits=1),'%)') tab17$year <- as.character(tab17$year)
#kable(tab17, col.names = c("Year", "Records", "Correctional Care", "Commercial/Private Insurance", "CHIP", "Medicare", #"Medicaid", "Other Govt. Insurance", "No Coverage", "Other Insurance", "Unknown", "Workers Comp", "NA", "Not CHORDS #Categorized"), align = 'c', format.args = list(big.mark = ",")) tab17 <- flextable(format(tab17, big.mark=",")) %>% set_table_properties(layout = "autofit") %>% set_header_labels(year="Year", num_rec="Records", correctional_care = "Correctional Care", comm_private_insurance = "Commercial or Private Insurance", CHIP= "CHIP", medicare= "Medicare", medicaid ="Medicaid", other_gov="Other Govt. Insurance", no_coverage="No Coverage", other_ins="Other Insurance", unknown_ins="Unknown", work_comp="Workers Comp", na="NA", not_chords_categorized="Not CHORDS Category") %>% flextable::fontsize(size = 9, part = "all") %>% flextable::align(align = "center", part = "all") %>% border_inner(part = "all", border = fp_border(color = "black", style = "solid")) knitr::knit_print(tab17)
tabBenefit_years_f <- function(){ current_year <- as.numeric(format(Sys.Date() ,"%Y")) y1 <- as.character(current_year - 4) y2 <- as.character(current_year - 3) y3 <- as.character(current_year - 2) y4 <- as.character(current_year - 1) y5 <- as.character(current_year) return (c(y1, y2, y3, y4, y5)) } tabBenefit_years <- tabBenefit_years_f()
tab17_1 <- run_query(paste0( "WITH CTE_BENEFITS_TOTAL as ( SELECT BENEFIT_CAT, COUNT(*) Total FROM ", benefit, " b GROUP BY BENEFIT_CAT ), CTE_BENEFIT_DATA as ( SELECT BENEFIT_CAT ,CU ,EN ,PR ,PA ,SR ,NI FROM ( SELECT b.BENEFIT_ID ,b.BENEFIT_TYPE ,b.BENEFIT_CAT FROM ", benefit, " b ) data pivot(COUNT(BENEFIT_ID) FOR BENEFIT_TYPE IN (CU, EN, PR, PA, SR, NI) ) AS pivot_table ) SELECT CASE WHEN d.BENEFIT_CAT = 'CC' THEN 'CC (Correctional Care)' WHEN d.BENEFIT_CAT = 'CO' THEN 'CO (Commercial/Private Insurance)' WHEN d.BENEFIT_CAT = 'CP' THEN 'CP (Children’s Health Insurance Program)' WHEN d.BENEFIT_CAT = 'MC' THEN 'MC (Medicare)' WHEN d.BENEFIT_CAT = 'MD' THEN 'MD (Medicaid)' WHEN d.BENEFIT_CAT = 'OG' THEN 'OG (Other government)' WHEN d.BENEFIT_CAT = 'NC' THEN 'NC (No coverage)' WHEN d.BENEFIT_CAT = 'OT' THEN 'OT (Other insurance)' WHEN d.BENEFIT_CAT = 'UN' THEN 'UN (Unknown insurance)' WHEN d.BENEFIT_CAT = 'WC' THEN 'WC (Workers compensation)' WHEN d.BENEFIT_CAT = 'NI' THEN 'NI (No Benefit information)' END AS 'Benefit Category' ,FORMAT(CU, 'N0') + ' (' + cast(ROUND(cast(CU as float)/cast(t.total as float), 4) * 100 as varchar) + '%)' AS 'CU (Current insurance)' ,FORMAT(EN, 'N0') + ' (' + cast(ROUND(cast(EN as float)/cast(t.total as float), 4) * 100 as varchar) + '%)' AS 'EN (Enrollment insurance)' ,FORMAT(PR, 'N0') + ' (' + cast(ROUND(cast(PR as float)/cast(t.total as float), 4) * 100 as varchar) + '%)' AS 'PR (Primary payer)' ,FORMAT(PA, 'N0') + ' (' + cast(ROUND(cast(PA as float)/cast(t.total as float), 4) * 100 as varchar) + '%)' AS 'PA (payer unknown rank)' ,FORMAT(SR, 'N0') + ' (' + cast(ROUND(cast(SR as float)/cast(t.total as float), 4) * 100 as varchar) + '%)' AS 'SR (Self-reported insurance)' ,FORMAT(NI, 'N0') + ' (' + cast(ROUND(cast(NI as float)/cast(t.total as float), 4) * 100 as varchar) + '%)' AS 'NI (No Benefit information)' FROM CTE_BENEFIT_DATA d JOIN CTE_BENEFITS_TOTAL t on t.BENEFIT_CAT = d.BENEFIT_CAT;" , sep = '')) kable(tab17_1)
r paste0(tabBenefit_years[[1]], " - ", tabBenefit_years[[5]])
)tab17_2 <- run_query(paste0( "SET NOCOUNT ON; DROP TABLE IF EXISTS #benefit_counts; SELECT * INTO #benefit_counts FROM ( SELECT b.PERSON_ID ,YEAR(b.BENEFIT_DATE) BENEFIT_YEAR ,cast(count(b.PERSON_ID) AS FLOAT) CountB FROM ", benefit, " b WHERE YEAR(b.BENEFIT_DATE) IN ('", tabBenefit_years[[1]],"', ", "'",tabBenefit_years[[2]], "', ", "'", tabBenefit_years[[3]], "', ", "'", tabBenefit_years[[4]], "', ", "'", tabBenefit_years[[5]], "') GROUP BY b.PERSON_ID ,YEAR(b.BENEFIT_DATE) ) data; DROP TABLE IF EXISTS #benefit_records_person; SELECT * INTO #benefit_records_person FROM ( SELECT BENEFIT_YEAR ,SUM(IIF(b.CountB > 1, 1, 0)) total_gt_1 ,count(b.CountB) total_rec ,round(AVG(cast(b.CountB AS FLOAT)), 2) avg_per_person ,max(b.CountB) AS max FROM #benefit_counts b GROUP BY BENEFIT_YEAR ) data; DROP TABLE IF EXISTS #benefit_records_person_median; SELECT * INTO #benefit_records_person_median FROM ( SELECT DISTINCT BENEFIT_YEAR ,PERCENTILE_CONT(0.5) WITHIN GROUP ( ORDER BY CountB ) OVER (PARTITION BY BENEFIT_YEAR) AS MedianCont FROM #benefit_counts AS d ) data; SELECT a.BENEFIT_YEAR ,format(total_gt_1, 'N0') + ' (' + cast(IIF(total_rec > 0, round(cast(total_gt_1 AS FLOAT) / cast(total_rec AS FLOAT), 4) * 100.0, 0) AS VARCHAR) + '%) ' AS '# (%) patients with >1 benefit records per year' ,a.avg_per_person AS 'Average # of records per person' ,b.MedianCont AS 'Median # of records per person' ,a.max AS 'Maximum # of records per person' FROM #benefit_records_person a JOIN #benefit_records_person_median b ON a.BENEFIT_YEAR = b.BENEFIT_YEAR ORDER BY BENEFIT_YEAR asc;", sep = '')) kable(tab17_2)
r paste0(tabBenefit_years[[1]], " - ", tabBenefit_years[[5]])
)tab17_3 <- run_query(paste0( "SET NOCOUNT ON; DROP TABLE IF EXISTS #benefit_counts; SELECT * INTO #benefit_counts FROM ( SELECT b.ENC_ID ,YEAR(b.BENEFIT_DATE) BENEFIT_YEAR ,cast(count(b.ENC_ID) AS FLOAT) CountB FROM ", benefit, " b WHERE YEAR(b.BENEFIT_DATE) IN ('", tabBenefit_years[[1]],"', ", "'",tabBenefit_years[[2]], "', ", "'", tabBenefit_years[[3]], "', ", "'", tabBenefit_years[[4]], "', ", "'", tabBenefit_years[[5]], "') GROUP BY b.ENC_ID ,YEAR(b.BENEFIT_DATE) ) data; DROP TABLE IF EXISTS #benefit_records_encounter; SELECT * INTO #benefit_records_encounter FROM ( SELECT BENEFIT_YEAR ,SUM(IIF(b.CountB > 1, 1, 0)) total_gt_1 ,count(b.CountB) total_rec ,round(AVG(cast(b.CountB AS FLOAT)), 2) avg_per_encounter ,max(b.CountB) AS max FROM #benefit_counts b GROUP BY BENEFIT_YEAR ) data; DROP TABLE IF EXISTS #benefit_records_encounter_median; SELECT * INTO #benefit_records_encounter_median FROM ( SELECT DISTINCT BENEFIT_YEAR ,PERCENTILE_CONT(0.5) WITHIN GROUP ( ORDER BY CountB ) OVER (PARTITION BY BENEFIT_YEAR) AS MedianCont FROM #benefit_counts AS d ) data; SELECT a.BENEFIT_YEAR ,format(total_gt_1, 'N0') + ' (' + cast(IIF(total_rec > 0, round(cast(total_gt_1 AS FLOAT) / cast(total_rec AS FLOAT), 4) * 100.0, 0) AS VARCHAR) + '%) ' AS '# (%) patients with >1 benefit records per year' ,a.avg_per_encounter AS 'Average # of records per encounter' ,b.MedianCont AS 'Median # of records per encounter' ,a.max AS 'Maximum # of records per encounter' FROM #benefit_records_encounter a JOIN #benefit_records_encounter_median b ON a.BENEFIT_YEAR = b.BENEFIT_YEAR ORDER BY BENEFIT_YEAR asc;", sep = '')) kable(tab17_3)
r paste0(tabBenefit_years[[1]], " - ", tabBenefit_years[[5]])
)tab17_4 <- run_query(paste0( "SET NOCOUNT ON; DROP TABLE IF EXISTS #benefit_counts; SELECT * INTO #benefit_counts FROM ( SELECT b.PERSON_ID ,YEAR(b.BENEFIT_DATE) BENEFIT_YEAR ,cast(count(DISTINCT b.BENEFIT_CAT) AS FLOAT) CountB FROM ", benefit ," b WHERE YEAR(b.BENEFIT_DATE) IN ('", tabBenefit_years[[1]],"', ", "'",tabBenefit_years[[2]], "', ", "'", tabBenefit_years[[3]], "', ", "'", tabBenefit_years[[4]], "', ", "'", tabBenefit_years[[5]], "') GROUP BY b.PERSON_ID ,YEAR(b.BENEFIT_DATE) ) data; DROP TABLE IF EXISTS #benefit_records_person; SELECT * INTO #benefit_records_person FROM ( SELECT BENEFIT_YEAR ,SUM(IIF(b.CountB > 1, 1, 0)) total_gt_1 ,count(b.CountB) total_rec ,round(AVG(cast(b.CountB AS FLOAT)), 2) avg_per_person ,max(b.CountB) AS max FROM #benefit_counts b GROUP BY BENEFIT_YEAR ) data; DROP TABLE IF EXISTS #benefit_records_person_median; SELECT * INTO #benefit_records_person_median FROM ( SELECT DISTINCT BENEFIT_YEAR ,PERCENTILE_CONT(0.5) WITHIN GROUP ( ORDER BY CountB ) OVER (PARTITION BY BENEFIT_YEAR) AS MedianCont FROM #benefit_counts AS d ) data; SELECT a.BENEFIT_YEAR ,format(total_gt_1, 'N0') + ' (' + cast(IIF(total_rec > 0, round(cast(total_gt_1 AS FLOAT) / cast(total_rec AS FLOAT), 4) * 100.0, 0) AS VARCHAR) + '%) ' AS '# (%) patients with >1 benefit category per year' ,a.avg_per_person AS 'Average # of benefit categories per person' ,b.MedianCont AS 'Median # of benefit categories per person' ,a.max AS 'Maximum # of benefit categories per person' FROM #benefit_records_person a JOIN #benefit_records_person_median b ON a.BENEFIT_YEAR = b.BENEFIT_YEAR ORDER BY BENEFIT_YEAR asc; " , sep ='')) kable(tab17_4)
tab18 <- run_query( paste0("SELECT COUNT(DISTINCT PERSON_ID) as npats, COUNT(DISTINCT CID) as nlink FROM ", linkage)) knitr::kable(tab18, col.names = c("Number of unique patient IDs", "Number of unique linkage IDs"), format.args = list(big.mark = ","))
# close odbc odbcCloseAll() endTime <- (Sys.time() - startTime)
r format(round(endTime, 3))
title: "P1 QA March 2021"\
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.