`sqliteSGP` <-
function(sgp_object,
state=NULL,
years=NULL,
content_areas=NULL,
other.student.groups,
text.output=TRUE,
json.output=TRUE,
null.output.string=NULL,
projection.years.for.target=3,
output.directory=file.path("Data", "SchoolView")) {
started.at <- proc.time()
message(paste("\tStarted sqliteSGP in outputSGP", prettyDate()))
YEAR <- DISTRICT_NUMBER <- SCHOOL_NUMBER <- CONTENT_AREA <- DISTRICT_ENROLLMENT_STATUS <- GRADE <- ETHNICITY <- STUDENTGROUP <- SCHOOL_ENROLLMENT_STATUS <- EMH_LEVEL <- MEDIAN_SGP <- NULL
INSTRUCTOR_NUMBER <- INSTRUCTOR_ENROLLMENT_STATUS <- TMP_ID <- NULL
## Create state (if NULL) from sgp_object (if possible)
if (is.null(state)) {
tmp.name <- toupper(gsub("_", " ", deparse(substitute(sgp_object))))
state <- getStateAbbreviation(tmp.name, "sqliteSGP")
}
## Create group variable names
if (!is.null(SGP::SGPstateData[[state]][["SGP_Configuration"]][["output.groups"]])) {
output.groups <- SGP::SGPstateData[[state]][["SGP_Configuration"]][["output.groups"]]
} else {
output.groups <- c("DISTRICT", "SCHOOL")
}
group.number <- paste(output.groups, "NUMBER", sep="_")
group.enroll.status <- paste(output.groups, "ENROLLMENT_STATUS", sep="_")
group.enroll.status.label <- paste0("Enrolled ", sapply(output.groups, capwords), ": Yes")
## Create/Set database
if (state %in% c(datasets::state.abb, "DEMO")) {
tmp.state <- gsub(" ", "_", c(datasets::state.name, "Demonstration")[state==c(datasets::state.abb, "DEMO")])
} else {
tmp.state <- gsub(" ", "_", state)
}
sqlite.output.directory <- file.path(output.directory, "SQLITE")
dir.create(sqlite.output.directory, recursive=TRUE, showWarnings=FALSE)
if (file.exists(file.path(sqlite.output.directory, paste0(tmp.state, "_Data_SQLITE.sqlite")))) file.remove(file.path(sqlite.output.directory, paste0(tmp.state, "_Data_SQLITE.sqlite")))
db <- dbConnect(SQLite(), dbname=file.path(sqlite.output.directory, paste0(tmp.state, "_Data_SQLITE.sqlite")))
if (text.output) {
text.output.directory <- file.path(output.directory, "TEXT")
dir.create(text.output.directory, recursive=TRUE, showWarnings=FALSE)
}
if (json.output) {
json.output.directory <- file.path(output.directory, "JSON")
dir.create(json.output.directory, recursive=TRUE, showWarnings=FALSE)
}
## Utility functions
strtail <- function(s, n=1) {
if (n < 0) substring(s, 1-n)
else substring(s, nchar(s)-n+1)
}
strhead <- function(s,n=1) {
if (n < 0) substr(s, 1, nchar(s)+n)
else substr(s, 1, n)
}
sqlite.create.table <- function(table.name, field.types, primary.key) {
tmp.sql <- paste0("CREATE TABLE ", table.name, " (", paste(field.types, collapse=", "),
", PRIMARY KEY (", paste(primary.key, collapse=", "), "))")
return(tmp.sql)
}
"%w/o%" <- function(x, y) x[!x %in% y]
convert.variables <- function(tmp.df, factor.variables=NULL) {
if (length(grep("_", tmp.df$YEAR)) > 0) {
tmp.df$YEAR <- sapply(strsplit(tmp.df$YEAR, "_"), '[', 2)
}
if (is.character(tmp.df$CONTENT_AREA)) {
tmp.df$CONTENT_AREA <- as.factor(tmp.df$CONTENT_AREA)
}
tmp.factor.names <- c(factor.variables, names(tmp.df)[sapply(tmp.df, class)=="factor"] %w/o% c(group.number[2], group.number[1], "INSTRUCTOR_NUMBER"))
for (i in tmp.factor.names) {
tmp.df[[i]] <- unclass(as.factor(tmp.df[[i]]))
}
tmp.df[sapply(tmp.df, is.nan)] <- NA
return(tmp.df)
}
get.grade <- function(grade) {
if (SGP::SGPstateData[[state]][["Assessment_Program_Information"]][["Test_Season"]]=="Fall") grade-1 else grade
}
get.year <- function(year) {
if (SGP::SGPstateData[[state]][["Assessment_Program_Information"]][["Test_Season"]]=="Fall") {
yearIncrement(year, -1)
} else {
return(year)
}
}
convert.names <- function(my.data) {
names(my.data)[names(my.data)=="PERCENT_CATCHING_UP_KEEPING_UP"] <- "PERCENT_AT_ABOVE_TARGET"
names(my.data)[names(my.data)==paste("MEDIAN_SGP_TARGET", projection.years.for.target, "YEAR", sep="_")] <- "MEDIAN_SGP_TARGET"
if ("EMH_LEVEL" %in% names(my.data) && is.numeric(my.data[['EMH_LEVEL']])) {
my.data[['EMH_LEVEL']] <- as.character(factor(my.data[['EMH_LEVEL']], levels=1:3, labels=c("E", "H", "M")))
}
if ("EMH_LEVEL" %in% names(my.data) && is.character(my.data[['EMH_LEVEL']])) {
my.data[['EMH_LEVEL']] <- substr(my.data[['EMH_LEVEL']],1,1)
}
if ("GENDER" %in% names(my.data)) {
my.data[['STUDENTGROUP']][my.data[['STUDENTGROUP']]=="Female"] <- "F"
my.data[['STUDENTGROUP']][my.data[['STUDENTGROUP']]=="Male"] <- "M"
}
if ("INSTRUCTOR_NUMBER" %in% names(my.data)) names(my.data)[names(my.data)=="INSTRUCTOR_NUMBER"] <- "TEACHER_USID"
if (group.number[1]!="DISTRICT") names(my.data)[names(my.data)==group.number[1]] <- "DISTRICT_NUMBER"
if (group.number[2]!="SCHOOL") names(my.data)[names(my.data)==group.number[2]] <- "SCHOOL_NUMBER"
return(my.data)
}
## Create relevant variables
if (is.null(years)) years <- unique(sgp_object@Data[['YEAR']]) %w/o% NA
if (is.null(content_areas)) content_areas <- unique(sgp_object@Data[['CONTENT_AREA']]) %w/o% NA
if (!is.null(SGP::SGPstateData[[state]][["SGP_Configuration"]][["null.output.string"]])) {
my.null.string <- SGP::SGPstateData[[state]][["SGP_Configuration"]][["null.output.string"]]
} else {
my.null.string <- "NULL"
}
## Create tmp.school.and.district.by.year table
setkeyv(sgp_object@Data, c("YEAR", group.number[1], group.number[2]))
tmp.school.and.district.by.year <- as.data.frame(convert.variables(unique(sgp_object@Data, by=key(sgp_object@Data))[, c("YEAR", group.number[1], group.number[2]), with=FALSE]))
###
### Construct tables
###
### Table 1. DISTRICT
field.types <- c(
"DISTRICT_NUMBER TEXT NOT NULL",
"CONTENT_AREA TEXT NOT NULL",
"YEAR INTEGER NOT NULL",
"MEDIAN_SGP REAL",
"MEDIAN_SGP_TARGET REAL",
"PERCENT_AT_ABOVE_TARGET REAL",
"PERCENT_AT_ABOVE_PROFICIENT REAL",
"MEDIAN_SGP_COUNT INTEGER",
"PERCENT_AT_ABOVE_PROFICIENT_COUNT INTEGER")
tmp <- as.data.frame(convert.variables(subset(sgp_object@Summary[[group.number[1]]][[paste(group.number[1], "CONTENT_AREA__YEAR", group.enroll.status[1], sep="__")]],
!is.na(get(group.number[1])) & CONTENT_AREA %in% content_areas & YEAR %in% years & get(group.enroll.status[1])==group.enroll.status.label[1] & !is.na(MEDIAN_SGP))))
tmp <- convert.names(tmp)
tmp <- tmp[, sapply(strsplit(field.types, " "), function(x) head(x,1))]
dbGetQuery(db, sqlite.create.table("DISTRICT", field.types, c("YEAR", "DISTRICT_NUMBER", "CONTENT_AREA")))
dbWriteTable(db, "DISTRICT", tmp, row.names=FALSE, append=TRUE)
if (text.output) write.table(tmp, file=file.path(text.output.directory, "DISTRICT.dat"), row.names=FALSE, na=my.null.string, quote=FALSE, sep="|")
if (json.output) cat(toJSON(tmp), file=file.path(json.output.directory, "DISTRICT.json"))
### Table 2. DISTRICT_GRADE
field.types <- c(
"DISTRICT_NUMBER TEXT NOT NULL",
"CONTENT_AREA TEXT NOT NULL",
"YEAR INTEGER NOT NULL",
"GRADE INTEGER NOT NULL",
"MEDIAN_SGP REAL",
"MEDIAN_SGP_TARGET REAL",
"PERCENT_AT_ABOVE_TARGET REAL",
"PERCENT_AT_ABOVE_PROFICIENT REAL",
"MEDIAN_SGP_COUNT INTEGER",
"PERCENT_AT_ABOVE_PROFICIENT_COUNT INTEGER")
tmp <- as.data.frame(convert.variables(subset(sgp_object@Summary[[group.number[1]]][[paste(group.number[1], "CONTENT_AREA__YEAR__GRADE", group.enroll.status[1], sep="__")]],
!is.na(get(group.number[1])) & CONTENT_AREA %in% content_areas & YEAR %in% years & !is.na(GRADE) & get(group.enroll.status[1])==group.enroll.status.label[1] & !is.na(MEDIAN_SGP))))
tmp <- convert.names(tmp)
tmp <- tmp[, sapply(strsplit(field.types, " "), function(x) head(x,1))]
dbGetQuery(db, sqlite.create.table("DISTRICT_GRADE", field.types, c("YEAR", "DISTRICT_NUMBER", "CONTENT_AREA", "GRADE")))
dbWriteTable(db, "DISTRICT_GRADE", tmp[, sapply(strsplit(field.types, " "), function(x) head(x,1))], row.names=FALSE, append=TRUE)
if (text.output) write.table(tmp, file=file.path(text.output.directory, "DISTRICT_GRADE.dat"), row.names=FALSE, na=my.null.string, quote=FALSE, sep="|")
if (json.output) cat(toJSON(tmp), file=file.path(json.output.directory, "DISTRICT_GRADE.json"))
### Table 3. DISTRICT_ETHNICITY
field.types <- c(
"DISTRICT_NUMBER TEXT NOT NULL",
"CONTENT_AREA TEXT NOT NULL",
"YEAR INTEGER NOT NULL",
"ETHNICITY INTEGER NOT NULL",
"MEDIAN_SGP REAL",
"MEDIAN_SGP_TARGET REAL",
"PERCENT_AT_ABOVE_TARGET REAL",
"PERCENT_AT_ABOVE_PROFICIENT REAL",
"MEDIAN_SGP_COUNT INTEGER",
"PERCENT_AT_ABOVE_PROFICIENT_COUNT INTEGER",
"ENROLLMENT_PERCENTAGE REAL")
tmp <- as.data.frame(convert.variables(subset(sgp_object@Summary[[group.number[1]]][[paste(group.number[1], "CONTENT_AREA__YEAR__ETHNICITY", group.enroll.status[1], sep="__")]],
!is.na(get(group.number[1])) & CONTENT_AREA %in% content_areas & YEAR %in% years & !is.na(ETHNICITY) & get(group.enroll.status[1])==group.enroll.status.label[1] &
!is.na(MEDIAN_SGP)), factor.variables="ETHNICITY"))
tmp <- convert.names(tmp)
tmp$ENROLLMENT_PERCENTAGE <- NA
tmp <- tmp[, sapply(strsplit(field.types, " "), function(x) head(x,1))]
dbGetQuery(db, sqlite.create.table("DISTRICT_ETHNICITY", field.types, c("YEAR", "DISTRICT_NUMBER", "CONTENT_AREA", "ETHNICITY")))
dbWriteTable(db, "DISTRICT_ETHNICITY", tmp, row.names=FALSE, append=TRUE)
if (text.output) write.table(tmp, file=file.path(text.output.directory, "DISTRICT_ETHNICITY.dat"), row.names=FALSE, na=my.null.string, quote=FALSE, sep="|")
if (json.output) cat(toJSON(tmp), file=file.path(json.output.directory, "DISTRICT_ETHNICITY.json"))
### Table 4. DISTRICT_GRADE_ETHNICITY
field.types <- c(
"DISTRICT_NUMBER TEXT NOT NULL",
"CONTENT_AREA TEXT NOT NULL",
"YEAR INTEGER NOT NULL",
"GRADE TEXT NOT NULL",
"ETHNICITY TEXT NOT NULL",
"MEDIAN_SGP REAL",
"MEDIAN_SGP_TARGET REAL",
"PERCENT_AT_ABOVE_TARGET REAL",
"PERCENT_AT_ABOVE_PROFICIENT REAL",
"MEDIAN_SGP_COUNT INTEGER",
"PERCENT_AT_ABOVE_PROFICIENT_COUNT INTEGER")
tmp <- as.data.frame(convert.variables(subset(sgp_object@Summary[[group.number[1]]][[paste(group.number[1], "CONTENT_AREA__YEAR__GRADE__ETHNICITY", group.enroll.status[1], sep="__")]],
!is.na(get(group.number[1])) & CONTENT_AREA %in% content_areas & YEAR %in% years & !is.na(GRADE) & !is.na(ETHNICITY) &
get(group.enroll.status[1])==group.enroll.status.label[1] & !is.na(MEDIAN_SGP)), factor.variables="ETHNICITY"))
tmp <- convert.names(tmp)
tmp <- tmp[, sapply(strsplit(field.types, " "), function(x) head(x,1))]
dbGetQuery(db, sqlite.create.table("DISTRICT_GRADE_ETHNICITY", field.types, c("YEAR", "DISTRICT_NUMBER", "CONTENT_AREA", "GRADE", "ETHNICITY")))
dbWriteTable(db, "DISTRICT_GRADE_ETHNICITY", tmp, row.names=FALSE, append=TRUE)
if (text.output) write.table(tmp, file=file.path(text.output.directory, "DISTRICT_GRADE_ETHNICITY.dat"), row.names=FALSE, na=my.null.string, quote=FALSE, sep="|")
if (json.output) cat(toJSON(tmp), file=file.path(json.output.directory, "DISTRICT_GRADE_ETHNICITY.json"))
### Table 5. DISTRICT_STUDENTGROUP
field.types <- c(
"DISTRICT_NUMBER TEXT NOT NULL",
"CONTENT_AREA TEXT NOT NULL",
"YEAR INTEGER NOT NULL",
"STUDENTGROUP TEXT NOT NULL",
"MEDIAN_SGP REAL",
"MEDIAN_SGP_TARGET REAL",
"PERCENT_AT_ABOVE_TARGET REAL",
"PERCENT_AT_ABOVE_PROFICIENT REAL",
"MEDIAN_SGP_COUNT INTEGER",
"PERCENT_AT_ABOVE_PROFICIENT_COUNT INTEGER",
"ENROLLMENT_PERCENTAGE REAL")
tmp.list <- list()
for (i in other.student.groups %w/o% grep("ETHNICITY", other.student.groups, value=TRUE)) {
tmp.list[[i]] <- sgp_object@Summary[[group.number[1]]][[paste(group.number[1], "CONTENT_AREA__YEAR", i, group.enroll.status[1], sep="__")]]
}
for (i in seq_along(tmp.list)) {
setnames(tmp.list[[i]], 4, "STUDENTGROUP")
}
tmp <- as.data.frame(convert.variables(subset(rbindlist(tmp.list, fill=TRUE),
!is.na(get(group.number[1])) & CONTENT_AREA %in% content_areas & YEAR %in% years & !is.na(STUDENTGROUP) & get(group.enroll.status[1])==group.enroll.status.label[1] &
!is.na(MEDIAN_SGP)), factor.variables="STUDENTGROUP"))
tmp <- convert.names(tmp)
tmp$ENROLLMENT_PERCENTAGE <- NA
tmp <- data.table(tmp[, sapply(strsplit(field.types, " "), function(x) head(x,1))], key=c("YEAR", "DISTRICT_NUMBER", "CONTENT_AREA", "STUDENTGROUP"))
tmp <- as.data.frame(data.table(tmp[!duplicated(tmp, by=key(tmp))]))
dbGetQuery(db, sqlite.create.table("DISTRICT_STUDENTGROUP", field.types, c("YEAR", "DISTRICT_NUMBER", "CONTENT_AREA", "STUDENTGROUP")))
dbWriteTable(db, "DISTRICT_STUDENTGROUP", tmp, row.names=FALSE, append=TRUE)
if (text.output) write.table(tmp, file=file.path(text.output.directory, "DISTRICT_STUDENTGROUP.dat"), row.names=FALSE, na=my.null.string, quote=FALSE, sep="|")
if (json.output) cat(toJSON(tmp), file=file.path(json.output.directory, "DISTRICT_STUDENTGROUP.json"))
### Table 6. DISTRICT_GRADE_STUDENTGROUP
field.types <- c(
"DISTRICT_NUMBER TEXT NOT NULL",
"CONTENT_AREA TEXT NOT NULL",
"YEAR INTEGER NOT NULL",
"GRADE TEXT NOT NULL",
"STUDENTGROUP TEXT NOT NULL",
"MEDIAN_SGP REAL",
"MEDIAN_SGP_TARGET REAL",
"PERCENT_AT_ABOVE_TARGET REAL",
"PERCENT_AT_ABOVE_PROFICIENT REAL",
"MEDIAN_SGP_COUNT INTEGER",
"PERCENT_AT_ABOVE_PROFICIENT_COUNT INTEGER")
tmp.list <- list()
for (i in other.student.groups %w/o% grep("ETHNICITY", other.student.groups, value=TRUE)) {
tmp.list[[i]] <- sgp_object@Summary[[group.number[1]]][[paste(group.number[1], "CONTENT_AREA__YEAR__GRADE", i, group.enroll.status[1], sep="__")]]
}
for (i in seq_along(tmp.list)) {
setnames(tmp.list[[i]], 5, "STUDENTGROUP")
}
tmp <- as.data.frame(convert.variables(subset(rbindlist(tmp.list, fill=TRUE),
!is.na(get(group.number[1])) & YEAR %in% years & CONTENT_AREA %in% content_areas & !is.na(STUDENTGROUP) & get(group.enroll.status[1])==group.enroll.status.label[1] &
!is.na(MEDIAN_SGP)), factor.variables="STUDENTGROUP"))
tmp <- convert.names(tmp)
tmp <- data.table(tmp[, sapply(strsplit(field.types, " "), function(x) head(x,1))], key=c("YEAR", "DISTRICT_NUMBER", "CONTENT_AREA", "GRADE", "STUDENTGROUP"))
tmp <- as.data.frame(tmp[!duplicated(tmp, by=key(tmp))])
dbGetQuery(db, sqlite.create.table("DISTRICT_GRADE_STUDENTGROUP", field.types, c("YEAR", "DISTRICT_NUMBER", "CONTENT_AREA", "GRADE", "STUDENTGROUP")))
dbWriteTable(db, "DISTRICT_GRADE_STUDENTGROUP", tmp, row.names=FALSE, append=TRUE)
if (text.output) write.table(tmp, file=file.path(text.output.directory, "DISTRICT_GRADE_STUDENTGROUP.dat"), row.names=FALSE, na=my.null.string, quote=FALSE, sep="|")
if (json.output) cat(toJSON(tmp), file=file.path(json.output.directory, "DISTRICT_GRADE_STUDENTGROUP.json"))
## Table 7. SCHOOL
field.types <- c(
"DISTRICT_NUMBER TEXT NOT NULL",
"SCHOOL_NUMBER TEXT NOT NULL",
"EMH_LEVEL TEXT NOT NULL",
"CONTENT_AREA TEXT NOT NULL",
"YEAR INTEGER NOT NULL",
"MEDIAN_SGP REAL",
"MEDIAN_SGP_TARGET REAL",
"PERCENT_AT_ABOVE_TARGET REAL",
"PERCENT_AT_ABOVE_PROFICIENT REAL",
"MEDIAN_SGP_COUNT INTEGER",
"PERCENT_AT_ABOVE_PROFICIENT_COUNT INTEGER")
tmp <- as.data.frame(convert.variables(subset(sgp_object@Summary[[group.number[2]]][[paste(group.number[2], "EMH_LEVEL__CONTENT_AREA__YEAR", group.enroll.status[2], sep="__")]],
!is.na(get(group.enroll.status[2])) & !is.na(get(group.number[2])) & !is.na(EMH_LEVEL) & CONTENT_AREA %in% content_areas & YEAR %in% years & get(group.enroll.status[2])==group.enroll.status.label[2] &
!is.na(MEDIAN_SGP))))
tmp <- as.data.frame(merge(tmp, as.data.frame(tmp.school.and.district.by.year), all.x=TRUE))
tmp <- convert.names(tmp)
tmp <- tmp[, sapply(strsplit(field.types, " "), function(x) head(x,1))]
dbGetQuery(db, sqlite.create.table("SCHOOL", field.types, c("YEAR", "DISTRICT_NUMBER", "SCHOOL_NUMBER", "EMH_LEVEL", "CONTENT_AREA")))
dbWriteTable(db, "SCHOOL", tmp, row.names=FALSE, append=TRUE)
if (text.output) write.table(tmp, file=file.path(text.output.directory, "SCHOOL.dat"), row.names=FALSE, na=my.null.string, quote=FALSE, sep="|")
if (json.output) cat(toJSON(tmp), file=file.path(json.output.directory, "SCHOOL.json"))
## Table 8. SCHOOL_GRADE
field.types <- c(
"DISTRICT_NUMBER TEXT NOT NULL",
"SCHOOL_NUMBER TEXT NOT NULL",
"EMH_LEVEL TEXT NOT NULL",
"CONTENT_AREA TEXT NOT NULL",
"YEAR INTEGER NOT NULL",
"GRADE TEXT NOT NULL",
"MEDIAN_SGP REAL",
"MEDIAN_SGP_TARGET REAL",
"PERCENT_AT_ABOVE_TARGET REAL",
"PERCENT_AT_ABOVE_PROFICIENT REAL",
"MEDIAN_SGP_COUNT INTEGER",
"PERCENT_AT_ABOVE_PROFICIENT_COUNT INTEGER")
tmp <- as.data.frame(convert.variables(subset(sgp_object@Summary[[group.number[2]]][[paste(group.number[2], "EMH_LEVEL__CONTENT_AREA__YEAR__GRADE", group.enroll.status[2], sep="__")]],
!is.na(get(group.number[2])) & !is.na(EMH_LEVEL) & CONTENT_AREA %in% content_areas & YEAR %in% years & !is.na(GRADE) & get(group.enroll.status[2])==group.enroll.status.label[2] &
!is.na(MEDIAN_SGP))))
tmp <- data.frame(merge(tmp, as.data.frame(tmp.school.and.district.by.year), all.x=TRUE))
tmp <- convert.names(tmp)
tmp <- tmp[, sapply(strsplit(field.types, " "), function(x) head(x,1))]
dbGetQuery(db, sqlite.create.table("SCHOOL_GRADE", field.types, c("YEAR", "DISTRICT_NUMBER", "SCHOOL_NUMBER", "EMH_LEVEL", "GRADE", "CONTENT_AREA")))
dbWriteTable(db, "SCHOOL_GRADE", tmp, row.names=FALSE, append=TRUE)
if (text.output) write.table(tmp, file=file.path(text.output.directory, "SCHOOL_GRADE.dat"), row.names=FALSE, na=my.null.string, quote=FALSE, sep="|")
if (json.output) cat(toJSON(tmp), file=file.path(json.output.directory, "SCHOOL_GRADE.json"))
## Table 9. SCHOOL_ETHNICITY
field.types <- c(
"DISTRICT_NUMBER TEXT NOT NULL",
"SCHOOL_NUMBER TEXT NOT NULL",
"EMH_LEVEL TEXT NOT NULL",
"CONTENT_AREA TEXT NOT NULL",
"YEAR INTEGER NOT NULL",
"ETHNICITY TEXT NOT NULL",
"MEDIAN_SGP REAL",
"MEDIAN_SGP_TARGET REAL",
"PERCENT_AT_ABOVE_TARGET REAL",
"PERCENT_AT_ABOVE_PROFICIENT REAL",
"MEDIAN_SGP_COUNT INTEGER",
"PERCENT_AT_ABOVE_PROFICIENT_COUNT INTEGER",
"ENROLLMENT_PERCENTAGE REAL")
tmp <- as.data.frame(convert.variables(subset(sgp_object@Summary[[group.number[2]]][[paste(group.number[2], "EMH_LEVEL__CONTENT_AREA__YEAR__ETHNICITY", group.enroll.status[2], sep="__")]],
!is.na(get(group.number[2])) & !is.na(EMH_LEVEL) & CONTENT_AREA %in% content_areas & YEAR %in% years & !is.na(ETHNICITY) & get(group.enroll.status[2])==group.enroll.status.label[2] &
!is.na(MEDIAN_SGP)), factor.variables="ETHNICITY"))
tmp <- data.frame(merge(tmp, as.data.frame(tmp.school.and.district.by.year), all.x=TRUE))
tmp <- convert.names(tmp)
tmp$ENROLLMENT_PERCENTAGE <- NA
tmp <- tmp[, sapply(strsplit(field.types, " "), function(x) head(x,1))]
dbGetQuery(db, sqlite.create.table("SCHOOL_ETHNICITY", field.types, c("YEAR", "DISTRICT_NUMBER", "SCHOOL_NUMBER", "EMH_LEVEL", "CONTENT_AREA", "ETHNICITY")))
dbWriteTable(db, "SCHOOL_ETHNICITY", tmp, row.names=FALSE, append=TRUE)
if (text.output) write.table(tmp, file=file.path(text.output.directory, "SCHOOL_ETHNICITY.dat"), row.names=FALSE, na=my.null.string, quote=FALSE, sep="|")
if (json.output) cat(toJSON(tmp), file=file.path(json.output.directory, "SCHOOL_ETHNICITY.json"))
## Table 10. SCHOOL_STUDENTGROUP
field.types <- c(
"DISTRICT_NUMBER TEXT NOT NULL",
"SCHOOL_NUMBER TEXT NOT NULL",
"EMH_LEVEL TEXT NOT NULL",
"CONTENT_AREA TEXT NOT NULL",
"YEAR INTEGER NOT NULL",
"STUDENTGROUP TEXT NOT NULL",
"MEDIAN_SGP REAL",
"MEDIAN_SGP_TARGET REAL",
"PERCENT_AT_ABOVE_TARGET REAL",
"PERCENT_AT_ABOVE_PROFICIENT REAL",
"MEDIAN_SGP_COUNT INTEGER",
"PERCENT_AT_ABOVE_PROFICIENT_COUNT INTEGER",
"ENROLLMENT_PERCENTAGE REAL")
tmp.list <- list()
for (i in other.student.groups %w/o% grep("ETHNICITY", other.student.groups, value=TRUE)) {
tmp.list[[i]] <- sgp_object@Summary[[group.number[2]]][[paste(group.number[2], "EMH_LEVEL__CONTENT_AREA__YEAR", i, group.enroll.status[2], sep="__")]]
}
for (i in seq_along(tmp.list)) {
setnames(tmp.list[[i]], 5, "STUDENTGROUP")
}
tmp <- as.data.frame(convert.variables(subset(rbindlist(tmp.list, fill=TRUE),
!is.na(get(group.number[2])) & !is.na(EMH_LEVEL) & CONTENT_AREA %in% content_areas & YEAR %in% years & !is.na(STUDENTGROUP) & get(group.enroll.status[2])==group.enroll.status.label[2] &
!is.na(MEDIAN_SGP)), factor.variables="STUDENTGROUP"))
tmp <- as.data.frame(merge(tmp, as.data.frame(tmp.school.and.district.by.year), all.x=TRUE))
tmp <- convert.names(tmp)
tmp$ENROLLMENT_PERCENTAGE <- NA
tmp <- data.table(tmp[, sapply(strsplit(field.types, " "), function(x) head(x,1))], key=c("YEAR", "DISTRICT_NUMBER", "SCHOOL_NUMBER", "EMH_LEVEL", "CONTENT_AREA", "STUDENTGROUP"))
tmp <- as.data.frame(tmp[!duplicated(tmp, by=key(tmp))])
dbGetQuery(db, sqlite.create.table("SCHOOL_STUDENTGROUP", field.types,
c("YEAR", "DISTRICT_NUMBER", "SCHOOL_NUMBER", "EMH_LEVEL", "CONTENT_AREA", "STUDENTGROUP")))
dbWriteTable(db, "SCHOOL_STUDENTGROUP", tmp, row.names=FALSE, append=TRUE)
if (text.output) write.table(tmp, file=file.path(text.output.directory, "SCHOOL_STUDENTGROUP.dat"), row.names=FALSE, na=my.null.string, quote=FALSE, sep="|")
if (json.output) cat(toJSON(tmp), file=file.path(json.output.directory, "SCHOOL_STUDENTGROUP.json"))
## Table 11. SCHOOL_TEACHER
if (any(c(paste(group.number[2], "INSTRUCTOR_NUMBER__EMH_LEVEL__CONTENT_AREA__YEAR", sep="__"),
paste(group.number[2], "INSTRUCTOR_NUMBER__EMH_LEVEL__CONTENT_AREA__YEAR__INSTRUCTOR_ENROLLMENT_STATUS", sep="__")) %in% names(sgp_object@Summary[[group.number[2]]]))) {
field.types <- c(
"DISTRICT_NUMBER TEXT NOT NULL",
"SCHOOL_NUMBER TEXT NOT NULL",
"EMH_LEVEL TEXT NOT NULL",
"TEACHER_USID TEXT NOT NULL",
"CONTENT_AREA TEXT NOT NULL",
"YEAR INTEGER NOT NULL",
"MEDIAN_SGP REAL",
"MEDIAN_SGP_TARGET REAL",
"PERCENT_AT_ABOVE_TARGET REAL",
"PERCENT_AT_ABOVE_PROFICIENT REAL",
"MEDIAN_SGP_COUNT INTEGER",
"PERCENT_AT_ABOVE_PROFICIENT_COUNT INTEGER")
if (paste(group.number[2], "INSTRUCTOR_NUMBER__EMH_LEVEL__CONTENT_AREA__YEAR__INSTRUCTOR_ENROLLMENT_STATUS", sep="__") %in% names(sgp_object@Summary[[group.number[2]]])) {
tmp.table.name <- paste(group.number[2], "INSTRUCTOR_NUMBER__EMH_LEVEL__CONTENT_AREA__YEAR__INSTRUCTOR_ENROLLMENT_STATUS", sep="__")
tmp <- as.data.frame(convert.variables(subset(sgp_object@Summary[[group.number[2]]][[tmp.table.name]],
!is.na(get(group.number[2])) & !is.na(INSTRUCTOR_NUMBER) & !is.na(EMH_LEVEL) & CONTENT_AREA %in% content_areas & YEAR %in% years &
INSTRUCTOR_ENROLLMENT_STATUS=="Enrolled Instructor: Yes" & !is.na(MEDIAN_SGP))))
} else {
tmp.table.name <- paste(group.number[2], "INSTRUCTOR_NUMBER__EMH_LEVEL__CONTENT_AREA__YEAR", sep="__")
tmp <- as.data.frame(convert.variables(subset(sgp_object@Summary[[group.number[2]]][[tmp.table.name]],
!is.na(get(group.number[2])) & !is.na(INSTRUCTOR_NUMBER) & !is.na(EMH_LEVEL) & CONTENT_AREA %in% content_areas & YEAR %in% years &
!is.na(MEDIAN_SGP))))
}
tmp <- data.frame(merge(tmp, as.data.frame(tmp.school.and.district.by.year), all.x=TRUE))
tmp <- convert.names(tmp)
tmp <- tmp[, sapply(strsplit(field.types, " "), function(x) head(x,1))]
dbGetQuery(db, sqlite.create.table("SCHOOL_TEACHER", field.types, c("YEAR", "DISTRICT_NUMBER", "SCHOOL_NUMBER", "TEACHER_USID", "EMH_LEVEL", "CONTENT_AREA")))
dbWriteTable(db, "SCHOOL_TEACHER", tmp, row.names=FALSE, append=TRUE)
if (text.output) write.table(tmp, file=file.path(text.output.directory, "SCHOOL_TEACHER.dat"), row.names=FALSE, na=my.null.string, quote=FALSE, sep="|")
if (json.output) cat(toJSON(tmp), file=file.path(json.output.directory, "SCHOOL_TEACHER.json"))
} ### END SCHOOL_TEACHER table
## Table 12. KEY_VALUE_LOOKUP (ADD CONTENT_AREA and YEAR)
field.types <- c(
"KEY_VALUE_ID INTEGER NOT NULL",
"KEY_VALUE_KEY TEXT",
"KEY_VALUE_CODE TEXT",
"KEY_VALUE_TEXT TEXT")
# CONTENT_AREA
tmp <- subset(sgp_object@Summary[[group.number[1]]][[paste(group.number[1], "CONTENT_AREA__YEAR", group.enroll.status[1], sep="__")]],
!is.na(get(group.number[1])) & CONTENT_AREA %in% content_areas & YEAR %in% years & get(group.enroll.status[1])==group.enroll.status.label[1])
tmp.CONTENT_AREA <- data.frame(
KEY_VALUE_KEY="CONTENT_AREA",
KEY_VALUE_CODE=seq_along(unique(tmp[['CONTENT_AREA']])),
KEY_VALUE_TEXT=sapply(sort(unique(tmp[['CONTENT_AREA']])), capwords))
# YEAR
tmp <- convert.variables(subset(sgp_object@Summary[[group.number[1]]][[paste(group.number[1], "CONTENT_AREA__YEAR", group.enroll.status[1], sep="__")]],
!is.na(get(group.number[1])) & CONTENT_AREA %in% content_areas & YEAR %in% years & get(group.enroll.status[1])==group.enroll.status.label[1]))
tmp.YEAR <- data.frame(
KEY_VALUE_KEY="YEAR",
KEY_VALUE_CODE=sort(unique(tmp[['YEAR']])),
KEY_VALUE_TEXT=paste0(as.numeric(sapply(sort(unique(tmp[['YEAR']])), get.year))-1, "-", sapply(sort(unique(tmp[['YEAR']])), get.year)))
# GRADE
tmp <- subset(as.data.frame(sgp_object@Summary[[group.number[1]]][[paste(group.number[1], "CONTENT_AREA__YEAR__GRADE", group.enroll.status[1], sep="__")]]),
!is.na(get(group.number[1])) & CONTENT_AREA %in% content_areas & YEAR %in% years & !is.na(GRADE) & get(group.enroll.status[1])==group.enroll.status.label[1])
tmp.GRADE <- data.frame(
KEY_VALUE_KEY="GRADE",
KEY_VALUE_CODE=sort(unique(as.integer(tmp[['GRADE']]))),
KEY_VALUE_TEXT=paste("Grade", get.grade(sort(unique(as.integer(tmp[['GRADE']]))))))
# EMH_LEVEL
tmp <- subset(sgp_object@Summary[[group.number[2]]][[paste(group.number[2], "EMH_LEVEL__CONTENT_AREA__YEAR", group.enroll.status[2], sep="__")]],
!is.na(get(group.number[2])) & !is.na(EMH_LEVEL) & CONTENT_AREA %in% content_areas & YEAR %in% years & get(group.enroll.status[2])==group.enroll.status.label[2])
if (!is.factor(tmp$EMH_LEVEL)) tmp[['EMH_LEVEL']] <- as.factor(tmp[['EMH_LEVEL']])
tmp.EMH <- data.frame(
KEY_VALUE_KEY="EMH_LEVEL",
KEY_VALUE_CODE=strhead(levels(as.factor(tmp$EMH_LEVEL))[sort(unique(as.integer(as.factor(tmp[['EMH_LEVEL']]))))], 1), ## TEMP fix until EMH_LEVEL is fixed up
KEY_VALUE_TEXT= levels(as.factor(tmp$EMH_LEVEL))[sort(unique(as.integer(as.factor(tmp[['EMH_LEVEL']]))))])
# ETHNICITY
tmp <- subset(as.data.frame(sgp_object@Summary[[group.number[1]]][[paste(group.number[1], "CONTENT_AREA__YEAR__ETHNICITY", group.enroll.status[1], sep="__")]]),
!is.na(get(group.number[1])) & CONTENT_AREA %in% content_areas & YEAR %in% years & !is.na(ETHNICITY) & get(group.enroll.status[1])==group.enroll.status.label[1])
tmp.ETHNICITY <- data.frame(
KEY_VALUE_KEY="ETHNICITY",
KEY_VALUE_CODE=sort(unique(as.integer(as.factor(tmp[['ETHNICITY']])))),
KEY_VALUE_TEXT=levels(as.factor(tmp$ETHNICITY))[sort(unique(as.integer(as.factor(tmp[['ETHNICITY']]))))])
# STUDENTGROUP
tmp.list <- list()
for (i in other.student.groups %w/o% grep("ETHNICITY", other.student.groups, value=TRUE)) {
tmp.list[[i]] <- sgp_object@Summary[[group.number[1]]][[paste(group.number[1], "CONTENT_AREA__YEAR", i, group.enroll.status[1], sep="__")]]
}
for (i in seq_along(tmp.list)) {
setnames(tmp.list[[i]], 4, "STUDENTGROUP")
}
tmp <- data.table(convert.names(convert.variables(subset(rbindlist(tmp.list, fill=TRUE),
!is.na(get(group.number[1])) & !is.na(STUDENTGROUP) & get(group.enroll.status[1])==group.enroll.status.label[1]))),
key=c("YEAR", "DISTRICT_NUMBER", "CONTENT_AREA", "STUDENTGROUP"))
tmp <- as.data.frame(data.table(tmp[!duplicated(tmp, by=key(tmp))]))
tmp.STUDENTGROUP <- data.frame(
KEY_VALUE_KEY="STUDENT_GROUP", ### NOTE: Must have underscore. It's an older version of the table
KEY_VALUE_CODE=sort(unique(as.integer(as.factor(tmp[['STUDENTGROUP']])))),
KEY_VALUE_TEXT=levels(as.factor(tmp$STUDENTGROUP))[sort(unique(as.integer(as.factor(tmp[['STUDENTGROUP']]))))])
tmp <- rbind(tmp.CONTENT_AREA, tmp.YEAR, tmp.GRADE, tmp.EMH, tmp.ETHNICITY, tmp.STUDENTGROUP)
tmp <- data.frame(KEY_VALUE_ID=1:dim(tmp)[1], tmp)
dbGetQuery(db, sqlite.create.table("KEY_VALUE_LOOKUP", field.types, "KEY_VALUE_ID"))
dbWriteTable(db, "KEY_VALUE_LOOKUP", tmp, row.names=FALSE, append=TRUE)
if (text.output) write.table(tmp, file=file.path(text.output.directory, "KEY_VALUE_LOOKUP.dat"), row.names=FALSE, na=my.null.string, quote=FALSE, sep="|")
if (json.output) cat(toJSON(tmp), file=file.path(json.output.directory, "KEY_VALUE_LOOKUP.json"))
###
### Disconnect database
###
dbDisconnect(db)
message(paste("\tFinished sqliteSGP in outputSGP", prettyDate(), "in", convertTime(timetakenSGP(started.at)), "\n"))
} ### END sqliteSGP
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.