# depricated/label_schools.R In svenhalvorson/SvenR: Sven's R Jiggering

```#we're gonna write a script to give standardized names and numbers
#to the schools at SFPS
#going to call the output 'vec'

label_schools <- function(df, school = "school", to = "abbr", from = NA, current = FALSE){

library("dplyr")

if(!is.data.frame(df)){
stop("df must be a data frame")
}

#check to see that parameters specified correctly
if(!(from %in% c("num","abbr","name", NA))){
stop("from argument must be num, abbr, name, or left blank")
}

if(!(to %in% c("num","abbr","name"))){
stop("to argument must be in num, abbr, name")
}

if(!is.na(from)){
if((from == "num" & to == "num") | (from == "abbr" & to == "abrr")){
stop("from and to arguments should be different or both 'name'")
}
}

#check if there exactly one column matching the school argument
if(sum(colnames(df) == school) == 0){
}

if(sum(colnames(df) == school) > 1){
stop("multiple columns matching school argument")
}

# This is pretty chintzy but I realized that something about the dplyr tibbles makes
# it so taking the school vector out is not so easy. I'm gonna make a copy of df
# and coerce it to data frame to find the school vec
ww = as.data.frame(df)

#okay we're gonna try a different approach
#let's making a copy of the school variable and use it as our working vector
#and take only the non missing
vec = as.character(ww[!is.na(ww[,school]) & ww[,school] != "",school])

#now try to detect the type of the from argument
if(is.na(from)){
#maybe it's clearly numeric, I guess we could accept some null values
#but maybe if <20% are NA when coerced we would say it's probably numeric
numtest = suppressWarnings(as.numeric(vec))
likelynum = sum(is.na(numtest))/length(numtest) < .2
if(likelynum){
from = "num"
}
}

#detect 4 character code
if(is.na(from)){
#let's try to work off the mean number of characters being between 3 and 4
numchar = nchar(vec)

if((mean(numchar)>3) & (mean(numchar)<=4)){
from = "abbr"
}

}

#detect long name form
if(is.na(from)){
#probably the name average name should be longer than 4 characters
numchar = nchar(vec)
if(mean(numchar)>4){
from = "name"
}
}

#if it's still NA let's stop
if(is.na(from)){
stop("Unable to detect unspecified 'from' argument")
}

#load the conversion table
ref <- convtable

#okay let's divide the work into sections based on the from argument because it's hardest

# NUM ---------------------------------------------------------------------

#assuming we have from == "num"
if(from == "num"){

#make sure argument is numeric and the columns match for merging
df\$num = df[,school]
if(is.character(df\$num)){
df\$num = as.numeric(df\$num)
}
if(is.factor(df\$num)){
df\$num = as.numeric(as.character(df\$num))
}

#cut out that old school code
df\$num[which(df\$num == 174)] = 11

if(to == "abbr"){
ref = select(ref,num,abbr)
ref = rename(ref,school.abbr = abbr)
}

if(to == "name"){
ref = select(ref,num,name)
ref = rename(ref,school.name = name)
}

#now join to the reference table
df = suppressWarnings(left_join(df, ref, by = "num"))
df = select(df,-num)
}

# ABBR --------------------------------------------------------------------

if(from == "abbr"){

df\$abbr = as.character(df[,school])

#cut out that old school code
df[which(df[,"abbr"] %in% c("ECRA","AFES")),"abbr"] = "ECCS"

if(to == "num"){
ref = select(ref,abbr,num)
ref = rename(ref,school.num = num)
}

if(to == "name"){
ref = select(ref,abbr,name)
ref = rename(ref,school.name = name)
}

#now join to the reference table
df = suppressWarnings(left_join(df, ref, by = "abbr"))
df = select(df,-abbr)
}

#progress! Now we just gotta fix this part
#okay now for the hard part. We gotta figure out how to do this framgenting match

# NAME --------------------------------------------------------------------

#let's write a function to do the name matching
infrags2 <- function(nam,type){

#loop through the rows of convtable
for(i in 1:nrow(convtable)){

#loop through the fragments
for(j in c("frag1","frag2","frag3","frag4")){

#check if the input or the fragment are subsets of eachother
if(double.regex(nam,convtable[i,j],ignore.case = TRUE)){
return(convtable[i,type])
}
}
}
#if we don't hit return a NA
return(NA)

}

if(from == "name"){
#make a blank  vector to store the output
df\$tmp = ""

#run through all the values in the schol variable, run the infrags function
#store in tmp
for(i in 1:nrow(df)){
df\$tmp[i] = infrags2(vec[i],type = to)

}

#rename based on the 'to' argument
if(to == "num"){
df = rename(df,school.num = tmp)
}
if(to == "abbr"){
df = rename(df,school.abbr = tmp)
}
if(to == "name"){
df = rename(df,school.name = tmp)
}
}

#Now we're gonna tack on another option to return a vector saying if the school is one of our 'normal'
#schools. If current = TRUE, we'll return an extra vector

active.schools.num = c(141 ,54 ,12 ,8 ,24 ,33 ,20 ,188 ,5 ,22 ,146 ,9 ,7 ,
186 ,11 ,166 ,135 ,99 ,169 ,57 ,70 ,173 ,145 ,170 ,
168 ,110 ,100 ,23 ,52 ,143 ,165 ,130 ,34 ,160 ,78 ,
176 ,53,7)

active.schools.abbr = c("ABCS","ACCS","ALHS","AMES","ATC","ATES","CAHS",
"CAMS","CCES","CGES","CHES","DC","DRS","DVMS",
"ECCS","ECO","EDCS","EJES","ENHS","GOCS","KEES",
"MIMS","NAES","NOCS","NYEB","ORMS","PIES","RTES",
"RTH","SAES","SFHS","SWES","SWPK","TEES","TEP",
"WGES","ZBS")

if(current){
browser()
if(to == "abbr"){
df\$current.school = df\$school.abbr %in% active.schools.abbr
}
if(to == "num"){
df\$current.school = df\$school.num %in% active.schools.num
}

if(to == "name"){
if(from == "abbr"){
df\$current.school = df[,school] %in% active.schools.abbr
}
if(from == "num"){
df\$current.school = df[,school] %in% active.schools.num
}
}
}

return(df)

}
```
svenhalvorson/SvenR documentation built on June 24, 2018, 9:25 a.m.