inst/tests.R

library(HGTools)

# load('../HGmiscTools/.RData')

training <- B[sample(nrow(B), 10000, replace = FALSE), ]

grids[['neuralnet']] <- expand.grid(.hidden = c(2,3), .threshold = 100)

fit_nn <- train(X = c("demo_gender_f", "demo_age_00t04", "demo_age_05t09",
                      "demo_age_10t14", "demo_age_15t17", "demo_age_18t20",
                      "demo_age_21t24", "demo_age_25t34", "demo_age_35t44",
                      "demo_age_45t54", "demo_age_55t64", "demo_age_65t74",
                      "demo_age_75t84", "demo_age_85t", "demo_marital_m",
                      "demo_education_college", "demo_education_graduate", "demo_hh_adults_2",
                      "demo_hh_adults_3t", "demo_hh_children_1", "demo_hh_children_2t3",
                      "demo_hh_children_4t", "demo_hh_income_1t2", "demo_hh_income_3t4",
                      "demo_hh_income_5t6", "demo_hh_income_7t9", "demo_hh_income_atc",
                      "demo_hh_income_dt", "demo_addr_timeshare", "demo_addr_mobile",
                      "demo_addr_apartment", "demo_addr_condo", "demo_addr_lor_05t09y",
                      "demo_addr_lor_10t19y", "demo_addr_lor_20ty", "demo_addr_owner",
                      "geo_hh_vacant", "geo_ms_married_present", "geo_ms_married_absent",
                      "geo_ms_divorced", "geo_ms_widowed", "geo_hh_renter",
                      "geo_hh_owner", "geo_hh_children", "geo_hh_no_children",
                      "geo_hh_mobile", "geo_hh_units", "geo_hh_poverty",
                      "geo_ed_dropout", "geo_ed_college", "geo_ed_grad",
                      "geo_emp_employed", "geo_emp_unemployed", "geo_emp_military",
                      "geo_emp_notlabor", "geo_tran_home", "geo_tran_walk",
                      "geo_tran_bike", "geo_tran_carpool", "geo_tran_drive",
                      "geo_tran_public", "geo_emp_profit", "geo_emp_nonprofit",
                      "geo_emp_govt", "geo_emp_self", "geo_ind_accomodation",
                      "geo_ind_admin", "geo_ind_agriculture", "geo_ind_recreation",
                      "geo_ind_construction", "geo_ind_education", "geo_ind_finance",
                      "geo_ind_health", "geo_ind_information", "geo_ind_management",
                      "geo_ind_manufacturing", "geo_ind_service_oth", "geo_ind_proffessional",
                      "geo_ind_public", "geo_ind_retail", "geo_ind_transportation",
                      "geo_ind_wholesale", "geo_occ_engineer", "geo_occ_entertainment",
                      "geo_occ_maintenance", "geo_occ_business", "geo_occ_social",
                      "geo_occ_computer", "geo_occ_construction", "geo_occ_education",
                      "geo_occ_farming", "geo_occ_food", "geo_occ_practitioner",
                      "geo_occ_health", "geo_occ_repair", "geo_occ_legal",
                      "geo_occ_science", "geo_occ_management", "geo_occ_office",
                      "geo_occ_production", "geo_occ_protective", "geo_occ_sales",
                      "geo_occ_care", "geo_occ_transportation", "geo_occ_white",
                      "geo_occ_blue", "geo_occ_farm", "geo_pop_high_growth",
                      "geo_hh_high_value", "geo_hh_large_size", "geo_hh_new_structure",
                      "geo_hh_long_residence", "geo_hh_many_vehicles", "geo_hh_high_income",
                      "geo_tran_long_commute", "geo_pop_low_growth", "geo_hh_low_value",
                      "geo_hh_small_size", "geo_hh_old_structure", "geo_hh_short_residence",
                      "geo_hh_few_vehicles", "geo_hh_low_income", "geo_tran_short_commute",
                      "geo_re_hispanic", "geo_re_black"),
                Y = "has_adhd",
                data = training,
                testData = T,
                method = "neuralnet",
                grid = grids[["neuralnet"]],
                k = 2,
                metric = "AUC",
                save_models = TRUE,
                cvControl = cvControl(verbose=TRUE, model_type = "binary")
)


fit_rf <- train(X = c("demo_gender_f", "demo_age_00t04", "demo_age_05t09",
                      "demo_age_10t14", "demo_age_15t17", "demo_age_18t20",
                      "demo_age_21t24", "demo_age_25t34", "demo_age_35t44",
                      "demo_age_45t54", "demo_age_55t64", "demo_age_65t74",
                      "demo_age_75t84", "demo_age_85t", "demo_marital_m",
                      "demo_education_college", "demo_education_graduate", "demo_hh_adults_2",
                      "demo_hh_adults_3t", "demo_hh_children_1", "demo_hh_children_2t3",
                      "demo_hh_children_4t", "demo_hh_income_1t2", "demo_hh_income_3t4",
                      "demo_hh_income_5t6", "demo_hh_income_7t9", "demo_hh_income_atc",
                      "demo_hh_income_dt", "demo_addr_timeshare", "demo_addr_mobile",
                      "demo_addr_apartment", "demo_addr_condo", "demo_addr_lor_05t09y",
                      "demo_addr_lor_10t19y", "demo_addr_lor_20ty", "demo_addr_owner",
                      "geo_hh_vacant", "geo_ms_married_present", "geo_ms_married_absent",
                      "geo_ms_divorced", "geo_ms_widowed", "geo_hh_renter",
                      "geo_hh_owner", "geo_hh_children", "geo_hh_no_children",
                      "geo_hh_mobile", "geo_hh_units", "geo_hh_poverty",
                      "geo_ed_dropout", "geo_ed_college", "geo_ed_grad",
                      "geo_emp_employed", "geo_emp_unemployed", "geo_emp_military",
                      "geo_emp_notlabor", "geo_tran_home", "geo_tran_walk",
                      "geo_tran_bike", "geo_tran_carpool", "geo_tran_drive",
                      "geo_tran_public", "geo_emp_profit", "geo_emp_nonprofit",
                      "geo_emp_govt", "geo_emp_self", "geo_ind_accomodation",
                      "geo_ind_admin", "geo_ind_agriculture", "geo_ind_recreation",
                      "geo_ind_construction", "geo_ind_education", "geo_ind_finance",
                      "geo_ind_health", "geo_ind_information", "geo_ind_management",
                      "geo_ind_manufacturing", "geo_ind_service_oth", "geo_ind_proffessional",
                      "geo_ind_public", "geo_ind_retail", "geo_ind_transportation",
                      "geo_ind_wholesale", "geo_occ_engineer", "geo_occ_entertainment",
                      "geo_occ_maintenance", "geo_occ_business", "geo_occ_social",
                      "geo_occ_computer", "geo_occ_construction", "geo_occ_education",
                      "geo_occ_farming", "geo_occ_food", "geo_occ_practitioner",
                      "geo_occ_health", "geo_occ_repair", "geo_occ_legal",
                      "geo_occ_science", "geo_occ_management", "geo_occ_office",
                      "geo_occ_production", "geo_occ_protective", "geo_occ_sales",
                      "geo_occ_care", "geo_occ_transportation", "geo_occ_white",
                      "geo_occ_blue", "geo_occ_farm", "geo_pop_high_growth",
                      "geo_hh_high_value", "geo_hh_large_size", "geo_hh_new_structure",
                      "geo_hh_long_residence", "geo_hh_many_vehicles", "geo_hh_high_income",
                      "geo_tran_long_commute", "geo_pop_low_growth", "geo_hh_low_value",
                      "geo_hh_small_size", "geo_hh_old_structure", "geo_hh_short_residence",
                      "geo_hh_few_vehicles", "geo_hh_low_income", "geo_tran_short_commute",
                      "geo_re_hispanic", "geo_re_black"),
                Y = "has_adhd",
                data = training,
                testData = T,
                method = "rf",
                grid = grids[["rf"]],
                k = 3,
                metric = "AUC",
                save_models = TRUE,
                cvControl = cvControl(verbose=TRUE, model_type = "binary")
)



fit_nn <- train(X = c("demo_gender_f", "demo_age_00t04", "demo_age_05t09",
                      "demo_age_10t14", "demo_age_15t17", "demo_age_18t20",
                      "demo_age_21t24", "demo_age_25t34", "demo_age_35t44",
                      "demo_age_45t54", "demo_age_55t64", "demo_age_65t74",
                      "demo_age_75t84", "demo_age_85t", "demo_marital_m",
                      "demo_education_college", "demo_education_graduate", "demo_hh_adults_2",
                      "demo_hh_adults_3t", "demo_hh_children_1", "demo_hh_children_2t3",
                      "demo_hh_children_4t", "demo_hh_income_1t2", "demo_hh_income_3t4",
                      "demo_hh_income_5t6", "demo_hh_income_7t9", "demo_hh_income_atc",
                      "demo_hh_income_dt", "demo_addr_timeshare", "demo_addr_mobile",
                      "demo_addr_apartment", "demo_addr_condo", "demo_addr_lor_05t09y",
                      "demo_addr_lor_10t19y", "demo_addr_lor_20ty", "demo_addr_owner",
                      "geo_hh_vacant", "geo_ms_married_present", "geo_ms_married_absent",
                      "geo_ms_divorced", "geo_ms_widowed", "geo_hh_renter",
                      "geo_hh_owner", "geo_hh_children", "geo_hh_no_children",
                      "geo_hh_mobile", "geo_hh_units", "geo_hh_poverty",
                      "geo_ed_dropout", "geo_ed_college", "geo_ed_grad",
                      "geo_emp_employed", "geo_emp_unemployed", "geo_emp_military",
                      "geo_emp_notlabor", "geo_tran_home", "geo_tran_walk",
                      "geo_tran_bike", "geo_tran_carpool", "geo_tran_drive",
                      "geo_tran_public", "geo_emp_profit", "geo_emp_nonprofit",
                      "geo_emp_govt", "geo_emp_self", "geo_ind_accomodation",
                      "geo_ind_admin", "geo_ind_agriculture", "geo_ind_recreation",
                      "geo_ind_construction", "geo_ind_education", "geo_ind_finance",
                      "geo_ind_health", "geo_ind_information", "geo_ind_management",
                      "geo_ind_manufacturing", "geo_ind_service_oth", "geo_ind_proffessional",
                      "geo_ind_public", "geo_ind_retail", "geo_ind_transportation",
                      "geo_ind_wholesale", "geo_occ_engineer", "geo_occ_entertainment",
                      "geo_occ_maintenance", "geo_occ_business", "geo_occ_social",
                      "geo_occ_computer", "geo_occ_construction", "geo_occ_education",
                      "geo_occ_farming", "geo_occ_food", "geo_occ_practitioner",
                      "geo_occ_health", "geo_occ_repair", "geo_occ_legal",
                      "geo_occ_science", "geo_occ_management", "geo_occ_office",
                      "geo_occ_production", "geo_occ_protective", "geo_occ_sales",
                      "geo_occ_care", "geo_occ_transportation", "geo_occ_white",
                      "geo_occ_blue", "geo_occ_farm", "geo_pop_high_growth",
                      "geo_hh_high_value", "geo_hh_large_size", "geo_hh_new_structure",
                      "geo_hh_long_residence", "geo_hh_many_vehicles", "geo_hh_high_income",
                      "geo_tran_long_commute", "geo_pop_low_growth", "geo_hh_low_value",
                      "geo_hh_small_size", "geo_hh_old_structure", "geo_hh_short_residence",
                      "geo_hh_few_vehicles", "geo_hh_low_income", "geo_tran_short_commute",
                      "geo_re_hispanic", "geo_re_black"),
                Y = "has_adhd",
                data = training,
                testData = testing,
                method = "neuralnet",
                grid = grids[["neuralnet"]],
                k = 2,
                metric = "AUC"
)

setwd("data")
training <- B
save(training, file="adhd_train.rda")
cdeterman/HGTools documentation built on May 13, 2019, 2:34 p.m.