inst/extdata/test_df.md

Test Dataset Classifiers

mabadgeley r Sys.Date()

Test Dataset

data(test_df, package="ProjUtils")
str(test_df)
#> Classes 'tbl_df', 'tbl' and 'data.frame':    180 obs. of  14 variables:
#>  $ id      : chr  "7862738" "7863668" "7872384" "7873551" ...
#>  $ cnn_41  : num  0.992 1 1 1 1 ...
#>  $ cnn_42  : num  0.8449 0.0296 0.8085 0.0559 0.9171 ...
#>  $ cnn_43  : num  0.1781 0.0314 0.2015 0.5291 0.3774 ...
#>  $ cnn_8   : num  0.25347 0.65845 0.40168 0.76503 0.00696 ...
#>  $ cnn_10  : num  0.196 0.705 0.113 0.117 0.106 ...
#>  $ nlp_4   : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
#>  $ nlp_8   : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
#>  $ nlp_10  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
#>  $ attg    : logi  FALSE FALSE TRUE TRUE TRUE TRUE ...
#>  $ javin   : logi  FALSE FALSE FALSE TRUE TRUE TRUE ...
#>  $ eko     : logi  TRUE FALSE TRUE TRUE TRUE TRUE ...
#>  $ mike    : logi  TRUE FALSE FALSE TRUE TRUE TRUE ...
#>  $ priority: logi  FALSE FALSE FALSE FALSE FALSE FALSE ...

Craft S4 Classifier lists

kGOLD_STD <- "attg"

# Make classifierPoints with logical columns
cPs <- test_df %>% 
    keep(.p=is.logical) %>% 
    `[`(names(.) %ni% kGOLD_STD) %>% 
    map(function(x_lgl) ClassifierPoint(Y_=x_lgl, Y=test_df[[kGOLD_STD]], id = test_df[["id"]]))
# Make classifierCurves with numeric columns
cCs <- test_df %>%   
    keep(.p=is.numeric) %>% 
    map(function(x_dbl) ClassifierCurve(pY=x_dbl, Y=test_df[[kGOLD_STD]], id = test_df[["id"]]))

Tabulate

glance() Binary Classifier Performance metrics

(lift_dl(rbind))(map(cPs, glance_ClassifierPoint)) %>% 
    tibble::rownames_to_column(var="grader") %>% 
    arrange(desc(acc)) %>% 
    knitr::kable(digits=2)

grader acc sens spec ppv npv fpr

javin 0.90 0.80 0.94 0.87 0.91 0.06 eko 0.80 0.82 0.79 0.64 0.91 0.21 mike 0.79 0.73 0.82 0.65 0.87 0.18 nlp_10 0.71 0.18 0.95 0.62 0.72 0.05 nlp_8 0.64 0.29 0.80 0.39 0.71 0.20 nlp_4 0.60 0.66 0.57 0.41 0.79 0.43 priority 0.55 0.16 0.73 0.21 0.66 0.27

glance() Continuous Classifier Performance metrics

(lift_dl(rbind))(map(cCs, glance_ClassifierCurve)) %>% 
    tibble::rownames_to_column(var="grader") %>% 
    arrange(desc(auc)) %>% 
    knitr::kable(digits=2)

grader auc acc sens spec ppv npv fpr

cnn_41 0.66 0.53 0.79 0.42 0.38 0.81 0.58 cnn_10 0.65 0.54 0.79 0.44 0.39 0.82 0.56 cnn_8 0.56 0.41 0.79 0.23 0.32 0.71 0.77 cnn_42 0.47 0.33 0.79 0.12 0.29 0.56 0.88 cnn_43 0.38 0.31 0.79 0.09 0.28 0.48 0.91

Pick a single cnn_4

map_dfr(cCs, roc, .id = "cnn") %>% 
    ggplot(., aes(x=x, y=y, col=cnn)) +
    geom_line() +
    AnalysisToolkit:::gg_roc_layers
cCs["cnn_4"] <- cCs["cnn_41"]
cCs[c("cnn_41", "cnn_42", "cnn_43")] <- NULL

Re-rank continuous classifiers (cnns)

(lift_dl(rbind))(map(cCs, glance_ClassifierCurve)) %>% 
    tibble::rownames_to_column() %>% 
    arrange(desc(auc)) %>% 
    knitr::kable(digits=2)

rowname auc acc sens spec ppv npv fpr

cnn_4 0.66 0.53 0.79 0.42 0.38 0.81 0.58 cnn_10 0.65 0.54 0.79 0.44 0.39 0.82 0.56 cnn_8 0.56 0.41 0.79 0.23 0.32 0.71 0.77

Save Classifier Lists As Package Data

classifiers <- list(
    "classifier_points" = cPs,
    "classifier_curves" = cCs
)
str(classifiers)
#> List of 2
#>  $ classifier_points:List of 7
#>   ..$ nlp_4   :Formal class 'ClassifierPoint' [package "AnalysisToolkit"]..
#>   .. .. ..@ Y_: logi [1:180] TRUE TRUE TRUE TRUE TRUE TRUE ...
#>   .. .. ..@ Y : logi [1:180] FALSE FALSE TRUE TRUE TRUE TRUE ...
#>   .. .. ..@ id: chr [1:180] "7862738" "7863668" "7872384" "7873551" ...
#>   ..$ nlp_8   :Formal class 'ClassifierPoint' [package "AnalysisToolkit"]..
#>   .. .. ..@ Y_: logi [1:180] FALSE FALSE FALSE FALSE FALSE FALSE ...
#>   .. .. ..@ Y : logi [1:180] FALSE FALSE TRUE TRUE TRUE TRUE ...
#>   .. .. ..@ id: chr [1:180] "7862738" "7863668" "7872384" "7873551" ...
#>   ..$ nlp_10  :Formal class 'ClassifierPoint' [package "AnalysisToolkit"]..
#>   .. .. ..@ Y_: logi [1:180] FALSE FALSE FALSE FALSE FALSE FALSE ...
#>   .. .. ..@ Y : logi [1:180] FALSE FALSE TRUE TRUE TRUE TRUE ...
#>   .. .. ..@ id: chr [1:180] "7862738" "7863668" "7872384" "7873551" ...
#>   ..$ javin   :Formal class 'ClassifierPoint' [package "AnalysisToolkit"]..
#>   .. .. ..@ Y_: logi [1:180] FALSE FALSE FALSE TRUE TRUE TRUE ...
#>   .. .. ..@ Y : logi [1:180] FALSE FALSE TRUE TRUE TRUE TRUE ...
#>   .. .. ..@ id: chr [1:180] "7862738" "7863668" "7872384" "7873551" ...
#>   ..$ eko     :Formal class 'ClassifierPoint' [package "AnalysisToolkit"]..
#>   .. .. ..@ Y_: logi [1:180] TRUE FALSE TRUE TRUE TRUE TRUE ...
#>   .. .. ..@ Y : logi [1:180] FALSE FALSE TRUE TRUE TRUE TRUE ...
#>   .. .. ..@ id: chr [1:180] "7862738" "7863668" "7872384" "7873551" ...
#>   ..$ mike    :Formal class 'ClassifierPoint' [package "AnalysisToolkit"]..
#>   .. .. ..@ Y_: logi [1:180] TRUE FALSE FALSE TRUE TRUE TRUE ...
#>   .. .. ..@ Y : logi [1:180] FALSE FALSE TRUE TRUE TRUE TRUE ...
#>   .. .. ..@ id: chr [1:180] "7862738" "7863668" "7872384" "7873551" ...
#>   ..$ priority:Formal class 'ClassifierPoint' [package "AnalysisToolkit"]..
#>   .. .. ..@ Y_: logi [1:180] FALSE FALSE FALSE FALSE FALSE FALSE ...
#>   .. .. ..@ Y : logi [1:180] FALSE FALSE TRUE TRUE TRUE TRUE ...
#>   .. .. ..@ id: chr [1:180] "7862738" "7863668" "7872384" "7873551" ...
#>  $ classifier_curves:List of 3
#>   ..$ cnn_8 :Formal class 'ClassifierCurve' [package "AnalysisToolkit"] w..
#>   .. .. ..@ pY: num [1:180] 0.25347 0.65845 0.40168 0.76503 0.00696 ...
#>   .. .. ..@ Y : logi [1:180] FALSE FALSE TRUE TRUE TRUE TRUE ...
#>   .. .. ..@ id: chr [1:180] "7862738" "7863668" "7872384" "7873551" ...
#>   ..$ cnn_10:Formal class 'ClassifierCurve' [package "AnalysisToolkit"] w..
#>   .. .. ..@ pY: num [1:180] 0.196 0.705 0.113 0.117 0.106 ...
#>   .. .. ..@ Y : logi [1:180] FALSE FALSE TRUE TRUE TRUE TRUE ...
#>   .. .. ..@ id: chr [1:180] "7862738" "7863668" "7872384" "7873551" ...
#>   ..$ cnn_4 :Formal class 'ClassifierCurve' [package "AnalysisToolkit"] w..
#>   .. .. ..@ pY: num [1:180] 0.992 1 1 1 1 ...
#>   .. .. ..@ Y : logi [1:180] FALSE FALSE TRUE TRUE TRUE TRUE ...
#>   .. .. ..@ id: chr [1:180] "7862738" "7863668" "7872384" "7873551" ...
# devtools::use_data(classifiers)

Sys Info

Sys.info()
#>         sysname         release         version        nodename 
#>       "Windows"        "10 x64"   "build 16299" "MARCUSDESKTOP" 
#>         machine           login            user  effective_user 
#>        "x86-64"   "Marcus User"   "Marcus User"   "Marcus User"
search()
#>  [1] ".GlobalEnv"              "package:bindrcpp"       
#>  [3] "package:pander"          "package:knitr"          
#>  [5] "package:vizR"            "package:cowplot"        
#>  [7] "package:AnalysisToolkit" "package:broom"          
#>  [9] "package:ProjUtils"       "package:MyUtils"        
#> [11] "package:forcats"         "package:purrr"          
#> [13] "package:shiny"           "package:magrittr"       
#> [15] "package:rebus"           "package:glue"           
#> [17] "package:stringr"         "package:tidyr"          
#> [19] "package:dplyr"           "package:plyr"           
#> [21] "package:readr"           "package:beepr"          
#> [23] "package:ggplot2"         "package:stats"          
#> [25] "package:graphics"        "package:grDevices"      
#> [27] "package:utils"           "package:datasets"       
#> [29] "package:testthat"        "package:assertive"      
#> [31] "package:assertthat"      "package:methods"        
#> [33] "Autoloads"               "package:base"


mbadge/AnalysisToolkitR documentation built on May 27, 2019, 1:08 p.m.