Applying the PPS to the Titanic dataset

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.ext = "png",
  fig.align = "center"
)
library(tidyverse)
library(corrplot)

library(ppscore)
data("titanic_train")
pps_heatmap <- function(pps_mat) {
  melted_pps_mat <- melt(pps_mat)
  pps_heatmap <- ggplot(melted_cormat, aes(Var2, Var1, fill = value)) +
    geom_tile(color = "white") +
    scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                         midpoint = 0, limit = c(-1,1), space = "Lab", 
                         name="PPS Score") +
    geom_text(aes(Var2, Var1, label = value), color = "black", size = 4) +
    theme_minimal() + 
    theme(axis.text.x = element_text(angle = 45, vjust = 1, 
    size = 12, hjust = 1)) +
    theme(axis.title.x = "feature", axis.text.x = axis.ticks.x = element_blank(),
          axis.title.y = "target", axis.text.y = axis.ticks.y = element_blank(),
          panel.grid.major = panel.border = panel.background = axis.ticks = element_blank()) +
    coord_fixed() + 
  return(pps_heatmap)
}
corr_heatmap <- function(cor_mat) {
  col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
  title <- "Correlation Matrix"
  corr_heatmap <- corrplot(cor_mat, method="color", col=col(200),  
          diag=FALSE, # tl.pos="d", 
          type="upper", order="hclust", 
          title=title, 
          addCoef.col = "black", # Add coefficient of correlation
          insig = "blank", # hide correlation coefficient on the principal diagonal
          mar=c(0,0,1,0) # http://stackoverflow.com/a/14754408/54964
         )
  return(corr_heatmap)
}

Preparation of the Titanic dataset

titanic_train <- titanic_train[c("Survived", "Pclass", "Sex", "Age", "Ticket", "Fare", "Embarked")]

titanic_train %>% 
  rename(
    Pclass = Class,
    Ticket = TicketID,
    Fare = TicketPrice,
    Embarked = Port
    )

Single Predictive Power Score

ppscore::score(titanic_train, "Sex", "Survived")

PPS Matrix

matrix = ppscore::matrix(titanic_train)
matrix
pps_heatmap(matrix)

Correlation matrix

corr_heatmap(round(cor(titanic_train),2))


8080labs/ppscoreR documentation built on June 13, 2020, 1:26 a.m.