knitr::opts_chunk$set(echo = TRUE)
library(tidyverse) library(ggplot2) library(magrittr) library(dplyr)
data <- read.csv("~/Documents/Project/stats-computing-1/data/data.csv") data %<>% dplyr::select(-c(id, X))
# use 70% of the data for training and 30% for testing n <- nrow(data) train_size <- floor(0.8*n) # sample indeces for split train_ind <- sample(seq(n),size = train_size, replace = FALSE) train <- data[train_ind,] test <- data[-train_ind,]
y <- train$diagnosis X <- train %>% select(-diagnosis) %>% as.matrix() y_test <- test$diagnosis X_test <- test %>% select(-diagnosis) %>% as.matrix() X <- unname(cbind(1,X)) X_test <- unname(cbind(1,X_test)) # change factor levels to B=0 and M=1 levels(y) <- c(0,1) y %<>% matrix mode(y) <- 'numeric' levels(y_test) <-c(0,1) y_test %<>% matrix mode(y_test) <- 'numeric'
Calculate mean and variance for for every feature for both classes
Class probabilities
prior <- function(y){ k <- sort(unique(y)[,1]) prior <- rep(0,length(k)) for(i in 1:length(k)){ prior[i] <- sum(y==k[i])/length(y) } return(prior) }
prior(y)
# create empty matrices for all classes summaries <- function(X,y){ d <- dim(X)[2] k <- length(unique(y)) classes <- array(rep(1,d*k*2), dim=c(k,d,2)) for(i in 1:k){ X_k <- X[which(y==(i-1)),] classes[i,,1] <- apply(X_k,2,mean) classes[i,,2] <- apply(X_k,2,sd) } return(classes) }
Conditional probs p(x/class)
# for x_new a single observation cond <- function(X_new,summaries,priors){ n <- dim(X_new)[1] d <- dim(X_new)[2] k <- dim(summaries)[1] result <- matrix(rep(0,n*k),nrow=n) for(obs in 1:n){ for(class in 1:k){ post <- log(priors[class]) for(feat in 1:d){ mu <- summaries[class,feat,1] sd <- summaries[class,feat,2] cond <- dnorm(X_new[obs,feat],mu, sd, log = TRUE) post <- post + cond } result[obs,class] <- post } } pred <- apply(result,1,which.max) # return(result) return(as.matrix(pred-1)) }
confusion_matrix <- as.data.frame(table(predictions,y_test)) ggplot(confusion_matrix,aes(y=predictions,x=y_test))+ geom_tile(aes(fill=Freq))+ geom_text(aes(label=sprintf("%1.0f", Freq)),color="white",fontface="bold")+ labs(y="Predicted class",x="True class")+ theme_minimal()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.