knitr::opts_chunk$set(echo = TRUE)

Build the numpy matrix class:

library(reticulate)
use_condaenv("bis620")
np <- import("numpy", convert = FALSE)
setOldClass("numpy.ndarray")
bis620_pymatrix <- setClass(Class = "bis620_pymatrix",
                            representation = representation(pymat = "numpy.ndarray"))

Overwrite the "+" algorithm for numpy matrix:

# `+` for bis620 numpy matrix plus bis620 numpy matrix
setMethod(
  "+",
  c(e1="bis620_pymatrix",e2="bis620_pymatrix"),
  function(e1, e2) {
    np$add(e1@pymat,e2@pymat)
  }
)
# `+` for bis620 numpy matrix plus regular matrix
setMethod(
  "+",
  c(e1="bis620_pymatrix",e2="matrix"),
  function(e1, e2) {
    e2_py <- r_to_py(e2)
    np$add(e1@pymat,e2_py)
  }
)
# `+` for dgeMatrix plus bis620 numpy matrix 
setMethod(
  "+",
  c(e1="dgeMatrix",e2="bis620_pymatrix"),
  function(e1, e2) {
    e1_py <- r_to_py(as.matrix(e1))
    np$add(e1_py,e2@pymat)
  }
)
# `+` for bis620 numpy matrix plus dgCMatrix
setMethod(
  "+",
  c(e1="bis620_pymatrix",e2="dgCMatrix"),
  function(e1, e2) {
    e2_py <- r_to_py(as.matrix(e2))
    np$add(e1@pymat,e2_py)
  }
)

Overwrite the "-" algorithm for numpy matrix:

# `-` for bis620 numpy matrix plus bis620 numpy matrix
setMethod(
  "-",
  c(e1="bis620_pymatrix",e2="bis620_pymatrix"),
  function(e1, e2) {
    np$subtract(e1@pymat,e2@pymat)
  }
)
# `-` for bis620 numpy matrix plus regular matrix
setMethod(
  "-",
  c(e1="bis620_pymatrix",e2="matrix"),
  function(e1, e2) {
    e2_py <- r_to_py(e2)
    np$subtract(e1@pymat,e2_py)
  }
)
# `-` for dgeMatrix plus bis620 numpy matrix 
setMethod(
  "-",
  c(e1="dgeMatrix",e2="bis620_pymatrix"),
  function(e1, e2) {
    e1_py <- r_to_py(as.matrix(e1))
    np$subtract(e1_py,e2@pymat)
  }
)
# `-` for bis620 numpy matrix plus dgCMatrix
setMethod(
  "-",
  c(e1="bis620_pymatrix",e2="dgCMatrix"),
  function(e1, e2) {
    e2_py <- r_to_py(as.matrix(e2))
    np$subtract(e1@pymat,e2_py)
  }
)

Overwrite the "*" algorithm for numpy matrix:

# `*` for bis620 numpy matrix plus bis620 numpy matrix
setMethod(
  "*",
  c(e1="bis620_pymatrix",e2="bis620_pymatrix"),
  function(e1, e2) {
    np$multiply(e1@pymat,e2@pymat)
  }
)
# `*` for bis620 numpy matrix plus regular matrix
setMethod(
  "*",
  c(e1="bis620_pymatrix",e2="matrix"),
  function(e1, e2) {
    e2_py <- r_to_py(e2)
    np$multiply(e1@pymat,e2_py)
  }
)
# `*` for dgeMatrix plus bis620 numpy matrix 
setMethod(
  "*",
  c(e1="dgeMatrix",e2="bis620_pymatrix"),
  function(e1, e2) {
    e1_py <- r_to_py(as.matrix(e1))
    np$multiply(e1_py,e2@pymat)
  }
)
# `*` for bis620 numpy matrix plus dgCMatrix
setMethod(
  "*",
  c(e1="bis620_pymatrix",e2="dgCMatrix"),
  function(e1, e2) {
    e2_py <- r_to_py(as.matrix(e2))
    np$multiply(e1@pymat,e2_py)
  }
)

Overwrite the "/" algorithm for numpy matrix:

# `/` for bis620 numpy matrix plus bis620 numpy matrix
setMethod(
  "/",
  c(e1="bis620_pymatrix",e2="bis620_pymatrix"),
  function(e1, e2) {
    np$divide(e1@pymat,e2@pymat)
  }
)
# `/` for bis620 numpy matrix plus regular matrix
setMethod(
  "/",
  c(e1="bis620_pymatrix",e2="matrix"),
  function(e1, e2) {
    e2_py <- r_to_py(e2)
    np$divide(e1@pymat,e2_py)
  }
)
# `/` for dgeMatrix plus bis620 numpy matrix 
setMethod(
  "/",
  c(e1="dgeMatrix",e2="bis620_pymatrix"),
  function(e1, e2) {
    e1_py <- r_to_py(as.matrix(e1))
    np$divide(e1_py,e2@pymat)
  }
)
# `/` for bis620 numpy matrix plus dgCMatrix
setMethod(
  "/",
  c(e1="bis620_pymatrix",e2="dgCMatrix"),
  function(e1, e2) {
    e2_py <- r_to_py(as.matrix(e2))
    np$divide(e1@pymat,e2_py)
  }
)

Overwrite the "%*%" algorithm for numpy matrix:

# `%*%` for bis620 numpy matrix multiply (dot) bis620 numpy matrix
setMethod(
  "%*%",
  c(x="bis620_pymatrix",y="bis620_pymatrix"),
  function(x, y) {
     np$dot(x@pymat,y@pymat)
  }
)
# `%*%` for bis620 numpy matrix multiply (dot) regular matrix
setMethod(
  "%*%",
  c(x="bis620_pymatrix",y="matrix"),
  function(x, y) {
    y_py <- r_to_py(y)
    np$dot(x@pymat,y_py)
  }
)
# `%*%` for dgeMatrix multiply (dot) bis620 numpy matrix 
setMethod(
  "%*%",
  c(x="dgeMatrix",y="bis620_pymatrix"),
  function(x, y) {
    x_py <- r_to_py(as.matrix(x))
    np$dot(x_py,y@pymat)
  }
)
# `%*%` for bis620 numpy matrix multiply (dot) dgCMatrix
setMethod(
  "%*%",
  c(x="bis620_pymatrix",y="dgCMatrix"),
  function(x, y) {
    y_py <- r_to_py(as.matrix(y))
    np$dot(x@pymat,y_py)
  }
)

Overwrite the transpose algorithm for numpy matrix:

setMethod(
  "t",
  c(x="bis620_pymatrix"),
  function(x) {
    b <- x@pymat$transpose()
    b_numpy <- bis620_pymatrix(
      pymat = b
    )
  }
)

Test for the algorithms:

# Tests
x1 <- new(Class="bis620_pymatrix",
  pymat = np$array(list(1:6, 1:6, 1:6,1:6, 1:6, 1:6))
)
set.seed(1)
x2 <- matrix(rnorm(36), ncol = 6)
library(Matrix)
set.seed(1)
x3 <- Matrix(rnorm(36), ncol = 6)
x4 <- sparseMatrix(  
  i = c(1, 1, 3, 6), 
  j = c(2, 3, 5, 1), 
  x = c(4.3, 5.6, 7, 10),
  dims = c(6, 6)
)
# Test that the following are correct:
x1 + x1
t(x1) %*% x1
x1 %*% x1
x1 + t(x2)
x1 %*% x2
x3 + x1
x3 %*% x3
x1 + x4
x1 %*% x4

Final project proposal:

About 6.2 million adults in the United States have heart failure. In 2018, heart failure was mentioned on 379,800 death certificates (13.4%). Certain medical conditions can increase the risk for heart failure, including coronary artery disease, diabetes, high blood pressure, and obesity.

So our research question is how would age, a decrease of red blood cells or hemoglobin, level of the CPK enzyme in the blood, diabetes, percentage of blood leaving the heart at each contraction, hypertension, platelets in the blood, level of serum creatinine in the blood, level of serum sodium in the blood, smoking, and gender influence the mortality of heart failure. Moreover, how can we predict the mortality of heart failure based on the above eleven variables?

We will proceed with our study by analyzing the dataset of 299 patients with heart failure. The data has 13 columns which were collected in the Faisalabad Institute of Cardiology and at the Allied Hospital in Faisalabad in 2015. We obtained the open-sourced CSV file from Kaggle. We will first conduct exploratory data analysis to gain insights into the demographic distribution of the subjects. In addition to the techniques introduced in the lectures, we will also explore other means of visualizing the data and add interactive features to our visualization (such as Shiny). Furthermore, we will use various prediction models including logistic, lasso, and other machine learning models to answer the research question.

References: Chicco, D., Jurman, G. Machine learning can predict survival of patients with heart failure from serum creatinine and ejection fraction alone. BMC Med Inform Decis Mak 20, 16 (2020). https://doi.org/10.1186/s12911-020-1023-5



Violettttta/bis620 documentation built on Oct. 9, 2022, 10:28 a.m.