knitr::opts_chunk$set(echo = TRUE)
As mentioned in class, a sparse matrix represents a matrix by the non-zero elements. For example, a sparse matrix whose non-zero elements having values at 1, 1 and 2, 1 respectively can be represented in R as:
bsm1 <- data.frame(i = c(1, 2), j = c(1, 1), x = c(4.4, 1.2))
You can then add two of these sparse matrices with the following code:
bis620_sparse_add <- function(a, b) { c <- merge(a, b, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x + c$x2 c[, c("i", "j", "x")] }
and then you could verify that the code is correct with:
bsm2 <- data.frame(i = c(3, 1), j = c(1, 3), x = c(4.4, 1.2)) bis620_sparse_add(bsm1, bsm2)
However, there is a lot of room for improvement with this implementation. In this homework, you may work in groups of up to 3. Please do the following:
data.frame
representation above or is a data.frame
with the above representation. (1 points)+
, -
, /
, and *
along with %*%
so that the matrix behaves like a regular R matrix. (2 points)Matrix::sparseMatrix
print method if it's helpful. (1 point)bis620
package. (2 points)library(bis620) library(Matrix)
bis620_sparse_matrix <- setClass(Class = "bis620_sparse_matrix", representation(i = "numeric", j = "numeric", x = "numeric"))
############################## `+` ############################## # `+` for bis620 sparse matrix and bis620 sparse matrix setMethod( "+", c(e1="bis620_sparse_matrix",e2="bis620_sparse_matrix"), function(e1, e2) { e1 <- data.frame(i=e1@i,j=e1@j,x=e1@x) e2 <- data.frame(i=e2@i,j=e2@j,x=e2@x) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x + c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) # change a dense matrix to sparse matrix dense_to_sparse <- function(dense){ i <- c() j <- c() x <- c() for (col in 1:ncol(dense)){ for (row in 1:nrow(dense)){ if(dense[row,col]!=0){ i <- c(i,row) j <- c(j,col) x <- c(x,dense[row,col]) } } } data.frame(i=i,j=j,x=x) } # `+` for bis620 sparse matrix and dense matrix setMethod( "+", c(e1="bis620_sparse_matrix",e2="matrix"), function(e1, e2) { e1 <- data.frame(i=e1@i,j=e1@j,x=e1@x) e2 <- dense_to_sparse(e2) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x + c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) # `+` for dgeMatrix and bis620 sparse matrix setMethod( "+", c(e1="dgeMatrix",e2="bis620_sparse_matrix"), function(e1, e2) { e1 <- dense_to_sparse(e1) e2 <- data.frame(i=e2@i,j=e2@j,x=e2@x) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x + c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) # `+` for bis620 sparse matrix and dgCMatrix setMethod( "+", c(e1="bis620_sparse_matrix",e2="dgCMatrix"), function(e1, e2) { e1 <- data.frame(i=e1@i,j=e1@j,x=e1@x) e2 <- dense_to_sparse(e2) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x + c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) ############################## `-` ############################## # `-` for bis620 sparse matrix and bis620 sparse matrix setMethod( "-", c(e1="bis620_sparse_matrix",e2="bis620_sparse_matrix"), function(e1, e2) { e1 <- data.frame(i=e1@i,j=e1@j,x=e1@x) e2 <- data.frame(i=e2@i,j=e2@j,x=e2@x) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x - c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) # `-` for bis620 sparse matrix and dense matrix setMethod( "-", c(e1="bis620_sparse_matrix",e2="matrix"), function(e1, e2) { e1 <- data.frame(i=e1@i,j=e1@j,x=e1@x) e2 <- dense_to_sparse(e2) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x - c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) # `-` for bis620 sparse matrix and dgeMatrix setMethod( "-", c(e1="bis620_sparse_matrix",e2="dgeMatrix"), function(e1, e2) { e1 <- data.frame(i=e1@i,j=e1@j,x=e1@x) e2 <- dense_to_sparse(e2) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x - c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) # `-` for bis620 sparse matrix and dgCMatrix setMethod( "-", c(e1="bis620_sparse_matrix",e2="dgCMatrix"), function(e1, e2) { e1 <- data.frame(i=e1@i,j=e1@j,x=e1@x) e2 <- dense_to_sparse(e2) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x - c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) ############################## `*` ############################## # `*` for bis620 sparse matrix and bis620 sparse matrix setMethod( "*", c(e1="bis620_sparse_matrix",e2="bis620_sparse_matrix"), function(e1, e2) { e1 <- data.frame(i=e1@i,j=e1@j,x=e1@x) e2 <- data.frame(i=e2@i,j=e2@j,x=e2@x) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x * c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) # `*` for bis620 sparse matrix and dense matrix setMethod( "*", c(e1="bis620_sparse_matrix",e2="matrix"), function(e1, e2) { e1 <- data.frame(i=e1@i,j=e1@j,x=e1@x) e2 <- dense_to_sparse(e2) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x * c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) # `*` for bis620 sparse matrix and dgeMatrix setMethod( "*", c(e1="bis620_sparse_matrix",e2="dgeMatrix"), function(e1, e2) { e1 <- data.frame(i=e1@i,j=e1@j,x=e1@x) e2 <- dense_to_sparse(e2) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x * c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) # `*` for bis620 sparse matrix and dgCMatrix setMethod( "*", c(e1="bis620_sparse_matrix",e2="dgCMatrix"), function(e1, e2) { e1 <- data.frame(i=e1@i,j=e1@j,x=e1@x) e2 <- dense_to_sparse(e2) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x * c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) ############################## `/` ############################## # `/` for bis620 sparse matrix and bis620 sparse matrix setMethod( "/", c(e1="bis620_sparse_matrix",e2="bis620_sparse_matrix"), function(e1, e2) { e1 <- data.frame(i=e1@i,j=e1@j,x=e1@x) e2 <- data.frame(i=e2@i,j=e2@j,x=e2@x) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x / c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) # `/` for bis620 sparse matrix and dense matrix setMethod( "/", c(e1="bis620_sparse_matrix",e2="matrix"), function(e1, e2) { e1 <- data.frame(i=e1@i,j=e1@j,x=e1@x) e2 <- dense_to_sparse(e2) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x / c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) # `/` for bis620 sparse matrix and dgeMatrix setMethod( "/", c(e1="bis620_sparse_matrix",e2="dgeMatrix"), function(e1, e2) { e1 <- data.frame(i=e1@i,j=e1@j,x=e1@x) e2 <- dense_to_sparse(e2) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x / c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) # `/` for bis620 sparse matrix and dgCMatrix setMethod( "/", c(e1="bis620_sparse_matrix",e2="dgCMatrix"), function(e1, e2) { e1 <- data.frame(i=e1@i,j=e1@j,x=e1@x) e2 <- dense_to_sparse(e2) colnames(e1) <- c("i","j","x") colnames(e2) <- c("i","j","x") c <- merge(e1, e2, by = c("i", "j"), all = TRUE, suffixes = c("", "2")) c$x[is.na(c$x)] <- 0 c$x2[is.na(c$x2)] <- 0 c$x <- c$x / c$x2 a <- bis620_sparse_matrix( i = c$i, j = c$j, x = c$x) a } ) # `%*%` for bis620 sparse matrix and bis620 sparse matrix setMethod( "%*%", c(x="bis620_sparse_matrix",y="bis620_sparse_matrix"), function(x, y) { x <- data.frame(i=x@i,j=x@j,x=x@x) y <- data.frame(i=y@i,j=y@j,x=y@x) w <- c() e <- c() r <- c() for (row in unique(x$i)){ for (col in unique(y$j)){ p <- x[x$i == row,] q <- y[y$j == col,] full <- merge(p,q,by.x="j",by.y="i") full$x <- full$x.x*full$x.y w <- c(w, row) e <- c(e, col) r <- c(r, sum(full$x)) } } a <- data.frame(i = w, j = e, x = r) a <- a[a$x != 0,] b <- bis620_sparse_matrix(i = a$i, j = a$j, x = a$x) } ) # `%*%` for bis620 sparse matrix and dense matrix setMethod( "%*%", c(x="bis620_sparse_matrix",y="matrix"), function(x, y) { x <- data.frame(i=x@i,j=x@j,x=x@x) y <- dense_to_sparse(y) w <- c() e <- c() r <- c() for (row in unique(x$i)){ for (col in unique(y$j)){ p <- x[x$i == row,] q <- y[y$j == col,] full <- merge(p,q,by.x="j",by.y="i") full$x <- full$x.x*full$x.y w <- c(w, row) e <- c(e, col) r <- c(r, sum(full$x)) } } a <- data.frame(i = w, j = e, x = r) a <- a[a$x != 0,] b <- bis620_sparse_matrix(i = a$i, j = a$j, x = a$x) } ) # `%*%` for bis620 sparse matrix and dgeMatrix setMethod( "%*%", c(x="bis620_sparse_matrix",y="dgeMatrix"), function(x, y) { x <- data.frame(i=x@i,j=x@j,x=x@x) y <- dense_to_sparse(y) w <- c() e <- c() r <- c() for (row in unique(x$i)){ for (col in unique(y$j)){ p <- x[x$i == row,] q <- y[y$j == col,] full <- merge(p,q,by.x="j",by.y="i") full$x <- full$x.x*full$x.y w <- c(w, row) e <- c(e, col) r <- c(r, sum(full$x)) } } a <- data.frame(i = w, j = e, x = r) a <- a[a$x != 0,] b <- bis620_sparse_matrix(i = a$i, j = a$j, x = a$x) } ) # `%*%` for bis620 sparse matrix and dgCMatrix setMethod( "%*%", c(x="bis620_sparse_matrix",y="dgCMatrix"), function(x, y) { x <- data.frame(i=x@i,j=x@j,x=x@x) y <- dense_to_sparse(y) w <- c() e <- c() r <- c() for (row in unique(x$i)){ for (col in unique(y$j)){ p <- x[x$i == row,] q <- y[y$j == col,] full <- merge(p,q,by.x="j",by.y="i") full$x <- full$x.x*full$x.y w <- c(w, row) e <- c(e, col) r <- c(r, sum(full$x)) } } a <- data.frame(i = w, j = e, x = r) a <- a[a$x != 0,] b <- bis620_sparse_matrix(i = a$i, j = a$j, x = a$x) } ) # `%*%` for dgCMatrix and bis620 sparse matrix setMethod( "%*%", c(x="dgCMatrix",y="bis620_sparse_matrix"), function(x, y) { x <- dense_to_sparse(x) y <- data.frame(i=y@i,j=y@j,x=y@x) w <- c() e <- c() r <- c() for (row in unique(x$i)){ for (col in unique(y$j)){ p <- x[x$i == row,] q <- y[y$j == col,] full <- merge(p,q,by.x="j",by.y="i") full$x <- full$x.x*full$x.y w <- c(w, row) e <- c(e, col) r <- c(r, sum(full$x)) } } a <- data.frame(i = w, j = e, x = r) a <- a[a$x != 0,] b <- bis620_sparse_matrix(i = a$i, j = a$j, x = a$x) } ) ############################## Transpose ############################## setMethod( "t", c(x="bis620_sparse_matrix"), function(x) { b <- bis620_sparse_matrix(i = x@j, j = x@i, x = x@x) b } )
setMethod( f="print", signature(x="bis620_sparse_matrix"), function(x) { print(sparseMatrix( i = x@i, j = x@j, x = x@x) ) } )
# Tests x1 <- bis620_sparse_matrix( i = c(1, 2, 5, 6), j = c(2, 2, 6, 1), x = c(4.3, 5.6, 7, 10) ) set.seed(1) x2 <- matrix(rnorm(36), ncol = 6) library(Matrix) set.seed(1) x3 <- Matrix(rnorm(36), ncol = 6) x4 <- sparseMatrix( i = c(1, 1, 3, 6), j = c(2, 3, 5, 1), x = c(4.3, 5.6, 7, 10), dims = c(6, 6) ) # Test that the following are correct: x1 + x1 t(x1) %*% x1 x1 %*% x1 x1 + t(x2) x1 %*% x2 x3 + x1 x3 %*% x3 x1 + x4 x1 %*% x4
Proposal: We would like to conduct a clinical analysis that includes data cleaning, survival analysis, and longitudinal analysis using data from Project Data Sphere. We might use Python and R. The title of data set we will use is "Study on Prolonging Bone Metastasis-Free Survival in Men With Hormone Refractory Prostate Cancer", and its NCT number is NCT00286091.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.