Nothing
MIz.test <- function(feature, outcome) {
# 'feature' is the vector of X
# 'outcome' is the vector of Y
# 'k1' and 'k2' are the corresponding number of categories
# X and Y must have the same sample size
# Considering NA in the feature
XYtable <- table(feature, outcome)
n <- sum(XYtable)
k1 = length(table(feature)) # feature categories
k2 = length(table(outcome)) # outcome categories
test.stat = 2 * n * MI.z(XYtable) + (k1 - 1) * (k2 - 1)
p.value = pchisq(test.stat, (k1 - 1) * (k2 - 1), lower.tail = F)
return(p.value)
}
g_function <- function(x1, x2, x3) {
# Return a 3*1 matrix
vec = c(1 / x2, -(x1 - x3) / x2 ^ 2, -1 / x2)
return(as.matrix(vec, nrow = 3, ncol = 1))
}
E.CI.smi <- function(XYtable, alpha) {
# Input the frequency table between the feature and the outcome
K1 = nrow(XYtable)
K2 = ncol(XYtable)
K = K1 * K2
n = sum(XYtable)
# Make sure XYtable[K1,K2] is not zero. If zero, switch.
if (XYtable[K1, K2] == 0) {
nonZeroIndex = which(XYtable != 0, arr.ind = T)
i = nonZeroIndex[1, 1]
j = nonZeroIndex[1, 2]
tmp = XYtable[i, ]
XYtable[i, ] = XYtable[K1, ]
XYtable[K1, ] = tmp
tmp = row.names(XYtable)[i]
row.names(XYtable)[i] = row.names(XYtable)[K1]
row.names(XYtable)[K1] = tmp
tmp = XYtable[, j]
XYtable[, j] = XYtable[, K2]
XYtable[, K2] = tmp
tmp = colnames(XYtable)[j]
colnames(XYtable)[j] = colnames(XYtable)[K2]
colnames(XYtable)[K2] = tmp
}
Xmargin = rowSums(XYtable)
Ymargin = colSums(XYtable)
# First row of matrix A
pK1. = Xmargin[K1] / sum(Xmargin)
derX = vector()
for (i in 1:(K1 - 1)) {
pi. = Xmargin[i] / sum(Xmargin)
derX[i] = log(pK1.) - log(pi.)
}
A1 = vector()
for (i in 1:(K1 - 1)) {
for (j in 1:K2) {
A1 = c(A1, derX[i])
}
}
for (i in ((K1 - 1) * K2 + 1):(K - 1)) {
A1[i] = 0
}
# Second row of matrix A
p.K2 = Ymargin[K2] / sum(Ymargin)
derY = vector()
for (i in 1:(K2 - 1)) {
p.j = Ymargin[i] / sum(Ymargin)
derY[i] = log(p.K2) - log(p.j)
}
A2 = vector()
for (i in 1:K1) {
for (j in 1:K2) {
if (j == K2) {
A2 = c(A2, 0)
} else{
A2 = c(A2, derY[j])
}
}
}
A2 = A2[-length(A2)]
# Third row of matrix A
pK = XYtable[K1, K2] / n
A3 = vector()
for (i in 1:K1) {
for (j in 1:K2) {
pk = XYtable[i, j] / n
if (pk == 0) {
A3 = c(A3, 0)
} else{
A3 = c(A3, log(pK) - log(pk))
}
}
}
A3 = A3[-length(A3)]
A = as.matrix(rbind(A1, A2, A3))
# The Sigma matrix
p = c(t(XYtable)) / n
Sigma = matrix(
data = NA,
nrow = K - 1,
ncol = K - 1,
byrow = TRUE
)
for (i in 1:(K - 1)) {
for (j in 1:(K - 1)) {
if (i == j) {
Sigma[i, i] = p[i] * (1 - p[i])
} else{
Sigma[i, j] = -p[i] * p[j]
}
}
}
# Sigma hat
HXhat = entropy.plugin(Xmargin)
HYhat = entropy.plugin(Ymargin)
HXYhat = entropy.plugin(XYtable)
g = g_function(HXhat, HYhat, HXYhat)
sigmaHat = sqrt(t(g) %*% A %*% Sigma %*% t(A) %*% g)
# E in CI
Zstat = qnorm(alpha / 2, lower.tail = FALSE) # 95% confidence level
E = Zstat * (sigmaHat / sqrt(n))
return(E)
}
ftable2xytable <- function(ftable) {
# Convert an ftable to a regular XYtable where X are joint and Y is the outcome
return(ftable[rowSums(ftable) != 0, ])
}
Kappas <- function(data, idxFeatures) {
# Input data, index(es) of feature(s); outcome must be in the last column
# ftable automatically handles NA values
tmpIndexFnO = c(idxFeatures, ncol(data)) # Index of features and outcome
ftable = ftable(table(data[tmpIndexFnO]))
Xmargin <- rowSums(ftable)
Ymargin <- colSums(ftable)
n <- sum(ftable)
kappa.z = MI.z(ftable) / Entropy.z(Ymargin) # Same as smi.z
kappaStar = kappa.z * (1 - sum(Xmargin == 1L) / n)
return(list(kappa.z = kappa.z, kappa.star = kappaStar))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.