Nothing
h_exact_mca <- function(data1, data2, current_rank, nchunk, disk = TRUE) {
## data1 the starting matrix; data2 the upcoming data
## is.update: states if there are one or more updates
## nchunk: number of chunks to split data2
## This is equivalent to MCA on the indicator matrix
## mjca(data,lambda="indicator")
if(disk==TRUE){
suppressWarnings(dir.create("./PCstory"))
}else{
allCoords=list()
allCoordsU=list()
allctr=list()
allctrU=list()
allcor=list()
allcorU=list()
}
data1 <- data.frame(lapply(data.frame(data1), factor))
data2 <- data.frame(lapply(data.frame(data2), factor))
mods1 = apply(data1, 2, unique)
if(is.null(dim(mods1))== FALSE){
mods1=split(t(mods1),colnames(mods1))
}
n.mods1 = sapply(mods1, length)
bigMat = rbind(data1, data2)
fn <- rep(names(bigMat), unlist(lapply(bigMat, nlevels)))
ln <- unlist(lapply(bigMat, levels))
labs <- paste(fn, ln, sep = ":")
n1 = nrow(data1)
n = nrow(bigMat)
Q = ncol(bigMat)
bigMatFac=apply(bigMat,2,factor)
bigMatTab=apply(bigMatFac,2,table)
if(is.null(dim(bigMatTab))== F){
bigMatTab=split(t(bigMatTab),colnames(bigMatTab))
}
c=bigMatTab[[1]]
for(j in 2:Q){
c=c(c,bigMatTab[[j]])
}
c = c/(n*Q)
r = rep(1/n,n)
tZ1 = transform_z(data1, is.weight = T,is.exact=T, r = r, c = c)
sZ1 = tZ1$SZ
if (missing("current_rank")) {
#current_rank = full rank
current_rank = ncol(tZ1$SZ) - Q
}
dims=current_rank
if ((length(nchunk) > 1 ) & (sum(nchunk) != nrow(data2))) {
stop("\n The overall size of given 'nchunk' blocks does not match the size of 'data2'")
}
eg1 = do_es(sZ1)
eg1$u = eg1$u[,1:current_rank]
eg1$d = eg1$d[1:current_rank]
eg1$v = eg1$v[,1:current_rank]
r1 = r[1:n1]
# PC1 = t(t(eg1$v / sqrt(c)) * as.vector((eg1$d)))
PCu1 = t(t(eg1$u / sqrt(r1)) * as.vector((eg1$d)))
PC1 = (eg1$v / sqrt(c)) %*% diag(eg1$d)
PC1.ctr=eg1$v^2
PC1.cor=(PC1^2)/apply(PC1^2,1,sum)
PCu1.ctr = t(((1/n)*t(PCu1^2))*as.vector(1/(eg1$d)^2) )
PCu1.cor = (PCu1^2)/apply(PCu1^2,1,sum)
if(disk==TRUE){
fnameA=paste("./PCstory/PCstart",1,".txt",sep="")
fnameB=paste("./PCstory/PCEnd",1,".txt",sep="")
fnameC=paste("./PCstory/PCstartUnit",1,".txt",sep="")
fnameD=paste("./PCstory/PCendUnit",1,".txt",sep="")
fnameE=paste("./PCstory/PCctr",1,".txt",sep="")
fnameF=paste("./PCstory/PCctrUnit",1,".txt",sep="")
fnameG=paste("./PCstory/PCcor",1,".txt",sep="")
fnameH=paste("./PCstory/PCcorUnit",1,".txt",sep="")
# write.table(file=fnameA, matrix(0,dim(PC1[,1:dims])[1],dims))
write.table(file=fnameA, PC1[,1:dims])
write.table(file=fnameB, PC1[,1:dims])
# write.table(file=fnameC, matrix(0,dim(PCu1[,1:dims])[1],dims))
write.table(file=fnameC, PCu1[,1:dims])
write.table(file=fnameD, PCu1[,1:dims])
write.table(file=fnameE, PC1.ctr[,1:dims])
write.table(file=fnameF, PCu1.ctr[,1:dims])
write.table(file=fnameG, PC1.cor[,1:dims])
write.table(file=fnameH, PCu1.cor[,1:dims])
}
if(disk==FALSE){
allCoords[[1]] = PC1[,c(1:dims)]
allCoordsU[[1]] = PCu1[,c(1:dims)]
allctr[[1]] = PC1.ctr[,c(1:dims)]
allctrU[[1]] = PCu1.ctr[,c(1:dims)]
allcor[[1]] = PC1.cor[,c(1:dims)]
allcorU[[1]] = PCu1.cor[,c(1:dims)]
}
out.split = mat_split(data2, (nchunk))
mat.story = out.split$splitMat
#if block sizes are given, switch back to number
if (length(nchunk) > 1) {
nchunk = length(nchunk)
}
for (q in 1:length(mat.story)) {
mat.chu = data.matrix(mat.story[[q]])
n.chu = nrow(mat.chu)
mods.up = apply(mat.chu, 2, unique)
if(is.null(dim(mods.up))== F){
mods.up=split(t(mods.up),colnames(mods.up))
}
n.mods.up = sapply(mods.up, length)
catDiff = n.mods1 - n.mods.up
if (any(catDiff) != 0) {
fake.row = fake_row_make(mods1, mods.up, n.mods1, n.mods.up)
mat.chu = rbind(mat.chu, fake.row)
tZ2 = transform_z(mat.chu, is.weight = T,is.exact=T, r = r, c = c)
sZ2 = tZ2$SZ[1:n.chu, ]
} else {
tZ2 = transform_z(mat.chu, is.weight = T, is.exact=T,r = r, c = c)
sZ2 = tZ2$SZ
}
n2 = n.chu
eg2 = do_es(sZ2)
eg12 = add_es(eg1, eg2, current_rank, method="esm")
if (q > 1) {
#######################
PCu1 = PCuall
PC1 = PCall
#######################
}
SCall = eg12$v / sqrt(c)
PCall = SCall %*% diag(eg12$d)
r2 = r[1:(n1+n2)]
SRall = eg12$u / sqrt(r2)
PCuall = t(t(eg12$u / sqrt(r2)) * as.vector((eg12$d)))
PCall = sign_match(PC1, PCall)
PCuall = sign_match(PCu1, PCuall)
PCall.ctr = eg12$v^2
PCall.cor = PCall^2 / apply(PCall^2, 1, sum)
PCuall.ctr = t(((1/n)*t(PCuall^2))*as.vector(1/(eg12$d)^2) )
PCuall.cor = (PCuall^2)/apply(PCuall^2,1,sum)
if(disk==FALSE){
allCoords[[q+1]]=PCall[,c(1:dims)]
allCoordsU[[q+1]]=PCuall[,c(1:dims)]
allctr[[q+1]] = PCall.ctr[,c(1:dims)]
allctrU[[q+1]] = PCuall.ctr[,c(1:dims)]
allcor[[q+1]] = PCall.cor[,c(1:dims)]
allcorU[[q+1]] = PCuall.cor[,c(1:dims)]
}
eg1 = eg12
n1 = n1 + n2
r1 = r2
if(disk==TRUE){
fnameA=paste("./PCstory/PCstart",q,".txt",sep="")
fnameB=paste("./PCstory/PCEnd",q+1,".txt",sep="")
fnameC=paste("./PCstory/PCstartUnit",q,".txt",sep="")
fnameD=paste("./PCstory/PCendUnit",q+1,".txt",sep="")
fnameE=paste("./PCstory/PCctr",q+1,".txt",sep="")
fnameF=paste("./PCstory/PCctrUnit",q+1,".txt",sep="")
fnameG=paste("./PCstory/PCcor",q+1,".txt",sep="")
fnameH=paste("./PCstory/PCcorUnit",q+1,".txt",sep="")
write.table(file=fnameA, PC1[,1:dims])
write.table(file=fnameB, PCall[,1:dims])
write.table(file=fnameC, PCu1[,1:dims])
write.table(file=fnameD, PCuall[,1:dims])
write.table(file=fnameE, PCall.ctr[,1:dims])
write.table(file=fnameF, PCuall.ctr[,1:dims])
write.table(file=fnameG, PCall.cor[,1:dims])
write.table(file=fnameH, PCuall.cor[,1:dims])
}
}
nd.max = ncol(tZ1$SZ)-Q
if (current_rank == nd.max) {
inertia0 <- eg12$d[1:nd.max]^2
inertia.t <- sum(inertia0)
inertia.e <- inertia0 / inertia.t
# adjusted inertia
# J = ncol(tZ1$SZ)
# inertia0 = eg12$d[1:(J-Q)]^4
# alldim <- sum(sqrt(inertia0) >= 1/Q)
# inertia.adj <- ((Q/(Q-1))^2 * (sqrt(inertia0)[1:alldim] - 1/Q)^2)
# inertia.t <- (Q/(Q-1)) * (sum(inertia0) - ((J - Q) / Q^2))
} else {
inertia0 <- eg12$d[1:current_rank]^2
inertia.t <- nd.max/Q
inertia.e <- inertia0 / inertia.t
}
out = list()
out$rowpcoord = PCuall[,c(1:dims)]
out$colpcoord = PCall[,c(1:dims)]
out$rowcoord = SRall[,c(1:dims)]
out$colcoord = SCall[,c(1:dims)]
out$sv = eg12$d[c(1:nd.max)]
out$inertia.e = inertia.e #inertia.adj / inertia.t
out$levelnames = labs
out$rowctr = PCuall.ctr[,c(1:dims)]
out$colctr = PCall.ctr[,c(1:dims)]
out$rowcor = PCuall.cor[,c(1:dims)]
out$colcor = PCall.cor[,c(1:dims)]
out$rowmass = r
out$colmass = c
# out$sZ1 = sZ1
out$nchunk = nchunk
out$disk = disk
out$ff = 0
if(disk==FALSE){
out$allrowcoord=allCoordsU
out$allcolcoord=allCoords
out$allrowctr=allctrU
out$allcolctr=allctr
out$allrowcor=allcorU
out$allcolcor=allcor
}
out
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.