Nothing
clusters1D_4lm_help <- function(S, betas_with_intercept, X, y, clust.method, lam){
X <- X[, S==1, drop = FALSE]
betas_with_intercept <- betas_with_intercept[betas_with_intercept>0]
betas <- betas_with_intercept[-1]
n <- nrow(X)
nn <- sapply(1:ncol(X), function(i) class(X[,i]))
names(nn) <- colnames(X)
nn[nn == "integer"] <- "numeric"
x.full <- stats::model.matrix(y~., data = data.frame(y=y, X, check.names = TRUE))
p <- ncol(x.full)
#m <- stats::lm.fit(x.full, y)
faki <- which(nn == "factor")
n.factors <- length(faki)
if (n.factors > 0){
n.levels <- sapply(1:n.factors, function(i) length(levels(X[,faki[i]])))
p.fac <- sum(n.levels - 1)
} else{
p.fac <- 0
}
continous_columns <- which(nn == "numeric")
n.cont <- length(continous_columns)
namCont <- names(nn)[continous_columns]
#QR decompostion of the model matrix
# qX <- qr.Q(m$qr)
# rX <- qr.R(m$qr)
# Ro <- solve(rX)
# z <- t(qX)%*%y
# sigma <- as.numeric((t(m$res)%*%m$res)/(n - p))
#dissimilarity measures - matrices of squared t-statistics for each factor
if (n.factors > 0){
points <- lapply(1:n.factors, function(i) {
i1 <- ifelse(i == 1, 1, sum(n.levels[1:(i - 1)]-1) +1)
i2 <- sum(n.levels[1:i]-1)
out <- c(0,betas[i1:i2]) #appending 0 as a beta for the constrained level. Each factor has one level constrained to have beta==0
names(out) <- levels(X[,faki[i]])
return(out)
})
#cutting dendrograms
if (clust.method == "variable_selection") {
models <- lapply(points, function(x) artificial_clustering(x))
} else {
models <- lapply(points, function(x) hclust1d::hclust1d(x, method = clust.method))
}
heig <- lapply(1:n.factors, function(x){
out <- models[[x]]$he
names(out)<- rep(x, length(out))
out
})
heig <- unlist(heig)
} else {
heig <- c()
models <- list()
}
heig <- c(0,heig)
names(heig)[1] = "full"
if ((p.fac + 1) < p){ #continous columns handling
# if((p.fac + 2) == p){
# heig.add <- ((Ro[(p.fac + 2):p,]%*%z)^2)/(sigma*sum(Ro[(p.fac + 2):p,]^2))
# } else {
# heig.add <- ((Ro[(p.fac + 2):p,]%*%z)^2)/(sigma*(apply(Ro[(p.fac + 2):p, ], 1, function(y) t(y)%*%y)))
# }
heig.add <- betas_with_intercept[(p.fac + 2):p] #heights for continuous columns are just the betas (NOT squared!)
names(heig.add) <- colnames(x.full)[(p.fac + 2):p]
heig <- c(heig, heig.add)
}
heig <- sort(heig)
len <- length(heig)
#fitting models on the path
sp <- list()
form <- c()
nl <- 0
if (n.factors > 0){
for (i in 1:n.factors){
sp[[i]] <- 1:n.levels[i]
sp[[i]][sp[[i]] != 1] <- sp[[i]][sp[[i]] != 1] + nl
nl <- nl + length(unique(sp[[i]])) - 1
}
}
b <- 1:p
names(b) <- colnames(x.full)
A <- c()
form <- namCont
if (len >= 2){
for (i in 2:(len)){
a <- rep(0,p)
kt <- names(heig)[i]
if(length(intersect(kt, namCont)) > 0){
jj <- which(form == kt)
form <- form[-which(form == kt)]
jj <- which(namCont == kt)
a[p.fac + jj + 1] <- 1
} else { #this routine fails for equal heights, however it is fairly complex...
kt <- as.numeric(kt) #kt = number of a factor
spold <- sp[[kt]] #spold = clusters on the previous height for this factor
sp[[kt]] <- stats::cutree(models[[kt]], h = heig[i])
if(length(sp[[kt]][sp[[kt]] != 1]) > 0){
sp[[kt]][sp[[kt]] != 1] <- sp[[kt]][sp[[kt]] != 1] + min(spold[spold != 1]) - min(sp[[kt]][sp[[kt]] != 1])
}
for (ii in min(which(spold != sp[[kt]]))) {
suma <- ifelse(kt == 1, 0, sum(n.levels[1:(kt-1)] - 1))
if(sp[[kt]][ii] == 1){ #adding this to intercept
a[suma + ii ] <- 1
} else {
a[suma + ii] <- 1
a[suma + min(which(sp[[kt]] == sp[[kt]][ii]))] <- -1
}
if (kt < length(sp))
for( x in (kt+1):length(sp)) {
if (length(sp[[x]][sp[[x]]!=1]) > 0 )
sp[[x]][sp[[x]] != 1] <- sp[[x]][sp[[x]] !=1 ] - 1
}
nl <- nl - 1
}
}
A <- cbind(A, a)
be <- c(0, 2:(p-i+2))
bb <- c()
if(n.factors > 0){
bb <- unlist(sapply(1:length(sp), function(j) sp[[j]][-1]))
}
bb2 <- rep(1, n.cont)
names(bb2) <- namCont
if(length(form) > 0){
bb2[form] <- (nl + 2):(nl + 1 + length(form))
}
bb <- c(bb, bb2)
b=cbind(b, c(1, be[bb]))
}
}
A[1,] <- rep(0, p-1)
m <- stats::lm.fit(x.full, y) #it is in top of the related DMRnet function, in GLAMER moved here close to the end
#######################REGULARIZING THE RESULT########################################
#SzN there were cases (e.g. in Insurance dataset)
#when the original columns
#are lineary dependant
#(case not excluded even after grpreg was run for execution paths from DMRnet)
qX <- qr.Q(m$qr, complete=FALSE) #SzN: explicitly stating that we want partial results (https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/QR.Auxiliaries)
rX <- qr.R(m$qr) + diag(rep(lam, ncol(qX))) #SzN to solve the abovementioned matrix singularity we introduce the regularization of rX with a diagonal matrix
z <- t(qX)%*%y #moved here from top
S <- forwardsolve(t(rX), A)
QRs <- qr(S)
W <- qr.Q(QRs)
wyn <- (t(W)%*%z)^2
len <- nrow(wyn)
Tr <- round(lower.tri(matrix(1, len, len))) + diag(rep(1, len))
r22 <- Tr%*%wyn
RSS <- (sum(y^2) - sum(z^2))
RSS2 <- c(RSS, as.vector(RSS + r22))
return(list(b = b, rss = RSS2))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.