Nothing
jumpoints <-function(y, x, k=min(30,round(length(y)/10)), output="2",
psi=NULL, round=TRUE, control = fit.control(), selection=sel.control(), ...) {
#jump-point models
#y: the response; x the explanatory (if missing index integers are assumed)
#psi: the starting values for the breakpoints. If NULL k quantiles are used.
#k: the number of breakpoints to be estimated. This argument is ignored when psi is specified
#output: "1" restituisce tutti i breakpoint individuati. fit.control() puo essere usato per modificare
# it.max. Aumentandolo puo ridurre il n. di "putative" (candidate) psi e migliorare la performance
# della selezione dei psi attraverso un qualche criterio.
#output: "2" applica il criterio in selection per selezionare i jumpoints "significativi"
#output: "3" ri-applica l'algoritmo segmented assumendo come punti di partenza quelli selezionati; in genere
# se ci sono psi spuri questi possono essere ulteriormente eliminati
#------------
#--- seg.lm.fit0
#funzioni interne
seg.lm.fit0<-NULL
seg.lm.fit0<- function(y, Z, PSI, control, round=FALSE, ...){
#Questa e una versione semplificata di seg.lm.fit() per evitare calcoli inutili
#y: la risposta
#Z: matrice di variabile segmented
#control: lista che controlla il processo di stima
#round: approssimare le soluzioni del punto di svolta?
#----------------
it.max <- old.it.max <- control$it.max
toll <- control$toll
visual <- control$visual
last <- control$last
stop.if.error<-control$stop.if.error
h <- min(abs(control$h), 1)
if (h < 1)
it.max <- it.max + round(it.max/2)
it <- 1
epsilon<-10
k <- ncol(PSI)
psi.values <- NULL
H <- 1
psi<-PSI[1,]
#NB Poiche Z contiene ripetizioni della stessa variabile e sufficiente prendere Z[,1]
#if (intercept) XREG <- cbind(1,Z[,1],Xlinear) else XREG <- cbind(Z[,1],Xlinear)
#se AR
# n<-length(y)
# XREG<-cbind(c(y[1],y[1:(n-1)]),Z[,1])
#
XREG<-cbind(Z[,1])
#obj sotto serve solo per la stampare la dev
#obj<-lm.fit(y=y, x=XREG)
obj<-list(residuals=rep(10,3))
while (abs(epsilon) > toll) {
U <- pmax((Z - PSI), 0)
V <- ifelse((Z > PSI), -1, 0)
X<-cbind(XREG, U, V)
dev.old <- sum(obj$residuals^2)
rownames(X) <- NULL
if (ncol(V) == 1) {
#colnames(X)[ (ncol(XREG) + 1):ncol(X)] <- c("U", "V")
colnames(X)[ (ncol(XREG) ):ncol(X)] <- c("firstSlope","U", "V")
} else {
colnames(X)<-rev(c(paste("V", ncol(V):1, sep = ""),
paste("U",ncol(U):1, sep = ""),rep("firstSlope",ncol(X)-ncol(U)-ncol(V))))
}
obj <- lm.fit(x = X, y = y) #drop(solve(crossprod(X),crossprod(X,y)))
dev.new <- sum(obj$residuals^2)
if (visual) {
if (it == 1) cat(0, " ", formatC(dev.old, 3, format = "f"),"", "(No breakpoint(s))", "\n")
spp <- if (it < 10) "" else NULL
cat(it, spp, "", formatC(dev.new, 3, format = "f"), "---",ncol(V),"breakpoints","\n")
}
epsilon <- (dev.new - dev.old)/dev.old
obj$epsilon <- epsilon
it <- it + 1
obj$it <- it
class(obj) <- c("segmented", class(obj))
if (k == 1) {
beta.c <- coef(obj)["U"]
gamma.c <- coef(obj)["V"]
} else {
#se ci sono contrasti i beta.c quali sono?
beta.c <- coef(obj)[paste("U", 1:ncol(U), sep = "")]
gamma.c <- coef(obj)[paste("V", 1:ncol(V), sep = "")]
}
if (it > it.max) break
psi.values[[length(psi.values) + 1]] <- psi.old <- psi
if (it >= old.it.max && h < 1) H <- h
psi <- round(psi.old + H * gamma.c/beta.c,0)
PSI <- matrix(rep(psi, rep(nrow(Z), ncol(Z))), ncol = ncol(Z))
#check if psi is admissible..
a <- apply((Z <= PSI), 2, all)
b <- apply((Z >= PSI), 2, all)
if(stop.if.error) {
if(sum(a + b) != 0 || is.na(sum(a + b))) stop("(Some) estimated psi out of its range")
} else {
id.psi.ok<-!is.na((a+b)<=0)&(a+b)<=0
Z <- Z[,id.psi.ok,drop=FALSE]
psi <- psi[id.psi.ok]
PSI <- PSI[,id.psi.ok,drop=FALSE]
#id.psi.ok<-!a|b #indici di psi validi
# Z <- Z[,!is.na(id.psi.ok),drop=FALSE]
# psi <- psi[!is.na(id.psi.ok)]
# PSI <- PSI[,!is.na(id.psi.ok),drop=FALSE]
}
if(ncol(PSI)<=0) {
warning("No breakpoint estimated", call. = FALSE)
obj<-lm.fit(x = XREG, y = y)
obj$fitted.values<-rep(obj$coef,length(y))
obj$est.means<-obj$coef
return(obj)
}
} # end while
if(round) {
psi<-round(psi,0)
PSI <- matrix(rep(psi, rep(nrow(Z), ncol(Z))), ncol = ncol(Z))
V <- ifelse((Z > PSI), -1, 0)
#V serve per i fitted...si puo evitare di crearla?
}
#obj$psi <- if(round) round(sort(psi),2) else sort(psi)
obj$psi <- sort(psi)
obj$beta.c <- beta.c[order(psi)]
obj$gamma.c <- gamma.c[order(psi)]
obj$epsilon <- epsilon
obj$V<- V[,order(psi)]
#un'ultima verifica..
obj$psi<-obj$psi[!is.na(obj$beta.c)]
obj$V<-as.matrix(as.matrix(obj$V)[,!is.na(obj$beta.c)])
obj$beta.c<-obj$beta.c[!is.na(obj$beta.c)]
return(obj)
}
#
#-- pen.MDL
pen.MDL<-function(id,n){
#restituisce un vettore (di dim=length(id)) che rappresenta la penalita 2*sum\log n_j per
#ogni partizione (active set)
#length(id) e il num (max) di breakpoints ed n e' il vettore delle numerosita della
#partizione.
do.m<-function(id,n.col){
blockdiag <- function(...) {
args <- list(...)
nc <- sapply(args,ncol)
cumnc <- cumsum(nc)
## nr <- sapply(args,nrow)
## NR <- sum(nr)
NC <- sum(nc)
rowfun <- function(m,zbefore,zafter) {
cbind(matrix(0,ncol=zbefore,nrow=nrow(m)),m,
matrix(0,ncol=zafter,nrow=nrow(m)))
}
ret <- rowfun(args[[1]],0,NC-ncol(args[[1]]))
for (i in 2:length(args)) {
ret <- rbind(ret,rowfun(args[[i]],cumnc[i-1],NC-cumnc[i]))
}
ret
} #end blockgiag
id<-sort(id) #sort(unlist(id))
if(length(id)==1) {
m<-t(rep(1,id))} else {
m<-do.call(blockdiag,lapply(c(id[1],diff(id)),function(xx)t(rep(1,xx))))
}
m<-blockdiag(m,t(rep(1,n.col-ncol(m))))
m
} #end do.m
#inizio codici veri
if(length(n)!=(length(id)+1)) stop("Errore in 'id' o 'n'")
A<-matrix(rev(id),length(id),length(id),byrow=FALSE)
A[col(A)>row(A)]<-NA
r<-rev(apply(A,2,function(x)(x[!is.na(x)])))
lista.m<-lapply(r,do.m,n.col=length(n))
#sapply(lista.m,function(xx)drop(xx%*%n))
ris<-sapply(lista.m,function(xx)2*sum(log(drop(xx%*%n))))
ris<-c(2*sum(log(n)),ris)
ris
}
#-----------
#------------
n <- length(y)
# if(any(is.na(y))) {
# id<-complete.cases(y,x)
# x<-x[id]
# y<-y[id]
# }
if(missing(x)) {
x<-1:n
Y<-cumsum(y)
miss.x<-TRUE
} else {
miss.x<-FALSE
if(length(x)!=n) stop("Lengths of x and y differ")
y<-y[order(x)]
x<-sort(x)
diffx <- c(x[1],diff(x))
# DD <- matrix(diffx, ncol=n, nrow=n, byrow=TRUE)
# DD[col(DD)>row(DD)]<-0 # matrice di trasformazione di y in Y
# Y <- drop(DD%*%y)
Y<-cumsum(y*diffx)
}
rangeX<-range(x)
if(is.null(psi)) psi<-quantile(x, prob= seq(0,1,l=k+2)[-c(1,k+2)], names=FALSE)
k<-length(psi)
Z <- matrix(rep(x, k), nrow = n)
PSI <- matrix(rep(psi, rep(n, k)), ncol = k)
it.max <- old.it.max <- control$it.max
if (it.max == 0) U <- pmax((Z - PSI), 0)
Xlin<- NULL
obj<-seg.lm.fit0(y=Y, Z=Z, PSI=PSI, control=control, round=round, ...)
obj$y<-y
if(!is.null(obj$psi)){
obj$fitted.values<-drop(abs(obj$V)%*%obj$beta.c)
if("firstSlope"%in%names(coef(obj))) obj$fitted.values<- obj$fitted.values + obj$coef["firstSlope"]
obj$id.group<- -rowSums(obj$V)
}
obj$est.means<-cumsum(c(obj$coef["firstSlope"], obj$beta.c))
obj$n.psi<-length(obj$psi)
##---------------------------------------
##--- primo output
if(output=="1") {
class(obj)<-"aCGHsegmented"
obj$rangeX<-rangeX
return(obj)
}
#se vuoi selezionare anche i psi significativi...
psi0<-obj$psi
est.means0<-obj$est.means
display1<-selection$display
edf.psi<-selection$edf.psi
type<-selection$type
##nome<-deparse(substitute(type))
#plot.it<-selection$plot.it
Cn<-eval(parse(text=selection$Cn))
S<-selection$S
tipoAlg<-selection$alg
#require(lars)
if(is.null(obj$psi)) stop("No estimated breakpoint in obj")
#-------------------
#pesi2<-1/abs(diff(c(obj$psi,n))*obj$beta.c)
#pesi3<-1/sqrt(diff(c(obj$psi,n))^2+obj$beta.c^2)
#s1<-scale(diff(c(obj$psi,n)))
#b1<-scale(obj$beta.c)
#pesi3<-1/sqrt(b1^2+(s1/2)^2)
#olars<-lars(t(pesi3*t(abs(obj$V))), y=y, type="lasso", normalize=FALSE, intercept=TRUE, trace=display1)
#
##se bic mdl non considerare penaliz.:
#if(type!="rss") tipoAlg<-"stepwise"
olars<-lars(abs(obj$V), y=y, type=tipoAlg, normalize=FALSE, intercept=TRUE, trace=display1)
#type="stepwise" ,"lasso"
id.var.entry<-(1:ncol(obj$V))[order(olars$entry)]
edf<- if(edf.psi) (olars$df-1)*2+1 else olars$df
RSS<-olars$RSS
#######ottieni RSS unpenalized
# if(RSS.unp){
# RSS<-vector(length=length(RSS))
# RSS[1]<-olars$RSS[1]
# for(i in 1:(length(RSS)-1)){
# X.ok<- cbind(1,obj$V[,id.var.entry[1:i]])
# RSS[i+1]<-sum(lm.fit(y=y,x=X.ok)$residuals^2)
# }
# }
#######################################################
max.rss<-function(RSS){
var.mod<-(RSS/n)
ll<- -(log(2*pi*var.mod)+1)*n/2
new.r<-((ll[length(ll)]-ll[-1])/(ll[length(ll)]-ll[2]))*(length(ll)-1)+1
diff2<-diff(new.r,diff=2)>S
if(!any(diff2)) return(0)
maxll=max(which(diff2))+1
return(maxll)
}
min.r<-switch(type,
bic = which.min(log(RSS/n)+log(n)*edf*Cn/n),
mdl = which.min(n*log(RSS/(n-edf))+Cn*pen.MDL(id.var.entry,as.numeric(table(-rowSums(obj$V))))),
rss = max.rss(RSS)
)
crit<-switch(type,
bic = (log(RSS/n)+log(n)*edf*Cn/n),
mdl = (n*log(RSS/(n-edf))+Cn*pen.MDL(id.var.entry,as.numeric(table(-rowSums(obj$V))))),
rss = (RSS)
)
# rss = {
# diff.rss<-diff(RSS,diff=2)>.75
# if(any(diff.rss)) max(which(diff.rss)) else 0 #olars$RSS
# }
#new.r<-((RSS[length(RSS)]-RSS[-1])/(RSS[length(RSS)]-rss[2]))*(length(RSS)-1)+1
#max(which(c(10,diff(new.r,diff=2))>.75))
id<-sort(c(0,id.var.entry)[1:min.r])
if(length(id)<=1) {
o<-list(y=y,est.means=mean(y),id.var.entry=id.var.entry,n.psi=0,criterion=crit)
o$rangeX<-rangeX
if(!miss.x) o$x<-x
class(o)<-"aCGHsegmented"
return(o)
}
id<-id[-1] #escludi l'intercetta
psi1<- obj$psi[id]
if(output=="2"){
V.ok<- cbind(1,abs(obj$V[,id]))
if(tipoAlg!="stepwise"){
hat.b<-drop(solve(crossprod(V.ok),crossprod(V.ok,y))) #better than hat.b<-lm.fit(x=V.ok,y=y)$coef
} else {
hat.b<-olars$beta[min.r,id]
hat.b<-c(olars$mu-sum(colMeans(as.matrix(V.ok[,-1]))*hat.b),hat.b)
}
fittedvalues<-drop(V.ok%*%hat.b)
#gruppi<-rowSums(V.ok[,-1])
#fittedvalues<-rep(cumsum(hat.b),tapply(gruppi,gruppi,length))
ris<-list(id.var.entry=id.var.entry,psi=psi1,n.psi=length(id), psi.order.entry=psi0[id.var.entry],
psi0=psi0,est.means0=est.means0,est.means=cumsum(hat.b),criterion=crit,
fitted.values=fittedvalues)
ris$y<-y
ris$rangeX <- rangeX
if(!miss.x) ris$x<-x
class(ris)<-"aCGHsegmented"
return(ris)
}
k<-length(psi1)
Z <- matrix(rep(x, k), nrow = n)
PSI <- matrix(rep(psi1, rep(n, k)), ncol = k)
obj<-seg.lm.fit0(y=Y, Z=Z, PSI=PSI, control=fit.control(toll = 1e-04, it.max = 10, stop.if.error=FALSE),
round=round, ...)
# obj$y<-y
if(!is.null(obj$est.means)){
obj$n.psi<-0
obj$psi0<-psi0
obj$psi1<-psi1
obj$criterion<-crit
return(obj)
}
fitted.v<-drop(abs(obj$V)%*%obj$beta.c)
if("firstSlope"%in%names(coef(obj))) fitted.v<- fitted.v + obj$coef["firstSlope"]
obj$id.group<- -rowSums(obj$V)
est.means<-cumsum(c(obj$coef["firstSlope"], obj$beta.c)) #unique(fitted.v)
ris<-list(id.var.entry=id.var.entry,psi=obj$psi,n.psi=length(obj$psi), psi.order.entry=psi0[id.var.entry],
psi0=psi0,psi1=psi1,est.means0=est.means0, est.means=est.means,criterion=crit) #type=r)
ris$fitted.values<-fitted.v
ris$y<-y
ris$rangeX<-rangeX
if(!miss.x) ris$x<-x
class(ris)<-"aCGHsegmented"
return(ris)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.