R/clusterMethods.R

Feature.Sharpe <- function(data,Cno){
  # data <- returns
  c <- cor.shrink(data,verbose=FALSE)    # correlation
  d <- as.dist(1-c)              # convert correlation to distance
  hc <- hclust(d,method ="complete") # apply hirarchical clustering using the linkage function ward to minimize variance in each cluster
  c.tree <- cutree(hc,k=Cno)
  ass.nam <- colnames(data)
  
  Selected <- rep(0,max(c.tree))
  assets <- rep(NA,max(c.tree))
  
  data.q <- apply(diff(log(data[(dim(data)[1]-3*52):(dim(data)[1]),]))+1,2,cumprod)
  #data.q <- diff(log(apply(data,2,cumprod)))
  colnames(data.q) <- colnames(data)
  
  for( i in 1:(max(c.tree))){
    #i <- 1  
    if(length(which(c.tree == i)) > 1){
      sharpe <- (apply(data.q[,which(c.tree == i)],2,mean)*52)/(apply(data.q[,which(c.tree == i)],2,sd)*sqrt(52))
      assets[i] <- names(c.tree[which(c.tree == i)])[which(max(sharpe)[1] == sharpe)]
    }
    if(length(which(c.tree == i)) == 1){
      assets[i] <- ass.nam[which(c.tree == i)]
    }
  }
  return(assets)
}

Feature.AR <- function(data,Cno){
  #data <- returns
  c <- cor.shrink(data,verbose=FALSE)    # correlation
  d <- as.dist(1-c)              # convert correlation to distance
  hc <- hclust(d,method ="ward.D") # apply hirarchical clustering using the linkage function ward to minimize variance in each cluster
  c.tree <- cutree(hc,k=Cno)
  assets <- rep(NA,max(c.tree))
  
  data.q <- apply(data,2,cumprod)
  colnames(data.q) <- colnames(data)
  
  for( i in 1:(max(c.tree))){
    # i <- 1  
    if(length(which(c.tree == i)) > 1){
      subset <- data.q[,which(c.tree == i)]
      
      exp.ret <- rep(0,dim(subset)[2])
      for(j in 1:dim(subset)[2]){
        exp.ret[j] <- quantile(diff(log(subset[,j])),prob=c(0.25))
        
        #model <- ar(subset[,j])
        #exp.ret[j] <- predict(model,a.head=26)$pred/subset[dim(subset)[1],j]
        #exp.ret[j] <- (predict(model,a.head=26)$pred/subset[dim(subset)[1],j])-(quantile(model$resid[which(model$resid>-199)],prob=c(0.05))+1)
      }
      
      assets[i] <- names(c.tree[which(c.tree == i)])[which(max(exp.ret) == exp.ret)]
    }
    if(length(which(c.tree == i)) == 1){
      assets[i] <- colnames(data)[which(c.tree == i)]
    }
  }  
  return(assets)
}

Feature.LM_risk.adj <- function(data,Cno){
  # data <- diff(log(prices))+1
  c <- cor.shrink(data,verbose=FALSE)    # correlation
  d <- as.dist(1-c)              # convert correlation to distance
  hc <- hclust(d,method ="ward.D") # apply hirarchical clustering using the linkage function ward to minimize variance in each cluster
  c.tree <- cutree(hc,k=Cno)
  ass.nam <- colnames(data)
  
  assets <- rep(NA,max(c.tree))
  
  for( i in 1:(max(c.tree))){
    # i <- 1  
    if(length(which(c.tree == i)) > 1){
      i.data <- apply(data[(dim(data)[1]-3*52):dim(data)[1],which(c.tree == i)],2,cumprod)
      colnames(i.data) <- ass.nam[which(c.tree == i)]
      trends <- rep(NA,length(i.data[1,]))
      for(j in 1:(length(i.data[1,]))){ 
        #j <- 1
        model <- lm(i.data[,j] ~ c(1:length(i.data[,j])))
        trends[j] <- (model$coefficients[2])#/sd(model$residual)
        #trends[j] <- (model$coefficients[2])/quantile(model$residual,prob=c(0.05))  
      }
      assets[i] <- colnames(i.data)[which(max(trends)[1] == trends)]
    }
    
    if(length(which(c.tree == i)) == 1){
      assets[i] <- ass.nam[which(c.tree == i)]
    }
  }
  return(assets)
}
Bjerring/RISpackage2 documentation built on May 6, 2019, 7:56 a.m.