R/ICC.R

Defines functions ICC

Documented in ICC

ICC<-function(outcome,group,method="unbiased",var_n="n")
{
    # From Mostly Harmless Econometrics, D. Angrist
    
    # First, get the ICC from simple linear regression:
    
    
    group=as.factor(group)
    
    OK = !is.na(group) & !is.nan(group) & !is.na(outcome) & !is.nan(outcome)
    
    # Check for intragroup variability, if this is zero
    
    group=group[OK]
    outcome=outcome[OK]
    
    # Test for the trivial case of completely homogeneous groups first:
    
    # For this, all groups should have length larger than 1
    
    group_length=aggregate(outcome ~ group, FUN=length)
    
    if(all(group_length$outcome==1)) { return (1) }
    
    # In R, var(x) gives the sample variance with n-1 degrees of liberty, this is a convenience function for the sum of squares sum(xi-xbar)^2
    sum_squares=function(x)
    {
        return (sum((x-mean(x))^2))
    }
    
    
    # For the unbiased method
    intraclass_sum=function(x,x_mean)
    {
        # The original formula is (xi-x_mean)*(xj-x_mean), i and j being non equal. x_mean is the general means and thus externally imposed (from Mostly harmless econometrics, Angrist and Pischke)
        # One can develop this:
        # Sum xi*xj- n_g*x_mean*Sum xi - ng*x_mean*Sum xj + ng^2 sum x_mean^2 - sum (xi-x_mean)^2 where now the sum is over all combinations possible including i=j
        # Then, sum(xi) becomes ng*xbar where ng is the group size and xbar is the group mean
        # Sum xi*xj - 2*n_g^2*x_mean*x_bar + n_g^2*x_mean^2 - sum(xi^2) + 2*x_mean*sum(xi) - ng*x_mean^2
        # Sum xi*xj - n_g^2*x_mean*(x_bar+x_bar-x_mean)-sum(xi^2) + 2*ng*x_mean*x_bar - ng*x_mean^2
        # Sum xi* sum xj - n_g^2*x_mean*(x_bar+x_bar-x_mean)-sum(xi^2) - n_g*x_mean*(x_mean - x_bar - x_bar)
        # n_g^2*(x_mean-x_bar)^2 - sum(xi-x_bar)^2-ng*(x_mean-x_bar)^2
        # n_g*(n_g-1)*(x_mean-x_bar)^2-sum(xi-x_bar)^2
        
        n_g = length(x)
        x_bar = mean(x)
         
        return(n_g*(n_g-1)*(x_mean-x_bar)^2-sum_squares(x))
        
        
        
        
        
    }
    
    # The outcome is numerical, apply the standard formulae
    
    if(!is.factor(outcome))
    {
    
    x_mean = mean(outcome)
    
    if(method=="unbiased" | method=="Fisher") #from Mostly harmless econometrics, Angrist and Pischke, eq. 8.2.5, transformed for more compact evaluation
    {
    
        ng=aggregate(outcome~group,FUN=length)$outcome
        zg=aggregate(outcome~group,FUN=mean)$outcome
        n=length(outcome)
        if(var_n=="n-1"){ n=n-1  }
        return((sum(ng^2*(zg-mean(outcome))^2)/(sum((outcome-mean(outcome))^2)/n)-n)/sum(ng*(ng-1)))
    
    } else if(method=="unbiased_unequal_cluster_size") {
        # We find the formula in Mostly harmless econometric to not give exactly 1 for homogeneous cluster (no intra-cluster variability) when the clusters
        # are not of equal size. To address this problem, the formula here can be used. It is engineered to give approximatly 0 for non-clustered variables,
        # and exactly 1 for perfect clustering
        ng=aggregate(outcome~group,FUN=length)$outcome
        N=length(ng)
        zg=aggregate(outcome~group,FUN=mean)$outcome
        n=length(outcome)
        nv = n
        if(var_n=="n-1"){ nv=n-1  }
        return((n-3)/n/(n-N-2)*(sum(ng*(zg-mean(outcome))^2)/(sum((outcome-mean(outcome))^2)/nv)-n*(N-1)/(n-3)))
        
        #return((sum(ng*(zg-mean(z))^2)/(sum((z-mean(z))^2)/n)))
        
    
    } else if(method=="ANOVA")
    { # Group variance over total residual variance, general method, for example eq. 8.2.3 in Mostly harmless econometrics, Angrist and Pischke
    
    ag=aggregate( outcome ~group,FUN=function(x){return(all(duplicated(x)[-1]))})
    
    
    # Are all groups homogeneous
    if(all(ag$outcome)) {return(1)}
    
    outcome=as.numeric(outcome)
    
    
    linmod = lm(outcome~group)
    
    Var_intragroup = anova(linmod)["Residuals","Sum Sq"]
    Var_intergroup = anova(linmod)["group","Sum Sq"]
    
    ICC_m=Var_intergroup/(Var_intergroup+Var_intragroup)
    
    return(ICC_m)
    
    } else {
        
        stop(paste("Method ", method," not implemented", sep=""))
    }
    
    

    } else { # If the outcome is a factor, we assume this is non-ordered. We decompose this to 0-1 variables for each level and sum up
        
        sum_ICC=0
        n_ICC=0
        for(theLevel in levels(outcome))
        {
            levelOutcome = as.numeric(outcome==theLevel)
            sum_ICC=sum_ICC+ICC(levelOutcome,group,method=method)
            n_ICC=n_ICC+1
        }
    
        return (sum_ICC/n_ICC)
    
        
    }
    
	
}
tbgitoo/moultonTools documentation built on Nov. 8, 2021, 6:15 p.m.