R/association.R

Defines functions cramerv ICC_test column_identification cor_test correlation_calculation classify_columns association association_reduction_algorithm association_table_reduction

library(psych)
library(ICC)
library(tidyverse)
library(magrittr)
# Calculates correlation matrix, and provides groups of variables that are correlation. Assumes linear relationships.
# Performs reduction based upon picking the variable with the highest in group correlation and lowest out of group correlation

# Cont vs Cont & Linear - Check normality then 'Pearson'
# Cont vs Integer - Spearman
# Cont|Integer vs Nominal (<10 else integer) - ICC & Anova
# Cont|Integer|Ordinal vs Ordinal (Uniques < 10 else integer) - Kendall
# Nominal vs Nominal|Ordinal - Cramer's V & Chi Square


# Significance tests:
#   Continuous vs. Nominal: run an ANOVA. In R, you can use ?aov.
# Nominal vs. Nominal: run a chi-squared test. In R, you use ?chisq.test.
# Effect size (strength of association):
#   Continuous vs. Nominal: calculate the intraclass correlation. In R, you can use ?ICC in the psych package; there is also an ICC package.
# Nominal vs. Nominal: calculate Cramer's V.


options <- c("continuous","continuous (non-normal)","integer","nominal","ordinal")

association_parameters <- t(combn(options,2)) %>%
  as_tibble %>%
  set_colnames(c("X","Y"))%>%
  bind_rows(tibble("X"=options,"Y"=options))%>%
  mutate("Association"=c(
    "Spearman",
    "Spearman",
    "ICC",
    "Kendall",
    "Spearman",
    "ICC",
    "Kendall",
    "ICC",
    "Kendall",
    "Cramer's V",
    "Pearson",
    "Spearman",
    "Spearman",
    "Cramer's V",
    "Kendall"
  ),
  "method"=
    ifelse(Association=="Cramer's V",
           'cramer',
           ifelse(Association!='ICC',
                  tolower(Association),
                  'ICC')
    )
  )%>%
  (function(x){
    bind_rows(x,x%>%select(Y,X,Association,method)%>%set_colnames(colnames(x)))
  })%>%
  unite('vs',X:Y,sep=' vs ')%>%
  distinct(vs,Association,method)

cramerv <- function(x,y){
  chisq <- chisq.test(x, y)
  V <- sqrt(as.numeric(chisq$statistic)/(length(x) * (min(length(unique(x)),length(unique(y))) - 1)))
  p <- chisq$p.value
  out <- tibble('Value'=V,'p.value'=p)
  return(out)
}

ICC_test <- function(x,y){
  if((is.factor(x)|is.character(x))&(is.factor(y)|is.character(y))){
    stop('For two factor variables "cramerv" should be used')
  }else{
    if(is.factor(x)|is.character(x)){
      nominal_variable <- x
      numeric_variable <- y
    }else{
      nominal_variable <- y
      numeric_variable <- x
    }
  }

  ICC_output <- ICCest(nominal_variable,numeric_variable)
  value <- ICC_output$ICC
  anova <- aov(numeric_variable~nominal_variable)
  p <- summary(anova)[[1]][["Pr(>F)"]][1]
  out <- tibble('Value'=value,'p.value'=p)
  return(out)
}

column_identification <- function(x,nominal_cutoff=10,alpha=0.05){
  if(is.numeric(x)){
    if(sum(round(na.omit(x))==na.omit(x))==length(na.omit(x))){
      if(length(unique(x))<=nominal_cutoff){
        return('nominal')
      }else{
        return('integer')
      }
    }else{
      if(shapiro.test(na.omit(x))$p.value>alpha){
        return('continuous')
      }else{
        return('continuous (non-normal)')
      }
    }
  }else{
    return('categorical')
  }
}

cor_test <- function(x,y,method,...){
  results <- cor.test(as.numeric(x),as.numeric(y),method=method,...)
  value <- unlist(results$estimate)
  p <- results$p.value
  out <- tibble('Value'=value,'p.value'=p)
  return(out)
}

correlation_calculation <- function(x,y,method,alpha=0.05){
  if(method=='ICC'){
    return(ICC_test(x,y))
  }else if(method=='cramer'){
    return(cramerv(x,y))
  }else{
    return(cor_test(x,y,method=method))
  }
}

classify_columns <- function(data,nominal_cutoff=10,alpha=0.05){
  column_classification_vector <- unlist(lapply(data,column_identification,nominal_cutoff=nominal_cutoff))
  column_classification <- tibble('column'=names(column_classification_vector),'classification'=column_classification_vector)
  return(column_classification)
}

association <- function(data,column_classification=NULL,alpha=0.05,categorical_as='nominal',nominal_cutoff=10,FDR=FALSE,FDR_method="none"){
  if(is.null(column_classification)){
    column_classification <- classify_columns(data,nominal_cutoff)
  }

  if(categorical_as=='nominal'){
    column_classification <- column_classification%>%
      mutate(classification=ifelse(classification=='categorical',
                                   'nominal',
                                   classification))
    data <- data %>%
      mutate_if(is.character,as.factor)

  }else if(categorical_as=='ordinal'){
    factor_check <- lapply(
      data %>% select(column_classification%>%filter(classification=='categorical')%>%.[['column']]),
      is.factor
    ) %>% unlist

    if(!is.null(factor_check)){
      if(any(!factor_check)){
        stop('To treat character as ordinal variables they must be ordered factors. The following columns are characters: ',paste(names(!factor_check),sep=', '))
      }
    }

    column_classification <- column_classification%>%
      mutate(classification=ifelse(classification=='categorical',
                                   'ordinal',
                                   classification))

    data <- data %>%
      mutate_if(is.factor,as.numeric)

  }else{
    column_classification <- column_classification%>%
      filter(classification!='categorical')
  }


  data <- data %>%
    mutate_at(filter(column_classification,classification%in%c('nominal','ordinal')) %>% .[['column']],as.factor)

  initial <- t(combn(colnames(data),2)) %>% as_tibble %>%
    set_colnames(c("X","Y"))%>%
    left_join(column_classification,by=c('X'='column'))%>%
    left_join(column_classification,by=c('Y'='column'))%>%
    unite('vs',classification.x:classification.y,sep = ' vs ')%>%
    filter(!grepl('NA',vs))%>%
    left_join(association_parameters)%>%
    group_by(X,Y,vs)%>%
    mutate('Hope'=list(correlation_calculation(data[[X]],data[[Y]],method)))%>%
    unnest%>%
    select(-method)%>%
    ungroup%>%
    mutate('Significant'=p.value<=alpha)

  if(FDR){
    FDR_method <- match.arg(FDR_method,c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr", "none"))
    initial$p.value <- p.adjust(initial$p.value,method=FDR_method)
  }
  out <- initial %>%
    mutate('Interpretation'=
             ifelse(Significant,
                    ifelse(
                      Association%in%c('ICC',"Cramer's V"),
                      ifelse(
                        Value<0.05,
                        'Extremely weak association',
                        ifelse(Value<=0.2,
                               'Very weak association',
                               ifelse(Value<=0.4,
                                      'Weak association',
                                      ifelse(Value<=0.6,
                                             'Moderate association',
                                             ifelse(Value<=0.8,
                                                    'Strong association',
                                                    'Very strong association'
                                             )
                                      )
                               )
                        )

                      ),
                      ifelse(
                        abs(Value)<0.05,
                        sprintf('Extremely weak %s association',ifelse(Value<0,'negative','positive')),
                        ifelse(abs(Value)<=0.2,
                               sprintf('Very weak %s association',ifelse(Value<0,'negative','positive')),
                               ifelse(abs(Value)<=0.4,
                                      sprintf('Weak %s association',ifelse(Value<0,'negative','positive')),
                                      ifelse(abs(Value)<=0.6,
                                             sprintf('Moderate %s association',ifelse(Value<0,'negative','positive')),
                                             ifelse(abs(Value)<=0.8,
                                                    sprintf('Strong %s association',ifelse(Value<0,'negative','positive')),
                                                    sprintf('Very strong %s association',ifelse(Value<0,'negative','positive'))
                                             )
                                      )
                               )
                        )

                      )
                    ),
                    'No significant association'
             )
    )%>%
    ungroup%>%
    rename('A'='Value')

  factor_order <- sapply(unique(c(out$X,out$Y)),function(x){
    sum(grepl(x,out$X))
  })%>%
    sort(.,decreasing = TRUE)%>%
    names

  out <- out %>%
    mutate(X=factor(X,levels=(factor_order)),
           Y=factor(Y,levels=rev(factor_order)),
           Association=factor(Association,levels=c("Cramer's V","ICC","Kendall","Pearson","Spearman")))


  attr(out,'plot_correlation_labelled') <- out %>%
    filter(Association%in%c('Spearman','Kendall','Pearson'))%>%
    arrange(X,Y)%>%
    group_by(X)%>%
    mutate(yLoc=row_number())%>%
    ungroup%>%
    mutate(xLoc=cumsum(yLoc==1))%>%
    mutate('Significant'=ifelse(p.value<alpha,'*',''))%>%
    ggplot()+
    geom_tile(aes(x=X,y=Y,fill=A),width=1,colour='black')+
    geom_text(aes(x=xLoc,y=yLoc+0.1,label=sprintf('%0.2f%s',A,Significant)),colour='black')+
    geom_point(aes(x=xLoc,y=yLoc-0.1,shape=Association))+
    scale_shape_discrete(drop=FALSE)+
    scale_fill_gradient2(low='orange',mid='white',high='blue',limits=c(-1,1))+
    labs(title='Association Plot',
         subtitle = 'Correlation Only',
         caption=sprintf("* denotes significance at %0.1f%% %s",(1-alpha)*100,ifelse(FDR,'(FDR corrected)','')))

  attr(out,'plot_correlation') <- out %>%
    filter(Association%in%c('Spearman','Kendall','Pearson'))%>%
    arrange(X,Y)%>%
    group_by(X)%>%
    mutate(yLoc=row_number())%>%
    ungroup%>%
    mutate(xLoc=cumsum(yLoc==1))%>%
    mutate('Significant'=ifelse(p.value<alpha,'*',''))%>%
    ggplot()+
    geom_tile(aes(x=X,y=Y,fill=A),width=1,colour='black')+
    scale_fill_gradient2(low='orange',mid='white',high='blue',limits=c(-1,1))+
    labs(title='Association Plot',
         subtitle = 'Correlation Only')

   attr(out,'plot_association_labelled') <- out %>%
     arrange(X,Y)%>%
     group_by(X)%>%
     mutate(yLoc=row_number())%>%
     ungroup%>%
     mutate(xLoc=cumsum(yLoc==1))%>%
     mutate('Significant'=ifelse(p.value<alpha,'*',''),
            '|A|'=abs(A))%>%
     ggplot()+
     geom_tile(aes(x=X,y=Y,fill=`|A|`),width=1,colour='black')+
     geom_text(aes(x=xLoc,y=yLoc+0.1,label=sprintf('%0.2f%s%s',abs(A),Significant,ifelse(A<0,"'",''))),colour='black')+
     geom_point(aes(x=xLoc,y=yLoc-0.1,shape=Association))+
     scale_shape_discrete(drop=FALSE)+
     scale_fill_gradient(low='grey',high='darkgreen',limits=c(0,1))+
     labs(title='Association Plot',
          caption=sprintf("' denotes negative correlation, * denotes significance at %0.1f%% %s",(1-alpha)*100,ifelse(FDR,'(FDR corrected)','')))+
     labs(fill='|A|')

   attr(out,'plot_association') <- out %>%
     arrange(X,Y)%>%
     group_by(X)%>%
     mutate(yLoc=row_number())%>%
     ungroup%>%
     mutate(xLoc=cumsum(yLoc==1))%>%
     mutate('Significant'=ifelse(p.value<alpha,'*',''),
            '|A|'=abs(A))%>%
     ggplot()+
     geom_tile(aes(x=X,y=Y,fill=`|A|`),width=1,colour='black')+
     scale_fill_gradient(low='grey',high='darkgreen',limits=c(0,1))+
     labs(title='Association Plot')+
     labs(fill='|A|')


  return(out)
}

association_reduction_algorithm <- function(useable_association_table,focus='min_outside',iteration=0,max_iterations=10){
  needed <- useable_association_table%>%
    mutate('AbsA'=abs(A))%>%
    select(X,Y,AbsA)%>%
    arrange(desc(AbsA))

  average_association <- needed %>%
    gather('Remove','Variable',X:Y)%>%
    group_by(Variable)%>%
    summarise('MeanAbsA'=mean(AbsA))%>%
    arrange(desc(MeanAbsA))

  # Extract current reduced variables
  if(is.null(attr(useable_association_table,'Reduced'))){
    message('And so it begins...')
    grouping <- needed %>%
      gather('Remove','Variable',X:Y)%>%
      distinct(Variable)%>%
      mutate('Group'=0)

    # No Grouping Done. Start by placing the variable with the highest average association as a group.
    grouping <- grouping %>%
      mutate(Group=ifelse(Variable==average_association[['Variable']][1],max(Group)+1,Group))

    # Then go down in order and assign the next highest association to either the same group or a new one if the highest association is not with the previou
    for(variable in grouping %>% filter(Group==0) %>% .[['Variable']]){
      highest <- needed %>%
        filter(X==variable|Y==variable) %>%
        mutate('Other'=ifelse(X!=variable,as.character(X),as.character(Y)),
               'X'=variable,
               'Y'=Other)%>%
        select(-Other)%>%
        arrange(desc(AbsA))%>%
        .[['Y']]%>%
        .[1]

      highestGroup <- grouping%>%filter(Variable==highest)%>%.[['Group']]

      grouping <- grouping %>%
        mutate(Group=ifelse(
          Variable==variable,
          ifelse(highestGroup!=0,
                 highestGroup,
                 max(Group)+1),
          Group)
        )
    }

    # For each variable calculate average within and outside group AbsA. Select the variable with the highest Within^2/Outside Ratio. High within group correlation and low outside group correlation.
    # We mutliply by Within group again to place higher importance on the within group correlation. This could be made an option in future versions.

    representatives <- lapply(average_association$Variable, function(variable){
      needed %>%
        filter(X==variable|Y==variable) %>%
        mutate('Other'=ifelse(X!=variable,as.character(X),as.character(Y)),
               'X'=variable,
               'Y'=Other)%>%
        select(-Other)%>%
        left_join(grouping,by=c('X'='Variable'))%>%
        rename('Variable Group'='Group')%>%
        left_join(grouping,by=c('Y'='Variable'))%>%
        rename('Other Group'='Group')%>%
        mutate('Calculation'=factor(ifelse(`Variable Group`==`Other Group`,'Within','Outside'),levels=c('Within','Outside')))%>%
        group_by(X,`Variable Group`,Calculation,.drop=FALSE)%>%
        summarise('MeanAbsA'=mean(AbsA))%>%
        mutate('MeanAbsA'=ifelse(is.nan(MeanAbsA),1,MeanAbsA))%>%
        rename('Variable'='X')%>%
        spread(Calculation,MeanAbsA)%>%
        mutate('Ratio'=ifelse(focus=='max_within',Within^2/Outside,
                              ifelse(focus=='min_outside',Within/Outside^2,Within/Outside)))
    })%>%
      bind_rows%>%
      arrange(`Variable Group`,desc(Ratio))%>%
      group_by(`Variable Group`,add=FALSE)%>%
      filter(Ratio==max(Ratio))%>%
      select(Variable,`Variable Group`)

    attr(useable_association_table,'Reduced') <- representatives
    association_reduction_algorithm(useable_association_table,focus,iteration=iteration+1,max_iterations)

  }else{

    representatives <- old_representatives <- attr(useable_association_table,'Reduced')

    grouping <- needed %>%
      gather('Remove','Variable',X:Y)%>%
      distinct(Variable)%>%
      mutate('Group'=0)%>%
      left_join(representatives)%>%
      mutate('Group'=ifelse(!is.na(`Variable Group`),`Variable Group`,Group))%>%
      select(-`Variable Group`)

    # Assign to current groups with the highest correlation first
    # Assign each variable to the group representative that it has the highest correlation with

    for(variable in grouping %>% filter(Group==0) %>% .[['Variable']]){
      potential <- needed %>%
        filter(X==variable|Y==variable) %>%
        mutate('Other'=ifelse(X!=variable,as.character(X),as.character(Y)),
               'X'=variable,
               'Y'=Other)

      current <- potential %>%
        filter(Y%in%representatives$Variable)%>%
        select(-Other)%>%
        arrange(desc(AbsA))%>%
        .[['Y']]%>%
        .[1]

      if(is.na(current)){
        currentGroup <- 0
      }else{
        currentGroup <- grouping%>%filter(Variable==current)%>%.[['Group']]
      }


      grouping <- grouping %>%
        mutate(Group=ifelse(
          Variable==variable,
          ifelse(currentGroup!=0,
                 currentGroup,
                 max(Group)+1),
          Group)
        )
    }

    ordered <- lapply(average_association$Variable, function(variable){
      needed %>%
        filter(X==variable|Y==variable) %>%
        mutate('Other'=ifelse(X!=variable,as.character(X),as.character(Y)),
               'X'=variable,
               'Y'=Other)%>%
        select(-Other)%>%
        left_join(grouping,by=c('X'='Variable'))%>%
        rename('Variable Group'='Group')%>%
        left_join(grouping,by=c('Y'='Variable'))%>%
        rename('Other Group'='Group')%>%
        mutate('Calculation'=factor(ifelse(`Variable Group`==`Other Group`,'Within','Outside'),levels=c('Within','Outside')))%>%
        group_by(X,`Variable Group`,Calculation,.drop=FALSE)%>%
        summarise('MeanAbsA'=mean(AbsA))%>%
        mutate('MeanAbsA'=ifelse(is.nan(MeanAbsA),1,MeanAbsA))%>%
        rename('Variable'='X')%>%
        spread(Calculation,MeanAbsA)%>%
        mutate('Ratio'=ifelse(focus=='max_within',Within^2/Outside,
                              ifelse(focus=='min_outside',Within/Outside^2,Within/Outside)))
    })%>%
      bind_rows%>%
      arrange(`Variable Group`,desc(Ratio))

    representatives <- ordered %>%
      group_by(`Variable Group`,add=FALSE)%>%
      filter(Ratio==max(Ratio))%>%
      select(Variable,`Variable Group`)

    if(identical(representatives,old_representatives)){
      attr(useable_association_table,'Reduced') <- representatives
      attr(useable_association_table,'Grouping') <- grouping
      attr(useable_association_table,'Iterations') <- iteration
      attr(useable_association_table,'Ratio') <- ordered
      message('Done')
      return(useable_association_table)
    }else{
      attr(useable_association_table,'Reduced') <- representatives
      message('Go again')
      if((iteration+1)<max_iterations){
        return(association_reduction_algorithm(useable_association_table,focus,iteration=iteration+1,max_iterations))

      }else{
        warning('Maximum Iterations Reached')
        attr(useable_association_table,'Reduced') <- representatives
        attr(useable_association_table,'Grouping') <- grouping
        attr(useable_association_table,'Iterations') <- iteration
        attr(useable_association_table,'Ratio') <- ordered
        return(useable_association_table)
      }
    }
  }
}

association_table_reduction <- function(association_table,min_abs_association=0.4,focus='max_within',max_iterations=10){

  focus <- match.arg(focus,c("max_within", "min_outside", "neither"))

  not_significant <- association_table %>%
    filter(Significant)

  low_association <- association_table %>%
    filter(Significant & abs(A)<min_abs_association)

  useable_association_table <- association_table %>%
    filter(Significant & abs(A)>=min_abs_association)

  if(nrow(useable_association_table)>0){
    reduction <- association_reduction_algorithm(useable_association_table,focus=focus,max_iterations=max_iterations)

    reduced_variables <- attr(reduction,'Reduced')%>%
      rename('Group'='Variable Group')
    groupings <- attr(reduction,'Grouping')%>%
      arrange(Group)

    order <- attr(reduction,'Ratio')$Variable

    all_variables <- sort(unique(c(as.character(association_table$X),as.character(association_table$Y))))
    grouped_variables <- sort(groupings$Variable)

    # If we haven't assigned all groups due to low association or not significant then assign a new group

    if(!identical(all_variables,grouped_variables)){
      groupings <- tibble('Variable'=all_variables)%>%
        left_join(attr(reduction,'Grouping'))%>%
        mutate('Needed'=is.na(Group))%>%
        group_by(Needed)%>%
        mutate('Group'=ifelse(is.na(Group),row_number()+max(attr(reduction,'Grouping')$Group),Group))%>%
        arrange(Group)%>%
        ungroup

      reduced_variables <- groupings %>%
        ungroup%>%
        filter(Needed)%>%
        select(-Needed)%>%
        set_colnames(colnames(reduced_variables))%>%
        bind_rows(reduced_variables,.)

      groupings <- groupings %>%
        select(-Needed)

      order <- c(order,reduced_variables$Variable[!reduced_variables$Variable%in%order])
    }

    reduced_table <- association_table %>%
      triangular('X','Y',diag=FALSE,full=FALSE)%>%
      filter(X%in%reduced_variables$Variable&Y%in%reduced_variables$Variable)

    return(list('reduced_variables'=reduced_variables,'groupings'=groupings))
  }else{
    stop('No reduction to be made')
  }
}

association_reduction <- function(data, column_classification = NULL, alpha = 0.05, categorical_as = "nominal",
                                 nominal_cutoff = 10, FDR = FALSE, FDR_method = "none", min_abs_association = 0.4,
                                 focus = "min_outside", max_iterations = 10){

  association_table <- association(data, column_classification = column_classification,
                                   alpha = alpha, categorical_as = categorical_as,
                                   nominal_cutoff = nominal_cutoff, FDR = FDR, FDR_method = FDR_method)

  reduction <- association_table_reduction(association_table=association_table,
                                           min_abs_association = min_abs_association, focus = focus,
                                           max_iterations = max_iterations)

  reduced_table <- association_table %>%
    filter(X%in%reduction$reduced_variables$Variable&Y%in%reduction$reduced_variables$Variable)

  out <- data %>%
    select(reduction$reduced_variables$Variable)

  attr(out,'assocation') <- reduced_table
  attr(out,'alpha') <- alpha
  attr(out,'FDR') <- ifelse(FDR,FDR_method,'None')

  attr(out,'plot_association_labelled') <- reduced_table %>%
    arrange(X,Y)%>%
    group_by(X)%>%
    mutate(yLoc=row_number())%>%
    ungroup%>%
    mutate(xLoc=cumsum(yLoc==1))%>%
    mutate('Significant'=ifelse(p.value<alpha,'*',''),
           '|A|'=abs(A))%>%
    ggplot()+
    geom_tile(aes(x=X,y=Y,fill=`|A|`),width=1,colour='black')+
    geom_text(aes(x=xLoc,y=yLoc+0.1,label=sprintf('%0.2f%s%s',abs(A),Significant,ifelse(A<0,"'",''))),colour='black')+
    geom_point(aes(x=xLoc,y=yLoc-0.1,shape=Association))+
    scale_shape_discrete(drop=FALSE)+
    scale_fill_gradient(low='grey',high='darkgreen',limits=c(0,1))+
    labs(title='Association Plot',
         subtitle=sprintf('Variables selected to %s',
                          ifelse(focus=='min_outside',
                                 'minimise variation with other selected group representatives',
                                 ifelse(focus=='max_within',
                                        'maximise variation within grouped variables',
                                        'find group representatives that have high within and low outside group association')
                                 )
                          ),
         caption=sprintf("' denotes negative correlation, * denotes significance at %0.1f%% %s",(1-alpha)*100,ifelse(FDR,'(FDR corrected)','')))+
    labs(fill='|A|')

  attr(out,'plot_association') <- reduced_table %>%
    arrange(X,Y)%>%
    group_by(X)%>%
    mutate(yLoc=row_number())%>%
    ungroup%>%
    mutate(xLoc=cumsum(yLoc==1))%>%
    mutate('Significant'=ifelse(p.value<alpha,'*',''),
           '|A|'=abs(A))%>%
    ggplot()+
    geom_tile(aes(x=X,y=Y,fill=`|A|`),width=1,colour='black')+
    scale_fill_gradient(low='grey',high='darkgreen',limits=c(0,1))+
    labs(title='Association Plot',
         subtitle=sprintf('Variables selected to %s',
                          ifelse(focus=='min_outside',
                                 'minimise variation with other selected group representatives',
                                 ifelse(focus=='max_within',
                                        'maximise variation within grouped variables',
                                        'find group representatives that have high within and low outside group association')
                          )
         ),
         caption=sprintf("' denotes negative correlation, * denotes significance at %0.1f%% %s",(1-alpha)*100,ifelse(FDR,'(FDR corrected)','')))+
    labs(fill='|A|')


  return(reduction)
}

# column_classification = NULL; alpha = 0.05; categorical_as = "nominal";
# nominal_cutoff = 10; FDR = FALSE; FDR_method = "none"; min_abs_association = 0.4;
# focus = "min_outside"; max_iterations = 10
# <<<<<<< HEAD
# association(iris,categorical_as = 'ordinal')
# =======
# # association(iris,categorical_as = 'ordinal')
# >>>>>>> parent of 9287179... more bug fixes
# # association(iris,categorical_as = 'remove')
# association_table <- association(mtcars,categorical_as = 'nominal')
# # association(mtcars,categorical_as = 'nominal')
# #
# # withinReduce <- association_table_reduction(association_table,focus = 'max_within',min_abs_association = 0.3)
# # outsideReduce <- association_table_reduction(association_table,focus = 'min_outside',min_abs_association = 0.1)
# # standardReduce <- association_table_reduction(association_table,focus = 'neither',min_abs_association = 0.3)
# # visualise(association_table)
# #
# # all <- step(lm(mpg~.,data=mtcars))
# # summary(all)
# # car::vif(all)
# #
# # withinmod <- lm(mpg~.,data=mtcars[,c('mpg',withinReduce$reduced_variables$Variable)])
# # summary(withinmod)
# # car::vif(withinmod)
# #
# # outsidemod <- lm(mpg~.,data=mtcars[,c('mpg',outsideReduce$reduced_variables$Variable)])
# # summary(outsidemod)
# # car::vif(outsidemod)
# #
# # standardmod <- lm(mpg~.,data=mtcars[,c('mpg',standardReduce$reduced_variables$Variable)])
# # summary(standardmod)
# # car::vif(standardmod)
#
# test <- association(mtcars[,c(outsideReduce$reduced_variables$Variable)],categorical_as = 'nominal')
#
# # Association reduction from data.
# # association_reduction <- function(data,column_classification=NULL,alpha=0.05,categorical_as='nominal',nominal_cutoff=10,min_abs_association=0.4,FDR=FALSE,FDR_method="none",maximise='within',max_iterations=10){
# #
# #   association_table <- association(data,column_classification=column_classification,alpha=alpha,categorical_as=categorical_as,nominal_cutoff=nominal_cutoff)
# #
# #
#
#
# withinReduce <- association_table_reduction(association_table,focus = 'max_within')
# outsideReduce <- association_table_reduction(association_table,focus = 'min_outside')
# standardReduce <- association_table_reduction(association_table,focus = 'neither')
#
# # %>%mutate('Adj'=p.adjust(p.value,method='BH'))
statisticiansix/demonstrandum documentation built on Dec. 2, 2019, 1:29 a.m.