README.md

stp25stat

Wolfgang Peter 2021-02-23

Functions for Statistical Computations

which_output()
## [1] "markdown_html"
#'  #  set_my_options(style_mean = list(line_break = '<br>'))
#'  #  set_my_options(prozent = list(null_percent_sign = '.'))
#'  #  set_my_options(mittelwert = list(mean.style = 2, median.style = "IQR"), 
#'  #              prozent=list(style=2))
#'  #  set_my_options(caption= "include.n") 

Deskriptive Tabellen mit sig-Test

#' effect_size
#'
#' Cohen's d  d=(x1-x2)/s psych::cohen.d
#' Hedges' g  g= (x1-x2)/s1s2 ( pooled standard deviation)
#' g enspricht  x<-scale(x)  (mean(x1) - mean(x2)) 
#' 
#' Generalized Log Odds Ratios for Frequency Tables vcd::oddsratio
#' 
#' @param x vector
#' @param by factor
#' @param measure  intern c("mean", "median", "numeric", "factor", "logical")
#' @param measure.test, test  intern c("cattest", "contest", "notest", "ttest", ...)
#' @export
#'
#' @examples
#' 
#'  effect_size(c(2, 3, 4, 2, 3, 4, 3, 6,
#' 7, 6, 8, 9, 4, 5, 6, 7) ,
#' gl(2, 8, labels = c("Control", "Treat")))
#' 
#' effect_size(factor(c(2, 2, 2, 2, 1, 1, 1, 1,
#'                      2, 2, 2, 2, 1, 2, 1, 2)) ,
#'             gl(2, 8, labels = c("Control", "Treat")))
#'             
#'             
effect_size2 <- function(x,
                        by,
                        measure,
                        ...
                        ) {
  dat <-  na.omit(data.frame(x = x, by = by))
  n <- nrow(dat)
  es <- rslt <- "n.a."
  if (n > 10) {
    if (measure %in% c("mean", "median", "numeric")) {
      es <- psych::cohen.d(dat$x, dat$by)
      es <- stp25rndr::Format2(es$cohen.d)
      es <- paste0(es[2], " [", es[1], ", ", es[3], "] ES")
    }
    else  if (measure %in% c("factor", "logical")) {
      # Generalized Log Odds Ratios for Frequency Tables
      if (measure == "factor" & nlevels(dat$x) != 2) {
        es <- "n.a."
      }
      else{
        es <- vcd::oddsratio(table(dat$x, dat$by), log = FALSE)
        if (coef(es) < 0.01)
          es <- "n.a."
        else if (coef(es) > 100)
          es <- "n.a."
        else{
          es <- stp25rndr::rndr_ods(c(coef(es) ,  confint(es)))
          es <- paste0(es[1], " [", es[2], ", ", es[3], "] OR")
        }
      }
    }
  }
  cbind('Odds Ratio/Effect Size' = es)
}
# Zu Auto-Test existiiert eine eigene Funktion unter stp25stat::auto_test

auto_test2 <- function(x,
                      by,
                      measure,
                      measure.test,
                      type = 2) {
  dat <-  na.omit(data.frame(x = x, by = by))
  rslt <- NULL

  contest <- stp25stat:::contest
  cattest <- stp25stat:::cattest

  if (measure.test == "notest") {
    rslt <-  ""
  }
  else if (measure.test == "contest") {
    if (inherits(x, "factor")) {
      dat$x <- as.numeric(dat$x)
    }
    rslt <- stp25stat:::conTest(x ~ by, dat)
  }
  else if (measure.test == "cattest") {
    rslt <- stp25stat:::catTest( ~ x + by, dat)
  }
  else if (measure.test %in% contest) {
    if (inherits(x, "factor")) {
      dat$x <-
        as.numeric(dat$x)
    }
    rslt <- stp25stat:::conTest(x ~ by, dat, measure.test)
  }
  else if (measure.test %in% cattest) {
    rslt <- stp25stat:::catTest( ~ x + by, dat, measure.test)
  }



  if (type == 1) {
    cbind('Statistics' = rslt)
  }
  else if (type == 2) {
    rslt <-   strsplit(rslt, ', p')[[1]]
    cbind("Test Statistics" = rslt[1],
          "p-value" = gsub("=", "", rslt[2]))
  }   else if (type == 3) {
    cbind('Statistics' = gsub(", p", ",<BR>p", rslt))
  }


}
DF %>% Tbll_desc(
  age[median, 1],
  height[1, mean, ttest],
  sex,
  smoker[fisher],
  iq[0],
  likert[2, mean],
  by =  ~ died,
  include.custom = effect_size2,
  include.test=TRUE,
  include.n=TRUE,
  include.total=TRUE
) %>% Output()
## Warning in `[[<-.factor`(`*tmp*`, i, value = 4): invalid factor level, NA
## generated
Tab 1: died Item Total FALSE TRUE Odds Ratio/Effect Size Test Statistik (N)  30 15 15 Age (median) 10.0 (IQR 2.0) 10.0 (IQR 2.0) 10.0 (IQR 2.5) -0.15 \[-0.86, 0.57\] ES F(1, 28)=0.11, p=.739 Height (mean) 52.2 (8.7) 52.8 (9.4) 51.7 (8.3) -0.13 \[-0.85, 0.59\] ES T(28)=0.35, p=.729 Sex  0.76 \[0.18, 3.24\] OR X2(1)=0.14, p=.713  Male 13 (43%) 6 (40%) 7 (47%)  Female 17 (57%) 9 (60%) 8 (53%) Smoker true 20 (67%) 10 (67%) 10 (67%) 1.00 \[0.22, 4.56\] OR OR=1.00, p=1.000 IQ (mean) 99 (11) 99 (13) 100 (9) 0.13 \[-0.59, 0.84\] ES F(1, 28)=0.65, p=.427 Likert (mean) 3.07 (1.41) 3.13 (1.41) 3.00 (1.46) -0.05 \[-0.78, 0.68\] ES F(1, 28)=0.06, p=.804 Wilcoxon-Test, T-Test, Pearson Chi-squared, Fisher Exact Test
    population.model <- '
    Sex =~ sex
    Age =~ age
    Bildungszertifikat =~ edu
    Beruf =~ beruf
    Education  ~ 1.04*Bildungszertifikat + 2.76* Beruf

    single~ 1*age + 2*edu

    Openness =~ o1+o2+o3+o4+o5
    Conscientiousness =~ c1+c2+c3+c4+c5
    Extraversion =~ e1+e2+e3+e4+e5
    Neuroticism =~  n1+n2+n3+n4+n5
    Agreeableness =~ a1+a2+a3+a4+a5

    Einkommen ~ 1.25*Sex +  (-0.31)*Age  + .12*Openness + .23*Conscientiousness + (-0.91)*Extraversion + 2.3*Neuroticism + (-0.541)*Agreeableness

    '

    DF <- lavaan::simulateData(population.model, sample.nobs = 100)

    DF<- transform(DF, 
                   sex=cut(sex, 2, c("m", "f")),
                   age=scale2(age, 18, 55),
                   edu=cut(edu,3, c("low", "med", "hig")),
                   beruf= cut(edu,2, c("blue", "withe")),
                   single =cut(single, 2, c("yes", "no")),
                   Einkommen = (4-log(Einkommen-min(Einkommen)+1))*1000
                   )

Mittelwerte und Prozent

#APA_Correlation(~o1+o2+o3+o4+o5+c1+c2+c3, DF)
Tbll_corr(~o1+o2+o3+o4+o5+c1+c2+c3, DF) %>% Output()
Tab 2: Quelle \(1\) \(2\) \(3\) \(4\) \(5\) \(6\) \(7\) \(8\) (1) o1 1 .62\*\*\* .53\*\*\* .58\*\*\* .55\*\*\* .09 -.01 -.02 (2) o2 1 .48\*\*\* .63\*\*\* .45\*\*\* -.03 -.00 .05 (3) o3 1 .56\*\*\* .46\*\*\* .13 .11 .10 (4) o4 1 .42\*\*\* .12 .06 .08 (5) o5 1 .06 .09 .17 (6) c1 1 .37\*\*\* .35\*\*\* (7) c2 1 .50\*\*\* (8) c3 1 pearson

PCA

    vars <- c(
      "o1",  "o2",  "o3",  "o4",  "o5",  "c1",  "c2",  "c3",
      "c4",  "c5",  "e1",  "e2",  "e3",  "e4",  "e5",  "n1",
      "n2",  "n3",  "n4",  "n5",  "a1",  "a2",  "a3",  "a4",  "a5"
    )
     APA_PCA(DF[vars], 5, cut=.35, include.plot = FALSE)
## R was not square, finding R from data
Tab 3: Standardized loadings (pattern matrix) based upon correlation matrix Item Nr PC1 PC2 PC3 PC4 PC5 h2 o1  1  0.83         0.71 o4  4  0.81         0.67 o2  2  0.81         0.67 o3  3  0.76         0.59 o5  5  0.72         0.55 a2 22    0.84       0.74 a4 24    0.79       0.65 a3 23    0.77       0.64 a5 25    0.75       0.60 a1 21    0.74       0.59 n2 17      0.81     0.70 n5 20      0.78     0.62 n4 19      0.78     0.62 n1 16      0.77     0.62 n3 18      0.76     0.62 e1 11        0.79   0.63 e4 14        0.79   0.62 e3 13        0.76   0.65 e5 15        0.75   0.58 e2 12        0.74   0.56 c4  9          0.79 0.63 c3  8          0.77 0.62 c2  7          0.74 0.56 c5 10          0.68 0.53 c1  6          0.64 0.45 Tab 4: Standardized loadings (pattern matrix) based upon correlation matrix Measures Test Statistik n.obs 100 Mean item complexity 1.1 RMSR 0.06 empirical chi square X2=184.22, p=.502 Tab 5: Standardized loadings (pattern matrix) based upon correlation matrix Measures Statistic Kaiser-Meyer-Olkin Measure 0.71 Bartlett’s test of sphericity X2(300)=1038.49, p<.001

Reliability

    Openness <- Reliability(~o1+o2+o3+o4+o5, DF, check.keys=TRUE)
## Number of categories should be increased  in order to count frequencies.
    Conscientiousness <- Reliability(~ c1+c2+c3+c4+c5, DF, check.keys=TRUE)
## Number of categories should be increased  in order to count frequencies.
    Extraversion <-Reliability(~ e1+e2+e3+e4+e5, DF, check.keys=TRUE)
## Number of categories should be increased  in order to count frequencies.
    Neuroticism <- Reliability(~  n1+n2+n3+n4+n5, DF, check.keys=TRUE)
## Number of categories should be increased  in order to count frequencies.
    Agreeableness <- Reliability(~ a1+a2+a3+a4+a5, DF, check.keys=TRUE)
## Number of categories should be increased  in order to count frequencies.
    DF$O<- Openness$index
    DF$C<- Conscientiousness$index
    DF$E<- Extraversion$index
    DF$N<- Neuroticism$index
    DF$A<- Agreeableness$index

    Alpha2(Openness,Conscientiousness,Extraversion,Neuroticism,Agreeableness)
Tab 6: Quelle Item n M SD Alpha Range Skew Kurtosi Shapiro.Test Openness 5 100 0.11 1.24 0.85 -3.96; 4.02 0.17 -0.17 W=0.99, p=.543 Conscientiousness 5 100 0.02 0.94 0.78 -4.47; 3.50 -0.07 -0.12 W=0.99, p=.938 Extraversion 5 100 -0.11 1.06 0.83 -4.51; 4.12 -0.12 0.15 W=0.99, p=.703 Neuroticism 5 100 -0.09 1.10 0.84 -4.11; 4.85 0.30 -0.43 W=0.99, p=.356 Agreeableness 5 100 0.00 1.15 0.84 -4.54; 4.23 0.13 -0.29 W=0.99, p=.769

Test des Regressionsmodels

Mittelwerte und Effecte

fit<- lm(Einkommen ~ sex + age + O + C + E + N + A, DF)
#Tabelle2  gibt die Tabellen formatiert aus.
mean_sd<- Tabelle(fit )
## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car
mean_sd$sex %>% Output( "Mittelwerte")
Tab 7: Mittelwerte   Einkommen sex   n M SD m   31 2303.33 566.81 f   69 2172.44 462.23
#mean_sd$O %>% Output(output="md")
APA2(effects::Effect("sex", fit))
Tab 8: Effekte: Effect sex fit lower upper m 2379.71 2253.87 2505.56 f 2138.12 2055.28 2220.96
APA2(visreg::visreg(fit, "sex", plot=FALSE), digits=1)
Tab 9: Einkommen sex age O C E N A fit ci m 44.6420180327509 -0.0399716297517768 0.0278381187668443 0.0288540142655275 -0.177365131342206 -0.0727364994640318 2419.9 \[2292.4, 2547.5\] f 44.6420180327509 -0.0399716297517768 0.0278381187668443 0.0288540142655275 -0.177365131342206 -0.0727364994640318 2178.3 \[2095.0, 2261.7\]

Regressionsmodel

APA_Table(fit, include.se=FALSE, include.ci=TRUE)
Tab 10: Quelle b conf Parameter (Intercept) 2326\*\*\* \[2659, 1992\] sexf -242\*\* \[-87.9, -395\] age 0.866 \[7.92, -6.19\] O -16.5 \[41.4, -74.3\] C -4.98 \[69.5, -79.5\] E  106\*\* \[ 172, 40.2\] N -321\*\*\* \[-254, -387\] A 69.5\* \[ 131, 8.41\] Goodness of fit R2 0.56 adj. R2 0.53 AIC 1460.0 BIC 1483.4 RMSE 327.30 Obs 100

GOF

 APA_Validation(fit) 
Tab 11: Testing Regression Models Test statistic F-Statistic F(7, 92)=16.96, p<.001 Deviance Residuals 10712573.9 R2 0.56 adj. R2 0.53 Heteroskedasticity (Breusch-Pagan) BP(7)=10.57, p=.158 Autocorrelation (Durbin-Watson) DW=2.02, p=.536 Shapiro-Wilk normality test W=0.93, p<.001 AIC 1460.0 BIC 1483.4 Var: Residual 116441.02 Obs 100
library(units)
## udunits system database from C:/Users/wpete/OneDrive/Dokumente/R/win-library/4.0/units/share/udunits
dat<- data.frame(
  spd1 = set_units(1:5, m/s),
  spd2 = set_units(1:5, km/h))

dat<- Label(dat, spd1="Ultra", spd2="Half")

dat
##      spd1     spd2
## 1 1 [m/s] 1 [km/h]
## 2 2 [m/s] 2 [km/h]
## 3 3 [m/s] 3 [km/h]
## 4 4 [m/s] 4 [km/h]
## 5 5 [m/s] 5 [km/h]
dat %>% Tabelle(spd1, spd2)
##          Item                              value
## 1 Ultra [m/s] 3.00 (SD 1.58, range 1.00 to 5.00)
## 2 Half [km/h] 3.00 (SD 1.58, range 1.00 to 5.00)

Test

Count Data

stats::chisq.test                   Pearson's Chi-squared Test for Count Data
stats::fisher.test                  Fisher's Exact Test for Count Data
stats::mcnemar.test                 McNemar's Chi-squared Test for Count Data
stats::pairwise.prop.test           Pairwise comparisons for proportions
stats::prop.test                    Test of Equal or Given Proportions
stats::prop.trend.test              Test for trend in proportions 
stats::binom.test                   Exact Binomial Test

stats::cor.test                     Test for Association/Correlation Between Paired Samples
stats::ansari.test                  Ansari-Bradley Test

T-Test

stats::t.test                       Student's t-Test    
stats::pairwise.t.test              Pairwise t tests
Hmisc::t.test.cluster               t-test for Clustered Data
stats::oneway.test                  Test for Equal Means in a One-Way Layout

Nicht-parametrische Tests

stats::kruskal.test                 Kruskal-Wallis Rank Sum Test
stats::wilcox.test                  Wilcoxon Rank Sum and Signed Rank Tests    
stats::pairwise.wilcox.test         Pairwise Wilcoxon rank sum tests
Hmisc::biVar                        Bivariate Summaries Computed Separately by a Series of Predictors
stats::mantelhaen.test              Cochran-Mantel-Haenszel Chi-Squared Test for Count Data
stats::friedman.test                Friedman Rank Sum Test
stats::ks.test                      Kolmogorov-Smirnov Tests
stats::shapiro.test                 Shapiro-Wilk Normality Test

survival

survival::cox.zph                   Test the Proportional Hazards Assumption of a Cox Regression
survival::plot.cox.zph              Graphical Test of Proportional Hazards
survival::survdiff                  Test Survival Curve Differences 
survival::survival-internal         Internal survival functions
survival::survobrien                O'Brien's Test for Association of a Single Variable with Survival
survival::survregDtest              Verify a survreg distribution

Power calculations

stats::power.anova.test             Power calculations for balanced one-way analysis of variance tests
stats::power.prop.test              Power calculations two sample test for proportions
stats::power.t.test                 Power calculations for one and two sample t tests    
Hmisc::bpower                       Power and Sample Size for Two-Sample Binomial Test
Hmisc::ciapower                     Power of Interaction Test for Exponential Survival
Hmisc::cpower                       Power of Cox/log-rank Two-Sample Test
Hmisc::spower                       Simulate Power of 2-Sample Test for Survival under Complex Conditions

Sobel

multilevel::sobel                   Estimate Sobel's (1982) Test for Mediation
multilevel::sobel.lme               Estimate Sobel's (1982) Test for Mediation in Two-Level lme Model

GOF

stats::bartlett.test                Bartlett Test of Homogeneity of Variances    
stats::mauchly.test                 Mauchly's Test of Sphericity    
stats::var.test                     F Test to Compare Two Variances    
vcd::coindep_test                   Test for (Conditional) Independence
vcd::goodfit                        Goodness-of-fit Tests for Discrete Data    
stats::poisson.test                 Exact Poisson tests

Andere

stats::Box.test                     Box-Pierce and Ljung-Box Tests
stats::fligner.test                 Fligner-Killeen Test of Homogeneity of Variances
stats::mood.test                    Mood Two-Sample Test of Scale
stats::PP.test                      Phillips-Perron Test for Unit Roots
stats::quade.test                   Quade Test


stp4/stp25stat documentation built on Sept. 17, 2021, 2:03 p.m.