knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Breaking Down the Research Process: A Review

Frequency Distributions

Frequency Distributions

knitr::include_graphics("pictures/introDA2/ndis.jpg")

Frequency Distributions

library(datasets)
data("quakes")
str(quakes)

Frequency Distributions: Tables

table(quakes$mag)

Frequency Distributions: Plots

hist(quakes$mag, breaks = 24)

Understanding Frequency Distributions

Skew Visualized

knitr::include_graphics("pictures/introDA2/Skew_PosvsNeg.png")

Kurtosis Visualized

knitr::include_graphics("pictures/introDA2/Kurtosis.png")

Produce a Histogram

hist(quakes$mag)

Calculating Skewness

library(moments)
skewness(quakes$mag)

library(psych)
describe(quakes$mag)

Interpreting Skewness

Calculating Kurtosis

#moments
kurtosis(quakes$mag)

#psych
describe(quakes$mag)

Interpreting Kurtosis

Interpreting Excess Kurtosis

Central Tendency

summary(quakes$mag)

Other Summary Stat Functions

library(pastecs)
stat.desc(quakes)

Other Summary Stat Functions

library(Hmisc)
Hmisc::describe(quakes)

Other Summary Stat Functions

library(psych)
psych::describe(quakes)

The Mean

$$\bar{x} = \frac {\sum_{i=1}^{n}x_{i}} {n}$$

mean(quakes$mag)

The Median

median(quakes$mag)

The Mode

getmode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

getmode(quakes$mag)

More Than One Mode

knitr::include_graphics("pictures/introDA2/bimodal_distribution.png")

Which Measure of Central Tendency & When

knitr::include_graphics("pictures/introDA2/Capture_01.png")

Dispersion

Range

range(quakes$mag)
psych::describe(quakes$mag)

Interquartile Range

knitr::include_graphics("pictures/introDA2/interquartile_range.png")

Calculating Quartiles

quantile(quakes$mag)
summary(quakes$mag)
quantile(quakes$mag, c(0.05,0.50,0.75,0.95))

Variance

$$SD^2 = \frac {\sum_{i=1}^{n}(x_{i} - \bar{x})^2} {n}$$

var(quakes$mag)

Standard Deviation

sd(quakes$mag)

Using Standardized Values

$$Z = \frac{(x_{i} - \bar{x})} {SD}$$

quakes$zscore <- scale(quakes$mag)
head(quakes$zscore)
str(quakes$zscore)
mean(quakes$mag)
sd(quakes$mag)

Z-score properties

Z-score properties

Measures of Association

Covariance

$$cov_{x,y} = \frac {\sum_{i=1}^{n}(x_{i} - \bar{x})(y_{i} - \bar{y})} {n}$$

cov(quakes$mag, quakes$depth)

Correlation

cor(quakes$mag, quakes$depth)

Correlation

library(corrplot)
corrplot(cor(quakes), order = "hclust")

Summary



doomlab/learnSTATS documentation built on June 9, 2022, 12:54 a.m.