Common Designs

Populations & Samples

Statistics vs Parameters

Frequencies

table(mtcars$cyl)
## 
##  4  6  8 
## 11  7 14
cyl_prop <- mtcars %>% 
  group_by(cyl) %>% 
  summarize(n = n(),
            prop = n/nrow(mtcars)) %>% 
  ungroup()

Visualizing Frequencies

mtcars %>% 
  ggplot(aes(as.factor(cyl))) + 
  geom_bar(fill = 'dodgerblue')

mtcars %>% 
  ggplot(aes(x = '',
             y = cyl,
             fill = as.factor(cyl))) + 
  geom_bar(stat = 'identity',
           width = 1) + 
  coord_polar('y', start = 0) +
  theme_void()

mtcars %>% 
  ggplot(aes(mpg)) + 
  geom_histogram(color = 'white',
                 fill = 'dodgerblue',
                 bins = 15)

mtcars %>% 
  group_by(cyl) %>% 
  summarize(n = n()) %>% 
  ungroup() %>% 
  ggplot(aes(cyl, n)) + 
  geom_line()

Normal Distribution

Skewed Distributions

Frequency Calculations

\[ Relative\;frequency = \frac{f}{N} \]

f <- 10
N <- 200

freq <- f/N
freq
## [1] 0.05
percent <- freq*100
percent
## [1] 5

Relative Frequency Using Normal Curve

Central Tendency

calc <- c(3, 5, 4, 6, 6, 3, 5, 1, 8, 10)
calc
##  [1]  3  5  4  6  6  3  5  1  8 10
table(calc)
## calc
##  1  3  4  5  6  8 10 
##  1  2  1  2  2  1  1
calc
##  [1]  3  5  4  6  6  3  5  1  8 10
sort(calc)
##  [1]  1  3  3  4  5  5  6  6  8 10

\[ \overline{X} = \frac{\Sigma\;X}{N} \]

calc
##  [1]  3  5  4  6  6  3  5  1  8 10
sum_x <- sum(calc)
mean_n <- length(calc)

average <- sum_x/mean_n
average
## [1] 5.1
median(calc)
## [1] 5
mean(calc)
## [1] 5.1

Deviation

\[ X - \overline{X} \]

In the Near Future

set.seed(06062022)
devs <- tibble(x = rnorm(60, 5, n = 100)) %>% 
  rowid_to_column()

devs$mean <- mean(devs$x)
devs$deviations <- devs$x - devs$mean

head(devs)
## # A tibble: 6 x 4
##   rowid     x  mean deviations
##   <int> <dbl> <dbl>      <dbl>
## 1     1  73.2  60.0     13.2  
## 2     2  56.2  60.0     -3.78 
## 3     3  57.7  60.0     -2.27 
## 4     4  59.8  60.0     -0.172
## 5     5  60.9  60.0      0.909
## 6     6  61.8  60.0      1.76
devs %>% 
  ggplot(aes(rowid, x)) + 
  geom_point(aes(color = deviations)) +
  geom_hline(yintercept = mean(devs$x),
             size = 1.25, 
             color = 'red')

sum(devs$deviations)
## [1] -2.629008e-13