mtcars
datalibrary(nycflights13)
library(dplyr)
library(ggplot2)
library(stringr)
library(infer)
mtcars <- as.data.frame(mtcars) %>%
mutate(cyl = factor(cyl),
vs = factor(vs),
am = factor(am),
gear = factor(gear),
carb = factor(carb))
One numerical variable (mean)
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", mu = 25) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "mean")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 25.5
## 2 2 25.1
## 3 3 22.4
## 4 4 26.4
## 5 5 26.1
## 6 6 23.5
## 7 7 25.1
## 8 8 25.1
## 9 9 23.7
## 10 10 24.7
## # ... with 90 more rows
One numerical variable (median)
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", med = 26) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "median")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 28.2
## 2 2 28.2
## 3 3 27.8
## 4 4 25.5
## 5 5 24.5
## 6 6 26.0
## 7 7 26.0
## 8 8 27.8
## 9 9 26.2
## 10 10 27.8
## # ... with 90 more rows
One numerical variable (standard deviation)
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", sigma = 5) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "sd")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 6.66
## 2 2 5.55
## 3 3 6.21
## 4 4 4.08
## 5 5 4.58
## 6 6 6.51
## 7 7 7.60
## 8 8 6.85
## 9 9 5.77
## 10 10 6.96
## # ... with 90 more rows
One categorical (2 level) variable
mtcars %>%
specify(response = am, success = "1") %>% # formula alt: am ~ NULL
hypothesize(null = "point", p = .25) %>%
generate(reps = 100, type = "simulate") %>%
calculate(stat = "prop")
## # A tibble: 100 x 2
## replicate stat
## <fctr> <dbl>
## 1 1 0.188
## 2 2 0.0625
## 3 3 0.281
## 4 4 0.344
## 5 5 0.219
## 6 6 0.250
## 7 7 0.312
## 8 8 0.250
## 9 9 0.281
## 10 10 0.156
## # ... with 90 more rows
Two categorical (2 level) variables
mtcars %>%
specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in props", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.0873
## 2 2 -0.0397
## 3 3 0.317
## 4 4 -0.0952
## 5 5 0.230
## 6 6 -0.0238
## 7 7 0.183
## 8 8 0.0714
## 9 9 -0.238
## 10 10 0.0952
## # ... with 90 more rows
One categorical (>2 level) - GoF
mtcars %>%
specify(cyl ~ NULL) %>% # alt: response = cyl
hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>%
generate(reps = 100, type = "simulate") %>%
calculate(stat = "Chisq")
## # A tibble: 100 x 2
## replicate stat
## <fctr> <dbl>
## 1 1 2.75
## 2 2 1.69
## 3 3 1.00
## 4 4 4.19
## 5 5 0.688
## 6 6 1.69
## 7 7 1.69
## 8 8 3.69
## 9 9 2.00
## 10 10 0.188
## # ... with 90 more rows
Two categorical (>2 level) variables
mtcars %>%
specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "Chisq")
## # A tibble: 100 x 2
## replicate stat
## <fctr> <dbl>
## 1 1 5.73
## 2 2 0.513
## 3 3 1.36
## 4 4 4.16
## 5 5 1.26
## 6 6 0.134
## 7 7 0.172
## 8 8 0.164
## 9 9 0.592
## 10 10 0.296
## # ... with 90 more rows
One numerical variable one categorical (2 levels) (diff in means)
mtcars %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in means", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 2.17
## 2 2 0.344
## 3 3 1.67
## 4 4 0.376
## 5 5 -1.47
## 6 6 -2.03
## 7 7 0.615
## 8 8 0.153
## 9 9 1.40
## 10 10 -0.872
## # ... with 90 more rows
One numerical variable one categorical (2 levels) (diff in medians)
mtcars %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in medians", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.600
## 2 2 0
## 3 3 -3.20
## 4 4 -1.90
## 5 5 -1.10
## 6 6 0
## 7 7 -1.10
## 8 8 -1.80
## 9 9 -3.90
## 10 10 -2.40
## # ... with 90 more rows
One numerical one categorical (>2 levels) - ANOVA
mtcars %>%
specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "F")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.129
## 2 2 2.33
## 3 3 1.82
## 4 4 0.628
## 5 5 0.235
## 6 6 0.378
## 7 7 0.431
## 8 8 1.24
## 9 9 0.988
## 10 10 0.642
## # ... with 90 more rows
Two numerical vars - SLR
mtcars %>%
specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "slope")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.00473
## 2 2 -0.00982
## 3 3 0.00359
## 4 4 0.00231
## 5 5 -0.00980
## 6 6 -0.0200
## 7 7 0.0128
## 8 8 0.00150
## 9 9 -0.0149
## 10 10 -0.0187
## # ... with 90 more rows
One numerical (one mean)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "mean")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 20.9
## 2 2 19.6
## 3 3 21.4
## 4 4 21.2
## 5 5 17.9
## 6 6 20.9
## 7 7 17.9
## 8 8 20.5
## 9 9 21.5
## 10 10 19.2
## # ... with 90 more rows
One numerical (one median)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "median")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 19.7
## 2 2 18.6
## 3 3 21.4
## 4 4 17.7
## 5 5 17.7
## 6 6 20.4
## 7 7 21.0
## 8 8 19.0
## 9 9 17.3
## 10 10 20.4
## # ... with 90 more rows
One numerical (standard deviation)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "sd")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 5.86
## 2 2 6.20
## 3 3 6.41
## 4 4 5.67
## 5 5 6.12
## 6 6 5.80
## 7 7 6.58
## 8 8 5.02
## 9 9 6.55
## 10 10 5.96
## # ... with 90 more rows
One categorical (one proportion)
mtcars %>%
specify(response = am, success = "1") %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "prop")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.531
## 2 2 0.250
## 3 3 0.375
## 4 4 0.344
## 5 5 0.406
## 6 6 0.594
## 7 7 0.531
## 8 8 0.562
## 9 9 0.531
## 10 10 0.469
## # ... with 90 more rows
One numerical variable one categorical (2 levels) (diff in means)
mtcars %>%
specify(mpg ~ am) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "diff in means", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 - 7.36
## 2 2 - 9.03
## 3 3 -10.8
## 4 4 - 7.36
## 5 5 - 3.03
## 6 6 - 5.74
## 7 7 - 9.80
## 8 8 - 6.70
## 9 9 - 7.71
## 10 10 - 7.26
## # ... with 90 more rows
Two categorical variables (diff in proportions)
mtcars %>%
specify(am ~ vs, success = "1") %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "diff in props", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.217
## 2 2 -0.0688
## 3 3 0.203
## 4 4 -0.125
## 5 5 -0.151
## 6 6 0.0405
## 7 7 0.0635
## 8 8 -0.116
## 9 9 -0.188
## 10 10 -0.0931
## # ... with 90 more rows
Two numerical vars - SLR
mtcars %>%
specify(mpg ~ hp) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "slope")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.107
## 2 2 -0.0727
## 3 3 -0.0439
## 4 4 -0.0947
## 5 5 -0.0860
## 6 6 -0.0714
## 7 7 -0.0633
## 8 8 -0.0821
## 9 9 -0.0865
## 10 10 -0.0593
## # ... with 90 more rows