After the data generation you may want to draw a sample from the population. If you should be interested in keeping information about your population, see the vignette 'addingComputation'. Use the function sim_sample()
to add a sampling component to your sim_setup
. In general you can provide an own sampling function. Two predefined functions are available:
sample_number
- wrapper around dplyr::sample_n
sample_fraction
- wrapper around dplyr::sample_frac
library(saeSim)
base_id(3, 4) %>% sim_gen_x() %>% sim_sample(sample_number(1L))
## idD idU x
## 2 1 2 2.474
base_id(3, 4) %>% sim_gen_x() %>% sim_sample(sample_number(1L, groupVars = "idD"))
## idD idU x
## 1 1 1 -0.1066
## 2 2 3 0.1086
## 3 3 4 -1.3275
# simple random sampling:
sim_base_lm() %>% sim_sample(sample_number(size = 10L))
## idD idU x e y
## 1287 13 87 3.290 -2.415 100.87
## 9735 98 35 -2.011 3.195 101.18
## 497 5 97 3.046 -6.817 96.23
## 8709 88 9 1.878 -2.841 99.04
## 2254 23 54 1.146 1.702 102.85
## 7821 79 21 -2.908 7.021 104.11
sim_base_lm() %>% sim_sample(sample_fraction(size = 0.05))
## idD idU x e y
## 1315 14 15 -0.85263 -0.6907 98.46
## 807 9 7 -0.03432 -5.5050 94.46
## 2760 28 60 -2.12616 -8.1636 89.71
## 8115 82 15 2.51351 3.6743 106.19
## 5373 54 73 2.44824 -2.4887 99.96
## 218 3 18 1.99300 1.8973 103.89
# srs in each domain/cluster
sim_base_lm() %>% sim_sample(sample_number(size = 10L, groupVars = "idD"))
## idD idU x e y
## 1 1 40 8.103 6.163 114.27
## 2 1 41 -2.793 7.404 104.61
## 3 1 89 -4.007 -8.210 87.78
## 4 1 7 -3.466 -3.684 92.85
## 5 1 83 -4.839 -4.247 90.91
## 6 1 10 3.548 -3.493 100.06
sim_base_lm() %>% sim_sample(sample_fraction(size = 0.05, groupVars = "idD"))
## idD idU x e y
## 1 1 5 -2.278 4.3089 102.03
## 2 1 93 -2.588 1.1808 98.59
## 3 1 53 -3.421 5.0154 101.59
## 4 1 77 -8.021 -3.4064 88.57
## 5 1 98 3.496 0.3627 103.86
## 6 2 47 -2.706 -4.5059 92.79