This brief vignette shows an example of a basic workflow selecting recordings for different times of day by site and year.
First we’ll load the packages we want to work with
Next we’ll prepare our metadata on the recordings, by cleaning, adding site-level information and calculating the time to sunrise/sunset for each file. We’ll also define recordings as either ‘early’ (occurring before 6am) or ‘late’ (occurring after 6am).
s <- clean_site_index(example_sites_clean,
name_date = c("date_time_start", "date_time_end")
)
m <- clean_metadata(project_files = example_files) |>
add_sites(s) |>
calc_sun() |>
mutate(
time_period = if_else(hour(date_time) < 6, "early", "late"),
year = year(date)
)
#> Extracting ARU info...
#> Extracting Dates and Times...
#> Joining by columns `date_time_start` and `date_time_end`
m
#> # A tibble: 42 × 15
#> file_name type path aru_type aru_id site_id date_time date
#> <chr> <chr> <chr> <chr> <chr> <chr> <dttm> <date>
#> 1 P01_1_2020… wav a_BA… BarLT BARLT… P01_1 2020-05-02 05:00:00 2020-05-02
#> 2 P01_1_2020… wav a_BA… BarLT BARLT… P01_1 2020-05-03 05:20:00 2020-05-03
#> 3 P02_1_2020… wav a_S4… SongMet… S4A01… P02_1 2020-05-04 05:25:00 2020-05-04
#> 4 P02_1_2020… wav a_S4… SongMet… S4A01… P02_1 2020-05-05 07:30:00 2020-05-05
#> # ℹ 38 more rows
#> # ℹ 7 more variables: longitude <dbl>, latitude <dbl>, tz <chr>, t2sr <dbl>,
#> # t2ss <dbl>, time_period <chr>, year <dbl>
Time to do some sampling!
First we define the selection parameters for each time frame we’re interested in sampling. This might be “dawn” and “dusk”, or in this example, “early” and “late” morning.
This function will also simulate the selection weights so we can see what we’ve defined.
p <- list(
"early" = sim_selection_weights(min_range = c(-70, 240)),
"late" = sim_selection_weights(min_range = c(100, 300), min_mean = 200)
)
p
#> $early
#> $early$min_range
#> [1] -70 240
#>
#> $early$min_mean
#> [1] 30
#>
#> $early$min_sd
#> [1] 60
#>
#> $early$day_range
#> [1] 120 201
#>
#> $early$day_mean
#> [1] 161
#>
#> $early$day_sd
#> [1] 20
#>
#> $early$offset
#> [1] 0
#>
#> $early$return_log
#> [1] TRUE
#>
#> $early$selection_fun
#> [1] "norm"
#>
#>
#> $late
#> $late$min_range
#> [1] 100 300
#>
#> $late$min_mean
#> [1] 200
#>
#> $late$min_sd
#> [1] 60
#>
#> $late$day_range
#> [1] 120 201
#>
#> $late$day_mean
#> [1] 161
#>
#> $late$day_sd
#> [1] 20
#>
#> $late$offset
#> [1] 0
#>
#> $late$return_log
#> [1] TRUE
#>
#> $late$selection_fun
#> [1] "norm"
Now we can calculate selection weights
Here we’ll calculate a separate set of selection weights for early and late recordings in each year. Then we’ll group recordings by site, year, and time period.
w <- m |>
nest(data = c(-time_period, -year)) |>
mutate(
params = p,
sel = map2(data, params, calc_selection_weights)
) |>
unnest(sel) |>
select(-"data", -"params") |>
mutate(selection_group = glue("{site_id}_{year}_{time_period}"))
w
#> # A tibble: 21 × 24
#> time_period year file_name type path aru_type aru_id site_id
#> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 early 2020 P01_1_20200503T052000_A… wav a_BA… BarLT BARLT… P01_1
#> 2 early 2020 P02_1_20200504T052500_A… wav a_S4… SongMet… S4A01… P02_1
#> 3 early 2020 P06_1_20200509T052000_A… wav a_BA… BarLT BARLT… P06_1
#> 4 early 2020 P07_1_20200509T052500_A… wav a_S4… SongMet… S4A01… P07_1
#> # ℹ 17 more rows
#> # ℹ 16 more variables: date_time <dttm>, date <date>, longitude <dbl>,
#> # latitude <dbl>, tz <chr>, t2sr <dbl>, t2ss <dbl>, doy <dbl>, psel_by <chr>,
#> # psel_min <dbl>, psel_doy <dbl>, psel <dbl>, psel_scaled <dbl>,
#> # psel_std <dbl>, psel_normalized <dbl>, selection_group <glue>
This w
data sets contains the original sampling
recordings, but now also new columns containing various measures of the
probability of selection.
We’ll define the number of samples we’d like to have.
n <- w |>
summarize(n_recordings = n(), .by = c("selection_group", "time_period")) |>
mutate(
n = if_else(time_period == "early", 5, 2),
n_os = if_else(time_period == "early", floor(n * 1 / 3), floor(n * 1 / 4)),
n_os = pmax(0, pmin(n_recordings - n, round(n / 3))),
n = pmin(n, n_recordings)
)
n
#> # A tibble: 7 × 5
#> selection_group time_period n_recordings n n_os
#> <glue> <chr> <int> <dbl> <dbl>
#> 1 P01_1_2020_early early 3 3 0
#> 2 P02_1_2020_early early 3 3 0
#> 3 P06_1_2020_early early 3 3 0
#> 4 P07_1_2020_early early 3 3 0
#> # ℹ 3 more rows
And finally sample the recordings!
g <- sample_recordings(w, n,
col_site_id = selection_group,
col_sel_weights = psel_normalized
)
g
#> Summary of Site Counts:
#>
#> siteuse by total:
#> Base Over
#> total 19 2
#>
#> siteuse by stratum:
#> Base Over
#> P01_1_2020_early 3 0
#> P02_1_2020_early 3 0
#> P03_1_2020_late 2 1
#> P06_1_2020_early 3 0
#> P07_1_2020_early 3 0
#> P08_1_2020_late 2 1
#> P09_1_2020_early 3 0
The recordings selected for sampling…
g$sites_base
#> Simple feature collection with 19 features and 32 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: 124 ymin: -53.21667 xmax: 132 ymax: 238.3167
#> Projected CRS: WGS 84 / World Mercator
#> First 10 features:
#> siteID siteuse replsite lon_WGS84 lat_WGS84 stratum wgt ip
#> 1 sample-01 Base None 0.001113911 -4.812753e-04 P01_1_2020_early 1 1
#> 2 sample-02 Base None 0.001113911 -4.812753e-04 P01_1_2020_early 1 1
#> 3 sample-03 Base None 0.001113911 -4.812753e-04 P01_1_2020_early 1 1
#> 4 sample-04 Base None 0.001122894 -4.273146e-04 P02_1_2020_early 1 1
#> 5 sample-05 Base None 0.001122894 -4.273146e-04 P02_1_2020_early 1 1
#> 6 sample-06 Base None 0.001122894 -4.273146e-04 P02_1_2020_early 1 1
#> 7 sample-07 Base None 0.001167810 3.240657e-05 P06_1_2020_early 1 1
#> 8 sample-08 Base None 0.001167810 3.240657e-05 P06_1_2020_early 1 1
#> 9 sample-09 Base None 0.001167810 3.240657e-05 P06_1_2020_early 1 1
#> 10 sample-10 Base None 0.001167810 -3.701886e-04 P07_1_2020_early 1 1
#> caty aux time_period year file_name type
#> 1 None 0.001 early 2020 P01_1_20200503T052000_ARU.wav wav
#> 2 None 0.001 early 2020 P01_1_20200503T052000_ARU.wav wav
#> 3 None 0.001 early 2020 P01_1_20200503T052000_ARU.wav wav
#> 4 None 0.001 early 2020 P02_1_20200504T052500_ARU.wav wav
#> 5 None 0.001 early 2020 P02_1_20200504T052500_ARU.wav wav
#> 6 None 0.001 early 2020 P02_1_20200504T052500_ARU.wav wav
#> 7 None 0.001 early 2020 P06_1_20200509T052000_ARU.wav wav
#> 8 None 0.001 early 2020 P06_1_20200509T052000_ARU.wav wav
#> 9 None 0.001 early 2020 P06_1_20200509T052000_ARU.wav wav
#> 10 None 0.001 early 2020 P07_1_20200509T052500_ARU.wav wav
#> path aru_type aru_id
#> 1 a_BARLT10962_P01_1/P01_1_20200503T052000_ARU.wav BarLT BARLT10962
#> 2 j_BARLT10962_P01_1/P01_1_20200503T052000_ARU.wav BarLT BARLT10962
#> 3 o_BARLT10962_P01_1/P01_1_20200503T052000_ARU.wav BarLT BARLT10962
#> 4 a_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav SongMeter S4A01234
#> 5 j_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav SongMeter S4A01234
#> 6 o_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav SongMeter S4A01234
#> 7 a_BARLT10962_P06_1/P06_1_20200509T052000_ARU.wav BarLT BARLT10962
#> 8 j_BARLT10962_P06_1/P06_1_20200509T052000_ARU.wav BarLT BARLT10962
#> 9 o_BARLT10962_P06_1/P06_1_20200509T052000_ARU.wav BarLT BARLT10962
#> 10 a_S4A01234_P07_1/P07_1_20200509T052500_ARU.wav SongMeter S4A01234
#> site_id date_time date longitude latitude tz
#> 1 P01_1 2020-05-03 05:20:00 2020-05-03 -85.03 50.01 America/Toronto
#> 2 P01_1 2020-05-03 05:20:00 2020-05-03 -85.03 50.01 America/Toronto
#> 3 P01_1 2020-05-03 05:20:00 2020-05-03 -85.03 50.01 America/Toronto
#> 4 P02_1 2020-05-04 05:25:00 2020-05-04 -87.45 52.68 America/Toronto
#> 5 P02_1 2020-05-04 05:25:00 2020-05-04 -87.45 52.68 America/Toronto
#> 6 P02_1 2020-05-04 05:25:00 2020-05-04 -87.45 52.68 America/Toronto
#> 7 P06_1 2020-05-09 05:20:00 2020-05-09 -90.08 52.00 America/Winnipeg
#> 8 P06_1 2020-05-09 05:20:00 2020-05-09 -90.08 52.00 America/Winnipeg
#> 9 P06_1 2020-05-09 05:20:00 2020-05-09 -90.08 52.00 America/Winnipeg
#> 10 P07_1 2020-05-09 05:25:00 2020-05-09 -86.03 50.45 America/Toronto
#> t2ss psel_by psel_min psel_doy psel psel_scaled psel_std
#> 1 498.4167 t2sr -0.5359972 -0.9351720 0.2296568 0.8502302 1
#> 2 498.4167 t2sr -0.5359972 -0.9351720 0.2296568 0.8502302 1
#> 3 498.4167 t2sr -0.5359972 -0.9351720 0.2296568 0.8502302 1
#> 4 483.4167 t2sr -0.5240265 -0.9200039 0.2359748 0.8736204 1
#> 5 483.4167 t2sr -0.5240265 -0.9200039 0.2359748 0.8736204 1
#> 6 483.4167 t2sr -0.5240265 -0.9200039 0.2359748 0.8736204 1
#> 7 521.9333 t2sr -0.4585242 -0.8503970 0.2701113 1.0000000 1
#> 8 521.9333 t2sr -0.4585242 -0.8503970 0.2701113 1.0000000 1
#> 9 521.9333 t2sr -0.4585242 -0.8503970 0.2701113 1.0000000 1
#> 10 488.7500 t2sr -0.5129536 -0.8503970 0.2558023 0.9470254 1
#> psel_normalized selection_group geometry
#> 1 0.001 P01_1_2020_early POINT (124 -53.21667)
#> 2 0.001 P01_1_2020_early POINT (124 -53.21667)
#> 3 0.001 P01_1_2020_early POINT (124 -53.21667)
#> 4 0.001 P02_1_2020_early POINT (125 -47.25)
#> 5 0.001 P02_1_2020_early POINT (125 -47.25)
#> 6 0.001 P02_1_2020_early POINT (125 -47.25)
#> 7 0.001 P06_1_2020_early POINT (130 3.583333)
#> 8 0.001 P06_1_2020_early POINT (130 3.583333)
#> 9 0.001 P06_1_2020_early POINT (130 3.583333)
#> 10 0.001 P07_1_2020_early POINT (130 -40.93333)