Examples on Simulating Migration Flows for the MicSim Package

The sample data on migration between Spain, Sweden and the Netherlands were prepared by Claudio Bosco (European Commission), Daniela Ghio (European Commission), Maurizio Teobaldelli (European Commission) and Sabine Zinn (German Socio-Economic Panel, Humboldt University Berlin). EUROSTAT data were used as the data source. The sample was intentionally kept small. MicSim can also handle larger numbers of cases, but to be efficient in terms of run times, this requires a bit more computing power with more CPUs.

# Load library
library(MicSim)
#> Lade nötiges Paket: snowfall
#> Lade nötiges Paket: snow
#> Lade nötiges Paket: rlecuyer

Definition of Basic Simulation Frame

# Defining simulation horizon
startDate <- 20140101 # yyyymmdd
endDate   <- 20181231 # yyyymmdd
simHorizon <- c(startDate=startDate, endDate=endDate)

# Seed for random number generator
set.seed(12)

# Definition of maximal age (sharp, i.e. max age is 100.0)
maxAge <- 100

# Definition of state space
sex <- c("m","f")                     
country_R <- c("NL","ES","SE")
fert <- c("0","1","2","3","4+","999")           
stateSpace <- expand.grid(sex=sex,country_R=country_R,fert=fert)

# Definition of nonabsorbing and absorbing states
absStates <- c("dead","rest")   

Definition of initial population and newborn characteristics

# initial population included in the MicSim package
initPop <- initPopMigrExp
head(initPop)
#>   ID birthDate initState
#> 1  1  19411206  m/NL/999
#> 2  2  20070608  m/SE/999
#> 3  3  19750709    m/ES/0
#> 4  4  19220312  f/NL/999
#> 5  5  19740425    f/ES/0
#> 6  6  20110828  m/ES/999

# population of immigrants entering to virtual population over simulation time
immigrPop <- immigrPopMigrExp # included in the MicSim package
head(immigrPop)
#>      ID immigrDate birthDate immigrInitState
#> 1 72966   20180925  19830404          m/ES/0
#> 2 72967   20171208  20100319          f/ES/0
#> 3 72968   20151217  19910316          f/NL/0
#> 4 72969   20170105  20070602          m/NL/0
#> 5 72970   20140609  19880623          f/ES/0
#> 6 72971   20171003  19881129          m/NL/0

# Definition of initial states for newborns
fixInitStates <- 2 # give indices for attribute/substate that will be taken over from the mother, here: nat (nationality)
varInitStates <- rbind(c("m","ES","0"), c("f","ES","0"), # to have possibility to define distinct sex ratios in distinct countries, 
                       c("m","NL","0"), c("f","NL","0"), # hold substate that are inherited by mother in the init state (i.e. nationality)
                       c("m","SE","0"), c("f","SE","0")) 
initStatesProb <- c(0.5151,0.4849, # probabilities for female / male newborns for each nationality separately
                    0.5124,0.4876,
                    0.5146,0.4854)

Rates Definition

Beware: Rates have to be given at least for age [0,maxAge) and for all years within the simulation horizon. At this the exact value of maxAge is excluded, i.e. here 100.00 but not e.g. age=99.9999.

# Load rates from data included in the MicSim package (one column for one transition)
rates <- migrExpRates

# Define function to easily transform rates in function format required by MicSim 
require(glue)
#> Lade nötiges Paket: glue
for (i in 1:length(names(rates))) {
  script_var = names(rates[i])
  eval(parse(text = glue("{script_var} <- unlist(as.numeric(rates[,{i}]))")))
}

# --------------------------------------------------------------------------
# Fertility Rates
# --------------------------------------------------------------------------

fertRates_NL_0_1 <- function(age,calTime, duration){
  rate <- fert_NL_0_1[as.integer(age)+1]
  return(rate)  
}

fertRates_NL_1_2 <- function(age,calTime, duration){
  rate <- fert_NL_1_2[as.integer(age)+1]
  return(rate)  
}

fertRates_NL_2_3 <- function(age,calTime, duration){
  rate <- fert_NL_2_3[as.integer(age)+1]
  return(rate)  
}

fertRates_NL_3_4 <- function(age,calTime, duration){
  rate <- fert_NL_3_4[as.integer(age)-+1]
  return(rate)  
}

fertRates_ES_0_1 <- function(age,calTime, duration){
  rate <- fert_ES_0_1[as.integer(age)+1]
  return(rate)  
}

fertRates_ES_1_2 <- function(age,calTime, duration){
  rate <- fert_ES_1_2[as.integer(age)+1]
  return(rate)  
}

fertRates_ES_2_3 <- function(age,calTime, duration){
  rate <- fert_ES_2_3[as.integer(age)+1]
  return(rate)  
}

fertRates_ES_3_4 <- function(age,calTime, duration){
  rate <- fert_ES_3_4[as.integer(age)+1]
  return(rate)  
}

fertRates_SE_0_1 <- function(age,calTime, duration){
  rate <- fert_SE_0_1[as.integer(age)+1]
  return(rate)  
}

fertRates_SE_1_2 <- function(age,calTime, duration){
  rate <- fert_SE_1_2[as.integer(age)+1]
  return(rate)  
}

fertRates_SE_2_3 <- function(age,calTime, duration){
  rate <- fert_SE_2_3[as.integer(age)+1]
  return(rate)  
}

fertRates_SE_3_4 <- function(age,calTime, duration){
  rate <- fert_SE_3_4[as.integer(age)+1]
  return(rate)  
}

# --------------------------------------------------------------------------
# Internal Migration Rates
# --------------------------------------------------------------------------

`%!in%` <- Negate(`%in%`)

for(i in 1:length(country_R)) {
  other_provinces = country_R[which(country_R %!in% country_R[i])]
  for(k in 1:length(other_provinces)) {
    eq = paste(sprintf('%s_%s_rates', glue("{country_R[i]}"),  glue("{other_provinces[k]}")), 
               '<- function(age,calTime, duration)', '{',
               sprintf('rate <- rate_%s_%s[as.integer(age)+1]', glue("{country_R[i]}"), 
               glue("{other_provinces[k]}")), "\n ", 'return(rate)','}')
    eval(parse(text = eq))
  }
}

# --------------------------------------------------------------------------
# Mortality Rates
# --------------------------------------------------------------------------

# Female mortality
for(i in 1:length(country_R)) {
  eq = paste(sprintf('mortRates_f_%s', glue("{country_R[i]}")), '<- function(age,calTime, duration)',
             '{',
             sprintf('rate <- mort_f_%s[as.integer(age)+1]', glue("{country_R[i]}")),
             "\n ",
             'return(rate)',
             '}')
  eval(parse(text = eq))
}

# Male mortality
for(i in 1:length(country_R)) {
  eq = paste(sprintf('mortRates_m_%s', glue("{country_R[i]}")), '<- function(age,calTime, duration)',
             '{',
             sprintf('rate <- mort_m_%s[as.integer(age)+1]', glue("{country_R[i]}")),
             "\n ",
             'return(rate)',
             '}')
  eval(parse(text = eq))
}

# ---------------------------------------------------------------------------
# Emigration rates
# ---------------------------------------------------------------------------

# Emigration rates for females
for(i in 1:length(country_R)) {
  eq = paste(sprintf('emigrRates_f_%s', glue("{country_R[i]}")), '<- function(age,calTime, duration)',
             '{',
             sprintf('rate <- emig_f_%s[as.integer(age)+1]', glue("{country_R[i]}")),
             "\n ",
             'return(rate)',
             '}')
  eval(parse(text = eq))
}

# Emigration rates for males
for(i in 1:length(country_R)) {
  eq = paste(sprintf('emigrRates_m_%s', glue("{country_R[i]}")), '<- function(age,calTime, duration)',
             '{',
             sprintf('rate <- emig_m_%s[as.integer(age)+1]', glue("{country_R[i]}")),
             "\n ",
             'return(rate)',
             '}')
  eval(parse(text = eq))
}

Transition pattern and assignment of functions specifying transition rates

# ---------------------------------------------------------------------------
# Transition matrix for fertility
# ---------------------------------------------------------------------------
fertTrMatrix <- cbind(c("f/ES/0->f/ES/1","f/ES/1->f/ES/2","f/ES/2->f/ES/3","f/ES/3->f/ES/4+",
                        "f/SE/0->f/SE/1","f/SE/1->f/SE/2","f/SE/2->f/SE/3","f/SE/3->f/SE/4+",
                        "f/NL/0->f/NL/1","f/NL/1->f/NL/2","f/NL/2->f/NL/3","f/NL/3->f/NL/4+"),
                      c("fertRates_ES_0_1", "fertRates_ES_1_2", "fertRates_ES_2_3", "fertRates_ES_3_4",
                        "fertRates_SE_0_1", "fertRates_SE_1_2", "fertRates_SE_2_3", "fertRates_SE_3_4",
                        "fertRates_NL_0_1", "fertRates_NL_1_2", "fertRates_NL_2_3", "fertRates_NL_3_4"))

# ---------------------------------------------------------------------------
# Transition matrix for migration
# ---------------------------------------------------------------------------
# First: make names for transition matrix, i.e. stateOfOrigin->stateOfDestination 
testo1 <- "intmigTrMatrix <- cbind(c("
for(i in 1:length(country_R)) {
  for(m in 1:length(country_R)) {
    if(m != i) {
      eq1 = paste(sprintf("'%s->%s'", glue("{country_R[i]}"), glue("{country_R[m]}")))
      if (i == length(country_R) & m == (i-1)) {
        testo1 = paste(testo1,eq1)
      } else {
        testo1 = paste(testo1 ,eq1, ",")
      }
    }
    if (m == i & m == length(country_R)){
      testo1 = paste(testo1, "),")
      
    }
  }
}

#Second: set names for transition functions
testo1 = paste (testo1,"c(")
for(i in 1:length(country_R)) {
  for(m in 1:length(country_R)) {
    if(m != i) {
      eq1 = paste(sprintf("'%s_%s_rates'", glue("{country_R[i]}"), glue("{country_R[m]}")))
      if (i == length(country_R) & m == (i-1)) {
        testo1 = paste(testo1,eq1)
      } else {
        testo1 = paste(testo1 ,eq1, ",")
      }
    }
    if (m == i & m == length(country_R)){
      testo1 = paste(testo1, "))")
    }
  }
}
eval(parse(text = testo1))

# ---------------------------------------------------------------------------
# Transition matrix for mortality and emigration
# ---------------------------------------------------------------------------

testo <- "absTransitions <- rbind("
for(i in 1:length(country_R)) {
  for(m in 1:length(sex)) {
    eq1 = paste(sprintf("c('%s/%s/dead', 'mortRates_%s_%s')", glue("{sex[m]}"), glue("{country_R[i]}") ,
                        glue("{sex[m]}"), glue("{country_R[i]}")),',',
                sprintf("c('%s/%s/rest', 'emigrRates_%s_%s')", glue("{sex[m]}"),glue("{country_R[i]}") ,
                        glue("{sex[m]}"), glue("{country_R[i]}")))
    if(i == length(country_R) & m == length(sex)) {
      testo = paste(testo, eq1, ")")
    } else {
      testo = paste(testo,eq1, ",")
    }
  }
}
eval(parse(text = testo))

# ---------------------------------------------------------------------------
# Combine all
# ---------------------------------------------------------------------------

allTransitions <- rbind(fertTrMatrix, intmigTrMatrix)
transitionMatrix <- buildTransitionMatrix(allTransitions=allTransitions,
                                          absTransitions=absTransitions, 
                                          stateSpace=stateSpace)

# ---------------------------------------------------------------------------
# Define transitions triggering a birth event
# ---------------------------------------------------------------------------

fertTr <- fertTrMatrix[,1]

Run Simulation (using one core)

For illustration purpose, the subsequent run is limited to the first 500 people and to 100 migrants. However, just remove the restriction to run the whole sample, i.e.  using initPop instead of initPop[1:500,] and immigrPop instead of immigrPop[1:100,].

pop <- micSim(initPop=initPop[1:500,], immigrPop=immigrPop[1:100,],
              transitionMatrix=transitionMatrix, absStates=absStates,
              varInitStates=varInitStates, initStatesProb=initStatesProb,
              fixInitStates=fixInitStates,
              maxAge=maxAge, simHorizon=simHorizon,fertTr=fertTr)
#> Initialization ... 
#> [1] "Starting at:  2024-01-23 14:07:50.88455"
#> [1] "Ending at:  2024-01-23 14:07:51.858173"
#> Simulation is running ... 
#> Year:  2014 
#> Year:  2015 
#> Year:  2016 
#> Year:  2017 
#> Year:  2018 
#> Simulation has finished.
#> ------------------
head(pop)
#>     ID birthDate initState     From   To transitionTime transitionAge motherID
#> 1    1  19411206  m/NL/999     <NA> <NA>           <NA>            NA     <NA>
#> 112  2  20070608  m/SE/999     <NA> <NA>           <NA>            NA     <NA>
#> 225  3  19750709    m/ES/0     <NA> <NA>           <NA>            NA     <NA>
#> 337  4  19220312  f/NL/999 f/NL/999 dead       20150626         93.29     <NA>
#> 449  5  19740425    f/ES/0     <NA> <NA>           <NA>            NA     <NA>
#> 461  6  20110828  m/ES/999     <NA> <NA>           <NA>            NA     <NA>

Convert to long format

popLong <- convertToLongFormat(pop, migr=TRUE)
head(popLong)
#>     ID birthDate   Tstart    Tstop statusEntry statusExit   OD ns Episode sex
#> 1    1  19411206 20140101 20181231           0          0 cens  1       1   m
#> 114  2  20070608 20140101 20181231           0          0 cens  1       1   m
#> 231  3  19750709 20140101 20181231           0          0 cens  1       1   m
#> 348  4  19220312 20140101 20150626           0          1 dead  1       1   f
#> 465  5  19740425 20140101 20181231           0          0 cens  1       1   f
#> 478  6  20110828 20140101 20181231           0          0 cens  1       1   m
#>     country_R fert
#> 1          NL  999
#> 114        SE  999
#> 231        ES    0
#> 348        NL  999
#> 465        ES    0
#> 478        ES  999

Convert to wide format

popWide <- convertToWideFormat(pop)
head(popWide)
#>     ID birthDate initState ns   From.1 To.1 transitionTime.1 transitionAge.1
#> 1    1  19411206  m/NL/999  0     <NA> <NA>             <NA>              NA
#> 112  2  20070608  m/SE/999  0     <NA> <NA>             <NA>              NA
#> 225  3  19750709    m/ES/0  0     <NA> <NA>             <NA>              NA
#> 337  4  19220312  f/NL/999  1 f/NL/999 dead         20150626           93.29
#> 449  5  19740425    f/ES/0  0     <NA> <NA>             <NA>              NA
#> 461  6  20110828  m/ES/999  0     <NA> <NA>             <NA>              NA
#>     From.2 To.2 transitionTime.2 transitionAge.2
#> 1     <NA> <NA>             <NA>              NA
#> 112   <NA> <NA>             <NA>              NA
#> 225   <NA> <NA>             <NA>              NA
#> 337   <NA> <NA>             <NA>              NA
#> 449   <NA> <NA>             <NA>              NA
#> 461   <NA> <NA>             <NA>              NA

Run Simulation with parallel computing on several cores

Try this to speed up your simulation run. The more cores you can use the faster the simulation will be executed. This example uses three cores. Give as many seeds as you use cores. That way you can replicate your results.


cores <- 3
seeds <- c(34,145,97)
pop <- micSimParallel(initPop=initPop, immigrPop=immigrPop,
                      transitionMatrix=transitionMatrix, absStates=absStates,
                      varInitStates=varInitStates, initStatesProb=initStatesProb,
                      fixInitStates=fixInitStates,
                      maxAge=maxAge, simHorizon=simHorizon,fertTr=fertTr,
                      cores=cores, seeds=seeds)
#> Starting at [1] "2024-01-23 14:07:52 CET"
#> Warning in searchCommandline(parallel, cpus = cpus, type = type, socketHosts =
#> socketHosts, : Unknown option on commandline: tools::buildVignettes(dir
#> R Version:  R version 4.3.2 (2023-10-31 ucrt)
#> snowfall 1.84-6.3 initialized (using snow 0.4-4): parallel execution on 3 CPUs.
#> 
#> Stopping cluster
#> Stopped at [1] "2024-01-23 14:09:42 CET"
head(pop)
#>       ID birthDate initState     From   To transitionTime transitionAge
#> 1      1  19411206  m/NL/999     <NA> <NA>           <NA>            NA
#> 11220  2  20070608  m/SE/999     <NA> <NA>           <NA>            NA
#> 16690  3  19750709    m/ES/0     <NA> <NA>           <NA>            NA
#> 17812  4  19220312  f/NL/999 f/NL/999 dead       20160929         94.55
#> 18937  5  19740425    f/ES/0     <NA> <NA>           <NA>            NA
#> 20057  6  20110828  m/ES/999 m/ES/999 rest       20180917          7.05
#>       motherID
#> 1         <NA>
#> 11220     <NA>
#> 16690     <NA>
#> 17812     <NA>
#> 18937     <NA>
#> 20057     <NA>