CodelistGenerator options: examples with a with mock vocabulary

Mock vocabulary database

Let´s say we have a mock vocabulary database with these hypothetical concepts and relationships.

Search for exact keyword match

To find “Musculoskeletal disorder” we can search for that like so

codes <- getCandidateCodes(
  cdm = cdm,
  keywords = "Musculoskeletal disorder",
  domains = "Condition",
  includeDescendants = FALSE,
)
#> condition domain: Limiting to domains of interest
#> condition: Getting concepts to include
#> ✔ 1 candidate concept identified
#> 
#> Time taken: 0 minutes and 0 seconds

codes %>% 
  glimpse()
#> Rows: 1
#> Columns: 6
#> $ concept_id       <int> 1
#> $ found_from       <chr> "From initial search"
#> $ concept_name     <chr> "Musculoskeletal disorder"
#> $ domain_id        <chr> "Condition"
#> $ vocabulary_id    <chr> "SNOMED"
#> $ standard_concept <chr> "standard"

Note, we would also identify it based on a partial match

codes <- getCandidateCodes(
  cdm = cdm,
  keywords = "Musculoskeletal",
  domains = "Condition",
  includeDescendants = FALSE
)
#> condition domain: Limiting to domains of interest
#> condition: Getting concepts to include
#> ✔ 1 candidate concept identified
#> 
#> Time taken: 0 minutes and 0 seconds

codes %>% 
  glimpse()
#> Rows: 1
#> Columns: 6
#> $ concept_id       <int> 1
#> $ found_from       <chr> "From initial search"
#> $ concept_name     <chr> "Musculoskeletal disorder"
#> $ domain_id        <chr> "Condition"
#> $ vocabulary_id    <chr> "SNOMED"
#> $ standard_concept <chr> "standard"

Add descendants

To include descendants of an identified code, we can set includeDescendants to TRUE

getCandidateCodes(
  cdm = cdm,
  keywords = "Musculoskeletal disorder",
  domains = "Condition",
  includeDescendants = TRUE
) %>% 
  glimpse()
#> condition domain: Limiting to domains of interest
#> condition: Getting concepts to include
#> condition domain: Adding descendants
#> ✔ 5 candidate concepts identified
#> 
#> Time taken: 0 minutes and 0 seconds
#> Rows: 5
#> Columns: 6
#> $ concept_id       <int> 1, 2, 3, 4, 5
#> $ found_from       <chr> "From initial search", "From descendants", "From desc…
#> $ concept_name     <chr> "Musculoskeletal disorder", "Osteoarthrosis", "Arthri…
#> $ domain_id        <chr> "Condition", "Condition", "Condition", "Condition", "…
#> $ vocabulary_id    <chr> "SNOMED", "SNOMED", "SNOMED", "SNOMED", "SNOMED"
#> $ standard_concept <chr> "standard", "standard", "standard", "standard", "stan…

Multiple search terms

We can also search for multiple keywords at the same time, and would have picked these all up with the following search

codes <- getCandidateCodes(
  cdm = cdm,
  keywords = c(
    "Musculoskeletal disorder",
    "arthritis",
    "arthrosis"
  ),
  domains = "Condition",
  includeDescendants = FALSE
)
#> condition domain: Limiting to domains of interest
#> condition: Getting concepts to include
#> ✔ 5 candidate concepts identified
#> 
#> Time taken: 0 minutes and 0 seconds

codes %>% 
  glimpse()
#> Rows: 5
#> Columns: 6
#> $ concept_id       <int> 1, 3, 4, 5, 2
#> $ found_from       <chr> "From initial search", "From initial search", "From i…
#> $ concept_name     <chr> "Musculoskeletal disorder", "Arthritis", "Osteoarthri…
#> $ domain_id        <chr> "Condition", "Condition", "Condition", "Condition", "…
#> $ vocabulary_id    <chr> "SNOMED", "SNOMED", "SNOMED", "SNOMED", "SNOMED"
#> $ standard_concept <chr> "standard", "standard", "standard", "standard", "stan…

Add ancestor

To include the ancestors one level above the identified concepts we can set includeAncestor to TRUE

codes <- getCandidateCodes(
  cdm = cdm,
  keywords = "Osteoarthritis of knee",
  includeAncestor = TRUE,
  domains = "Condition"
)
#> condition domain: Limiting to domains of interest
#> condition: Getting concepts to include
#> condition domain: Adding descendants
#> condition domain: Adding ancestor
#> ✔ 2 candidate concepts identified
#> 
#> Time taken: 0 minutes and 0 seconds

codes %>% 
  glimpse()
#> Rows: 2
#> Columns: 6
#> $ concept_id       <int> 4, 3
#> $ found_from       <chr> "From initial search", "From ancestor"
#> $ concept_name     <chr> "Osteoarthritis of knee", "Arthritis"
#> $ domain_id        <chr> "Condition", "Condition"
#> $ vocabulary_id    <chr> "SNOMED", "SNOMED"
#> $ standard_concept <chr> "standard", "standard"

Searches with multiple words

We can also find concepts with multiple words even if they are in a different order. For example, a search for “Knee osteoarthritis” will pick up “Osteoarthritis of knee”.

codes <- getCandidateCodes(
  cdm = cdm,
  keywords = "Knee osteoarthritis",
  domains = "Condition",
  includeDescendants = TRUE
)
#> condition domain: Limiting to domains of interest
#> condition: Getting concepts to include
#> condition domain: Adding descendants
#> ✔ 1 candidate concept identified
#> 
#> Time taken: 0 minutes and 0 seconds

codes %>% 
  glimpse()
#> Rows: 1
#> Columns: 6
#> $ concept_id       <int> 4
#> $ found_from       <chr> "From initial search"
#> $ concept_name     <chr> "Osteoarthritis of knee"
#> $ domain_id        <chr> "Condition"
#> $ vocabulary_id    <chr> "SNOMED"
#> $ standard_concept <chr> "standard"

With exclusions

We can also exclude specific terms

codes <- getCandidateCodes(
  cdm = cdm,
  keywords = "arthritis",
  exclude = "Hip osteoarthritis",
  domains = "Condition"
)
#> condition domain: Limiting to domains of interest
#> condition: Getting concepts to include
#> condition domain: Adding descendants
#> ✔ 2 candidate concepts identified
#> 
#> Time taken: 0 minutes and 0 seconds

codes %>% 
  glimpse()
#> Rows: 2
#> Columns: 6
#> $ concept_id       <int> 3, 4
#> $ found_from       <chr> "From initial search", "From initial search"
#> $ concept_name     <chr> "Arthritis", "Osteoarthritis of knee"
#> $ domain_id        <chr> "Condition", "Condition"
#> $ vocabulary_id    <chr> "SNOMED", "SNOMED"
#> $ standard_concept <chr> "standard", "standard"

Search using synonyms

We can also pick up codes based on their synonyms. In this case “Arthritis” has a synonym of “Osteoarthrosis” and so a search of both the primary name of a concept and any of its associated synonyms would pick up this synonym and it would be included.

codes <- getCandidateCodes(
  cdm = cdm,
  keywords = "osteoarthrosis",
  domains = "Condition",
  searchInSynonyms = TRUE
)
#> condition domain: Limiting to domains of interest
#> condition: Getting concepts to include
#> condition domain: Adding concepts using synonymns
#> condition domain: Adding descendants
#> ✔ 4 candidate concepts identified
#> 
#> Time taken: 0 minutes and 0 seconds

codes %>% 
  glimpse()
#> Rows: 4
#> Columns: 6
#> $ concept_id       <int> 2, 3, 4, 5
#> $ found_from       <chr> "From initial search", "In synonyms", "From descendan…
#> $ concept_name     <chr> "Osteoarthrosis", "Arthritis", "Osteoarthritis of kne…
#> $ domain_id        <chr> "Condition", "Condition", "Condition", "Condition"
#> $ vocabulary_id    <chr> "SNOMED", "SNOMED", "SNOMED", "SNOMED"
#> $ standard_concept <chr> "standard", "standard", "standard", "standard"

Search via non-standard

Or we could have also picked up “Osteoarthrosis” by searching via non-standard.

codes <- getCandidateCodes(
  cdm = cdm,
  keywords = c("arthritis", "arthropathy"),
  domains = "Condition",
  searchNonStandard = TRUE
)
#> condition domain: Limiting to domains of interest
#> condition: Getting concepts to include
#> condition domain: Adding descendants
#> condition domain: Adding codes from non-standard
#> ✔ 4 candidate concepts identified
#> 
#> Time taken: 0 minutes and 0 seconds

codes %>% 
  glimpse()
#> Rows: 4
#> Columns: 6
#> $ concept_id       <int> 3, 4, 5, 2
#> $ found_from       <chr> "From initial search", "From initial search", "From i…
#> $ concept_name     <chr> "Arthritis", "Osteoarthritis of knee", "Osteoarthriti…
#> $ domain_id        <chr> "Condition", "Condition", "Condition", "Condition"
#> $ vocabulary_id    <chr> "SNOMED", "SNOMED", "SNOMED", "SNOMED"
#> $ standard_concept <chr> "standard", "standard", "standard", "standard"

Search for both standard and non-standard concepts

We can also include non-standard codes in our results like so

codes <- getCandidateCodes(
  cdm = cdm,
  keywords = c(
    "Musculoskeletal disorder",
    "arthritis",
    "arthropathy",
    "arthrosis"
  ),
  domains = "Condition",
  standardConcept = c("Standard", "Non-standard")
)
#> condition domain: Limiting to domains of interest
#> condition: Getting concepts to include
#> condition domain: Adding descendants
#> ✔ 8 candidate concepts identified
#> 
#> Time taken: 0 minutes and 0 seconds

codes %>% 
  glimpse()
#> Rows: 8
#> Columns: 6
#> $ concept_id       <int> 1, 3, 4, 5, 8, 17, 7, 2
#> $ found_from       <chr> "From initial search", "From initial search", "From i…
#> $ concept_name     <chr> "Musculoskeletal disorder", "Arthritis", "Osteoarthri…
#> $ domain_id        <chr> "Condition", "Condition", "Condition", "Condition", "…
#> $ vocabulary_id    <chr> "SNOMED", "SNOMED", "SNOMED", "SNOMED", "Read", "ICD1…
#> $ standard_concept <chr> "standard", "standard", "standard", "standard", "non-…