Summarise cohort timing

We saw in the previous vignette how we can summarise the overlap between cohorts. In addition to this, we might also be interested in timings between cohorts. That is, the time between an individual entering one cohort and another. For this we can use the summariseCohortTiming(). In this example we’ll look at the time between entering cohorts for acetaminophen, morphine, and oxycodone using the Eunomia data.

library(CDMConnector)
library(CodelistGenerator)
library(CohortCharacteristics)
library(dplyr)
library(ggplot2)

con <- DBI::dbConnect(duckdb::duckdb(),
  dbdir = CDMConnector::eunomia_dir()
)
cdm <- CDMConnector::cdm_from_con(con,
  cdm_schem = "main",
  write_schema = "main",
  cdm_name = "Eunomia"
)

meds_cs <- getDrugIngredientCodes(
  cdm = cdm,
  name = c(
    "acetaminophen",
    "morphine",
    "warfarin"
  )
)

cdm <- generateConceptCohortSet(
  cdm = cdm,
  name = "meds",
  conceptSet = meds_cs,
  end = "event_end_date",
  limit = "all",
  overwrite = TRUE
)

settings(cdm$meds)
#> # A tibble: 3 × 6
#>   cohort_definition_id cohort_name   limit prior_observation future_observation
#>                  <int> <chr>         <chr>             <dbl>              <dbl>
#> 1                    1 morphine      all                   0                  0
#> 2                    2 acetaminophen all                   0                  0
#> 3                    3 warfarin      all                   0                  0
#> # ℹ 1 more variable: end <chr>
cohortCount(cdm$meds)
#> # A tibble: 3 × 3
#>   cohort_definition_id number_records number_subjects
#>                  <int>          <int>           <int>
#> 1                    1             35              35
#> 2                    2          13908            2679
#> 3                    3            137             137

Now we have our cohorts we can summarise the timing between cohort entry. Note setting restrictToFirstEntry to TRUE will mean that we only consider timing between an individual’s first record in each cohort (i.e. their first exposure to each of the medications).

meds_timing <- cdm$meds |>
  summariseCohortTiming(restrictToFirstEntry = TRUE)
meds_timing |>
  glimpse()
#> Rows: 42
#> Columns: 13
#> $ result_id        <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
#> $ cdm_name         <chr> "Eunomia", "Eunomia", "Eunomia", "Eunomia", "Eunomia"…
#> $ group_name       <chr> "cohort_name_reference &&& cohort_name_comparator", "…
#> $ group_level      <chr> "acetaminophen &&& morphine", "warfarin &&& acetamino…
#> $ strata_name      <chr> "overall", "overall", "overall", "overall", "overall"…
#> $ strata_level     <chr> "overall", "overall", "overall", "overall", "overall"…
#> $ variable_name    <chr> "number records", "number records", "number records",…
#> $ variable_level   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#> $ estimate_name    <chr> "count", "count", "count", "count", "count", "count",…
#> $ estimate_type    <chr> "integer", "integer", "integer", "integer", "integer"…
#> $ estimate_value   <chr> "35", "136", "35", "136", "6", "6", "35", "136", "35"…
#> $ additional_name  <chr> "overall", "overall", "overall", "overall", "overall"…
#> $ additional_level <chr> "overall", "overall", "overall", "overall", "overall"…

As with cohort overlap, we have table and plotting functions to help view our results.

tableCohortTiming(meds_timing,
  timeScale = "years",
  .options = list(decimals = c(numeric = 0))
)

CDM name	Cohort name reference	Cohort name comparator	Variable name	Estimate name	Estimate value
Eunomia	Acetaminophen	Morphine	Number records	N	35
			Number subjects	N	35
		Warfarin	Number records	N	136
			Number subjects	N	136
	Morphine	Warfarin	Number records	N	6
			Number subjects	N	6
	Acetaminophen	Morphine	Years between cohort entries	Median [Q25 - Q75]	16 [5 - 34]
				Range	-34 - 77
		Warfarin	Years between cohort entries	Median [Q25 - Q75]	54 [46 - 67]
				Range	-3 - 92
	Morphine	Warfarin	Years between cohort entries	Median [Q25 - Q75]	5 [-5 - 10]
				Range	-9 - 19

plotCohortTiming(meds_timing,
  plotType = "boxplot",
  timeScale = "years"
)

If we want to see an even more granular summary of cohort timings we can make a density plot instead of a box plot. Note, for this we’ll need to set density as TRUE when getting our initial results.

meds_timing <- cdm$meds |>
  summariseCohortTiming(
    restrictToFirstEntry = TRUE,
    density = TRUE
  )
plotCohortTiming(meds_timing,
  plotType = "density",
  timeScale = "years"
)

As well as generating these estimates for cohorts overall, we can also obtain stratified estimates.

cdm$meds <- cdm$meds |>
  PatientProfiles::addAge(ageGroup = list(c(0, 49), c(50, 150))) |>
  compute(temporary = FALSE, name = "meds") |>
  newCohortTable()
meds_timing <- cdm$meds |>
  summariseCohortTiming(
    restrictToFirstEntry = TRUE,
    strata = list("age_group"),
    density = TRUE
  )
tableCohortTiming(meds_timing,
  timeScale = "years",
  .options = list(decimals = c(numeric = 0))
)

CDM name	Cohort name reference	Cohort name comparator	Variable name	Estimate name	Age group
CDM name	Cohort name reference	Cohort name comparator	Variable name	Estimate name	0 to 49	50 to 150	Overall
Eunomia	Acetaminophen	Morphine	Number records	N	25	1	26
			Number subjects	N	25	1	26
		Warfarin	Number records	N	8	1	9
			Number subjects	N	8	1	9
	Morphine	Warfarin	Number records	N	-	6	6
			Number subjects	N	-	6	6
	Acetaminophen	Morphine	Years between cohort entries	Median [Q25 - Q75]	9 [0 - 30]	24 [24 - 24]	10 [1 - 29]
				Range	-34 - 37	24 - 24	-34 - 37
		Warfarin	Years between cohort entries	Median [Q25 - Q75]	45 [33 - 46]	-2 [-2 - -2]	44 [32 - 46]
				Range	29 - 49	-2 - -2	-2 - 49
	Morphine	Warfarin	Years between cohort entries	Median [Q25 - Q75]	-	5 [-5 - 10]	5 [-5 - 10]
				Range	-	-9 - 19	-9 - 19

plotCohortTiming(meds_timing,
  plotType = "boxplot",
  timeScale = "years",
  facet = "strata_level",
  colour = "strata_level",
  colourName = "Age group"
)