Fitting epicurves

Example

To illustrate the trend fitting functionality of i2extras we will use the simulated Ebola Virus Disease (EVD) outbreak data from the outbreaks package.

Loading relevant packages and data

library(outbreaks)
library(i2extras)
#> Loading required package: incidence2
#> Loading required package: grates
#> 
#> Attaching package: 'i2extras'
#> The following object is masked from 'package:incidence2':
#> 
#>     estimate_peak
library(ggplot2)

raw_dat <- ebola_sim_clean$linelist

For this example we will restrict ourselves to the first 20 weeks of data:

dat <- incidence(
    raw_dat, 
    date_index = "date_of_onset",
    interval = "week",
    groups = "gender"
)[1:20, ]
dat
#> # incidence:  20 x 4
#> # count vars: date_of_onset
#> # groups:     gender
#>    date_index gender count_variable count
#>    <isowk>    <fct>  <chr>          <int>
#>  1 2014-W15   f      date_of_onset      1
#>  2 2014-W16   m      date_of_onset      1
#>  3 2014-W17   f      date_of_onset      4
#>  4 2014-W17   m      date_of_onset      1
#>  5 2014-W18   f      date_of_onset      4
#>  6 2014-W19   f      date_of_onset      9
#>  7 2014-W19   m      date_of_onset      3
#>  8 2014-W20   f      date_of_onset      7
#>  9 2014-W20   m      date_of_onset     10
#> 10 2014-W21   f      date_of_onset      8
#> 11 2014-W21   m      date_of_onset      7
#> 12 2014-W22   f      date_of_onset      9
#> 13 2014-W22   m      date_of_onset     10
#> 14 2014-W23   f      date_of_onset     13
#> 15 2014-W23   m      date_of_onset     10
#> 16 2014-W24   f      date_of_onset      7
#> 17 2014-W24   m      date_of_onset     14
#> 18 2014-W25   f      date_of_onset     15
#> 19 2014-W25   m      date_of_onset     15
#> 20 2014-W26   f      date_of_onset     12
plot(dat, angle = 45, border_colour = "white")

Modeling incidence

We can use fit_curve() to fit the data with either a poisson or negative binomial regression model. The output from this will be a nested object with class incidence2_fit which has methods available for both automatic plotting and the calculation of growth (decay) rates and doubling (halving) times.

out <- fit_curve(dat, model = "poisson", alpha = 0.05)
out
#> # A tibble: 2 × 9
#>   count_variable gender      data model estimates  fitting_warning fitting_error
#>   <chr>          <fct>  <list<ti> <lis> <list>     <list>          <list>       
#> 1 date_of_onset  f       [11 × 2] <glm> <trndng_p> <NULL>          <NULL>       
#> 2 date_of_onset  m        [9 × 2] <glm> <trndng_p> <NULL>          <NULL>       
#> # ℹ 2 more variables: prediction_warning <list>, prediction_error <list>
plot(out, angle = 45, border_colour = "white")

growth_rate(out)
#> # A tibble: 2 × 10
#>   count_variable gender model      r r_lower r_upper growth_or_decay  time
#>   <chr>          <fct>  <list> <dbl>   <dbl>   <dbl> <chr>           <dbl>
#> 1 date_of_onset  f      <glm>  0.137  0.0698   0.206 doubling         5.07
#> 2 date_of_onset  m      <glm>  0.240  0.146    0.341 doubling         2.89
#> # ℹ 2 more variables: time_lower <dbl>, time_upper <dbl>

To unnest the data we can use unnest() (a function that has been reexported from the tidyr package.

unnest(out, estimates)
#> # A tibble: 20 × 15
#>    count_variable gender           data model count date_index estimate lower_ci
#>    <chr>          <fct>  <list<tibble[> <lis> <int> <isowk>       <dbl>    <dbl>
#>  1 date_of_onset  f            [11 × 2] <glm>     1 2014-W15       3.27     1.90
#>  2 date_of_onset  f            [11 × 2] <glm>     4 2014-W17       4.30     2.83
#>  3 date_of_onset  f            [11 × 2] <glm>     4 2014-W18       4.93     3.44
#>  4 date_of_onset  f            [11 × 2] <glm>     9 2014-W19       5.65     4.15
#>  5 date_of_onset  f            [11 × 2] <glm>     7 2014-W20       6.47     4.99
#>  6 date_of_onset  f            [11 × 2] <glm>     8 2014-W21       7.42     5.92
#>  7 date_of_onset  f            [11 × 2] <glm>     9 2014-W22       8.51     6.90
#>  8 date_of_onset  f            [11 × 2] <glm>    13 2014-W23       9.75     7.88
#>  9 date_of_onset  f            [11 × 2] <glm>     7 2014-W24      11.2      8.82
#> 10 date_of_onset  f            [11 × 2] <glm>    15 2014-W25      12.8      9.72
#> 11 date_of_onset  f            [11 × 2] <glm>    12 2014-W26      14.7     10.6 
#> 12 date_of_onset  m             [9 × 2] <glm>     1 2014-W16       2.01     1.02
#> 13 date_of_onset  m             [9 × 2] <glm>     1 2014-W17       2.56     1.43
#> 14 date_of_onset  m             [9 × 2] <glm>     3 2014-W19       4.13     2.73
#> 15 date_of_onset  m             [9 × 2] <glm>    10 2014-W20       5.25     3.75
#> 16 date_of_onset  m             [9 × 2] <glm>     7 2014-W21       6.67     5.07
#> 17 date_of_onset  m             [9 × 2] <glm>    10 2014-W22       8.48     6.69
#> 18 date_of_onset  m             [9 × 2] <glm>    10 2014-W23      10.8      8.50
#> 19 date_of_onset  m             [9 × 2] <glm>    14 2014-W24      13.7     10.4 
#> 20 date_of_onset  m             [9 × 2] <glm>    15 2014-W25      17.4     12.4 
#> # ℹ 7 more variables: upper_ci <dbl>, lower_pi <dbl>, upper_pi <dbl>,
#> #   fitting_warning <list>, fitting_error <list>, prediction_warning <list>,
#> #   prediction_error <list>

fit_curve() also works nicely with grouped incidence2 objects. In this situation, we return a nested tibble with some additional columns that indicate whether there has been a warning or error during the fitting or prediction stages.

grouped_dat <- incidence(
    raw_dat, 
    date_index = "date_of_onset",
    interval = "week",
    groups = "hospital"
)[1:120, ]
grouped_dat
#> # incidence:  120 x 4
#> # count vars: date_of_onset
#> # groups:     hospital
#>    date_index hospital                                     count_variable count
#>    <isowk>    <fct>                                        <chr>          <int>
#>  1 2014-W15   Military Hospital                            date_of_onset      1
#>  2 2014-W16   Connaught Hospital                           date_of_onset      1
#>  3 2014-W17   <NA>                                         date_of_onset      2
#>  4 2014-W17   other                                        date_of_onset      3
#>  5 2014-W18   <NA>                                         date_of_onset      1
#>  6 2014-W18   Connaught Hospital                           date_of_onset      1
#>  7 2014-W18   Princess Christian Maternity Hospital (PCMH) date_of_onset      1
#>  8 2014-W18   Rokupa Hospital                              date_of_onset      1
#>  9 2014-W19   <NA>                                         date_of_onset      1
#> 10 2014-W19   Connaught Hospital                           date_of_onset      3
#> # ℹ 110 more rows

out <- fit_curve(grouped_dat, model = "poisson", alpha = 0.05)
out
#> # A tibble: 6 × 9
#>   count_variable hospital                  data model estimates  fitting_warning
#>   <chr>          <fct>                 <list<t> <lis> <list>     <list>         
#> 1 date_of_onset  Connaught Hospital    [22 × 2] <glm> <trndng_p> <NULL>         
#> 2 date_of_onset  Military Hospital     [21 × 2] <glm> <trndng_p> <NULL>         
#> 3 date_of_onset  other                 [20 × 2] <glm> <trndng_p> <NULL>         
#> 4 date_of_onset  Princess Christian M… [17 × 2] <glm> <trndng_p> <NULL>         
#> 5 date_of_onset  Rokupa Hospital       [18 × 2] <glm> <trndng_p> <NULL>         
#> 6 date_of_onset  <NA>                  [22 × 2] <glm> <trndng_p> <NULL>         
#> # ℹ 3 more variables: fitting_error <list>, prediction_warning <list>,
#> #   prediction_error <list>

# plot with a prediction interval but not a confidence interval
plot(out, ci = FALSE, pi=TRUE, angle = 45, border_colour = "white")

growth_rate(out)
#> # A tibble: 6 × 10
#>   count_variable hospital      model     r r_lower r_upper growth_or_decay  time
#>   <chr>          <fct>         <lis> <dbl>   <dbl>   <dbl> <chr>           <dbl>
#> 1 date_of_onset  Connaught Ho… <glm> 0.197   0.177   0.217 doubling         3.53
#> 2 date_of_onset  Military Hos… <glm> 0.173   0.147   0.200 doubling         4.00
#> 3 date_of_onset  other         <glm> 0.170   0.141   0.200 doubling         4.09
#> 4 date_of_onset  Princess Chr… <glm> 0.142   0.101   0.188 doubling         4.87
#> 5 date_of_onset  Rokupa Hospi… <glm> 0.178   0.133   0.228 doubling         3.89
#> 6 date_of_onset  <NA>          <glm> 0.184   0.164   0.205 doubling         3.77
#> # ℹ 2 more variables: time_lower <dbl>, time_upper <dbl>

We provide helper functions, is_ok(), is_warning() and is_error() to help filter the output as necessary.

out <- fit_curve(grouped_dat, model = "negbin", alpha = 0.05)
is_warning(out)
#> # A tibble: 5 × 7
#>   count_variable hospital               data model    estimates  fitting_warning
#>   <chr>          <fct>              <list<t> <list>   <list>     <list>         
#> 1 date_of_onset  Connaught Hospital [22 × 2] <negbin> <trndng_p> <chr [2]>      
#> 2 date_of_onset  other              [20 × 2] <negbin> <trndng_p> <chr [2]>      
#> 3 date_of_onset  Princess Christia… [17 × 2] <negbin> <trndng_p> <chr [2]>      
#> 4 date_of_onset  Rokupa Hospital    [18 × 2] <negbin> <trndng_p> <chr [2]>      
#> 5 date_of_onset  <NA>               [22 × 2] <negbin> <trndng_p> <chr [2]>      
#> # ℹ 1 more variable: prediction_warning <list>
unnest(is_warning(out), fitting_warning)
#> # A tibble: 10 × 7
#>    count_variable hospital              data model    estimates  fitting_warning
#>    <chr>          <fct>             <list<t> <list>   <list>     <chr>          
#>  1 date_of_onset  Connaught Hospit… [22 × 2] <negbin> <trndng_p> iteration limi…
#>  2 date_of_onset  Connaught Hospit… [22 × 2] <negbin> <trndng_p> iteration limi…
#>  3 date_of_onset  other             [20 × 2] <negbin> <trndng_p> iteration limi…
#>  4 date_of_onset  other             [20 × 2] <negbin> <trndng_p> iteration limi…
#>  5 date_of_onset  Princess Christi… [17 × 2] <negbin> <trndng_p> iteration limi…
#>  6 date_of_onset  Princess Christi… [17 × 2] <negbin> <trndng_p> iteration limi…
#>  7 date_of_onset  Rokupa Hospital   [18 × 2] <negbin> <trndng_p> iteration limi…
#>  8 date_of_onset  Rokupa Hospital   [18 × 2] <negbin> <trndng_p> iteration limi…
#>  9 date_of_onset  <NA>              [22 × 2] <negbin> <trndng_p> iteration limi…
#> 10 date_of_onset  <NA>              [22 × 2] <negbin> <trndng_p> iteration limi…
#> # ℹ 1 more variable: prediction_warning <list>

Rolling average

We can add a rolling average, across current and previous intervals, to an incidence2 object with the add_rolling_average() function:

ra <- add_rolling_average(grouped_dat, n = 2L) # group observations with the 2 prior
plot(ra, border_colour = "white", angle = 45) +
    geom_line(aes(x = date_index, y = rolling_average))
#> Warning: Removed 1 row containing missing values or values outside the scale range
#> (`geom_line()`).