Introduction

trending aims to provides a coherent interface to several modelling tools. Whilst it is useful in an interactive context, it’s main focus is to provide an intuitive interface on which other packages can be developed (e.g. trendbreaker).

Main features

  • Model specification: Interfaces to common models through intuitive functions; lm_model(), glm_model(), glm_nb_model and brms_model*.

  • Model fitting and prediction: Once specified, models can be fit to data and generate confidence and prediction intervals for future data using fit() and predict().

  • Error and warning catching: The provided methods for fit and predict catch all warnings and errors, returning the output and these captured values in a list.

*   Requires brms

Example usage

Setup

library(outbreaks)  # for data
library(trending)   # for trend fitting
library(dplyr)      # for data manipulation

# load data
data(covid19_england_nhscalls_2020)

# select 6 weeks of data (from a period when the prevalence was decreasing)
last_date <- as.Date("2020-05-28")
first_date <- last_date - 8*7
pathways_recent <-
  covid19_england_nhscalls_2020 %>%
  filter(date >= first_date, date <= last_date) %>%
  group_by(date, day, weekday) %>%
  summarise(count = sum(count), .groups = "drop")

# split data for fitting and prediction
dat <-
  pathways_recent %>%
  group_by(date <= first_date + 6*7) %>%
  group_split()

fitting_data <- dat[[2]]
pred_data <- select(dat[[1]], date, day, weekday)

A succesful model fit

(model  <- glm_nb_model(count ~ day + weekday))
#> Untrained trending model:
#>     glm.nb(formula = count ~ day + weekday)
(fitted_model <- fit(model, fitting_data))
#> <trending_fit_tbl> 1 x 3
#>   result   warnings errors
#>   <list>   <list>   <list>
#> 1 <negbin> <NULL>   <NULL>
fitted_model %>% get_result()
#> [[1]]
#> 
#> Call:  glm.nb(formula = count ~ day + weekday, data = fitting_data, 
#>     init.theta = 39.34595795, link = log)
#> 
#> Coefficients:
#>    (Intercept)             day   weekdaymonday  weekdayweekend  
#>       11.46417        -0.03476         0.19162        -0.14224  
#> 
#> Degrees of Freedom: 42 Total (i.e. Null);  39 Residual
#> Null Deviance:       395.9 
#> Residual Deviance: 43.17     AIC: 850.6

# default
fitted_model %>% 
  predict(pred_data) %>%
  get_result()
#> [[1]]
#> <trending_prediction> 14 x 8
#>    date         day weekday      estimate lower_ci upper_ci lower_pi upper_pi
#>    <date>     <int> <fct>           <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#>  1 2020-05-15    58 rest_of_week   12682.   11390.   14122.     8107    18870
#>  2 2020-05-16    59 weekend        10625.    9299.   12140.     6618    16223
#>  3 2020-05-17    60 weekend        10262.    8956.   11759.     6373    15714
#>  4 2020-05-18    61 monday         13840.   11749.   16303.     8363    21784
#>  5 2020-05-19    62 rest_of_week   11036.    9782.   12450.     6962    16638
#>  6 2020-05-20    63 rest_of_week   10659.    9416.   12066.     6701    16124
#>  7 2020-05-21    64 rest_of_week   10295.    9064.   11693.     6450    15626
#>  8 2020-05-22    65 rest_of_week    9943.    8724.   11333.     6208    15145
#>  9 2020-05-23    66 weekend         8330.    7138.    9721.     5079    12992
#> 10 2020-05-24    67 weekend         8045.    6872.    9419.     4889    12588
#> 11 2020-05-25    68 monday         10851.    9048.   13012.     6439    17388
#> 12 2020-05-26    69 rest_of_week    8652.    7486.   10001.     5326    13366
#> 13 2020-05-27    70 rest_of_week    8357.    7204.    9694.     5126    12955
#> 14 2020-05-28    71 rest_of_week    8071.    6933.    9396.     4933    12558

# without uncertainty
fitted_model %>% 
  predict(pred_data, uncertain = FALSE) %>% 
  get_result()
#> [[1]]
#> <trending_prediction> 14 x 8
#>    date         day weekday      estimate lower_ci upper_ci lower_pi upper_pi
#>    <date>     <int> <fct>           <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#>  1 2020-05-15    58 rest_of_week   12682.   11390.   14122.     9027    16948
#>  2 2020-05-16    59 weekend        10625.    9299.   12140.     7562    14199
#>  3 2020-05-17    60 weekend        10262.    8956.   11759.     7304    13715
#>  4 2020-05-18    61 monday         13840.   11749.   16303.     9852    18494
#>  5 2020-05-19    62 rest_of_week   11036.    9782.   12450.     7855    14748
#>  6 2020-05-20    63 rest_of_week   10659.    9416.   12066.     7586    14245
#>  7 2020-05-21    64 rest_of_week   10295.    9064.   11693.     7327    13758
#>  8 2020-05-22    65 rest_of_week    9943.    8724.   11333.     7076    13289
#>  9 2020-05-23    66 weekend         8330.    7138.    9721.     5928    11134
#> 10 2020-05-24    67 weekend         8045.    6872.    9419.     5725    10754
#> 11 2020-05-25    68 monday         10851.    9048.   13012.     7723    14501
#> 12 2020-05-26    69 rest_of_week    8652.    7486.   10001.     6157    11564
#> 13 2020-05-27    70 rest_of_week    8357.    7204.    9694.     5947    11170
#> 14 2020-05-28    71 rest_of_week    8071.    6933.    9396.     5743    10788

# without prediction intervals
fitted_model %>% 
  predict(pred_data, add_pi = FALSE) %>% 
  get_result()
#> [[1]]
#> <trending_prediction> 14 x 6
#>    date         day weekday      estimate lower_ci upper_ci
#>    <date>     <int> <fct>           <dbl>    <dbl>    <dbl>
#>  1 2020-05-15    58 rest_of_week   12682.   11390.   14122.
#>  2 2020-05-16    59 weekend        10625.    9299.   12140.
#>  3 2020-05-17    60 weekend        10262.    8956.   11759.
#>  4 2020-05-18    61 monday         13840.   11749.   16303.
#>  5 2020-05-19    62 rest_of_week   11036.    9782.   12450.
#>  6 2020-05-20    63 rest_of_week   10659.    9416.   12066.
#>  7 2020-05-21    64 rest_of_week   10295.    9064.   11693.
#>  8 2020-05-22    65 rest_of_week    9943.    8724.   11333.
#>  9 2020-05-23    66 weekend         8330.    7138.    9721.
#> 10 2020-05-24    67 weekend         8045.    6872.    9419.
#> 11 2020-05-25    68 monday         10851.    9048.   13012.
#> 12 2020-05-26    69 rest_of_week    8652.    7486.   10001.
#> 13 2020-05-27    70 rest_of_week    8357.    7204.    9694.
#> 14 2020-05-28    71 rest_of_week    8071.    6933.    9396.

# bootstraped prediction intervals
fitted_model %>% 
  predict(pred_data, simulate_pi = TRUE) %>% 
  get_result()
#> [[1]]
#> <trending_prediction> 14 x 8
#>    date         day weekday      estimate lower_ci upper_ci lower_pi upper_pi
#>    <date>     <int> <fct>           <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#>  1 2020-05-15    58 rest_of_week   12682.   11390.   14122.     9166    17173
#>  2 2020-05-16    59 weekend        10625.    9299.   12140.     7411    14703
#>  3 2020-05-17    60 weekend        10262.    8956.   11759.     7134    14361
#>  4 2020-05-18    61 monday         13840.   11749.   16303.     9437    19283
#>  5 2020-05-19    62 rest_of_week   11036.    9782.   12450.     7684    15308
#>  6 2020-05-20    63 rest_of_week   10659.    9416.   12066.     7354    14970
#>  7 2020-05-21    64 rest_of_week   10295.    9064.   11693.     7219    13969
#>  8 2020-05-22    65 rest_of_week    9943.    8724.   11333.     6889    13709
#>  9 2020-05-23    66 weekend         8330.    7138.    9721.     5772    11518
#> 10 2020-05-24    67 weekend         8045.    6872.    9419.     5684    11220
#> 11 2020-05-25    68 monday         10851.    9048.   13012.     7346    15094
#> 12 2020-05-26    69 rest_of_week    8652.    7486.   10001.     6085    12112
#> 13 2020-05-27    70 rest_of_week    8357.    7204.    9694.     5827    11638
#> 14 2020-05-28    71 rest_of_week    8071.    6933.    9396.     5595    10931

Example of failed model fitting

(model2  <- glm_nb_model(count ~ day + nonexistent))
#> Untrained trending model:
#>     glm.nb(formula = count ~ day + nonexistent)
(fitted_model2 <- fit(model2, fitting_data))
#> <trending_fit_tbl> 1 x 3
#>   result warnings errors   
#>   <list> <list>   <list>   
#> 1 <NULL> <NULL>   <chr [1]>
get_result(fitted_model2)
#> [[1]]
#> NULL
get_errors(fitted_model2)
#> [[1]]
#> [1] "object 'nonexistent' not found"

Multiple models

The fit function also works with a list input of multiple models.

models  <- list(
  simple = lm_model(count ~ day),
  glm_poisson = glm_model(count ~ day, family = "poisson"),
  glm_negbin = glm_nb_model(count ~ day + weekday),
  will_error = glm_nb_model(count ~ day + nonexistant)
)
(fitted_tbl <- fit(models, fitting_data))
#> <trending_fit_tbl> 4 x 4
#>   model_name  result       warnings     errors      
#>   <chr>       <named list> <named list> <named list>
#> 1 simple      <lm>         <NULL>       <NULL>      
#> 2 glm_poisson <glm>        <NULL>       <NULL>      
#> 3 glm_negbin  <negbin>     <NULL>       <NULL>      
#> 4 will_error  <NULL>       <NULL>       <chr [1]>
get_result(fitted_tbl)
#> $simple
#> 
#> Call:
#> lm(formula = count ~ day, data = fitting_data)
#> 
#> Coefficients:
#> (Intercept)          day  
#>       69202        -1093  
#> 
#> 
#> $glm_poisson
#> 
#> Call:  glm(formula = count ~ day, family = "poisson", data = fitting_data)
#> 
#> Coefficients:
#> (Intercept)          day  
#>    11.57014     -0.03822  
#> 
#> Degrees of Freedom: 42 Total (i.e. Null);  41 Residual
#> Null Deviance:       321600 
#> Residual Deviance: 50970     AIC: 51490
#> 
#> $glm_negbin
#> 
#> Call:  glm.nb(formula = count ~ day + weekday, data = fitting_data, 
#>     init.theta = 39.34595795, link = log)
#> 
#> Coefficients:
#>    (Intercept)             day   weekdaymonday  weekdayweekend  
#>       11.46417        -0.03476         0.19162        -0.14224  
#> 
#> Degrees of Freedom: 42 Total (i.e. Null);  39 Residual
#> Null Deviance:       395.9 
#> Residual Deviance: 43.17     AIC: 850.6
#> 
#> $will_error
#> NULL

This can also then be used with predict()

(pred <- predict(fitted_tbl, pred_data))
#> <trending_predict_tbl> 4 x 4
#>   model_name  result              warnings     errors      
#>   <chr>       <named list>        <named list> <named list>
#> 1 simple      <trndng_p [14 × 8]> <NULL>       <NULL>      
#> 2 glm_poisson <trndng_p [14 × 8]> <NULL>       <NULL>      
#> 3 glm_negbin  <trndng_p [14 × 8]> <NULL>       <NULL>      
#> 4 will_error  <NULL>              <NULL>       <chr [1]>
get_result(pred)
#> $simple
#> <trending_prediction> 14 x 8
#>    date         day weekday      estimate lower_ci upper_ci lower_pi upper_pi
#>    <date>     <int> <fct>           <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#>  1 2020-05-15    58 rest_of_week    5813.     234.   11392.  -13007.   24632.
#>  2 2020-05-16    59 weekend         4720.   -1053.   10492.  -14158.   23597.
#>  3 2020-05-17    60 weekend         3627.   -2341.    9594.  -15311.   22565.
#>  4 2020-05-18    61 monday          2534.   -3631.    8699.  -16467.   21535.
#>  5 2020-05-19    62 rest_of_week    1441.   -4922.    7804.  -17626.   20508.
#>  6 2020-05-20    63 rest_of_week     348.   -6215.    6911.  -18786.   19482.
#>  7 2020-05-21    64 rest_of_week    -745.   -7509.    6020.  -19949.   18459.
#>  8 2020-05-22    65 rest_of_week   -1838.   -8805.    5129.  -21114.   17439.
#>  9 2020-05-23    66 weekend        -2931.  -10101.    4240.  -22282.   16420.
#> 10 2020-05-24    67 weekend        -4024.  -11399.    3352.  -23451.   15404.
#> 11 2020-05-25    68 monday         -5116.  -12697.    2464.  -24623.   14390.
#> 12 2020-05-26    69 rest_of_week   -6209.  -13996.    1578.  -25797.   13378.
#> 13 2020-05-27    70 rest_of_week   -7302.  -15296.     692.  -26973.   12369.
#> 14 2020-05-28    71 rest_of_week   -8395.  -16597.    -193.  -28152.   11361.
#> 
#> $glm_poisson
#> <trending_prediction> 14 x 8
#>    date         day weekday      estimate lower_ci upper_ci lower_pi upper_pi
#>    <date>     <int> <fct>           <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#>  1 2020-05-15    58 rest_of_week   11535.   11484.   11587.    11275    11798
#>  2 2020-05-16    59 weekend        11103.   11052.   11154.    10846    11361
#>  3 2020-05-17    60 weekend        10686.   10636.   10737.    10434    10941
#>  4 2020-05-18    61 monday         10286.   10236.   10336.    10038    10536
#>  5 2020-05-19    62 rest_of_week    9900.    9850.    9950.     9656    10146
#>  6 2020-05-20    63 rest_of_week    9529.    9480.    9578.     9289     9770
#>  7 2020-05-21    64 rest_of_week    9171.    9123.    9220.     8936     9409
#>  8 2020-05-22    65 rest_of_week    8827.    8780.    8876.     8596     9061
#>  9 2020-05-23    66 weekend         8496.    8449.    8544.     8269     8726
#> 10 2020-05-24    67 weekend         8178.    8131.    8225.     7955     8403
#> 11 2020-05-25    68 monday          7871.    7825.    7917.     7652     8092
#> 12 2020-05-26    69 rest_of_week    7576.    7531.    7621.     7361     7793
#> 13 2020-05-27    70 rest_of_week    7292.    7247.    7337.     7081     7505
#> 14 2020-05-28    71 rest_of_week    7018.    6974.    7063.     6811     7228
#> 
#> $glm_negbin
#> <trending_prediction> 14 x 8
#>    date         day weekday      estimate lower_ci upper_ci lower_pi upper_pi
#>    <date>     <int> <fct>           <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#>  1 2020-05-15    58 rest_of_week   12682.   11390.   14122.     8107    18870
#>  2 2020-05-16    59 weekend        10625.    9299.   12140.     6618    16223
#>  3 2020-05-17    60 weekend        10262.    8956.   11759.     6373    15714
#>  4 2020-05-18    61 monday         13840.   11749.   16303.     8363    21784
#>  5 2020-05-19    62 rest_of_week   11036.    9782.   12450.     6962    16638
#>  6 2020-05-20    63 rest_of_week   10659.    9416.   12066.     6701    16124
#>  7 2020-05-21    64 rest_of_week   10295.    9064.   11693.     6450    15626
#>  8 2020-05-22    65 rest_of_week    9943.    8724.   11333.     6208    15145
#>  9 2020-05-23    66 weekend         8330.    7138.    9721.     5079    12992
#> 10 2020-05-24    67 weekend         8045.    6872.    9419.     4889    12588
#> 11 2020-05-25    68 monday         10851.    9048.   13012.     6439    17388
#> 12 2020-05-26    69 rest_of_week    8652.    7486.   10001.     5326    13366
#> 13 2020-05-27    70 rest_of_week    8357.    7204.    9694.     5126    12955
#> 14 2020-05-28    71 rest_of_week    8071.    6933.    9396.     4933    12558
#> 
#> $will_error
#> NULL