Introduction

trending aims to provides a coherent interface to several modelling tools. Whilst it is useful in an interactive context, it’s main focus is to provide an intuitive interface on which other packages can be developed (e.g. trendbreaker).

Main features

  • Model specification: Interfaces to common models through intuitive functions; lm_model(), glm_model(), glm_nb_model and brms_model*.

  • Model fitting and prediction: Once specified, models can be fit to data and generate confidence and prediction intervals for future data using fit() and predict().

  • Error and warning catching: The provided methods for fit and predict catch all warnings and errors, returning the output and these captured values in a list.

*   Requires brms

Example usage

Setup

library(outbreaks)  # for data
library(trending)   # for trend fitting
library(dplyr)      # for data manipulation

# load data
data(covid19_england_nhscalls_2020)

# select 6 weeks of data (from a period when the prevalence was decreasing)
last_date <- as.Date("2020-05-28")
first_date <- last_date - 8*7
pathways_recent <-
  covid19_england_nhscalls_2020 %>%
  filter(date >= first_date, date <= last_date) %>%
  group_by(date, day, weekday) %>%
  summarise(count = sum(count), .groups = "drop")

# split data for fitting and prediction
dat <-
  pathways_recent %>%
  group_by(date <= first_date + 6*7) %>%
  group_split()

fitting_data <- dat[[2]]
pred_data <- select(dat[[1]], date, day, weekday)

A succesful model fit

(model  <- glm_nb_model(count ~ day + weekday))
#> Untrained trending model:
#>     glm.nb(formula = count ~ day + weekday)
(fitted_model <- fit(model, fitting_data))
#> <trending_fit_tbl> 1 x 3
#>   result   warnings errors
#>   <list>   <list>   <list>
#> 1 <negbin> <NULL>   <NULL>
fitted_model %>% get_result()
#> [[1]]
#> 
#> Call:  glm.nb(formula = count ~ day + weekday, data = fitting_data, 
#>     init.theta = 39.34595795, link = log)
#> 
#> Coefficients:
#>    (Intercept)             day   weekdaymonday  weekdayweekend  
#>       11.46417        -0.03476         0.19162        -0.14224  
#> 
#> Degrees of Freedom: 42 Total (i.e. Null);  39 Residual
#> Null Deviance:       395.9 
#> Residual Deviance: 43.17     AIC: 850.6

# default
fitted_model %>% 
  predict(pred_data) %>%
  get_result()
#> [[1]]
#> <trending_prediction> 14 x 8
#>    date         day weekday      estimate lower_ci upper_ci lower_pi upper_pi
#>    <date>     <int> <fct>           <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#>  1 2020-05-15    58 rest_of_week   12682.   11390.   14122.     8107    18870
#>  2 2020-05-16    59 weekend        10625.    9299.   12140.     6618    16223
#>  3 2020-05-17    60 weekend        10262.    8956.   11759.     6373    15714
#>  4 2020-05-18    61 monday         13840.   11749.   16303.     8363    21784
#>  5 2020-05-19    62 rest_of_week   11036.    9782.   12450.     6962    16638
#>  6 2020-05-20    63 rest_of_week   10659.    9416.   12066.     6701    16124
#>  7 2020-05-21    64 rest_of_week   10295.    9064.   11693.     6450    15626
#>  8 2020-05-22    65 rest_of_week    9943.    8724.   11333.     6208    15145
#>  9 2020-05-23    66 weekend         8330.    7138.    9721.     5079    12992
#> 10 2020-05-24    67 weekend         8045.    6872.    9419.     4889    12588
#> 11 2020-05-25    68 monday         10851.    9048.   13012.     6439    17388
#> 12 2020-05-26    69 rest_of_week    8652.    7486.   10001.     5326    13366
#> 13 2020-05-27    70 rest_of_week    8357.    7204.    9694.     5126    12955
#> 14 2020-05-28    71 rest_of_week    8071.    6933.    9396.     4933    12558

# without uncertainty
fitted_model %>% 
  predict(pred_data, uncertain = FALSE) %>% 
  get_result()
#> [[1]]
#> <trending_prediction> 14 x 8
#>    date         day weekday      estimate lower_ci upper_ci lower_pi upper_pi
#>    <date>     <int> <fct>           <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#>  1 2020-05-15    58 rest_of_week   12682.   11390.   14122.     9027    16948
#>  2 2020-05-16    59 weekend        10625.    9299.   12140.     7562    14199
#>  3 2020-05-17    60 weekend        10262.    8956.   11759.     7304    13715
#>  4 2020-05-18    61 monday         13840.   11749.   16303.     9852    18494
#>  5 2020-05-19    62 rest_of_week   11036.    9782.   12450.     7855    14748
#>  6 2020-05-20    63 rest_of_week   10659.    9416.   12066.     7586    14245
#>  7 2020-05-21    64 rest_of_week   10295.    9064.   11693.     7327    13758
#>  8 2020-05-22    65 rest_of_week    9943.    8724.   11333.     7076    13289
#>  9 2020-05-23    66 weekend         8330.    7138.    9721.     5928    11134
#> 10 2020-05-24    67 weekend         8045.    6872.    9419.     5725    10754
#> 11 2020-05-25    68 monday         10851.    9048.   13012.     7723    14501
#> 12 2020-05-26    69 rest_of_week    8652.    7486.   10001.     6157    11564
#> 13 2020-05-27    70 rest_of_week    8357.    7204.    9694.     5947    11170
#> 14 2020-05-28    71 rest_of_week    8071.    6933.    9396.     5743    10788

# without prediction intervals
fitted_model %>% 
  predict(pred_data, add_pi = FALSE) %>% 
  get_result()
#> [[1]]
#> <trending_prediction> 14 x 6
#>    date         day weekday      estimate lower_ci upper_ci
#>    <date>     <int> <fct>           <dbl>    <dbl>    <dbl>
#>  1 2020-05-15    58 rest_of_week   12682.   11390.   14122.
#>  2 2020-05-16    59 weekend        10625.    9299.   12140.
#>  3 2020-05-17    60 weekend        10262.    8956.   11759.
#>  4 2020-05-18    61 monday         13840.   11749.   16303.
#>  5 2020-05-19    62 rest_of_week   11036.    9782.   12450.
#>  6 2020-05-20    63 rest_of_week   10659.    9416.   12066.
#>  7 2020-05-21    64 rest_of_week   10295.    9064.   11693.
#>  8 2020-05-22    65 rest_of_week    9943.    8724.   11333.
#>  9 2020-05-23    66 weekend         8330.    7138.    9721.
#> 10 2020-05-24    67 weekend         8045.    6872.    9419.
#> 11 2020-05-25    68 monday         10851.    9048.   13012.
#> 12 2020-05-26    69 rest_of_week    8652.    7486.   10001.
#> 13 2020-05-27    70 rest_of_week    8357.    7204.    9694.
#> 14 2020-05-28    71 rest_of_week    8071.    6933.    9396.

# bootstraped prediction intervals
fitted_model %>% 
  predict(pred_data, simulate_pi = TRUE) %>% 
  get_result()
#> [[1]]
#> <trending_prediction> 14 x 8
#>    date         day weekday      estimate lower_ci upper_ci lower_pi upper_pi
#>    <date>     <int> <fct>           <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#>  1 2020-05-15    58 rest_of_week   12682.   11390.   14122.     9058    17377
#>  2 2020-05-16    59 weekend        10625.    9299.   12140.     7435    14749
#>  3 2020-05-17    60 weekend        10262.    8956.   11759.     7017    14226
#>  4 2020-05-18    61 monday         13840.   11749.   16303.     9392    19625
#>  5 2020-05-19    62 rest_of_week   11036.    9782.   12450.     7741    14791
#>  6 2020-05-20    63 rest_of_week   10659.    9416.   12066.     7415    14548
#>  7 2020-05-21    64 rest_of_week   10295.    9064.   11693.     7101    14024
#>  8 2020-05-22    65 rest_of_week    9943.    8724.   11333.     6871    13744
#>  9 2020-05-23    66 weekend         8330.    7138.    9721.     5722    11499
#> 10 2020-05-24    67 weekend         8045.    6872.    9419.     5394    11170
#> 11 2020-05-25    68 monday         10851.    9048.   13012.     7433    15286
#> 12 2020-05-26    69 rest_of_week    8652.    7486.   10001.     5852    12010
#> 13 2020-05-27    70 rest_of_week    8357.    7204.    9694.     5797    11587
#> 14 2020-05-28    71 rest_of_week    8071.    6933.    9396.     5664    11198

Example of failed model fitting

(model2  <- glm_nb_model(count ~ day + nonexistent))
#> Untrained trending model:
#>     glm.nb(formula = count ~ day + nonexistent)
(fitted_model2 <- fit(model2, fitting_data))
#> <trending_fit_tbl> 1 x 3
#>   result warnings errors   
#>   <list> <list>   <list>   
#> 1 <NULL> <NULL>   <chr [1]>
get_result(fitted_model2)
#> [[1]]
#> NULL
get_errors(fitted_model2)
#> [[1]]
#> [1] "object 'nonexistent' not found"

Multiple models

The fit function also works with a list input of multiple models.

models  <- list(
  simple = lm_model(count ~ day),
  glm_poisson = glm_model(count ~ day, family = "poisson"),
  glm_negbin = glm_nb_model(count ~ day + weekday),
  will_error = glm_nb_model(count ~ day + nonexistant)
)
(fitted_tbl <- fit(models, fitting_data))
#> <trending_fit_tbl> 4 x 4
#>   model_name  result       warnings     errors      
#>   <chr>       <named list> <named list> <named list>
#> 1 simple      <lm>         <NULL>       <NULL>      
#> 2 glm_poisson <glm>        <NULL>       <NULL>      
#> 3 glm_negbin  <negbin>     <NULL>       <NULL>      
#> 4 will_error  <NULL>       <NULL>       <chr [1]>
get_result(fitted_tbl)
#> $simple
#> 
#> Call:
#> lm(formula = count ~ day, data = fitting_data)
#> 
#> Coefficients:
#> (Intercept)          day  
#>       69202        -1093  
#> 
#> 
#> $glm_poisson
#> 
#> Call:  glm(formula = count ~ day, family = "poisson", data = fitting_data)
#> 
#> Coefficients:
#> (Intercept)          day  
#>    11.57014     -0.03822  
#> 
#> Degrees of Freedom: 42 Total (i.e. Null);  41 Residual
#> Null Deviance:       321600 
#> Residual Deviance: 50970     AIC: 51490
#> 
#> $glm_negbin
#> 
#> Call:  glm.nb(formula = count ~ day + weekday, data = fitting_data, 
#>     init.theta = 39.34595795, link = log)
#> 
#> Coefficients:
#>    (Intercept)             day   weekdaymonday  weekdayweekend  
#>       11.46417        -0.03476         0.19162        -0.14224  
#> 
#> Degrees of Freedom: 42 Total (i.e. Null);  39 Residual
#> Null Deviance:       395.9 
#> Residual Deviance: 43.17     AIC: 850.6
#> 
#> $will_error
#> NULL

This can also then be used with predict()

(pred <- predict(fitted_tbl, pred_data))
#> <trending_predict_tbl> 4 x 4
#>   model_name  result              warnings     errors      
#>   <chr>       <named list>        <named list> <named list>
#> 1 simple      <trndng_p [14 × 8]> <NULL>       <NULL>      
#> 2 glm_poisson <trndng_p [14 × 8]> <NULL>       <NULL>      
#> 3 glm_negbin  <trndng_p [14 × 8]> <NULL>       <NULL>      
#> 4 will_error  <NULL>              <NULL>       <chr [1]>
get_result(pred)
#> $simple
#> <trending_prediction> 14 x 8
#>    date         day weekday      estimate lower_ci upper_ci lower_pi upper_pi
#>    <date>     <int> <fct>           <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#>  1 2020-05-15    58 rest_of_week    5813.     234.   11392.  -13007.   24632.
#>  2 2020-05-16    59 weekend         4720.   -1053.   10492.  -14158.   23597.
#>  3 2020-05-17    60 weekend         3627.   -2341.    9594.  -15311.   22565.
#>  4 2020-05-18    61 monday          2534.   -3631.    8699.  -16467.   21535.
#>  5 2020-05-19    62 rest_of_week    1441.   -4922.    7804.  -17626.   20508.
#>  6 2020-05-20    63 rest_of_week     348.   -6215.    6911.  -18786.   19482.
#>  7 2020-05-21    64 rest_of_week    -745.   -7509.    6020.  -19949.   18459.
#>  8 2020-05-22    65 rest_of_week   -1838.   -8805.    5129.  -21114.   17439.
#>  9 2020-05-23    66 weekend        -2931.  -10101.    4240.  -22282.   16420.
#> 10 2020-05-24    67 weekend        -4024.  -11399.    3352.  -23451.   15404.
#> 11 2020-05-25    68 monday         -5116.  -12697.    2464.  -24623.   14390.
#> 12 2020-05-26    69 rest_of_week   -6209.  -13996.    1578.  -25797.   13378.
#> 13 2020-05-27    70 rest_of_week   -7302.  -15296.     692.  -26973.   12369.
#> 14 2020-05-28    71 rest_of_week   -8395.  -16597.    -193.  -28152.   11361.
#> 
#> $glm_poisson
#> <trending_prediction> 14 x 8
#>    date         day weekday      estimate lower_ci upper_ci lower_pi upper_pi
#>    <date>     <int> <fct>           <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#>  1 2020-05-15    58 rest_of_week   11535.   11484.   11587.    11275    11798
#>  2 2020-05-16    59 weekend        11103.   11052.   11154.    10846    11361
#>  3 2020-05-17    60 weekend        10686.   10636.   10737.    10434    10941
#>  4 2020-05-18    61 monday         10286.   10236.   10336.    10038    10536
#>  5 2020-05-19    62 rest_of_week    9900.    9850.    9950.     9656    10146
#>  6 2020-05-20    63 rest_of_week    9529.    9480.    9578.     9289     9770
#>  7 2020-05-21    64 rest_of_week    9171.    9123.    9220.     8936     9409
#>  8 2020-05-22    65 rest_of_week    8827.    8780.    8876.     8596     9061
#>  9 2020-05-23    66 weekend         8496.    8449.    8544.     8269     8726
#> 10 2020-05-24    67 weekend         8178.    8131.    8225.     7955     8403
#> 11 2020-05-25    68 monday          7871.    7825.    7917.     7652     8092
#> 12 2020-05-26    69 rest_of_week    7576.    7531.    7621.     7361     7793
#> 13 2020-05-27    70 rest_of_week    7292.    7247.    7337.     7081     7505
#> 14 2020-05-28    71 rest_of_week    7018.    6974.    7063.     6811     7228
#> 
#> $glm_negbin
#> <trending_prediction> 14 x 8
#>    date         day weekday      estimate lower_ci upper_ci lower_pi upper_pi
#>    <date>     <int> <fct>           <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#>  1 2020-05-15    58 rest_of_week   12682.   11390.   14122.     8107    18870
#>  2 2020-05-16    59 weekend        10625.    9299.   12140.     6618    16223
#>  3 2020-05-17    60 weekend        10262.    8956.   11759.     6373    15714
#>  4 2020-05-18    61 monday         13840.   11749.   16303.     8363    21784
#>  5 2020-05-19    62 rest_of_week   11036.    9782.   12450.     6962    16638
#>  6 2020-05-20    63 rest_of_week   10659.    9416.   12066.     6701    16124
#>  7 2020-05-21    64 rest_of_week   10295.    9064.   11693.     6450    15626
#>  8 2020-05-22    65 rest_of_week    9943.    8724.   11333.     6208    15145
#>  9 2020-05-23    66 weekend         8330.    7138.    9721.     5079    12992
#> 10 2020-05-24    67 weekend         8045.    6872.    9419.     4889    12588
#> 11 2020-05-25    68 monday         10851.    9048.   13012.     6439    17388
#> 12 2020-05-26    69 rest_of_week    8652.    7486.   10001.     5326    13366
#> 13 2020-05-27    70 rest_of_week    8357.    7204.    9694.     5126    12955
#> 14 2020-05-28    71 rest_of_week    8071.    6933.    9396.     4933    12558
#> 
#> $will_error
#> NULL