Skip to contents

This package provides functions to help with forecasting attendant care hours for the Transport Accident Commission.

Installation

# First install the dependencies
install.packages(c("tsibble","fabletools","abind","distributional","dplyr",
    "forcats","forecast","furrr","ggplot2","hts","janitor","lubridate",
  "MASS","readr","stringr","tibble","tidyr","tsbox","remotes"))
# Then install this package
remotes::install_github("robjhyndman/TACforecasting")

Example

The data included in the package and in this example has been randomly generated. No real data is included in this repository.

# Read data
group_costs <- read_tac_data(
  claims_file = "data-raw/T086_claim_header.csv",
  costs_file = "data-raw/T086_attendant_care_hours.csv"
)
# Plot total hours by billing period
plot_total_hours(group_costs)


# Plot total hours per age group by billing period
plot_total_hours(group_costs, age_group, include_average = FALSE) +
   ggtitle("Hours by age group in each billing period")


# Plot total hours per injury group by billing period
plot_total_hours(group_costs, injury_group, include_average = FALSE) +
   ggtitle("Hours by injury group in each billing period")

# Compute forecasts of total hours by age group and injury group
# Keep back 2022 as test set
fable_forecasts_hours <- group_costs |> 
    filter(billing_period <= as.Date("2021-12-31")) |> 
    get_forecasts(h = 13, nsim = 200)
# Plot forecasts of total hours
plot_forecasts(fable_forecasts_hours, group_costs)


# Plot forecasts of total hours by age group
plot_forecasts(fable_forecasts_hours, group_costs, show_age_group = "20-34") +
   ggtitle("Forecasts of total hours for 20-34 year olds")


# Plot forecasts of total hours by injury group
plot_forecasts(fable_forecasts_hours, group_costs, show_injury_group = "NONCAT") +
   ggtitle("Forecasts of total hours from NONCAT injuries claims")

# Compute accuracy statistics on the 2022 test set
tac_accuracy(fable_forecasts_hours, group_costs)
#> # A tibble: 13 × 6
#>        h   RMSE    MAE  MAPE coverage   CRPS
#>    <int>  <dbl>  <dbl> <dbl>    <dbl>  <dbl>
#>  1     1  8734.  8734. 13.6         1  4549.
#>  2     2 11377. 11377. 18.0         1  5234.
#>  3     3 14535. 14535. 24.4         1  7095.
#>  4     4 55633. 55633. 42.7         0 46918.
#>  5     5  6335.  6335.  7.67        1  7137.
#>  6     6 10539. 10539. 12.4         1  8060.
#>  7     7  6697.  6697.  9.55        1  4261.
#>  8     8 27604. 27604. 51.8         1 12819.
#>  9     9  9059.  9059. 10.2         1 10042.
#> 10    10 27780. 27780. 47.9         1 10161.
#> 11    11  2025.  2025.  2.64        1  4404.
#> 12    12 36674. 36674. 65.5         1 10075.
#> 13    13 48086. 48086. 78.2         1  7002.
# Compute accuracy statistics on a rolling origin
# (This takes a long time to run)
tscv_accuracy(group_costs, h = 13, nsim = 200, init = 195, step = 6) |> 
  filter(!is.na(RMSE), h <= 24) |>
  summarise(
    RMSE = sqrt(mean(RMSE^2)),
    MAE = mean(MAE),
    MAPE = mean(MAPE),
    Coverage = 100*mean(coverage),
    CRPS = mean(CRPS),
    .by = h
  ) 
#> # A tibble: 13 × 6
#>        h   RMSE    MAE  MAPE Coverage   CRPS
#>    <int>  <dbl>  <dbl> <dbl>    <dbl>  <dbl>
#>  1     1 14632. 12606.  16.9    100    8585.
#>  2     2 15476. 13430.  20.3     93.1  8650.
#>  3     3 14358. 11845.  18.6     96.4  7707.
#>  4     4 20249. 14351.  16.1     92.9 10747.
#>  5     5 17135. 13264.  18.1     92.9  9194.
#>  6     6 19022. 12874.  16.8     92.9  9702.
#>  7     7 14318. 12238.  16.9    100    8404.
#>  8     8 16845. 14858.  22.5     92.9  9093.
#>  9     9 15683. 12777.  20.2     96.3  8134.
#> 10    10 19521. 13922.  15.8     96.3 10651.
#> 11    11 19320. 15070.  21.4     92.6  9697.
#> 12    12 19836. 14594.  19.7     88.9  9942.
#> 13    13 15915. 13276.  19.1    100    8145.