Simulation and projections

Load packages and inputs

Load the MLE and MCMC done previously.

library(kableExtra)
library(tidyverse)
library(sbt)
library(reshape2)
library(SparseNUTS)
library(abind)
library(microbenchmark)

theme_set(theme_bw())

load(system.file("extdata", "opt.rda", package = "sbt"))
load(system.file("extdata", "mcmc.rda", package = "sbt"))
obj <- MakeADFun(func = cmb(sbt_model, data), parameters = parameters, map = map)
invisible(obj$fn(opt$par))

Simulation

Simulation can be done for any data set in the model that is passed through the RTMB function OBS [@Kristensen2016, @Kristensen2024]. For example, in the file R/likelihoods.R, the function get_cpue_like includes the code:

cpue_log_obs <- log(cpue_obs)
cpue_log_obs <- OBS(cpue_log_obs)
lp <- -dnorm(x = cpue_log_obs, mean = cpue_log_pred, sd = cpue_sig, log = TRUE)

This allows a call to obj$simulate()$cpue_log_obs. For example (see Figure 1):

# obj$simulate()$cpue_log_obs
# obj$simulate()$troll_log_obs
# obj$simulate()$aerial_log_obs
# obj$simulate()$gt_nrec
# obj$simulate()$hsp_nK
# obj$simulate()$pop_nP
plot(data$cpue_year, data$cpue_obs, col = 2)
for (i in 1:10) lines(data$cpue_year, exp(obj$simulate()$cpue_log_obs))

Data sets for which simulation is available include:

cpue_log_obs
troll_log_obs
aerial_log_obs
gt_nrec
hsp_nK
pop_nP

But not:

af_obs
lf_obs
cpue_lfs

Simulation is also required for calculating OSA residuals and to use the RTMB function checkConsistency. Unfortunately, the checkConsistency does not work yet because not all data types are defined using densities that have simulation support within the model (i.e., the AFs and LFs).

# chk <- checkConsistency(obj = obj, hessian = TRUE, estimate = TRUE, n = 100,
#                         observation.name  = "cpue_log_obs")
# chk
# s <- summary(chk)
# s
# s$marginal$p.value

Projection

Projections can be based on MLE or MCMC.

# iters <- sample(x = 1:2000, size = 1000)
iters <- 1:2000 # define the exact iterations you want to use
n_iter <- length(iters)
n_proj <- 13
proj_years <- (data$last_yr - 1):(data$last_yr + n_proj)
n_proj <- length(proj_years)
range(proj_years)

[1] 2021 2035

Recruitment

First we explore the recruitment deviates estimated by the model. The Augmented Dickey-Fuller (ADF) test was applied to the recruitment deviates series, yielding a test statistic of -3.4661 with a lag order of 4. The p-value of approximately 0.049 indicates that, at the 5% significance level, we can reject the null hypothesis of a unit root, supporting the alternative hypothesis that the series is stationary. This suggests that the rdevs time series does not exhibit non-stationary behavior, such as trends or random walks, making it suitable for modeling with processes like ARIMA without requiring differencing to achieve stationarity.

rdevs <- obj$report()$rdev_y
# tseries::adf.test(rdevs)

Note that there is evidence for up to AR2

acf(rdevs)
# pacf(rdevs)

Must set method = "mle".

# auto.arima(rdevs)
ar(rdevs, order.max = 5)


Call:
ar(x = rdevs, order.max = 5)

Coefficients:
      1        2
 0.8382  -0.2702

Order selected 2  sigma^2 estimated as  0.0715

ar(rdevs, order.max = 5, method = "mle")


Call:
ar(x = rdevs, order.max = 5, method = "mle")

Coefficients:
      1        2
 0.8324  -0.2618

Order selected 2  sigma^2 estimated as  0.06879

arima(rdevs, order = c(2, 0, 0))


Call:
arima(x = rdevs, order = c(2, 0, 0))

Coefficients:
         ar1      ar2  intercept
      0.8324  -0.2618    -0.0073
s.e.  0.0999   0.1010     0.0632

sigma^2 estimated as 0.06879:  log likelihood = -7.77,  aic = 23.54

Recruitment projections can be based on an MLE fit or MCMC using the project_rec_devs function. These projections can be done using Autoregressive Integrated Moving Average (ARIMA) models by setting option = "ARIMA". Note that I am using bootstrap = TRUE. The samp_years input defines the range of years that we will use to model future recruitment deviates. We can then begin the projection before or after the last OM year, for example, if the last model year is 2022 and we begin the projection in 2021, then the final two recruitment deviates will be overwritten in our projection. Results are shown for MLE without ARIMA (Figure 3), MCMC without ARIMA (Figure 4), and MCMC with ARIMA (Figure 5). A comparison of all projections is shown in Figure 6. ARIMA order summaries are shown in Figure 7, with AR parameters in Figure 8 and MA parameters in Figure 9.

proj_rdev_MLE <- project_rec_devs(
  data = data, obj = obj, mcmc = NULL, first_yr = 2021, last_yr = 2035,
  samp_years = 1931:2020, iters = iters, option = "AR1"
)
proj_rdev_AR1 <- project_rec_devs(
  data = data, obj = obj, mcmc = mcmc, first_yr = 2021, last_yr = 2035,
  samp_years = 1931:2020, iters = iters, option = "AR1"
)
proj_rdev_ARIMA <- project_rec_devs(
  data = data, obj = obj, mcmc = mcmc, first_yr = 2021, last_yr = 2035,
  samp_years = 1931:2020, iters = iters, option = "auto"
)

plot_rec_devs(data = data, object = obj, posterior = mcmc, proj = proj_rdev_MLE)

Figure 3: Recruitment deviates based on MLE without ARIMA.

plot_rec_devs(data = data, object = obj, posterior = mcmc, proj = proj_rdev_AR1)

Figure 4: Recruitment deviates based on MCMC without ARIMA.

plot_rec_devs(data = data, object = obj, posterior = mcmc, proj = proj_rdev_ARIMA)

Figure 5: Recruitment deviates based on MCMC with ARIMA.

df <- bind_rows(
  melt(proj_rdev_MLE$proj_rdev_y) %>% mutate(Projection = "MLE AR1"), 
  melt(proj_rdev_AR1$proj_rdev_y) %>% mutate(Projection = "MCMC AR1"), 
  melt(proj_rdev_ARIMA$proj_rdev_y) %>% mutate(Projection = "MCMC ARIMA"))

ggplot(data = df, aes(x = year, y = value, fill = Projection, color = Projection)) + 
  geom_hline(yintercept = 0, linetype = "dashed") +
  stat_summary(geom = "point", fun = median) +
  stat_summary(geom = "line", fun = median) +
  stat_summary(geom = "ribbon", fun.data = ~ data.frame(ymin = quantile(.x, 0.025), ymax = quantile(.x, 0.975)), alpha = 0.3, color = NA) +
  labs(x = "Year", y = "Recruitment deviate")

Figure 6: Comparing projected recruitment deviates with and without ARIMA.

Summaries ARIMA pars, never any I which makes sense given ADF test..

melt(proj_rdev_ARIMA$arima_pars) %>%
  ggplot(aes(x = factor(statistic), y = value)) +
  geom_boxplot() +
  labs(x = "Statistic", y = "Order")

melt(proj_rdev_ARIMA$ar_pars) %>%
  ggplot(aes(x = factor(statistic), y = value)) +
  geom_boxplot() +
  labs(x = NULL, y = "Value")

Warning: Removed 8478 rows containing non-finite outside the scale range
(`stat_boxplot()`).

melt(proj_rdev_ARIMA$ma_pars) %>%
  ggplot(aes(x = factor(statistic), y = value)) +
  geom_boxplot() +
  labs(x = NULL, y = "Value")

Warning: Removed 9129 rows containing non-finite outside the scale range
(`stat_boxplot()`).

Selectivity

Projecting selectivity can be done using the project_selectivity function. The first_yr and the last_yr arguments define the range of years over which the mean and standard deviation of selectivity is calculated. Future selectivity is then drawn from a normal distribution (in log-space). Ridge plots of projected selectivity for a single iteration and the median are shown in Figure 10 and Figure 11. A line plot of median projected selectivity is shown in Figure 12.

# proj_sel_fya <- project_selectivity(
#   data = data, obj = obj, mcmc = mcmc, first_yr = 2021, last_yr = 2035,
#   samp_years = 2015:2020, iters = iters
# )
# save(proj_sel_fya, file = "inst/extdata/proj_sel.rda")
load(system.file("extdata", "proj_sel.rda", package = "sbt"))

# plot_selectivity(data = data, object = obj, years = 2015:2022, fisheries = "LL3")

library(ggridges)
library(scales)

fsh <- c("LL1", "LL2", "LL3", "LL4", "Indonesian", "Australian", "CPUE")

df <- melt(proj_sel_fya) %>%
  mutate(fishery = fsh[fishery]) %>%
  mutate(fishery = factor(fishery, levels = fsh)) %>%
  filter(iteration == 10)

ggplot(data = df, aes(x = age, y = year, height = value, group = year)) +
  geom_density_ridges(stat = "identity", alpha = 0.5, rel_min_height = 0) +
  facet_wrap(fishery ~ .) +
  labs(x = "Age", y = "Year") +
  theme(legend.position = "none") +
  scale_x_continuous(limits = c(0, NA), expand = expansion(mult = c(0, 0.05))) +
  scale_y_reverse(breaks = pretty_breaks())

Warning in FUN(X[[i]], ...): no non-missing arguments to max; returning -Inf

Figure 10: Projected selectivity at age by year for a single MCMC iteration (ridge plot).

df <- melt(proj_sel_fya) %>%
  mutate(fishery = fsh[fishery]) %>%
  mutate(fishery = factor(fishery, levels = fsh)) %>%
  group_by(fishery, year, age) %>%
  summarise(value = median(value))

ggplot(data = df, aes(x = age, y = year, height = value, group = year)) +
  geom_density_ridges(stat = "identity", alpha = 0.5, rel_min_height = 0) +
  facet_wrap(fishery ~ .) +
  labs(x = "Age", y = "Year") +
  theme(legend.position = "none") +
  scale_x_continuous(limits = c(0, NA), expand = expansion(mult = c(0, 0.05))) +
  scale_y_reverse(breaks = pretty_breaks())

Warning in FUN(X[[i]], ...): no non-missing arguments to max; returning -Inf

Figure 11: Median projected selectivity at age by year across all MCMC iterations (ridge plot).

ggplot(data = df) +
  geom_line(aes(x = age, y = value, color = factor(year), group = year)) +
  facet_wrap(fishery ~ .) +
  scale_y_continuous(limits = c(0, NA), expand = expansion(mult = c(0, 0.05))) +
  labs(x = "Age", y = "Selectivity", color = "Year")

Warning: Removed 1335 rows containing missing values or values outside the scale range
(`geom_line()`).

Figure 12: Median projected selectivity at age by year (line plot).

Catch

Projected catch by fishery is shown in Figure 13.

dn <- dimnames(data$catch_obs_ysf)
proj_catch_ysf <- array(
  dim = list(n_proj, data$n_season, data$n_fishery), 
  dimnames = list(Year = proj_years, Season = dn$Season, Fishery = dn$Fishery)
)
for (y in 1:n_proj) proj_catch_ysf[y,,] <- data$catch_obs_ysf[71,,]

plot_catch(data = data, object = obj, proj = proj_catch_ysf)

[1] "The maximum catch difference was: NA"

Warning: Removed 65 rows containing missing values or values outside the scale range
(`geom_line()`).

Projection using dynamics function

Project forward with MCMC different because not the MLE like everything else above (single MCMC iteration).

# proj_dyn <- project_dynamics(data = data, object = obj, mcmc = mcmc, 
#                          first_yr = 2021, last_yr = 2035, n_iter = n_iter, 
#                          rdev_y = proj_rdev_ARIMA$proj_rdev_y, sel_fya = proj_sel_fya, 
#                          catch_ysf = proj_catch_ysf)
# save(proj_dyn, file = "inst/extdata/proj_dyn.rda")
load(system.file("extdata", "proj_dyn.rda", package = "sbt"))

df2 <- data.frame(year = (data$last_yr + 1):(data$last_yr + n_proj + 1), 
                  value = proj_dyn[[1]]$spawning_biomass_y)

dim(proj_dyn[[1]]$number_ysa)

[1] 16  2 31

dimnames(proj_dyn[[1]]$number_ysa) <- list(year = 1:16, season = 1:2, age = data$age_a)

# post <- extract_samples(fit = mcmc)
# plot(data$first_yr:(data$last_yr + 1), obj$report()$spawning_biomass_y, xlim = c(1931, max(proj_years)), ylim = c(0, 11e6))
# # for (i in seq_len(n_iter)) {
# for (i in seq_len(10)) {
#   rep <- obj$report(as.numeric(post[i,]))
#   lines(data$first_yr:(data$last_yr + 1), rep$spawning_biomass_y, col = 3)
#   lines(min(proj_years):(max(proj_years) + 1), dyn4[[i]]$spawning_biomass_y, col = 4)
# }
# 
# plot(data$cpue_years + data$first_yr - 1, data$cpue_obs, xlim = c(1969, 2042), ylim = c(0.3, 4))
# # for (i in seq_len(n_iter)) {
# for (i in seq_len(10)) {
#   rep <- obj$report(as.numeric(post[i,]))
#   lines(data$cpue_years + data$first_yr - 1, rep$cpue_pred, col = 2)
#   lines(proj_years, dyn4[[i]]$cpue_pred, col = 3)
#   points(proj_years, dyn4[[i]]$cpue_obs, col = 4)
# }