Exploration of advanced FIMS features

The setup

Code

# Names of required packages
packages <- c(
  "dplyr",
  "ggplot2",
  "gt",
  "here",
  "lubridate",
  "remotes",
  "reshape2",
  "shinystan",
  "tidyr",
  "TMB"
)

# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
  install.packages(packages[!installed_packages], repos = "http://cran.us.r-project.org")
}

# the Bayesian case study requires these extra packages
if (!"StanEstimators" %in% rownames(installed.packages())) {
  install.packages("StanEstimators", repos = c("https://andrjohns.r-universe.dev", "https://cloud.r-project.org"))
}

if (!"adnuts" %in% rownames(installed.packages())) {
  remotes::install_github("Cole-Monnahan-NOAA/adnuts", ref="sparse_M")
}

# SS3 case studies require r4ss
if (!"r4ss" %in% rownames(installed.packages())) {
  remotes::install_github("r4ss/r4ss")
}

# Install FIMS: main branch version if on main, dev version on any other branch

branch_name <- system("git branch --show-current")
use_fims_main <- grepl("main", branch_name)

if (use_fims_main) {
  remotes::install_github("NOAA-FIMS/FIMS")
} else {
  remotes::install_github("NOAA-FIMS/FIMS", ref = "dev-transform")
}

# Load packages
invisible(lapply(packages, library, character.only = TRUE))

library(FIMS)
library(adnuts)
R_version <- version$version.string
TMB_version <- packageDescription("TMB")$Version
FIMS_commit <- substr(packageDescription("FIMS")$GithubSHA1, 1, 7)

source(file.path("R", "utils.R"))

R version: R version 4.5.0 (2025-04-11)
TMB version: 1.9.17
FIMS commit: 4fa1cb4
Stock name: Gulf of Alaska (GOA) Walleye Pollock
Region: AFSC
Analyst: Cole Monnahan

Simplifications to the original assessment

See the AFSC-GOA-pollock case study for details on the model. Here the focus is on advanced features.

Script to prepare data for building FIMS object

Code

ggplot2::theme_set(ggplot2::theme_bw())
## define the dimensions and global variables
years <- 1970:2023
nyears <- length(years)
nseasons <- 1
nages <- 10
ages <- 1:nages
## This will fit the models bridging to FIMS (simplifying)
## source("fit_bridge_models.R")
## compare changes to model
pkfitfinal <- readRDS("data_files/pkfitfinal.RDS")
pkfit0 <- readRDS("data_files/pkfit0.RDS")
parfinal <- pkfitfinal$obj$env$parList()
pkinput0 <- readRDS('data_files/pkinput0.RDS')
fimsdat <- pkdat0 <- pkinput0$dat
pkinput <- readRDS('data_files/pkinput.RDS')

Run FIMS model without using wrapper functions

Code

## set up FIMS data objects
FIMS::clear()
estimate_fish_selex <- "fixed"
estimate_survey_selex <- "fixed"
estimate_q2 <- "fixed"
estimate_q3 <- "fixed"
estimate_q6 <- "fixed"
estimate_F <- "fixed"
estimate_recdevs <- TRUE

# Set up a FIMS model without wrapper functions
source("R/pk_prepare_FIMS_inputs_old.R")

## make FIMS model
success <- FIMS::CreateTMBModel()
parameters <- list(p = get_fixed(), re = get_random())
obj <- TMB::MakeADFun(
  data = list(),
  parameters,
  random = "re",
  DLL = "FIMS",
  silent = TRUE
)
opt <- with(obj, nlminb(
  start = par,
  objective = fn,
  gradient = gr,
  control = list(
    eval.max = 10000,
    iter.max = 10000,
    trace = 0
  )
))
max(abs(obj$gr())) # from Cole, can use TMBhelper::fit_tmb to get val to <1e-10
rep2 <- obj$report(obj$env$last.par.best) ## FIMS after estimation

Extra analyses

Two extra analyses are demonstrated. First is a likelihood profile over lnR0, showing component contributions and testing for data conflict (a Piner plot). The second is to run the model through the ‘Stan’ software using the ‘tmbstan’ R package. This samples from the posterior, which are put back into the model to get the posterior distribution for spawning stock biomass. Given its long run time the results are saved to a file and read in for post-hoc plotting.

Code

## Try a likelihood profile on ln(R_0)
i <- which(parameters[["p"]] == recruitment$log_rzero[1]$value)
map <- parameters
map$p[i] <- NA # map off R0 specified below
map$p <- as.factor(map$p)
map$re <- as.factor(map$re)
xseq <- as.numeric(c(opt$par[i],  seq(22,24, len=30)))
res <- list()
for(j in seq_along(xseq)) {
  print(j)
  parameters$p[i] <- xseq[j]
  obj2 <- TMB::MakeADFun(
    data = list(),
    parameters,
    DLL = "FIMS",
    silent = TRUE,
    map = map,
    random = "re"
  )
  opt2 <- with(obj2, nlminb(par,fn,gr))
  out <- obj2$report(obj2$env$last.par.best)
  nll_components <- out$nll_components
  index_nll <- sum(nll_components[seq(2, length(nll_components), by = 2)])
  age_comp_nll <- sum(nll_components[seq(3, length(nll_components), by = 2)])
  res[[j]] <- data.frame(
    j = j, lnR0 = xseq[j], total = out$jnll, index = index_nll,
    age = age_comp_nll, recruit = out$nll_components[1], maxgrad = max(abs(obj$gr()))
  )
}
res <- dplyr::bind_rows(res) |>
  tidyr::pivot_longer( cols=c(total, index, age, recruit)) |>
  dplyr::group_by(name) |>
  dplyr::mutate(deltaNLL=value-min(value))
g <- ggplot2::ggplot(res, ggplot2::aes(lnR0, deltaNLL, color=name)) +
  ggplot2::geom_line()
g <- g +
  ggplot2::geom_point(data = dplyr::filter(res, deltaNLL==0), size=2) +
  ggplot2::labs(y='Delta NLL', color='NLL component')
ggplot2::ggsave('figures/AFSC_PK_like_profile_R0.png', g, width=7, height=3.5)

Bayesian integration in FIMS

This section demonstrates how to do integration via the NUTS algorithm.

Code

# this takes too long to run for rendering so saving everything to file

## Try Bayesian
#library(tmbstan)
## Some parameters wandering off to Inf so fix those (need
## priors). Needs a ton of work but is proof of concept. Major
## problem is parallel fails.
map <- parameters
# TODO: Figure out parameter numbers of ill-behaved parameters because with
#       random effects there are fewer parameters.
## parameters$p[65:66]
map$p[c(65,66,114)] <- NA
map$p <- as.factor(map$p)
obj3 <- TMB::MakeADFun(
  data = list(),
  parameters,
  random = "re",
  DLL = "FIMS",
  silent = TRUE,
  map = map
)
parameter_names <- names(FIMS:::get_parameter_names(obj3[["par"]]))
# ## Fails when trying to do this in parallel unfortunately
# mcmc <- sample_sparse_tmb(obj3, iter=1200, warmup=200, chains=4, cores=1,
#                           seed=1, init='random',
#                           control=list(adapt_delta=.95))
# saveRDS(mcmc, file='data_files/pk_mcmcfit.RDS')

mcmc <- readRDS('data_files/pk_mcmcfit.RDS')
png('figures/MCMC_pairs.png', width=7, height=5, units='in', res=200)
adnuts::pairs_admb(mcmc, pars=1:6, order='slow')
dev.off()

png('figures/MCMC_marginals.png', width=7, height=5, units='in', res=200)
adnuts::plot_marginals(mcmc, pars=1:9)
dev.off()

png('figures/MCMC_uncertainties.png', width=7, height=5, units='in', res=200)
adnuts::plot_uncertainties(mcmc)
dev.off()

png('figures/MCMC_sampler_params.png', width=7, height=7, units='in', res=200)
adnuts::plot_sampler_params(mcmc)
dev.off()



#launch_shinyadmb(mcmc)
df <- as.data.frame(mcmc)
## for each posterior draw, report to get SSB
postreps <- list()
for(ii in 1:nrow(df)){
  if(ii %% 500==0) print(ii)
  postreps[[ii]] <- obj3$rep(df[ii,])
}
ssbpost <- lapply(postreps, function(x) data.frame(year=years, ssb=x$ssb[[1]][-55])) |>
  dplyr::bind_rows() |>
  dplyr::mutate(rep=rep(1:nrow(df), each=54))
saveRDS(ssbpost, file='data_files/pk_SSB_posteriors.RDS')
ssbpost <- readRDS('data_files/pk_pollock_SSB_posteriors.RDS')
g <- ggplot2::ggplot(ssbpost, ggplot2::aes(year, ssb/1e9, group=rep)) +
  ggplot2::geom_line(alpha=.1) +
  ggplot2::ylim(0,NA) +
  ggplot2::labs(x=NULL, y='SSB (M t)', title='Posterior demo (unconverged!)')
ggplot2::ggsave('figures/MCMC_ssb_posterior.png', g, width=7, height=4, units='in')

This results in the following plots showing convergence and then finally the posterior distribution of SSB over time. MCMC marginals MCMC pairs MCMC uncertainties MCMC sampler parameters SSB Posterior

Calculate conditional AIC and effective degrees of freedom

Zheng et al. (in review; pdflink) present a new formula to calculate conditional AIC. To do this it calculates the “effective degrees of freedom” and uses that in the calculation of cAIC. A parameter has 0 edf when all information to estimate it comes from a prior or hyperpriors. In contrast it has an edf of 1 when it is informed only by the data. Edf gives a sense of complexity of the model, and cAIC will perform better than marginal AIC (mAIC) for mixed effects models. Here I demonstrate equation 6 on a FIMS model. This approach works on penalized ML and random effects models alike. If Bayesian priors (or penalties) are used it will also calculate the impact of those relative to the data. No priors are used yet so that is left off for now.

Code

# Set up a FIMS model without wrapper functions

# the original model (with data)
mle <- opt$par
hess <- obj$he(mle)

## make FIMS model without any data, only hyperdistribution on recdevs
FIMS::clear()
# this is a hack to trick the script to thinking there's no data (it should all be -999)
years <- 500:1000
source("R/pk_prepare_FIMS_inputs_old.R")
years <- 1970:2023
# build object but DO NOT optimize it, pass the original MLE to it
obj_nodata <- TMB::MakeADFun(data = list(), parameters, DLL = "FIMS", silent = TRUE, random = "re")
success <- FIMS::CreateTMBModel()
parameters <- list(p = get_fixed())
parameter_names <- names(FIMS:::get_parameter_names(obj[["par"]]))
lrandom <- is.na(parameter_names) # vector of which are affected by priors/penalties

# the cAIC calculations
Hess_data <- obj$he(mle)[lrandom, lrandom]
Hess_nodata <- obj_nodata$he(mle)[lrandom, lrandom]
negEDF <- diag(solve(Hess_data, Hess_nodata))
# some data processing
edf <- data.frame(par='recdev', n=1, edf=1-negEDF) |>
  dplyr::mutate(year=1969+1:n()) |>
  dplyr::ungroup()
g <- ggplot2::ggplot(edf, ggplot2::aes(year, y=edf)) +
  ggplot2::geom_point() +
  ggplot2::facet_wrap('par') +
  ggplot2::labs(x=NULL, y='Effective degree of freedom') +
  ggplot2::ylim(NA,1)
ggplot2::ggsave('figures/cAIC_edf.png', g, width=7, height=4)
tab <- edf |>
  dplyr::group_by(par) |>
  dplyr::summarize(n=sum(n), edf=sum(edf)) |>
  dplyr::arrange(desc(edf))
tab <- dplyr::bind_rows(tab, edf |>
  dplyr::summarize(par='Total', n=sum(n), edf=sum(edf))) |>
  dplyr::mutate(pct=100*edf/n)
gt::gt(tab) |> gt::fmt_number(columns = 3:4, decimals = 1)

q <- sum(lrandom) # no. of random effects
p <- sum(1-lrandom) # no. of fixed effects
jnll <- obj$env$f(mle)
cnll <- jnll - obj_nodata$env$f(mle)
## conditional AIC (new calculation)
cAIC <- 2*cnll + 2*(p+q) - 2*sum(negEDF)
round(c(edf=sum(edf$edf), pct.edf=100*(sum(edf$edf)/sum(edf$n)),
     cAIC=cAIC, mAIC=TMBhelper::TMBAIC(opt)),1)

This appears to work except for the last recruitment deviation which is negative which should be impossible. It is likely the model is misconfigured. edf

Code

# Clear C++ objects from memory
FIMS::clear()