PIFSC Opakapaka Case Study

The setup

Code
# Names of required packages
packages <- c(
  "dplyr",
  "ggridges",
  "ggplot2",
  "gt",
  "here",
  "lubridate",
  "remotes",
  "reshape2",
  "shinystan",
  "tidybayes",
  "tidyr",
  "TMB"
)

# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
  install.packages(packages[!installed_packages], repos = "http://cran.us.r-project.org")
}

# the Bayesian case study requires these extra packages
if (!"StanEstimators" %in% rownames(installed.packages())) {
  install.packages("StanEstimators", repos = c("https://andrjohns.r-universe.dev", "https://cloud.r-project.org"))
}

if (!"adnuts" %in% rownames(installed.packages())) {
  remotes::install_github("Cole-Monnahan-NOAA/adnuts")
}

# SS3 case studies require r4ss
if (!"r4ss" %in% rownames(installed.packages())) {
  remotes::install_github("r4ss/r4ss")
}

# SS3 case studies require r4ss
if (!"stockplotr" %in% rownames(installed.packages())) {
  remotes::install_github("nmfs-ost/stockplotr")
}

# Install FIMS: main branch version if on main, dev version on any other branch
remotes::install_github("NOAA-FIMS/FIMS", ref = "dev-hake")

# Load packages
invisible(lapply(packages, library, character.only = TRUE))

library(FIMS)
library(adnuts)
R_version <- version$version.string
TMB_version <- packageDescription("TMB")$Version
FIMS_commit <- substr(packageDescription("FIMS")$GithubSHA1, 1, 7)
ggplot2::theme_set(ggplot2::theme_bw())

data_directory <- "data_files"
function_directory <- "R"
purrr::walk(
    dir(function_directory, full.names = TRUE),
  source
)
Code
common_name <- "main Hawaiian Islands Opakapaka"
  • R version: R version 4.5.2 (2025-10-31)
  • TMB version: 1.9.19
  • FIMS commit: 14916c8
  • Stock name: main Hawaiian Islands Opakapaka
  • Region: Pacific Islands
  • Analyst: Meg Oshima

Simplifications to the original assessment

The model presented in this case study was changed substantially from the operational version and should not be considered reflective of the main Hawaiian Islands Opakapaka stock. These results are intended to be a demonstration and nothing more.

To get the operational model to more closely match a FIMS model the following changes were made:

  • In our model maturity and selectivity are length-based processes so I converted (aka eye-balled logistic curves that look like they match for ages) to values that I think would be roughly equivalent in ages. Eventually, having length-based processes would be necessary for this model.
  • The scale of the fitted CPUE is lower than the observed values.
  • If I don’t fix the initial numbers-at-age, the model puts a really large number of individuals in age-0 bin and then 0 fish in the rest of the ages. Not sure what is happening here.

Setting up the data

Code
load(file.path(data_directory, "opaka_model.RDS"))
include_age_comps <- FALSE
years <- seq(opaka_dat$styr, opaka_dat$endyr)
n_years <- length(years) # the number of years which we have data for.
ages <- seq(1, 21) # age vector.
n_ages <- length(ages) # the number of age groups.
comp_lengths <- opaka_dat$lbin_vector # length vector.
nlengths <- length(comp_lengths) # the number of length bins.

opaka_dat_fims <- get_ss3_data(
  list(
    dat = opaka_dat,
    ctl = opaka_ctl,
    start = list(),
    fore = list(),
    wtatage = rep[["wtatage"]]
  ),
  fleets = c(1,2,3),
  ages = ages,
  lengths = comp_lengths
) |>
  dplyr::filter(type != "age_comp")
## age to length conversion matrix 
# Growth function values to create age to length conversion matrix from model
#comparison project
mg_pars <- rep$parameters  |> 
dplyr::filter(stringr::str_detect(Label, "_GP_"))
Linf <- mg_pars$Value[3]
K <- mg_pars$Value[4]
a0 <- -0.29
amax <- 21
cv <- mg_pars$Value[5]

L2Wa <- mg_pars$Value[7]
L2Wb <- mg_pars$Value[8]

AtoL <- function(a,Linf,K,a_0){
  L <- Linf*(1-exp(-K*(a-a_0)))
  }

ages <- 1:amax
len_bins <- comp_lengths

#Create length at age conversion matrix and fill proportions using above 
#growth parameters
length_age_conversion <- matrix(NA,nrow=length(ages),ncol=length(len_bins))
for(i in seq_along(ages)){
  #Calculate mean length at age to spread lengths around
  mean_length <- AtoL(ages[i],Linf,K,a0)
  #mean_length <- AtoLSchnute(ages[i],L1,L2,a1,a2,Ks)
  #Calculate the cumulative proportion shorter than each composition length
  temp_len_probs<-pnorm(q=len_bins,mean=mean_length,sd=mean_length*cv)
  #Reset the first length proportion to zero so the first bin includes all
  #density smaller than that bin
  temp_len_probs[1]<-0
  #subtract the offset length probabilities to calculate the proportion in each
  #bin. For each length bin the proportion is how many fish are larger than this
  #length but shorter than the next bin length.
  temp_len_probs <- c(temp_len_probs[-1],1)-temp_len_probs
  length_age_conversion[i,] <- temp_len_probs
}
colnames(length_age_conversion) <- len_bins
rownames(length_age_conversion) <- ages

#Extract years and fleets from milestone 1 data
start_date <- unique(opaka_dat_fims$timing[opaka_dat_fims$type=="landings"])
observers <- unique(opaka_dat_fims$name[opaka_dat_fims$type=="length_comp"])

#Create data frame for new fleet and year specific length at age conversion proportions
length_age_data <- data.frame(
  type = rep("age-to-length-conversion",length(len_bins)*length(ages)*length(observers)*length(start_date)),
  name = rep(sort(rep(observers,length(len_bins)*length(ages))),length(start_date)),
  age = rep(sort(rep(ages,length(len_bins))),length(observers)*length(start_date)),
  length = rep(len_bins,length(ages)*length(observers)*length(start_date)),
  timing = rep(start_date,each=length(len_bins)*length(ages)*length(observers)),
  value = rep(c(t(length_age_conversion)),length(observers)*length(start_date)),
  unit = rep("proportion",length(len_bins)*length(ages)*length(observers)*length(start_date)),
  uncertainty = rep(30,length(len_bins)*length(ages)*length(observers)*length(start_date)))

# Changing the CPUE indices for fleet1 to be a new fleet, fleet4. This helps with model convergence and fitting as it is the longest time series of data available along with the landings. 
opaka_dat_fims <- opaka_dat_fims |>
  dplyr::mutate(name = ifelse(name == "fleet1" & type == "index", "fleet4", name))

opaka_dat_fims <- type.convert(
  rbind(opaka_dat_fims, length_age_data),
  as.is = TRUE
)

opaka_dat_fims <- opaka_dat_fims |> 
  dplyr::mutate(uncertainty = ifelse(type == "length_comp" & name == "fleet2" & value != -999, 1, uncertainty)) |>
  dplyr::filter(!(type == "index" & name == "fleet3"))

data_4_model <- FIMS::FIMSFrame(opaka_dat_fims)

The data_4_model object contains a @data slot that holds a long data frame with: * 2 fleets: commercial fishery (fleet1) and survey (fleet2) * landings for fleet 1 * cpue for fleet 2 * length composition data for fleet 2

Run FIMS model

Code
recdevs <- rep$parameters |>
           dplyr::filter(stringr::str_detect(Label, "RecrDev")) |> 
           dplyr::select(Label, Value)

init_naa <- (exp(opaka_ctl$SR_parms["SR_LN(R0)", "INIT"]) * 1000) * exp(-(ages - 1) * 0.135)
init_naa[n_ages] <- init_naa[n_ages] / 0.135
# Create default parameters
default_parameters <- FIMS::create_default_configurations(
    data = data_4_model
  ) |>
  FIMS::create_default_parameters(
    data = data_4_model
  ) |>
  tidyr::unnest(cols = data) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Maturity",
      label = c("inflection_point", "slope"),
      value = c(7, 0.5)
    ),
    by = c("module_name", "label")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Selectivity",
      fleet_name = "fleet1",
      label = c("slope", "inflection_point"),
      # Used age selectivity values
      value = c(4.5, 1.81),
      estimation_type = "constant"
    ),
    by = c("module_name", "fleet_name", "label")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Selectivity",
      fleet_name = "fleet2",
      label = c("slope", "inflection_point"),
      value = c(3, 1),
      estimation_type = "constant"
    ),
    by = c("module_name", "fleet_name", "label")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Selectivity",
      fleet_name = "fleet3",
      label = c("slope", "inflection_point"),
      value = c(4.5, 1.97),
      estimation_type = "constant"
    ),
    by = c("module_name", "fleet_name", "label")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Selectivity",
      fleet_name = "fleet4",
      label = c("slope", "inflection_point"),
      value = c(4.5, 1.81),
      estimation_type = "constant"
    ),
    by = c("module_name", "fleet_name", "label")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Population",
      label = c("log_M"),
      value = log(0.135)
    ),
    by = c("module_name", "label")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Population",
      label = "log_init_naa",
      age = ages,
      value = log(init_naa),
      estimation_type = "constant"
    ),
    by = c("module_name", "label", "age")
  ) |>
  # dplyr::rows_update(
  #   tibble::tibble(
  #     module_name = "Recruitment",
  #     # Transformed 0.999 to logit where a previous version just used 0.999
  #     # Wondering if we should use logit(0.75) as noted previously for scamp
  #     # as the null recruitment model?
  #     label = c("log_rzero", "logit_steep", "log_sd"),
  #     value = c(opaka_ctl$SR_parms["SR_LN(R0)", "INIT"], -log(1.0 - 0.76) + log(0.76 - 0.2), sca$parm.cons$rec_sigma[8])
  #   ),
  #   by = c("module_name", "label")
  # ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Recruitment",
      label = "log_devs",
      time = years[-1],
      # The last value of the initial numbers at age is the first
      # recruitment deviation
      value = recdevs$Value,
    ),
    by = c("module_name", "label", "time")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Fleet",
      fleet_name = "fleet1",
      time = years,
      label = "log_Fmort",
      value = log(rep$exploitation$FRS)
    ),
    by = c("module_name", "fleet_name", "label", "time")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Fleet",
      fleet_name = "fleet2",
      label = "log_q",
      value = -4.12772
    ),
    by = c("module_name", "fleet_name", "label")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Fleet",
      fleet_name = "fleet3",
      label = "log_Fmort",
      time = years,
      value = log(rep$exploitation$Non_comm)
    ),
    by = c("module_name", "fleet_name", "label", "time")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Fleet",
      fleet_name = "fleet4",
      label = "log_q",
      value = -3.90281 #value from SS
    ),
    by = c("module_name", "fleet_name", "label")
  )

# Run the  model with optimization
fit <- default_parameters |>
  FIMS::initialize_fims(data = data_4_model) |>
  # Model is too big to run on GitHub action if you estimate uncertainty
  FIMS::fit_fims(optimize = TRUE, get_sd = FALSE)
✔ Starting optimization ...
ℹ Restarting optimizer 3 times to improve gradient.
ℹ Maximum gradient went from 0.0137 to 0.00286 after 3 steps.
✔ Finished optimization
ℹ FIMS model version: 0.8.1
ℹ Total run time was 1.26995 seconds
ℹ Number of parameters: fixed_effects=227, random_effects=0, and total=227
ℹ Maximum gradient= 0.00286
ℹ Negative log likelihood (NLL):
• Marginal NLL= 566.6127
• Total NLL= 566.6127
ℹ Terminal SB= 0

Plotting Results

Code
index_results <- data.frame(
  observed = FIMS::m_index(data_4_model, "fleet2"),
  expected = FIMS::get_report(fit)[["index_expected"]][[2]]
) |>
dplyr::mutate(year = years) |>
dplyr::filter(year > 2016)
#print(index_results)

ggplot2::ggplot(index_results, ggplot2::aes(x = year, y = observed)) +
  ggplot2::geom_point() +
  ggplot2::xlab("Year") +
  ggplot2::ylab("Index (mt)") +
  ggplot2::geom_line(ggplot2::aes(x = year, y = expected), color = "blue") +
  ggplot2::theme_bw()

Code
cpue_results <- data.frame(
  observed = FIMS::m_index(data_4_model, "fleet4"),
  expected = FIMS::get_report(fit)[["index_expected"]][[2]]
) |>
dplyr::mutate(year = years) |>
dplyr::filter(observed >0)
#print(cpue_results)
ggplot2::ggplot(cpue_results, ggplot2::aes(x = year, y = observed)) +
  ggplot2::geom_point() +
  ggplot2::xlab("Year") +
  ggplot2::ylab("CPUE") +
  ggplot2::geom_line(ggplot2::aes(x = year, y = expected), color = "blue") +
  ggplot2::theme_bw()

Code
catch_results <- data.frame(
  observed = c(FIMS::m_landings(data_4_model, fleet = "fleet1"), FIMS::m_landings(data_4_model, fleet = "fleet3")),
  expected = c(FIMS::get_report(fit)[["landings_expected"]][[1]], FIMS::get_report(fit)[["landings_expected"]][[3]]),
  fleet = rep(c("fleet1", "fleet3"), each = 75)
) |>
dplyr::mutate(year = rep(years, 2))
#print(catch_results)

ggplot2::ggplot(catch_results, ggplot2::aes(x = year, y = observed)) +
  ggplot2::geom_point(ggplot2::aes(color = fleet)) +
  ggplot2::xlab("Year") +
  ggplot2::ylab("Catch (mt)") +
  ggplot2::geom_line(ggplot2::aes(x = year, y = expected, color = fleet)) +
  ggplot2::theme_bw()

Code
biomass <- rep$timeseries |> 
dplyr::select(Yr, SpawnBio, Bio_all) |> 
dplyr::filter(Yr > 1947) |> ##CHECK: including "initial year" to match length with FIMS but need to check on FIMS
dplyr::rename("SS_SpawnBio" = "SpawnBio",
              "SS_Bio" = "Bio_all") |>
dplyr::mutate(FIMS_SpawnBio = FIMS::get_report(fit)[["spawning_biomass"]][[1]] , 
              FIMS_Bio = FIMS::get_report(fit)[["biomass"]][[1]]) |> ##CHECK: Is FIMS ssb reporting n_years+1 or initial year-1?
tidyr::pivot_longer(cols = -Yr) |>
tidyr::separate_wider_delim(cols = "name", delim = "_", names = c("Model", "Type"))             

ggplot2::ggplot(biomass, ggplot2::aes(x = Yr, y = value)) +
  ggplot2::geom_line(ggplot2::aes(color = Model)) +
  ggplot2::xlab("Year") +
  ggplot2::ylab("") +
  ggplot2::facet_wrap(~Type, scales = "free_y") +
  ggplot2::theme_bw()

Code
recruits <- rep$recruit |>
  dplyr::select(Yr, exp_recr, raw_dev) |>
  dplyr::rename("SS_recruit" = "exp_recr",
                "SS_recdev" = "raw_dev",
                "Year" = "Yr") |> 
  dplyr::mutate(FIMS_recruit = FIMS::get_report(fit)[["expected_recruitment"]][[1]][1:75],
                FIMS_recdev = c(
                  NA,
                  FIMS::get_estimates(fit) |>
                    dplyr::filter(label == "log_devs", module_name == "Recruitment") |>
                    dplyr::pull(estimated)
                ),
                SS_recruit = SS_recruit * 1000) |>
  tidyr::pivot_longer(cols = -Year) |>
  tidyr::separate_wider_delim(cols = "name", delim = "_", names = c("Model", "Type"))             

ggplot2::ggplot(recruits, ggplot2::aes(x = Year, y = value, color = Model)) +
  ggplot2::geom_line() + 
  ggplot2::facet_wrap(~Type, scales = "free_y") +
  ggplot2::theme_bw()
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_line()`).

Code
## Checking fit to proportion catch number at length
pcnal <- matrix(data = fit@report$pcnal[[2]], nrow = nlengths)

prop.dat <- opaka_dat_fims |> 
filter(type == "length" & name == "fleet2") |> 
group_by(datestart) |>
reframe(prop = value/sum(value)) 
pcnal.obs <- matrix(data = prop.dat$prop, nrow = nlengths)
head(pcnal.obs)

plot(x = 1:nlengths, y = pcnal.obs[,73], pch = 16,  ylim = c(0,1))
lines(x = 1:nlengths, y = pcnal[,73])
pcnal[,69]

## checking estimated numbers at age
head(fit@report$naa[[1]])
naa_mat <- matrix(data = fit@report$naa[[1]], nrow = n_ages)
head(naa_mat)
Code
FIMS::clear()