NWFSC Case Study Petrale Sole

The setup

Code
# Names of required packages
packages <- c(
  "dplyr",
  "ggplot2",
  "gt",
  "here",
  "lubridate",
  "remotes",
  "reshape2",
  "shinystan",
  "tidyr",
  "TMB"
)

# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
  install.packages(packages[!installed_packages], repos = "http://cran.us.r-project.org")
}

# the Bayesian case study requires these extra packages
if (!"StanEstimators" %in% rownames(installed.packages())) {
  install.packages("StanEstimators", repos = c("https://andrjohns.r-universe.dev", "https://cloud.r-project.org"))
}

if (!"adnuts" %in% rownames(installed.packages())) {
  remotes::install_github("Cole-Monnahan-NOAA/adnuts", ref="sparse_M")
}

# SS3 case studies require r4ss
if (!"r4ss" %in% rownames(installed.packages())) {
  remotes::install_github("r4ss/r4ss")
}

# Install FIMS: main branch version if on main, dev version on any other branch

branch_name <- system("git branch --show-current")
use_fims_main <- grepl("main", branch_name)

if (use_fims_main) {
  remotes::install_github("NOAA-FIMS/FIMS")
} else {
  remotes::install_github("NOAA-FIMS/FIMS", ref = "dev-transform")
}

# Load packages
invisible(lapply(packages, library, character.only = TRUE))

library(FIMS)
library(adnuts)
R_version <- version$version.string
TMB_version <- packageDescription("TMB")$Version
FIMS_commit <- substr(packageDescription("FIMS")$GithubSHA1, 1, 7)

source(file.path("R", "utils.R"))
Code
ggplot2::theme_set(ggplot2::theme_bw())
  • R version: R version 4.5.0 (2025-04-11)
  • TMB version: 1.9.17
  • FIMS commit: 4fa1cb4
  • Stock name: West Coast Petrale Sole
  • Region: NWFSC
  • Analyst: Ian G. Taylor

Simplifications to the original assessment

The operational petrale sole stock assessment used Stock Synthesis (SS3) and included numerous data types and population dynamic assumptions that are not yet available in FIMS. A simplified SS3 model was also developed to provide a closer comparison but is still a work in progress. This is intended as a demonstration and nothing more.

For both the FIMS model and the simplified SS3 model, I made the following changes:

  • Remove data
    • Remove lengths
    • Remove male ages
    • Remove discard fractions and discard comps
  • Simplify selectivity
    • Remove length-based retention functions
    • Convert to age-based logistic (from length-based double-normal, fixed asymptotic)
  • Remove parameter priors (on M and h)
  • Use female mean weight at age as calculated from the parametric growth curves
  • Varying Index CV to constant over time
  • Varying Catch ESS to constant over time

Script to prepare data for building FIMS object

Code
# read SS3 input files from petrale sole assessment on github
petrale_input <- r4ss::SS_read(
  "https://raw.githubusercontent.com/pfmc-assessments/petrale/main/models/2023.a034.001/"
)
petrale_output <- r4ss::SS_output(
  "https://raw.githubusercontent.com/pfmc-assessments/petrale/main/models/2023.a034.001",
  printstats = FALSE,
  verbose = FALSE
)
  
# generic names for SS3 data and control files could be useful in future generalized version of this code
ss3dat <- petrale_input$dat
ss3ctl <- petrale_input$ctl

# define the dimensions based on the range in the data
# for petrale, the north fleet catch begins in 1896, 
# not the start year of the model 1876 (which is when the southern fleet catch begins,
# but for simplicity this petrale example only uses the northern catch)
# years <- seq(ss3dat$styr, ss3dat$endyr)
years <- seq(1896, ss3dat$endyr)
nyears <- length(years)
nseasons <- 1
# ages <- 0:ss3dat$Nages # population ages in SS3, starts at age 0
ages <- 1:17 # same as data bins
nages <- length(ages)
lengths <- petrale_output[["lbins"]]
nlengths <- length(lengths)

# source function to simplify data and convert from SS3 format
source("R/get_ss3_data.R")

# fleet 4 used conditional age-at-length data with marginal observations
# entered as fleet == -4 (to exclude from likelihood due to redundancy)
# using only marginals for FIMS and exclude CAAL data by filtering out
# the fleet 4 age data
# similarly, some early ages were excluded from fleet 1 in the original model
# by assigning to fleet -1, these get filtered by get_ss3_data()

# only include age comps with fleet = -4 or 1:
ss3dat$agecomp <- ss3dat$agecomp |>
  dplyr::filter(fleet %in% c(-4, 1))
ss3dat$agecomp$fleet <- abs(ss3dat$agecomp$fleet)

# convert SS3 data into FIMS format using function defined in the R directory
mydat <- get_ss3_data(
  ss3dat,
  fleets = c(1, 4),
  ages = ages,
  lengths = petrale_input[["dat"]][["lbin_vector"]]
) |>
  # rename fleet4 as fleet2 (fleets 2 and 3 already removed above)
  dplyr::mutate(name = dplyr::case_when(
    name == "fleet1" ~ name,
    name == "fleet4" ~ "survey1" # change fleet4 to survey1 to match yellowtail case-study
  )) |>
  dplyr::filter(value != -999)

# Weight-at-age code copied directly from PIFSC-opakapaka.qmd
# Get weight-at-age from SS3 report output and convert from kg to mt
weight_at_age_data <- petrale_output[["wtatage"]] |> 
  dplyr::filter(sex == 1 & fleet == 1 & year == 1949) |> 
  dplyr::select(dplyr::matches("[0-9]+")) |>
  # round(4) |>
  tidyr::pivot_longer(names_to = "age", cols = dplyr::everything()) |>
  dplyr::filter(age %in% ages) |>
  tidyr::expand_grid(
    mydat |> dplyr::select(datestart) |> dplyr::distinct()
  ) |>
  dplyr::left_join(
    mydat |> dplyr::select(datestart, dateend) |> dplyr::distinct(),
    by = "datestart"
  ) |>
  dplyr::mutate(
    type = "weight-at-age",
    name = "fleet1",
    unit = "mt",
    age = as.integer(age),
    value = value/1000
  )
mydat <- mydat |> 
  dplyr::bind_rows(weight_at_age_data)

# set up FIMS data objects
fims_frame <- FIMS::FIMSFrame(mydat)

Prepare Parameters using create_default_parameters()

Code
## lots of code below copied PIFSC-opakapaka.qmd
# rename SS3 output to avoid modifying lines copied from opakapaka model
rep <- petrale_output

# Define fleet specifications for fleet1 and survey1
fleet1 <-  list(
  selectivity = list(form = "LogisticSelectivity"),
  data_distribution = c(
    Landings = "DlnormDistribution",
    AgeComp = "DmultinomDistribution"
  )
)
survey1 <- list(
  selectivity = list(form = "LogisticSelectivity"),
  data_distribution = c(
    Index = "DlnormDistribution",
    AgeComp = "DmultinomDistribution"
  )
)

# Create default parameters
default_parameters <- fims_frame |>
  create_default_parameters(
    fleets = list(fleet1 = fleet1, survey1 = survey1)
  )

Modifying parameters

Code
recdevs <- rep$recruitpars |> # $recruitpars is a subset of $parameters with Yr as an additional column
  dplyr::filter(Yr %in% years[-1]) |> # filter out initial numbers parameters
  dplyr::select(Yr, Value)

init_naa <- rep$natage |>
  dplyr::filter(Time == min(years), Sex == 1) |>  # numbers at age in FIMS start year, not SS3 model
  dplyr::select(paste(ages)) |> 
  as.numeric() # TODO: figure out if *1000 is needed
init_naa <- exp(rep$parameters |> dplyr::filter(grepl("R0", Label)) |> dplyr::pull(Init)) * 1000 * exp(-(ages - 1) * rep$parameters |> dplyr::filter(grepl("NatM.*Fem", Label)) |> dplyr::pull(Value))
init_naa[nages] <- init_naa[nages] / rep$parameters |> dplyr::filter(grepl("NatM.*Fem", Label)) |> dplyr::pull(Value) # sum of infinite series

# TODO: Move this to the R folder or FIMS
steepness_transform <- function(h) {
  # function to convert steepness into required parameter space
  -log(1.0 - h) + log(h - 0.2)
}

parameters <- default_parameters |>
  # SS3 model had length-based selectivity which leads to sex-specific
  # age-based selectivity due to sexually-dimorphic growth.
  # I didn't bother to calculate an age-based inflection point averaged over sexes
  update_parameters(
    modified_parameters = list(
      fleet1 = list(
        Fleet.log_Fmort.estimated = FALSE,
        Fleet.log_Fmort.value = rep$exploitation |> 
          dplyr::filter(Yr %in% years) |> 
          dplyr::pull(North) |>
          log(), # "North" is the SS3 fleet name
        LogisticSelectivity.inflection_point.value = 10, 
        LogisticSelectivity.inflection_point.estimated = TRUE,
        LogisticSelectivity.slope.value = 2, #used age selex values
        LogisticSelectivity.slope.estimated = TRUE
      ),
      survey1 = list(
        LogisticSelectivity.inflection_point.value = 6, 
        LogisticSelectivity.inflection_point.estimated = TRUE,
        LogisticSelectivity.slope.value = 2, 
        LogisticSelectivity.slope.estimated = TRUE
      )
    )
  ) |>
  update_parameters(
    modified_parameters = list(
      recruitment = list(
        BevertonHoltRecruitment.log_rzero.value = ss3ctl$SR_parms["SR_LN(R0)", "INIT"],
        BevertonHoltRecruitment.log_devs.value = recdevs$Value,
        BevertonHoltRecruitment.logit_steep.value = ss3ctl$SR_parms["SR_BH_steep", "INIT"] |>
          steepness_transform(),
        DnormDistribution.log_sd.value = 0.5
      )
    )
  ) |>
  update_parameters(
    modified_parameters = list(
      # approximate age-based equivalent to length-based maturity in petrale model
      # based on looking at model$endgrowth |> dplyr::filter(Sex == 1) |> dplyr::select(Age_Beg, Len_Mat)
      maturity = list(
        LogisticMaturity.inflection_point.value = 20.5, 
        LogisticMaturity.inflection_point.estimated = FALSE,
        LogisticMaturity.slope.value = 1.8, # arbitrary guess
        LogisticMaturity.slope.estimated = FALSE
      )
    )
  ) |>
  update_parameters(
    modified_parameters = list(
      population = list(
        Population.log_init_naa.value = log(init_naa),
        Population.log_init_naa.estimated = TRUE,
        Population.log_M.value = rep(
          log(rep$parameters["NatM_uniform_Fem_GP_1", "Value"]),
          get_n_years(fims_frame) * get_n_ages(fims_frame)
        ),
        Population.log_M.estimated = FALSE
      )
    )
  )

Initialize modules and fit the model

With data and parameters in place, we can now initialize modules using initialize_fims() and fit the model using fit_fims().

Code
# Run the model without optimization to help ensure a viable model
test_fit <- parameters |>
  initialize_fims(data = fims_frame) |>
  fit_fims(optimize = FALSE)

# Run the  model with optimization
fit <- parameters |>
  initialize_fims(data = fims_frame) |>
  fit_fims(optimize = TRUE)
✔ Starting optimization ...
ℹ Restarting optimizer 3 times to improve gradient.
ℹ Maximum gradient went from 0.06468 to 0.00396 after 3 steps.
✔ Finished optimization
Warning in sqrt(diag(cov)): NaNs produced
✔ Finished sdreport
Warning in sqrt(diag(object$cov.fixed)): NaNs produced
ℹ FIMS model version: 0.4.0
ℹ Total run time was 44.44916 seconds
ℹ Number of parameters: total=149, fixed_effects=149, and random_effects=0
ℹ Maximum gradient= 0.00396
ℹ Negative log likelihood (NLL):
• Marginal NLL= 6582.60262
• Total NLL= 6582.60262
ℹ Terminal SB=
Code
  # TODO: why does the final message show no SB value: "i Terminal SB="

Plotting Results

Code
# gather index fit info
index_results <- data.frame(
  observed = m_index(fims_frame, "survey1"),
  expected = get_report(fit)[["index_exp"]][[2]]
) |>
  dplyr::mutate(year = years) |>
  dplyr::filter(observed > 0) # filter out -999 rows

# plot index fit
ggplot2::ggplot(index_results, ggplot2::aes(x = year, y = observed)) +
  ggplot2::geom_point() +
  ggplot2::xlab("Year") +
  ggplot2::ylab("Index (mt)") +
  ggplot2::geom_line(ggplot2::aes(x = year, y = expected), color = "blue") +
  ggplot2::theme_bw()

Code
# gather catch info 
catch_results <- data.frame(
  year = years,
  observed = m_landings(fims_frame, fleet = "fleet1"),
  expected = get_report(fit)[["landings_exp"]][[1]]
)

# plot catch fit
ggplot2::ggplot(catch_results, ggplot2::aes(x = year, y = observed)) +
  ggplot2::geom_point() +
  #ggplot2::aes(color = fleet)) +
  ggplot2::xlab("Year") +
  ggplot2::ylab("Catch (mt)") +
  ggplot2::geom_line(ggplot2::aes(x = year, y = expected)) + #, color = fleet)) +
  ggplot2::theme_bw()

Code
# gather biomass info from FIMS and SS3
biomass <- rep$timeseries |> 
  dplyr::select(Yr, SpawnBio, Bio_all) |> 
  dplyr::filter(Yr %in% years) |> 
  dplyr::rename(
    "SS3_SpawnBio" = "SpawnBio",
    "SS3_Bio" = "Bio_all"
  ) |>
  dplyr::mutate(
    FIMS_SpawnBio = get_report(fit)[["ssb"]][[1]][-1] , 
    FIMS_Bio = get_report(fit)[["biomass"]][[1]][-1]
  ) |> ##CHECK: Is FIMS ssb reporting nyears+1 or initial year-1?
  tidyr::pivot_longer(cols = -Yr) |>
  tidyr::separate_wider_delim(
    cols = "name",
    delim = "_", names = c("Model", "Type")
  )

# plot comparison of spawning biomass and total biomass time series
ggplot2::ggplot(biomass, ggplot2::aes(x = Yr, y = value)) +
  ggplot2::geom_line(ggplot2::aes(color = Model)) +
  ggplot2::xlab("Year") +
  ggplot2::ylab("") +
  ggplot2::facet_wrap(~Type, scales = "free_y") +
  ggplot2::theme_bw()

Code
# gather info on recruitment
recruits <- rep$recruit |> 
  dplyr::filter(Yr %in% years) |>
  dplyr::select(Yr, exp_recr, raw_dev) |>
  dplyr::rename("SS3_recruit" = "exp_recr",
                "SS3_recdev" = "raw_dev",
                "Year" = "Yr") |> 
  dplyr::mutate(FIMS_recruit = c(get_report(fit)[["recruitment"]][[1]][-1]),
                FIMS_recdev = c(NA, get_report(fit)[["log_recruit_dev"]][[1]]),
                SS3_recruit = SS3_recruit * 1000) |>
  tidyr::pivot_longer(cols = -Year) |>
  tidyr::separate_wider_delim(cols = "name", delim = "_", names = c("Model", "Type"))             

# plot recruit time series
ggplot2::ggplot(recruits, ggplot2::aes(x = Year, y = value, color = Model)) +
  ggplot2::geom_line() + 
  ggplot2::facet_wrap(~Type, scales = "free_y") +
  ggplot2::theme_bw()
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_line()`).

Code
clear()