NWFSC Case Study Petrale Sole

The setup

Code
# Names of required packages
packages <- c(
  "dplyr",
  "ggridges",
  "ggplot2",
  "gt",
  "here",
  "lubridate",
  "remotes",
  "reshape2",
  "shinystan",
  "tidybayes",
  "tidyr",
  "TMB"
)

# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
  install.packages(packages[!installed_packages], repos = "http://cran.us.r-project.org")
}

# the Bayesian case study requires these extra packages
if (!"StanEstimators" %in% rownames(installed.packages())) {
  install.packages("StanEstimators", repos = c("https://andrjohns.r-universe.dev", "https://cloud.r-project.org"))
}

if (!"adnuts" %in% rownames(installed.packages())) {
  remotes::install_github("Cole-Monnahan-NOAA/adnuts")
}

# SS3 case studies require r4ss
if (!"r4ss" %in% rownames(installed.packages())) {
  remotes::install_github("r4ss/r4ss")
}

# SS3 case studies require r4ss
if (!"stockplotr" %in% rownames(installed.packages())) {
  remotes::install_github("nmfs-ost/stockplotr")
}

# Install FIMS: main branch version if on main, dev version on any other branch
remotes::install_github("NOAA-FIMS/FIMS", ref = "dev-hake")

# Load packages
invisible(lapply(packages, library, character.only = TRUE))

library(FIMS)
library(adnuts)
R_version <- version$version.string
TMB_version <- packageDescription("TMB")$Version
FIMS_commit <- substr(packageDescription("FIMS")$GithubSHA1, 1, 7)
ggplot2::theme_set(ggplot2::theme_bw())

data_directory <- "data_files"
function_directory <- "R"
purrr::walk(
    dir(function_directory, full.names = TRUE),
  source
)
Code
common_name <- "Petrale sole"


# read SS3 input files from petrale sole assessment on GitHub
petrale_input <- r4ss::SS_read(
  "https://raw.githubusercontent.com/pfmc-assessments/petrale/main/models/2023.a034.001/"
)
# add wtatage file to list of input files
petrale_input[["wtatage"]] <- r4ss::SS_readwtatage(
  "https://raw.githubusercontent.com/pfmc-assessments/petrale/refs/heads/main/models/2023.a034.001/wtatage.ss_new"
)
# read SS3 output files from petrale sole assessment on GitHub
petrale_output <- r4ss::SS_output(
  "https://raw.githubusercontent.com/pfmc-assessments/petrale/main/models/2023.a034.001",
  printstats = FALSE,
  verbose = FALSE,
  covar = FALSE
)
Some stats skipped because the .par file not found.
Code
output_petrale <- stockplotr::convert_output(
  "https://raw.githubusercontent.com/pfmc-assessments/petrale/main/models/2023.a034.001/Report.sso",
  model = "ss3"
)
ℹ Identified fleet names:
ℹ North, South, Triennial, and WCGBTS
→ Skipped DEFINITIONS
→ Processing DERIVED_QUANTITIES
→ Processing Input_Variance_Adjustment
→ Skipped LIKELIHOOD
→ Processing MGparm_By_Year_after_adjustments
ℹ Error values are present, but are unique to the data frame and not to a selected parameter.
→ Skipped MORPH_INDEXING
→ Processing OVERALL_COMPS
ℹ FACTORS REMOVED: OVERALL_COMPS - n_obs
ℹ FACTORS REMOVED: OVERALL_COMPS - n_obs
→ Processing PARAMETERS
→ Processing BIOMASS_AT_AGE
ℹ FACTORS REMOVED: BIOMASS_AT_AGE - beg/mid
→ Processing BIOMASS_AT_LENGTH
ℹ FACTORS REMOVED: BIOMASS_AT_LENGTH - beg/mid
→ Processing CATCH
ℹ FACTORS REMOVED: CATCH - fleet_name
→ Processing DISCARD_AT_AGE
→ Skipped EXPLOITATION
→ Processing CATCH_AT_AGE
→ Processing F_AT_AGE
→ Processing MEAN_SIZE_TIMESERIES
→ Processing NUMBERS_AT_AGE
ℹ FACTORS REMOVED: NUMBERS_AT_AGE - beg/mid
→ Processing NUMBERS_AT_LENGTH
ℹ FACTORS REMOVED: NUMBERS_AT_LENGTH - beg/mid
→ Processing SPAWN_RECRUIT
→ Processing SPAWN_RECR_CURVE
→ Processing SPR_SERIES
→ Processing TIME_SERIES
→ Skipped COMPOSITION_DATABASE
→ Skipped DISCARD_SPECIFICATION
→ Processing DISCARD_OUTPUT
ℹ Multiple error metrics reported in DISCARD_OUTPUT.
ℹ Error label(s) removed: 
std_use
ℹ FACTORS REMOVED: DISCARD_OUTPUT - fleet_name
→ Skipped INDEX_1
→ Processing INDEX_2
ℹ FACTORS REMOVED: INDEX_2 - fleet_name
→ Skipped INDEX_3
→ Processing FIT_LEN_COMPS
ℹ FACTORS REMOVED: FIT_LEN_COMPS - fleet_name
→ Processing FIT_AGE_COMPS
ℹ FACTORS REMOVED: FIT_AGE_COMPS - fleet_name
→ Skipped MEAN_BODY_WT_OUTPUT
→ Processing AGE_SELEX
→ Processing LEN_SELEX
→ Processing selparm(Size)_By_Year_after_adjustments
→ Processing selparm(Age)_By_Year_after_adjustments
→ Processing SELEX_database
→ Skipped AGE_AGE_KEY
→ Skipped AGE_LENGTH_KEY
→ Processing BIOLOGY
→ Processing Biology_at_age_in_endyr
→ Processing Growth_Parameters
→ Processing MEAN_BODY_WT(Begin)
→ Processing Natural_Mortality
ℹ FACTORS REMOVED: Natural_Mortality - beg/mid
→ Skipped RECRUITMENT_DIST
→ Processing Dynamic_Bzero
→ Skipped GLOBAL_MSY
→ Processing Kobe_Plot
→ Processing SPR/YPR_Profile
ℹ Some parameters were not found or included in the output file. The following parameters were not added into the new output file: 
Input_Variance_Adjustment
SPAWN_RECR_CURVE
selparm(Size)_By_Year_after_adjustments
selparm(Age)_By_Year_after_adjustments
BIOLOGY
Biology_at_age_in_endyr
SPR/YPR_Profile
✔ Conversion finished!
Code
# years <- seq(petrale_input$dat$styr, petrale_input$dat$endyr)
years <- seq(1896, petrale_input$dat$endyr)
n_years <- length(years)
# ages <- 0:petrale_input$dat$Nages # population ages in SS3, starts at age 0
ages <- 1:17 # same as data bins
n_ages <- length(ages)
lengths <- petrale_output[["lbins"]]
nlengths <- length(lengths)
  • R version: R version 4.5.2 (2025-10-31)
  • TMB version: 1.9.19
  • FIMS commit: 14916c8
  • Stock name: Petrale sole
  • Region: NWFSC
  • Analyst: Ian G. Taylor

Simplifications to the original assessment

The model presented in this case study was changed substantially from the operational version and should not be considered reflective of the Petrale sole stock. These results are intended to be a demonstration and nothing more.

To get the operational model to more closely match a FIMS model the following changes were made:

  • Remove data
    • Remove male lengths and ages
    • Remove discard fractions and discard comps
  • Simplify selectivity
    • Remove length-based retention functions
    • Convert to age-based logistic (from length-based double-normal, fixed asymptotic)
  • Remove priors
    • Natural mortality
    • Steepness
  • Use female mean weight at age from the SS3 weight-at-age file

Setting up the data

Code
# define the dimensions based on the range in the data
# for petrale, the north fleet catch begins in 1896, 
# not the start year of the model 1876 (which is when the southern fleet catch begins,
# but for simplicity this petrale example only uses the northern catch)

# fleet 4 used conditional age-at-length data with marginal observations
# entered as fleet == -4 (to exclude from likelihood due to redundancy)
# using only marginals for FIMS and exclude CAAL data by filtering out
# the fleet 4 age data
# similarly, some early ages were excluded from fleet 1 in the original model
# by assigning to fleet -1, these get filtered by get_ss3_data()

# only include age comps with fleet = -4 or 1:
petrale_input$dat$agecomp <- petrale_input$dat$agecomp |>
  dplyr::filter(fleet %in% c(-4, 1)) |>
  dplyr::mutate(fleet = abs(fleet)) |>
  dplyr::arrange(fleet, year, dplyr::desc(Nsamp)) |>
  dplyr::distinct(fleet, year, .keep_all = TRUE)

petrale_input$dat$lencomp <- petrale_input$dat$lencomp |>
    dplyr::filter(
    # filter out discard length-composition data
    !(part == 1 & fleet == 1)
  )

# convert SS3 data into FIMS format using function defined in the R directory
mydat <- get_ss3_data(
  ss3_inputs = petrale_input,
  fleets = c(1, 4),
  ages = ages,
  lengths = petrale_input[["dat"]][["lbin_vector"]]
) |>
  # rename fleet4 as fleet2 (fleets 2 and 3 already removed above)
  dplyr::mutate(name = dplyr::case_when(
    name == "fleet1" ~ name,
    name == "fleet4" ~ "survey1" # change fleet4 to survey1 to match yellowtail case-study
  )) |>
  dplyr::filter(
    # filter out forecast years
    timing %in% years
  )


# set up FIMS data objects
data_4_model <- FIMS::FIMSFrame(mydat |>
  # Remove the length-composition data
  # TODO: remove this as we create an age-to-length-conversion matrixt
  dplyr::filter(type != "length_comp") |>
  dplyr::select(-length)
)

Run FIMS model

Code
# lots of code below copied PIFSC-opakapaka.qmd
# rename SS3 output to avoid modifying lines copied from opakapaka model
rep <- petrale_output

# Create default parameters
default_parameters <- FIMS::create_default_configurations(
  data = data_4_model
) |>
  FIMS::create_default_parameters(
    data = data_4_model
  ) |>
  tidyr::unnest(cols = data)

recdevs <- rep$recruitpars |> # $recruitpars is a subset of $parameters with Yr as an additional column
  dplyr::filter(Yr %in% years[-1]) |> # filter out initial numbers parameters
  dplyr::select(Yr, Value)

init_naa <- rep$natage |>
  dplyr::filter(Time == min(years), Sex == 1) |>  # numbers at age in FIMS start year, not SS3 model
  dplyr::select(paste(ages)) |> 
  as.numeric() # TODO: figure out if *1000 is needed
init_naa <- exp(rep$parameters |> dplyr::filter(grepl("R0", Label)) |> dplyr::pull(Init)) * 1000 * exp(-(ages - 1) * rep$parameters |> dplyr::filter(grepl("NatM.*Fem", Label)) |> dplyr::pull(Value))
init_naa[n_ages] <- init_naa[n_ages] / rep$parameters |> dplyr::filter(grepl("NatM.*Fem", Label)) |> dplyr::pull(Value) # sum of infinite series

parameters <- default_parameters |>
  # SS3 model had length-based selectivity which leads to sex-specific
  # age-based selectivity due to sexually-dimorphic growth.
  # I didn't bother to calculate an age-based inflection point averaged over sexes
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Maturity",
      label = c("inflection_point", "slope"),
      value = c(20.5, 1.8), # arbitrary guess of slope
    ),
    by = c("module_name", "label")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Selectivity",
      fleet_name = "fleet1",
      label = c("inflection_point", "slope"),
      value = c(10, 2),
      estimation_type = "constant"
    ),
    by = c("module_name", "fleet_name", "label")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Selectivity",
      fleet_name = "survey1",
      label = c("inflection_point", "slope"),
      value = c(6, 2),
      estimation_type = "constant"
    ),
    by = c("module_name", "fleet_name", "label")
  )  |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Recruitment",
      label = c("log_rzero", "logit_steep", "log_sd"),
      value = c(
        petrale_input$ctl$SR_parms["SR_LN(R0)", "INIT"],
        FIMS::logit(0.2, 1.0, petrale_input$ctl$SR_parms["SR_BH_steep", "INIT"]),
        0.5
      )
    ),
    by = c("module_name", "label")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Recruitment",
      label = "log_devs",
      time = (FIMS::get_start_year(data_4_model) + 1):FIMS::get_end_year(data_4_model),
      # The last value of the initial numbers at age is the first
      # recruitment deviation
      value = recdevs$Value,
    ),
    by = c("module_name", "label", "time")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Population",
      label = "log_init_naa",
      age = FIMS::get_ages(data_4_model),
      value = log(init_naa),
      estimation_type = "fixed_effects"
    ),
    by = c("module_name", "label", "age")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Population",
      label = "log_M",
      value = log(rep$parameters["NatM_uniform_Fem_GP_1", "Value"]),
      estimation_type = "constant"
    ),
    by = c("module_name", "label")
  ) |>
  dplyr::rows_update(
    tibble::tibble(
      module_name = "Fleet",
      fleet_name = "fleet1",
      time = years,
      label = "log_Fmort",
      estimation_type = "constant",
      value = rep[["exploitation"]] |> 
        dplyr::filter(Yr %in% years) |>
        dplyr::mutate(North = ifelse(North == 0, -999, log(North))) |>
        dplyr::pull(North)
    ),
    by = c("module_name", "fleet_name", "label", "time")
  )

# Run the model without optimization to help ensure a viable model
test_fit <- parameters |>
  FIMS::initialize_fims(data = data_4_model) |>
  FIMS::fit_fims(optimize = FALSE)

# Run the  model with optimization
fit <- parameters |>
  FIMS::initialize_fims(data = data_4_model) |>
  FIMS::fit_fims(optimize = TRUE, get_sd = FALSE)
✔ Starting optimization ...
ℹ Restarting optimizer 3 times to improve gradient.
ℹ Maximum gradient went from 0.07278 to 0.00383 after 3 steps.
✔ Finished optimization
ℹ FIMS model version: 0.8.1
ℹ Total run time was 1.74314 seconds
ℹ Number of parameters: fixed_effects=145, random_effects=0, and total=145
ℹ Maximum gradient= 0.00383
ℹ Negative log likelihood (NLL):
• Marginal NLL= 16942.51308
• Total NLL= 16942.51308
ℹ Terminal SB= 0

Plotting results

Code
# gather index fit info
index_results <- data.frame(
  observed = FIMS::m_index(data_4_model, "survey1"),
  expected = FIMS::get_report(fit)[["index_expected"]][[2]]
) |>
  dplyr::mutate(year = years) |>
  dplyr::filter(observed > 0) # filter out -999 rows

# plot index fit
ggplot2::ggplot(index_results, ggplot2::aes(x = year, y = observed)) +
  ggplot2::geom_point() +
  ggplot2::xlab("Year") +
  ggplot2::ylab("Index (mt)") +
  ggplot2::geom_line(ggplot2::aes(x = year, y = expected), color = "blue") +
  ggplot2::theme_bw()

Code
# gather catch info 
catch_results <- data.frame(
  year = years,
  observed = FIMS::m_landings(data_4_model, fleet = "fleet1"),
  expected = FIMS::get_report(fit)[["landings_expected"]][[1]]
)

# plot catch fit
ggplot2::ggplot(catch_results, ggplot2::aes(x = year, y = observed)) +
  ggplot2::geom_point() +
  #ggplot2::aes(color = fleet)) +
  ggplot2::xlab("Year") +
  ggplot2::ylab("Catch (mt)") +
  ggplot2::geom_line(ggplot2::aes(x = year, y = expected)) + #, color = fleet)) +
  ggplot2::theme_bw()

Code
# gather biomass info from FIMS and SS3
biomass <- rep$timeseries |>
  dplyr::select(Yr, SpawnBio, Bio_all) |> 
  dplyr::filter(Yr %in% years) |> 
  dplyr::rename(
    "SS3_SpawnBio" = "SpawnBio",
    "SS3_Bio" = "Bio_all"
  ) |>
  dplyr::mutate(
    FIMS_SpawnBio = FIMS::get_report(fit)[["spawning_biomass"]][[1]][-1] , 
    FIMS_Bio = FIMS::get_report(fit)[["biomass"]][[1]][-1]
  ) |> ##CHECK: Is FIMS ssb reporting n_years+1 or initial year-1?
  tidyr::pivot_longer(cols = -Yr) |>
  tidyr::separate_wider_delim(
    cols = "name",
    delim = "_", names = c("Model", "Type")
  )

# plot comparison of spawning biomass and total biomass time series
ggplot2::ggplot(biomass, ggplot2::aes(x = Yr, y = value)) +
  ggplot2::geom_line(ggplot2::aes(color = Model)) +
  ggplot2::xlab("Year") +
  ggplot2::ylab("") +
  ggplot2::facet_wrap(~Type, scales = "free_y") +
  ggplot2::theme_bw()

Code
# gather info on recruitment
recruits <- rep$recruit |>
  dplyr::filter(Yr %in% years) |>
  dplyr::select(Yr, exp_recr, raw_dev) |>
  dplyr::rename("SS3_recruit" = "exp_recr",
                "SS3_recdev" = "raw_dev",
                "Year" = "Yr") |> 
  dplyr::mutate(FIMS_recruit = c(FIMS::get_report(fit)[["expected_recruitment"]][[1]][-1]),
                FIMS_recdev = c(
                  NA,
                  FIMS::get_estimates(fit) |>
                    dplyr::filter(
                      module_name == "Recruitment",
                      label == "log_devs"
                    ) |>
                    dplyr::pull(estimated)
                ),
                SS3_recruit = SS3_recruit * 1000) |>
  tidyr::pivot_longer(cols = -Year) |>
  tidyr::separate_wider_delim(cols = "name", delim = "_", names = c("Model", "Type"))             

# plot recruit time series
ggplot2::ggplot(recruits, ggplot2::aes(x = Year, y = value, color = Model)) +
  ggplot2::geom_line() + 
  ggplot2::facet_wrap(~Type, scales = "free_y") +
  ggplot2::theme_bw()
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_line()`).

Code
FIMS::clear()