NWFSC Case Study Petrale Sole

The setup

Code
# Names of required packages
packages <- c("dplyr", "tidyr", "ggplot2", "TMB", "reshape2", "here", "remotes", "lubridate")

# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
  install.packages(packages[!installed_packages], repos = "http://cran.us.r-project.org")
}

# Install FIMS: main branch version if on main, dev version on any other branch

branch_name <- system("git branch --show-current")
use_fims_main <- grepl("main", branch_name)

if (use_fims_main) {
  remotes::install_github("NOAA-FIMS/FIMS")
} else {
  remotes::install_github("NOAA-FIMS/FIMS", ref = "dev")
}

remotes::install_github("r4ss/r4ss")


# Load packages
invisible(lapply(packages, library, character.only = TRUE))

library(FIMS)

R_version <- version$version.string
TMB_version <- packageDescription("TMB")$Version
FIMS_commit <- substr(packageDescription("FIMS")$GithubSHA1, 1, 7)

source(file.path("R", "utils.R"))
Code
theme_set(theme_bw())
  • R version: R version 4.4.2 (2024-10-31)
  • TMB version: 1.9.16
  • FIMS commit: 92746fe
  • Stock name: West Coast Petrale Sole
  • Region: NWFSC
  • Analyst: Ian G. Taylor

Simplifications to the original assessment

The operational petrale sole stock assessment used Stock Synthesis (SS3) and included numerous data types and population dynamic assumptions that are not yet available in FIMS. A simplified SS3 model was also developed to provide a closer comparison but is still a work in progress. This is intended as a demonstration and nothing more.

For both the FIMS model and the simplified SS3 model, I made the following changes:

  • Remove data
    • Remove lengths
    • Remove male ages
    • Remove discard fractions and discard comps
  • Simplify selectivity
    • Remove length-based retention functions
    • Convert to age-based logistic (from length-based double-normal, fixed asymptotic)
  • Remove parameter priors (on M and h)
  • Use female mean weight at age as calculated from the parametric growth curves
  • Varying Index CV to constant over time
  • Varying Catch ESS to constant over time

Script to prepare data for building FIMS object

Code
# read SS3 input files from petrale sole assessment on github
petrale_input <- r4ss::SS_read(
  "https://raw.githubusercontent.com/pfmc-assessments/petrale/main/models/2023.a034.001/"
)
petrale_output <- r4ss::SS_output(
  "https://raw.githubusercontent.com/pfmc-assessments/petrale/main/models/2023.a034.001",
  printstats = FALSE,
  verbose = FALSE
)
  
# generic names for SS3 data and control files could be useful in future generalized version of this code
ss3dat <- petrale_input$dat
ss3ctl <- petrale_input$ctl

# define the dimensions based on the range in the data
# for petrale, the north fleet catch begins in 1896, 
# not the start year of the model 1876 (which is when the southern fleet catch begins,
# but for simplicity this petrale example only uses the northern catch)
# years <- seq(ss3dat$styr, ss3dat$endyr)
years <- seq(1896, ss3dat$endyr)
nyears <- length(years)
nseasons <- 1
# ages <- 0:ss3dat$Nages # population ages in SS3, starts at age 0
ages <- 1:17 # same as data bins
nages <- length(ages)
lengths <- petrale_output[["lbins"]]
nlengths <- length(lengths)

# source function to simplify data and convert from SS3 format
source("R/get_ss3_data.R")

# fleet 4 used conditional age-at-length data with marginal observations
# entered as fleet == -4 (to exclude from likelihood due to redundancy)
# using only marginals for FIMS and exclude CAAL data by filtering out
# the fleet 4 age data
# similarly, some early ages were excluded from fleet 1 in the original model
# by assigning to fleet -1, these get filtered by get_ss3_data()

# only include age comps with fleet = -4 or 1:
ss3dat$agecomp <- ss3dat$agecomp |>
  dplyr::filter(fleet %in% c(-4, 1))
ss3dat$agecomp$fleet <- abs(ss3dat$agecomp$fleet)

# convert SS3 data into FIMS format using function defined in the R directory
mydat <- get_ss3_data(
  ss3dat,
  fleets = c(1, 4),
  ages = ages,
  lengths = petrale_input[["dat"]][["lbin_vector"]]
) |>
  # rename fleet4 as fleet2 (fleets 2 and 3 already removed above)
  dplyr::mutate(name = dplyr::case_when(
    name == "fleet1" ~ name,
    name == "fleet4" ~ "survey1" # change fleet4 to survey1 to match yellowtail case-study
  )) |>
  dplyr::filter(value != -999)

# Weight-at-age code copied directly from PIFSC-opakapaka.qmd
# Get weight-at-age from SS3 report output and convert from kg to mt
weight_at_age_data <- petrale_output[["wtatage"]] |> 
  dplyr::filter(sex == 1 & fleet == 1 & year == 1949) |> 
  dplyr::select(dplyr::matches("[0-9]+")) |>
  # round(4) |>
  tidyr::pivot_longer(names_to = "age", cols = dplyr::everything()) |>
  dplyr::filter(age %in% ages) |>
  tidyr::expand_grid(
    mydat |> dplyr::select(datestart) |> dplyr::distinct()
  ) |>
  dplyr::left_join(
    mydat |> dplyr::select(datestart, dateend) |> dplyr::distinct(),
    by = "datestart"
  ) |>
  dplyr::mutate(
    type = "weight-at-age",
    name = "fleet1",
    unit = "mt",
    age = as.integer(age),
    value = value/1000
  )
mydat <- mydat |> 
  dplyr::bind_rows(weight_at_age_data)

# set up FIMS data objects
fims_frame <- FIMS::FIMSFrame(mydat)

Prepare Parameters using create_default_parameters()

Code
## lots of code below copied PIFSC-opakapaka.qmd
# rename SS3 output to avoid modifying lines copied from opakapaka model
rep <- petrale_output

# Define fleet specifications for fleet1 and survey1
fleet1 <- survey1 <- list(
  selectivity = list(form = "LogisticSelectivity"),
  data_distribution = c(
    Index = "DlnormDistribution",
    AgeComp = "DmultinomDistribution"
  )
)

# Create default parameters
default_parameters <- fims_frame |>
  create_default_parameters(
    fleets = list(fleet1 = fleet1, survey1 = survey1)
  )

Modifying parameters

Code
recdevs <- rep$recruitpars |> # $recruitpars is a subset of $parameters with Yr as an additional column
  dplyr::filter(Yr %in% years[-1]) |> # filter out initial numbers parameters
  dplyr::select(Yr, Value)

init_naa <- rep$natage |>
  dplyr::filter(Time == min(years), Sex == 1) |>  # numbers at age in FIMS start year, not SS3 model
  dplyr::select(paste(ages)) |> 
  as.numeric() # TODO: figure out if *1000 is needed
init_naa <- exp(rep$parameters |> dplyr::filter(grepl("R0", Label)) |> dplyr::pull(Init)) * 1000 * exp(-(ages - 1) * rep$parameters |> dplyr::filter(grepl("NatM.*Fem", Label)) |> dplyr::pull(Value))
init_naa[nages] <- init_naa[nages] / rep$parameters |> dplyr::filter(grepl("NatM.*Fem", Label)) |> dplyr::pull(Value) # sum of infinite series

# TODO: Move this to the R folder or FIMS
steepness_transform <- function(h) {
  # function to convert steepness into required parameter space
  -log(1.0 - h) + log(h - 0.2)
}

parameters <- default_parameters |>
  # SS3 model had length-based selectivity which leads to sex-specific
  # age-based selectivity due to sexually-dimorphic growth.
  # I didn't bother to calculate an age-based inflection point averaged over sexes
  update_parameters(
    modified_parameters = list(
      fleet1 = list(
        Fleet.log_Fmort.estimated = FALSE,
        Fleet.log_Fmort.value = rep$exploitation |> 
          dplyr::filter(Yr %in% years) |> 
          dplyr::pull(North) |>
          log(), # "North" is the SS3 fleet name
        LogisticSelectivity.inflection_point.value = 10, 
        LogisticSelectivity.inflection_point.estimated = TRUE,
        LogisticSelectivity.slope.value = 2, #used age selex values
        LogisticSelectivity.slope.estimated = TRUE
      ),
      survey1 = list(
        LogisticSelectivity.inflection_point.value = 6, 
        LogisticSelectivity.inflection_point.estimated = TRUE,
        LogisticSelectivity.slope.value = 2, 
        LogisticSelectivity.slope.estimated = TRUE
      )
    )
  ) |>
  update_parameters(
    modified_parameters = list(
      recruitment = list(
        BevertonHoltRecruitment.log_rzero.value = ss3ctl$SR_parms["SR_LN(R0)", "INIT"],
        BevertonHoltRecruitment.log_devs.value = recdevs$Value,
        BevertonHoltRecruitment.logit_steep.value = ss3ctl$SR_parms["SR_BH_steep", "INIT"] |>
          steepness_transform(),
        DnormDistribution.log_sd.value = 0.5
      )
    )
  ) |>
  update_parameters(
    modified_parameters = list(
      # approximate age-based equivalent to length-based maturity in petrale model
      # based on looking at model$endgrowth |> dplyr::filter(Sex == 1) |> dplyr::select(Age_Beg, Len_Mat)
      maturity = list(
        LogisticMaturity.inflection_point.value = 20.5, 
        LogisticMaturity.inflection_point.estimated = FALSE,
        LogisticMaturity.slope.value = 1.8, # arbitrary guess
        LogisticMaturity.slope.estimated = FALSE
      )
    )
  ) |>
  update_parameters(
    modified_parameters = list(
      population = list(
        Population.log_init_naa.value = log(init_naa),
        Population.log_init_naa.estimated = TRUE,
        Population.log_M.value = rep(
          log(rep$parameters["NatM_uniform_Fem_GP_1", "Value"]),
          get_n_years(fims_frame) * get_n_ages(fims_frame)
        ),
        Population.log_M.estimated = FALSE
      )
    )
  )

Initialize modules and fit the model

With data and parameters in place, we can now initialize modules using initialize_fims() and fit the model using fit_fims().

Code
# Run the model without optimization to help ensure a viable model
test_fit <- parameters |>
  initialize_fims(data = fims_frame) |>
  fit_fims(optimize = FALSE)

# Run the  model with optimization
fit <- parameters |>
  initialize_fims(data = fims_frame) |>
  fit_fims(optimize = TRUE)
✔ Starting optimization ...
ℹ Restarting optimizer 3 times to improve gradient.
ℹ Maximum gradient went from 0.09997 to 0.00394 after 3 steps.
✔ Finished optimization
Warning in sqrt(diag(cov)): NaNs produced
✔ Finished sdreport
Warning in sqrt(diag(object$cov.fixed)): NaNs produced
ℹ FIMS model version: 0.3.0.1
ℹ Total run time was 17.22856 seconds
ℹ Number of parameters: total=149, fixed_effects=149, and random_effects=0
ℹ Maximum gradient= 0.00394
ℹ Negative log likelihood (NLL):
• Marginal NLL= 6582.60366
• Total NLL= 6582.60366
ℹ Terminal SB=
Code
  # TODO: why does the final message show no SB value: "i Terminal SB="
str(fit)
Formal class 'FIMSFit' [package "FIMS"] with 10 slots
  ..@ input               :List of 1
  .. ..$ parameters:List of 1
  .. .. ..$ p: num [1:149] 10 2 6 2 0 ...
  ..@ obj                 :List of 10
  .. ..$ par     : Named num [1:149] 10 2 6 2 0 ...
  .. .. ..- attr(*, "names")= chr [1:149] "selectivity.inflection_point.1.0" "selectivity.slope.1.0" "selectivity.inflection_point.2.0" "selectivity.slope.2.0" ...
  .. ..$ fn      :function (x = last.par[lfixed()], ...)  
  .. ..$ gr      :function (x = last.par[lfixed()], ...)  
  .. ..$ he      :function (x = last.par[lfixed()], atomic = usingAtomics())  
  .. ..$ hessian : logi FALSE
  .. ..$ method  : chr "BFGS"
  .. ..$ retape  :function (set.defaults = TRUE)  
  .. ..$ env     :<environment: 0x11fdc7cb8> 
  .. ..$ report  :function (par = last.par)  
  .. ..$ simulate:function (par = last.par, complete = FALSE)  
  ..@ opt                 :List of 6
  .. ..$ par        : Named num [1:149] 12.083 0.725 3.203 2.045 0.367 ...
  .. .. ..- attr(*, "names")= chr [1:149] "p" "p" "p" "p" ...
  .. ..$ objective  : num 6583
  .. ..$ convergence: int 1
  .. ..$ iterations : int 1
  .. ..$ evaluations: Named int [1:2] 15 1
  .. .. ..- attr(*, "names")= chr [1:2] "function" "gradient"
  .. ..$ message    : chr "false convergence (8)"
  ..@ max_gradient        : num 0.00394
  ..@ report              :List of 17
  .. ..$ ssb            :List of 1
  .. .. ..$ : num [1:128] 9.13 8.57 8.01 7.9 8.16 ...
  .. ..$ cnal           :List of 2
  .. .. ..$ : num(0) 
  .. .. ..$ : num(0) 
  .. ..$ log_recruit_dev:List of 1
  .. .. ..$ : num [1:126] 0.00726 0.02663 0.05511 0.08395 0.10094 ...
  .. ..$ cwaa           :List of 2
  .. .. ..$ : num [1:2159] 9.71e-07 9.63e-12 3.39e-05 4.71e-05 1.38e-04 ...
  .. .. ..$ : num [1:2159] 2.16 7.48e-05 6.45e+02 9.15e+02 1.52e+03 ...
  .. ..$ exp_index      :List of 2
  .. .. ..$ : num [1:127] 0.242 0.198 0.154 0.15 0.146 ...
  .. .. ..$ : num [1:127] 48372 48389 47874 47872 47840 ...
  .. ..$ pcnaa          :List of 2
  .. .. ..$ : num [1:2159] 3.28e-04 7.33e-10 1.11e-03 8.87e-04 1.76e-03 ...
  .. .. ..$ : num [1:2159] 3.35e-03 2.61e-08 9.72e-02 7.90e-02 8.88e-02 ...
  .. ..$ nll_components : num [1:5] 664 930 1134 419 3435
  .. ..$ biomass        :List of 1
  .. .. ..$ : num [1:128] 34928 34794 34689 34595 34489 ...
  .. ..$ F_mort         :List of 2
  .. .. ..$ : num [1:127] 1.62e-05 1.33e-05 1.04e-05 1.02e-05 9.93e-06 ...
  .. .. ..$ : num [1:127] 1 1 1 1 1 1 1 1 1 1 ...
  .. ..$ M              :List of 1
  .. .. ..$ : num [1:2159] 0.142 0.142 0.142 0.142 0.142 ...
  .. ..$ pcnal          :List of 2
  .. .. ..$ : num(0) 
  .. .. ..$ : num(0) 
  .. ..$ recruitment    :List of 1
  .. .. ..$ : num [1:128] 9791303 5439117 5528224 5667505 5828827 ...
  .. ..$ cnaa           :List of 2
  .. .. ..$ : num [1:2159] 4.81e-02 1.08e-07 1.63e-01 1.30e-01 2.58e-01 ...
  .. .. ..$ : num [1:2159] 1.07e+05 8.36e-01 3.11e+06 2.53e+06 2.84e+06 ...
  .. ..$ exp_catch      :List of 2
  .. .. ..$ : num [1:127] 0.242 0.198 0.154 0.15 0.146 ...
  .. .. ..$ : num [1:127] 0 0 0 0 0 0 0 0 0 0 ...
  .. ..$ q              :List of 2
  .. .. ..$ : num 1
  .. .. ..$ : num 1.44
  .. ..$ naa            :List of 1
  .. .. ..$ : num [1:2176] 9.79e+06 1.06e+01 7.81e+06 3.02e+06 2.91e+06 ...
  .. ..$ jnll           : num 6583
  ..@ sdreport            :List of 8
  .. ..$ value         : Named num [1:11704] 9.79e+06 1.06e+01 7.81e+06 3.02e+06 2.91e+06 ...
  .. .. ..- attr(*, "names")= chr [1:11704] "NAA" "NAA" "NAA" "NAA" ...
  .. ..$ sd            : num [1:11704] 8611459 NaN 24442808 31519765 30438897 ...
  .. ..$ cov           : num [1:11704, 1:11704] 7.42e+13 3.34e+09 -1.64e+14 1.39e+14 -1.14e+13 ...
  .. ..$ par.fixed     : Named num [1:149] 12.083 0.725 3.203 2.045 0.367 ...
  .. .. ..- attr(*, "names")= chr [1:149] "selectivity.inflection_point.1.0" "selectivity.slope.1.0" "selectivity.inflection_point.2.0" "selectivity.slope.2.0" ...
  .. ..$ cov.fixed     : num [1:149, 1:149] 0.004841 -0.000302 -0.000385 0.000379 -0.000781 ...
  .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. ..$ : chr [1:149] "selectivity.inflection_point.1.0" "selectivity.slope.1.0" "selectivity.inflection_point.2.0" "selectivity.slope.2.0" ...
  .. .. .. ..$ : chr [1:149] "selectivity.inflection_point.1.0" "selectivity.slope.1.0" "selectivity.inflection_point.2.0" "selectivity.slope.2.0" ...
  .. ..$ pdHess        : logi FALSE
  .. ..$ gradient.fixed: num [1, 1:149] -5.40e-04 -3.94e-03 -7.85e-05 -3.67e-05 -6.34e-05 ...
  .. ..$ env           :<environment: 0x128c5bdd0> 
  .. ..- attr(*, "class")= chr "sdreport"
  ..@ estimates           : tibble [11,853 × 3] (S3: tbl_df/tbl/data.frame)
  .. ..$ name : chr [1:11853] "selectivity.inflection_point.1.0" "selectivity.slope.1.0" "selectivity.inflection_point.2.0" "selectivity.slope.2.0" ...
  .. ..$ value: num [1:11853] 12.083 0.725 3.203 2.045 0.367 ...
  .. ..$ se   : num [1:11853] 0.06958 0.00893 0.03576 0.07187 0.02158 ...
  ..@ number_of_parameters: Named int [1:3] 149 149 0
  .. ..- attr(*, "names")= chr [1:3] "total" "fixed_effects" "random_effects"
  ..@ timing              : 'difftime' Named num [1:3] 1.54112887382507 15.6837999820709 17.2285559177399
  .. ..- attr(*, "names")= chr [1:3] "time_optimization" "time_sdreport" "time_total"
  .. ..- attr(*, "units")= Named chr "secs"
  .. .. ..- attr(*, "names")= chr "time_optimization"
  ..@ version             :Classes 'package_version', 'numeric_version'  hidden list of 1
  .. ..$ : int [1:4] 0 3 0 1
Code
clear()
Code
fit@report$exp_index
fit@report$exp_catch # TODO: why is this all zeros
fit@report$F_mort[[1]]
fit@report$log_recruit_dev
fit@report$ssb
fit@report$biomass
unique(fit@estimates$name)
fit@input$parameters
fit@estimates |> filter(name == "NAA")
# Clear memory post-run

Plotting Results

Code
library(ggplot2)

# gather index fit info
index_results <- data.frame(
  observed = m_index(fims_frame, "survey1"),
  expected = get_report(fit)[["exp_index"]][[2]]
) |>
dplyr::mutate(year = years) |>
dplyr::filter(observed > 0) # filter out -999 rows

# plot index fit
ggplot2::ggplot(index_results, ggplot2::aes(x = year, y = observed)) +
  ggplot2::geom_point() +
  ggplot2::xlab("Year") +
  ggplot2::ylab("Index (mt)") +
  ggplot2::geom_line(ggplot2::aes(x = year, y = expected), color = "blue") +
  ggplot2::theme_bw()

Code
# gather catch info 
catch_results <- data.frame(
  year = years,
  observed = m_landings(fims_frame, fleet = "fleet1"),
  expected = get_report(fit)[["exp_index"]][[1]]
)

# plot catch fit
ggplot2::ggplot(catch_results, ggplot2::aes(x = year, y = observed)) +
  ggplot2::geom_point() + #ggplot2::aes(color = fleet)) +
  ggplot2::xlab("Year") +
  ggplot2::ylab("Catch (mt)") +
  ggplot2::geom_line(ggplot2::aes(x = year, y = expected)) + #, color = fleet)) +
  ggplot2::theme_bw()

Code
# gather biomass info from FIMS and SS3
biomass <- rep$timeseries |> 
  dplyr::select(Yr, SpawnBio, Bio_all) |> 
  dplyr::filter(Yr %in% years) |> 
  dplyr::rename(
    "SS3_SpawnBio" = "SpawnBio",
    "SS3_Bio" = "Bio_all"
  ) |>
  dplyr::mutate(
    FIMS_SpawnBio = get_report(fit)[["ssb"]][[1]][-1] , 
    FIMS_Bio = get_report(fit)[["biomass"]][[1]][-1]
  ) |> ##CHECK: Is FIMS ssb reporting nyears+1 or initial year-1?
  tidyr::pivot_longer(cols = -Yr) |>
  tidyr::separate_wider_delim(
    cols = "name",
    delim = "_", names = c("Model", "Type")
  )

# plot comparison of spawning biomass and total biomass time series
ggplot2::ggplot(biomass, ggplot2::aes(x = Yr, y = value)) +
  ggplot2::geom_line(ggplot2::aes(color = Model)) +
  ggplot2::xlab("Year") +
  ggplot2::ylab("") +
  ggplot2::facet_wrap(~Type, scales = "free_y") +
  ggplot2::theme_bw()

Code
# gather info on recruitment
recruits <- rep$recruit |> 
  dplyr::filter(Yr %in% years) |>
  dplyr::select(Yr, exp_recr, raw_dev) |>
  dplyr::rename("SS3_recruit" = "exp_recr",
                "SS3_recdev" = "raw_dev",
                "Year" = "Yr") |> 
  dplyr::mutate(FIMS_recruit = c(get_report(fit)[["recruitment"]][[1]][-1]),
                FIMS_recdev = c(NA, get_report(fit)[["log_recruit_dev"]][[1]]),
                SS3_recruit = SS3_recruit * 1000) |>
  tidyr::pivot_longer(cols = -Year) |>
  tidyr::separate_wider_delim(cols = "name", delim = "_", names = c("Model", "Type"))             

# plot recruit time series
ggplot2::ggplot(recruits, ggplot2::aes(x = Year, y = value, color = Model)) +
  ggplot2::geom_line() + 
  ggplot2::facet_wrap(~Type, scales = "free_y") +
  ggplot2::theme_bw()
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_line()`).