% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_mvgam_priors.R
\name{get_mvgam_priors}
\alias{get_mvgam_priors}
\title{Extract information on default prior distributions for an \pkg{mvgam} model}
\usage{
get_mvgam_priors(
  formula,
  trend_formula,
  factor_formula,
  knots,
  trend_knots,
  trend_model = "None",
  family = poisson(),
  data,
  unit = time,
  species = series,
  use_lv = FALSE,
  n_lv,
  trend_map,
  ...
)
}
\arguments{
\item{formula}{A \code{formula} object specifying the GAM observation model
formula. These are exactly like the formula
for a GLM except that smooth terms, \code{s()}, \code{te()}, \code{ti()}, \code{t2()}, as well
as time-varying \code{dynamic()} terms, nonparametric \code{gp()} terms and offsets using \code{offset()},
can be added to the right hand side to specify that the linear predictor
depends on smooth functions of predictors (or linear functionals of these).
In \code{nmix()} family models, the \code{formula} is used to set up a linear predictor
for the detection probability. Details of the formula
syntax used by \pkg{mvgam} can be found in \code{\link{mvgam_formulae}}}

\item{trend_formula}{An optional \code{formula} object specifying the GAM
process model formula. If
supplied, a linear predictor will be modelled for the latent trends to capture
process model evolution
separately from the observation model. Should not have a response variable
specified on the left-hand side
of the formula (i.e. a valid option would be \code{~ season + s(year)}). Also note
that you should not use
the identifier \code{series} in this formula to specify effects that vary across
time series. Instead you should use
\code{trend}. This will ensure that models in which a \code{trend_map} is supplied will
still work consistently
(i.e. by allowing effects to vary across process models, even when some time
series share the same underlying
process model). This feature is only currently available for \code{RW()}, \code{AR()}
and \code{VAR()} trend models.
In \code{nmix()} family models, the \code{trend_formula} is used to set up a linear
predictor for the underlying
latent abundance. Be aware that it can be very challenging to simultaneously
estimate intercept parameters
for both the observation mode (captured by \code{formula}) and the process model
(captured by \code{trend_formula}).
Users are recommended to drop one of these using the \code{- 1} convention in the
formula right hand side.}

\item{factor_formula}{Can be supplied instead \code{trend_formula} to match syntax from
\link{jsdgam}}

\item{knots}{An optional \code{list} containing user specified knot values to
be used for basis construction.
For most bases the user simply supplies the knots to be used, which must match
up with the \code{k} value supplied
(note that the number of knots is not always just \code{k}). Different terms can
use different numbers of knots,
unless they share a covariate}

\item{trend_knots}{As for \code{knots} above, this is an optional \code{list} of
knot values for smooth
functions within the \code{trend_formula}}

\item{trend_model}{\code{character} or  \code{function} specifying the time
series dynamics for the latent trend. Options are:
\itemize{
\item \code{None} (no latent trend component; i.e. the GAM component is all that
contributes to the linear predictor, and the observation process is the only
source of error; similarly to what is estimated by \code{\link[mgcv]{gam}})
\item \code{ZMVN} or \code{ZMVN()} (Zero-Mean Multivariate Normal; only available in
\code{Stan})
\item \code{'RW'} or \code{RW()}
\item \code{'AR1'} or \code{AR(p = 1)}
\item \code{'AR2'} or \code{AR(p = 2)}
\item \code{'AR3'} or \code{AR(p = 3)}
\item \code{'CAR1'} or \code{CAR(p = 1)}
\item \code{'VAR1'}  or \code{VAR()}(only available in \code{Stan})
\item \verb{'PWlogistic}, \code{'PWlinear'} or \code{PW()} (only available in \code{Stan})
\item \code{'GP'} or \code{GP()} (Gaussian Process with squared exponential kernel;
only available in \code{Stan})}

For all trend types apart from \code{ZMVN()}, \code{GP()}, \code{CAR()} and \code{PW()}, moving
average and/or correlated process error terms can also be estimated (for
example, \code{RW(cor = TRUE)} will set up a multivariate Random Walk if \code{n_series > 1}).
It is also possible for many multivariate trends to estimate hierarchical
correlations if the data are structured among levels of a relevant grouping
factor. See \link{mvgam_trends} for more details and see \link{ZMVN} for an example.}

\item{family}{\code{family} specifying the exponential observation family for
the series. Currently supported
families are:
\itemize{
\item\code{gaussian()} for real-valued data
\item\code{betar()} for proportional data on \verb{(0,1)}
\item\code{lognormal()} for non-negative real-valued data
\item\code{student_t()} for real-valued data
\item\code{Gamma()} for non-negative real-valued data
\item\code{bernoulli()} for binary data
\item\code{poisson()} for count data
\item\code{nb()} for overdispersed count data
\item\code{binomial()} for count data with imperfect detection when the number
of trials is known;
note that the \code{cbind()} function must be used to bind the discrete
observations and the discrete number
of trials
\item\code{beta_binomial()} as for \code{binomial()} but allows for overdispersion
\item\code{nmix()} for count data with imperfect detection when the number of
trials is unknown and should be modeled via a State-Space N-Mixture model.
The latent states are Poisson, capturing the 'true' latent
abundance, while the observation process is Binomial to account for
imperfect detection.
See \code{\link{mvgam_families}} for an example of how to use this family}
Default is \code{poisson()}.
See \code{\link{mvgam_families}} for more details}

\item{data}{A \code{dataframe} or \code{list} containing the model response
variable and covariates
required by the GAM \code{formula} and optional \code{trend_formula}. Most
models should include columns:
\itemize{
\item\code{series} (a \code{factor} index of the series IDs; the number of
levels should be identical
to the number of unique series labels (i.e. \code{n_series = length(levels(data$series))}))
\item\code{time} (\code{numeric} or \code{integer} index of the time point for
each observation).
For most dynamic trend types available in \code{mvgam} (see argument \code{trend_model}),
time should be
measured in discrete, regularly spaced intervals (i.e. \code{c(1, 2, 3, ...)}).
However you can
use irregularly spaced intervals if using \code{trend_model = CAR(1)}, though
note that any
temporal intervals that are exactly \code{0} will be adjusted to a very small number
(\code{1e-12}) to prevent sampling errors. See an example of \code{CAR()} trends in
\code{\link{CAR}}
}
Note however that there are special cases where these identifiers are not
needed. For
example, models with hierarchical temporal correlation processes (e.g.
\code{AR(gr = region, subgr = species)})
should NOT include a \code{series} identifier, as this will be constructed
internally (see
\code{\link{mvgam_trends}} and \code{\link{AR}} for details). \code{mvgam} can also
fit models that do not
include a \code{time} variable if there are no temporal dynamic structures included
(i.e. \code{trend_model = 'None'} or
\code{trend_model = ZMVN()}). \code{data} should also include any other variables to be
included in
the linear predictor of \code{formula}}

\item{unit}{The unquoted name of the variable that represents the unit of analysis in \code{data} over
which latent residuals should be correlated. This variable should be either a
\code{numeric} or \code{integer} variable in the supplied \code{data}.
Defaults to \code{time} to be consistent with other functionalities
in \pkg{mvgam}, though note that the data need not be time series in this case. See examples below
for further details and explanations}

\item{species}{The unquoted name of the \code{factor} variable that indexes
the different response units in \code{data} (usually \code{'species'} in a JSDM).
Defaults to \code{series} to be consistent with other \code{mvgam} models}

\item{use_lv}{\code{logical}. If \code{TRUE}, use dynamic factors to estimate series'
latent trends in a reduced dimension format. Only available for
\code{RW()}, \code{AR()} and \code{GP()} trend models. Defaults to \code{FALSE}}

\item{n_lv}{\code{integer} the number of latent dynamic factors to use if
\code{use_lv == TRUE}. Cannot be \code{> n_series}. Defaults arbitrarily to
\code{min(2, floor(n_series / 2))}}

\item{trend_map}{Optional \code{data.frame} specifying which series should depend
on which latent trends. Useful for allowing multiple series to depend on the
same latent trend process, but with different observation processes. If
supplied, a latent factor model is set up by setting \code{use_lv = TRUE} and
using the mapping to set up the shared trends. Needs to have column names
\code{series} and \code{trend}, with integer values in the \code{trend} column to state which
trend each series should depend on. The \code{series} column should have a single
unique entry for each series in the data (names should perfectly match factor
levels of the \code{series} variable in \code{data}). Note that if this is supplied,
the intercept parameter in the process model will NOT be automatically suppressed.
Not yet supported for models in wich the latent factors evolve in continuous time (\code{CAR()}).
See examples for details}

\item{...}{Not currently used}
}
\value{
either a \code{data.frame} containing the prior definitions (if any suitable
priors can be altered by the user) or \code{NULL}, indicating that no priors in the model
can be modified
}
\description{
This function lists the parameters that can have their prior distributions
changed for a given model, as well listing their default distributions
}
\details{
Users can supply a model formula, prior to fitting the model, so that default priors can be inspected and
altered. To make alterations, change the contents of the \code{prior} column and supplying this
\code{data.frame} to the \code{\link{mvgam}} or \code{\link{jsdgam}} functions using the argument \code{priors}. If using \code{Stan} as the backend,
users can also modify the parameter bounds by modifying the \code{new_lowerbound} and/or \code{new_upperbound} columns.
This will be necessary if using restrictive distributions on some parameters, such as a Beta distribution
for the trend sd parameters for example (Beta only has support on  \code{(0,1)}), so the upperbound cannot
be above \code{1}. Another option is to make use of the prior modification functions in \pkg{brms}
(i.e. \code{\link[brms]{prior}}) to change prior distributions and bounds (just use the name of the parameter that
you'd like to change as the \code{class} argument; see examples below)
}
\note{
Only the \code{prior}, \code{new_lowerbound} and/or \code{new_upperbound} columns of the output
should be altered when defining the user-defined priors for the model. Use only if you are
familiar with the underlying probabilistic programming language. There are no sanity checks done to
ensure that the code is legal (i.e. to check that lower bounds are smaller than upper bounds, for
example)
}
\examples{
\donttest{
# Simulate three integer-valued time series
library(mvgam)
dat <- sim_mvgam(trend_rel = 0.5)

# Get a model file that uses default mvgam priors for inspection (not always necessary,
# but this can be useful for testing whether your updated priors are written correctly)
mod_default <- mvgam(y ~ s(series, bs = 're') +
              s(season, bs = 'cc') - 1,
              family = nb(),
              data = dat$data_train,
              trend_model = AR(p = 2),
              run_model = FALSE)

# Inspect the model file with default mvgam priors
stancode(mod_default)

# Look at which priors can be updated in mvgam
test_priors <- get_mvgam_priors(y ~ s(series, bs = 're') +
                              s(season, bs = 'cc') - 1,
                              family = nb(),
                              data = dat$data_train,
                              trend_model = AR(p = 2))
test_priors

# Make a few changes; first, change the population mean for the series-level
# random intercepts
test_priors$prior[2] <- 'mu_raw ~ normal(0.2, 0.5);'

# Now use stronger regularisation for the series-level AR2 coefficients
test_priors$prior[5] <- 'ar2 ~ normal(0, 0.25);'

# Check that the changes are made to the model file without any warnings by
# setting 'run_model = FALSE'
mod <- mvgam(y ~ s(series, bs = 're') +
            s(season, bs = 'cc') - 1,
            family = nb(),
            data = dat$data_train,
            trend_model = AR(p = 2),
            priors = test_priors,
            run_model = FALSE)
stancode(mod)

# No warnings, the model is ready for fitting now in the usual way with the addition
# of the 'priors' argument

# The same can be done using 'brms' functions; here we will also change the ar1 prior
# and put some bounds on the ar coefficients to enforce stationarity; we set the
# prior using the 'class' argument in all brms prior functions
brmsprior <- c(prior(normal(0.2, 0.5), class = mu_raw),
              prior(normal(0, 0.25), class = ar1, lb = -1, ub = 1),
              prior(normal(0, 0.25), class = ar2, lb = -1, ub = 1))
brmsprior

mod <- mvgam(y ~ s(series, bs = 're') +
             s(season, bs = 'cc') - 1,
           family = nb(),
           data = dat$data_train,
           trend_model = AR(p = 2),
           priors = brmsprior,
           run_model = FALSE)
stancode(mod)

# Look at what is returned when an incorrect spelling is used
test_priors$prior[5] <- 'ar2_bananas ~ normal(0, 0.25);'
mod <- mvgam(y ~ s(series, bs = 're') +
             s(season, bs = 'cc') - 1,
            family = nb(),
            data = dat$data_train,
            trend_model = AR(p = 2),
            priors = test_priors,
            run_model = FALSE)
stancode(mod)

# Example of changing parametric (fixed effect) priors
simdat <- sim_mvgam()

# Add a fake covariate
simdat$data_train$cov <- rnorm(NROW(simdat$data_train))

priors <- get_mvgam_priors(y ~ cov + s(season),
                          data = simdat$data_train,
                          family = poisson(),
                          trend_model = AR())

# Change priors for the intercept and fake covariate effects
priors$prior[1] <- '(Intercept) ~ normal(0, 1);'
priors$prior[2] <- 'cov ~ normal(0, 0.1);'

mod2 <- mvgam(y ~ cov + s(season),
             data = simdat$data_train,
             trend_model = AR(),
             family = poisson(),
             priors = priors,
             run_model = FALSE)
stancode(mod2)

# Likewise using 'brms' utilities (note that you can use
# Intercept rather than `(Intercept)`) to change priors on the intercept
brmsprior <- c(prior(normal(0.2, 0.5), class = cov),
              prior(normal(0, 0.25), class = Intercept))
brmsprior

mod2 <- mvgam(y ~ cov + s(season),
             data = simdat$data_train,
             trend_model = AR(),
             family = poisson(),
             priors = brmsprior,
             run_model = FALSE)
stancode(mod2)

# The "class = 'b'" shortcut can be used to put the same prior on all
# 'fixed' effect coefficients (apart from any intercepts)
set.seed(0)
dat <- mgcv::gamSim(1, n = 200, scale = 2)
dat$time <- 1:NROW(dat)
mod <- mvgam(y ~ x0 + x1 + s(x2) + s(x3),
            priors = prior(normal(0, 0.75), class = 'b'),
            data = dat,
            family = gaussian(),
            run_model = FALSE)
stancode(mod)
}
}
\seealso{
\code{\link{mvgam}}, \code{\link{mvgam_formulae}}, \code{\link[brms]{prior}}
}
\author{
Nicholas J Clark
}
