## For performance reasons, this vignette builds from pre-calculated data.
##
## To run all calculations, set 'do.calc <- TRUE' in the vignette's first code chunk.
## Building the vignette will take a while.
The GUTS-package (Albert et al., 2016) implements the individual tolerance model GUTS-RED-IT, the stochastic death model GUTS-RED-SD and the GUTS-RED-proper model, which combines the GUTS-RED-IT and the GUTS-RED-SD model (Jager et al., 2011). We demonstrate a Bayesian model calibration of the proper model on the case study of the freshwater crustacean Gammarus pulex being exposed to Diazinon in three pulsed toxicity tests. This vignette follows the description in Albert et al. (2016), but uses a log-logistic threshold distribution, a slightly different calibration procedure and results analysis. The experiment data is provided with the GUTS-package.
Load the GUTS-package
library(GUTS)
#> Loading required package: Rcpp
packageVersion("GUTS")
#> [1] '1.2.5'
Load the Diazinon data set
data(diazinon)
str(diazinon)
#> List of 12
#> $ C1 : num [1:10] 102.7 97.6 0 0 103.9 ...
#> $ C2 : num [1:9] 101 106 0 0 104 ...
#> $ C3 : num [1:8] 100.6 94.6 0 0 100.6 ...
#> $ Ct1: num [1:10] 0 1.02 1.03 2.99 3.01 ...
#> $ Ct2: num [1:9] 0 1.02 1.03 8 8.01 ...
#> $ Ct3: num [1:8] 0 1.02 1.03 16 16.01 ...
#> $ y1 : num [1:23] 70 66 61 55 31 31 29 26 24 22 ...
#> $ y2 : num [1:23] 70 65 59 56 54 50 47 46 46 40 ...
#> $ y3 : num [1:23] 70 65 59 55 53 51 48 46 46 46 ...
#> $ yt1: int [1:23] 0 1 2 3 4 5 6 7 8 9 ...
#> $ yt2: int [1:23] 0 1 2 3 4 5 6 7 8 9 ...
#> $ yt3: int [1:23] 0 1 2 3 4 5 6 7 8 9 ...
Setting up a GUTS-Proper-object requires for each toxicity test
C
at each concentration measurement time
Ct
y
at each abundance measurement time yt
model = "Proper"
dist = "loglogistic"
We generate three GUTS-proper-objects; one for each of the toxicity test. The GUTS-objects are collected in a list.
<- list(
guts_object C1 = guts_setup(
C = diazinon[["C1"]], Ct = diazinon[["Ct1"]],
y = diazinon[["y1"]], yt = diazinon[["yt1"]],
model = "Proper", dist = "loglogistic"
),C2 = guts_setup(
C = diazinon[["C2"]], Ct = diazinon[["Ct2"]],
y = diazinon[["y2"]], yt = diazinon[["yt2"]],
model = "Proper", dist = "loglogistic"
),C3 = guts_setup(
C = diazinon[["C3"]], Ct = diazinon[["Ct3"]],
y = diazinon[["y3"]], yt = diazinon[["yt3"]],
model = "Proper", dist = "loglogistic"
) )
library('adaptMCMC') # Function `MCMC()`, Monte Carlo Markov Chain.
#> Loading required package: parallel
#> Loading required package: coda
#> Loading required package: Matrix
The list of GUTS-objects is used to calculate the joint likelihood of all studies.
<- function( pars, guts_objects, isOutOfBoundsFun) {
logposterior if ( isOutOfBoundsFun(pars) ) return(-Inf)
return(
sum(sapply( guts_objects, function(obj) guts_calc_loglikelihood(obj, pars) )))
}
The logposterior function is formulated with a function that defines parameter bounds.
Constraints on parameters should consider
is.infinite(p)
constraints parameters to finite
valuesp < 0
confines rates and thresholds to positive
valuesp[3] > 30
confines the killing rate to realistic
values. The constraint avoids an unrealistic local optimum of the
optimization problem, i.e. a high killing rate simulates the effect of a
low tolerance threshold, and makes the threshold estimation
obsolete.p[5] <= 1
is assumed, to exclude that the mode of
the log-logistic threshold distribution is 0, due to the shape
parameter. Note that the mode can approach 0 if the estimated median
p[4]
approaches 0. Therefore, this constraint stabilises
estimation of low threshold values.exp(8/p[5]) * p[4] > 1e200
avoids numerical problems
when approximating the log-logistic function for unrealistically high
median values p[4]
and extremely low shape values
p[5]
that result in a threshold distribution confined at a
zero threshold<- function(p) any(
is_out_of_bounds_fun_Proper is.na(p),
is.infinite(p),
< 0,
p 3] > 30 ,
p[5] <= 1,
p[exp(8/p[5]) * p[4] > 1e200
)
Suitable initial parameter values for the MCMC algorithm are searched by optimization.
<- c(0.05, 0.5, 1, 10, 5)
pars_start_Proper names(pars_start_Proper) <- c("hb", "ke", "kk", "mn", "beta")
# to avoid conflicts with boundaries of L-BFGS-B, the minimum logposteriror value is limited to -1e16
<- function(pars, guts_objects, isOutOfBoundsFun) {
optim_fun return(
max(-1e16,logposterior(pars, guts_objects, isOutOfBoundsFun) )
)
}
<- optim(pars_start_Proper, optim_fun, lower = c(1e-6, 1e-6, 1e-6, 1e-6, 1e-6), upper = c(1, 1, 30, 40, 20), method = "L-BFGS-B", control = list(trace = 0, fnscale = -1), guts_objects = guts_object, isOutOfBoundsFun = is_out_of_bounds_fun_Proper) optim_result_Proper
if (optim_result_Proper$convergence != 0) {
warning("Optimizing initial values has not converged. Using non-optimized initial values.")
$par <- pars_start_Proper
optim_result_Proper
}
print(optim_result_Proper)
#> $par
#> hb ke kk mn beta
#> 0.05279389 0.08650483 0.92822495 13.04188727 4.53323237
#>
#> $value
#> [1] -571.9289
#>
#> $counts
#> function gradient
#> 41 41
#>
#> $convergence
#> [1] 0
#>
#> $message
#> [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
<- optim_result_Proper$par
mcmc_pars_Proper <- diag( (mcmc_pars_Proper/10)^2 + .Machine$double.eps )
mcmc_sigma_Proper <- MCMC(p = logposterior,
mcmc_result_Proper init = mcmc_pars_Proper, scale = mcmc_sigma_Proper, adapt = 20000, acc.rate = 0.4, n = 150000,
guts_objects = guts_object, isOutOfBoundsFun = is_out_of_bounds_fun_Proper
)
#exclude burnin and thin by 20
$samples <- mcmc_result_Proper$samples[seq(50001, 150000, by = 20),]
mcmc_result_Proper$log.p <- mcmc_result_Proper$log.p[seq(50001, 150000, by = 20)] mcmc_result_Proper
The top four graphs show mixing and distribution of the parameters. The last row “LL” show mixing and distribution of the logposterior. A final plot indicates correlations among calibrated parameters.
if (all(is.finite(mcmc_result_Proper$log.p))) {
par( mfrow = c(dim(mcmc_result_Proper$samples)[2] + 1, 2) , mar = c(5,4,1,0.5))
plot(as.mcmc(cbind(mcmc_result_Proper$samples, LL = mcmc_result_Proper$log.p)), auto.layout = FALSE)
par(op)
else {
} par( mfrow = c(dim(mcmc_result_Proper$samples)[2], 2) , mar = c(5,4,1,0.5))
plot(as.mcmc(mcmc_result_Proper$samples), auto.layout = FALSE)
par(op)
}
<- function(x, y, ...)
panel.cor
{par(usr = c(0, 1, 0, 1))
<- as.character(format(cor(x, y), digits=2))
txt text(0.5, 0.5, txt, cex = max(0.1, 4 * abs(cor(x, y)) + 1))
}pairs(mcmc_result_Proper$samples, upper.panel=panel.cor)
This function summarizes the calibrated parameter values
<- function(sampMCMC, plot = TRUE) {
eval_MCMC <- sampMCMC$samples[which.max(sampMCMC$log.p),]
bestFit <- apply(sampMCMC$samples, 2, quantile, probs = c(0.025, 0.5, 0.975))
qu if (plot) {
plot(seq(dim(sampMCMC$samples)[2]), bestFit, pch = 20, cex = 2, ylim = range(qu),
xaxt = "n", xlab = "Model parameter", ylab = "Parameter value")
arrows(x0 = seq(dim(sampMCMC$samples)[2]), y0 = qu[1,], y1 = qu[3,], angle = 90, length = 0.1, code = 3)
axis(side = 1, at = seq(dim(sampMCMC$samples)[2]), dimnames(sampMCMC$samples)[[2]])
}<- rbind(bestFit, qu)
res rownames(res)[1] <- "best"
return(res)
}
Summarize the parameter values. Black dots indicate best estimates and 95% credible interval.
eval_MCMC(mcmc_result_Proper)
#> hb ke kk mn beta
#> best 0.05483402 0.07962859 2.019133 13.018437 4.581081
#> 2.5% 0.04559541 0.02360376 1.051607 4.637583 3.355161
#> 50% 0.05733930 0.08666466 4.605420 14.354177 5.317460
#> 97.5% 0.07038815 0.17226421 28.022584 25.336885 10.851111
The GUTS-package can be applied to forecast survival. Forecasting is demonstrated for one made-up experimental setting.
Values to be specified are:
C
Ct
yt
: a vector starting with 0y[1]
in a vector
y
of the same length as yt
. Elements other
than y[1]
can be set arbitrarily. The values are ignored in
forecasts.We assume three pulsed applications exactly at time steps 0, 10 and 20. We assume that the substance would dissipate over time. Therefore, to monitor the dissipation, the concentration is measured at several time steps. The initial population size is set to 100 individuals. Projections are made for 26 time steps.
<-
guts_obj_forecast guts_setup(
C = c(60, 40, 6, 0, 0, 60, 40, 6, 0, 0, 60, 40, 6, 0),
Ct = c(0, 2.2, 4, 6, 9.9, 10, 12.2, 14, 16, 19.9, 20, 22.2, 24, 26),
y = c(100, rep(0,26)),
yt = seq(0,26),
model = "Proper", dist = "loglogistic", N = 1000, M = 10000
)
We forecast survival probabilities and damage over time for each dose concentration and each parameter set in the thinned posterior. This procedure takes into account the uncertainties in parameter estimates.
<- mcmc_result_Proper$samples
mcmc_forecasts_paras
<- apply(mcmc_forecasts_paras, 1,
forec function(par) list(
guts_calc_survivalprobs(gobj = guts_obj_forecast, par = par),
guts_report_damage(gobj = guts_obj_forecast)$damage
) )
# extract the damage matrix
<- sapply(forec, function(x) x[[2]])
damage <- sapply(forec, function(x) x[[1]])
survProb rm(forec)
# analyse damage
<- apply(damage, 1, function(x) quantile(x, probs = c(0.025, 0.5, 0.975)))
damage.qu
<- apply(survProb, 1, function(x) quantile(x, probs = c(0.025, 0.5, 0.975)))
survProb.qu
<- (- apply(survProb, 2, diff, 1) ) / survProb[seq(2,dim(survProb)[1]),]
hazard
<- apply(hazard, 1, function(x) quantile(x, probs = c(0.025, 0.5, 0.975))) hazard.qu
The plots show median and 95%-CI over time of the (i) measured external concentration (red) and damage (black), (ii) survival probability and (iii) daily hazard.
par(mfrow = c(3,1), mar = c(0.5,4,0.5,0.5), oma = c(2.5,0,0,0), las = 1, cex = 1)
plot(guts_obj_forecast$Ct, guts_obj_forecast$C,
xlim = range(guts_obj_forecast$yt),
ylim = range(c(guts_obj_forecast$C, damage.qu)),
xlab = "", ylab = "exposure dose",
pch = 20, cex = 2, lwd = 2, col = "red",
type = "b", xaxt = "n"
)<- seq(0, max(guts_obj_forecast$yt),
damage.ti length.out = dim(damage.qu)[2])
lines(damage.ti, damage.qu[2,], type = "l", ylim = range(damage.qu), xlab = "time", ylab = "damage D", lwd = 2)
lines(damage.ti, damage.qu[1,], lty = 2, lwd = 2)
lines(damage.ti, damage.qu[3,], lty = 2, lwd = 2)
legend("topright", legend = c("external", "proj. internal", " (damage)"), fill = c("red", "black", NA), border = c("red", "black", NA), cex = 0.8, bty = "n")
<- seq(0, max(guts_obj_forecast$yt))
prob.ti plot(prob.ti, survProb.qu[2,],
ylim = range(survProb.qu),
xlab = "", ylab = "proj. survival", xaxt = "n",
pch = 20, cex = 2)
arrows(prob.ti[-1], survProb.qu[1,-1], prob.ti[-1], survProb.qu[3,-1], angle = 90, code = 3, length = 0.1, lwd = 2)
plot(prob.ti[-1] - 0.5, hazard.qu[2,],
xlim = range(prob.ti), ylim = range(hazard.qu),
xlab = "", ylab = "proj. daily hazard", pch = 20, cex = 2,
col = "blue")
arrows(prob.ti[-1] - 0.5, hazard.qu[1,], prob.ti[-1] - 0.5, hazard.qu[3,], angle = 90, code = 3, length = 0.1, col = "blue", lwd = 2)
mtext(text = "time", side = 1, line = 1.4, outer = TRUE)
The time series demonstrates an accumulative effect: The second and third exposure pulse exert a higher hazard than the first exposure pulse due to accumulating damage.
Albert, C., Vogel, S., and Ashauer, R. (2016). Computationally efficient implementation of a novel algorithm for the General Unified Threshold Model of Survival (GUTS). PLOS Computational Biology, 12(6), e1004978. doi: 10.1371/journal.pcbi.1004978.
Jager, T., Albert, C., Preuss, T., and Ashauer, R. (2011). General Unified Threshold Model of Survival - a toxicokinetic toxicodynamic framework for ecotoxicology. Environmental Science & Technology, 45(7), 2529–2540, doi: 10.1021/es103092a.