Bayesian methods allow us to quantify the uncertainty in the calibrated parameters even in the presence of non-identifiability (F. Alarid-Escudero et al. 2018).
To conduct a Bayesian calibration of the three-state model, we use the incremental mixture importance sampling (IMIS) algorithm (Teele, Raftery, and Mond 2006).
The IMIS algorithm has been used to calibrate health policy models (A. E. Raftery and Bao 2010, Menzies et al. (2017), Easterly et al. (2018))
# calibration functionality
library(lhs)
library(IMIS)
library(matrixStats) # package used for sumamry statistics
# visualization
library(plotrix)
library(psych)
load("CRSTargets.RData")
# Plot the targets
# TARGET 1: Survival ("Surv")
plotrix::plotCI(x = CRS.targets$Surv$Time, y = CRS.targets$Surv$value,
ui = CRS.targets$Surv$ub,
li = CRS.targets$Surv$lb,
ylim = c(0, 1),
xlab = "Time", ylab = "Pr Survive")
source("markov_crs.R") # creates the function markov_crs()
We’re going to use the IMIS()
function within the IMIS
package. This function needs three functions to be defined in the environment: prior()
, likelihood()
, and sample.prior()
. In the description of the IMIS
package authors: “prior(x) calculates prior density of x, likelihood(x) calculates the likelihood of x, and sample.prior(n) draws n samples from the prior distribution”. So, we’ll define each of those.
The following defines a uniform prior on each of the parameters.
# names and number of input parameters to be calibrated
param.names <- c("p.Mets","p.DieMets")
n.param <- length(param.names)
# range on input search space
lb <- c(p.Mets = 0.04, p.DieMets = 0.04) # lower bound
ub <- c(p.Mets = 0.16, p.DieMets = 0.16) # upper bound
sample.prior <- function(n.samp){
m.lhs.unit <- lhs::randomLHS(n = n.samp, k = n.param)
m.param.samp <- matrix(nrow = n.samp, ncol = n.param)
colnames(m.param.samp) <- param.names
for (i in 1:n.param){
m.param.samp[, i] <- qunif(m.lhs.unit[,i],
min = lb[i],
max = ub[i])
}
return(m.param.samp)
}
# view resulting parameter set samples
psych::pairs.panels(sample.prior(1000))
Note that we could define a different prior on the parameters instead of uniform, like
m.param.samp[, i] <- qbeta(m.lhs.unit[,i],
shape1 = 1,
shape2 = 1)
The following function evaluates the prior at v.params
.
f_log_prior <- function(v.params){
if(is.null(dim(v.params))) { # If vector, change to matrix
v.params <- t(v.params)
}
n.samp <- nrow(v.params)
colnames(v.params) <- param.names
lprior <- rep(0, n.samp)
for (i in 1:n.param){
lprior <- lprior + dunif(v.params[, i],
min = lb[i],
max = ub[i],
log = T)
}
return(lprior)
}
The result of this function is a single number - the log of the prior density at v.params
v.params.test <- c("p.Mets" = 0.1, "p.DieMets" = 0.1)
f_log_prior(v.params = v.params.test)
## p.Mets
## 4.240527
We exponentiate to get the prior density at v.params
.
prior <- function(v.params) {
exp(f_log_prior(v.params))
}
The final piece for IMIS
is the likelihood function. As before, we calculate the log-likelihood function and exponentiate to get likelihood()
.
# number of calibration targets
target.names <- c("Surv")
n.target <- length(target.names)
f_llik <- function(v.params){
# par_vector: a vector (or matrix) of model parameters
if(is.null(dim(v.params))) { # If vector, change to matrix
v.params <- t(v.params)
}
n.samp <- nrow(v.params)
v.llik <- matrix(0, nrow = n.samp, ncol = n.target)
llik.overall <- numeric(n.samp)
for(j in 1:n.samp) { # j=1
jj <- tryCatch( {
### Run model for parameter set "v.params" ###
model.res <- markov_crs(v.params[j, ])
### Calculate log-likelihood of model outputs to targets ###
# Survival ("Surv")
v.llik[j, 1] <- sum(dnorm(x = CRS.targets$Surv$value,
mean = model.res$Surv,
sd = CRS.targets$Surv$se,
log = T))
# OVERALL
llik.overall[j] <- sum(v.llik[j, ])
}, error = function(e) NA)
if (is.na(jj)) { llik.overall <- -Inf }
} # End loop over sampled parameter sets
# return LLIK
return(llik.overall)
}
The results are a single number:
f_llik(v.params = v.params.test)
## [1] -406.8578
Now, define the likelihood:
likelihood <- function(v.params){
exp(f_llik(v.params))
}
likelihood(v.params = v.params.test)
## [1] 2.013203e-177
f_log_post <- function(v.params) {
lpost <- f_log_prior(v.params) + f_llik(v.params)
return(lpost)
}
f_log_post(v.params = v.params.test)
## p.Mets
## -402.6173
# number of resamples
n.resamp <- 1000
# run IMIS
fit.imis <- IMIS(B = 1000, # the incremental sample size at each iteration of IMIS.
B.re = n.resamp, # the desired posterior sample size
number_k = 10, # the maximum number of iterations in IMIS.
D = 0)
## [1] "10000 likelihoods are evaluated in 0.05 minutes"
## [1] "Stage MargLike UniquePoint MaxWeight ESS"
## [1] 1.000 151.197 238.293 0.012 153.201
## [1] 2.000 151.188 356.631 0.012 215.799
## [1] 3.000 151.132 520.675 0.003 610.020
## [1] 4.000 151.154 617.248 0.002 832.580
## [1] 5.000 151.128 702.815 0.002 1209.714
The posterior distribution is stored in fit.imis$resample
:
# obtain posterior
m.calib.post <- fit.imis$resample
head(m.calib.post)
## p.Mets p.DieMets
## [1,] 0.04741892 0.12722364
## [2,] 0.12158817 0.04773289
## [3,] 0.04780613 0.12938193
## [4,] 0.11303551 0.05242054
## [5,] 0.04710601 0.13326130
## [6,] 0.12181387 0.04783988
Let’s look at the results!
# Plot the 1000 draws from the posterior
plot(m.calib.post,
xlim = c(lb[1], ub[1]), ylim = c(lb[2], ub[2]),
xlab = param.names[1], ylab = param.names[2])
# Plot the 1000 draws from the posterior with marginal histograms
psych::pairs.panels(m.calib.post)
Other posterior statistics - the mean, median, and mode. Note that the mode gives us the maximum a posteriori estimate:
# Compute posterior mean
v.calib.post.mean <- colMeans(m.calib.post)
v.calib.post.mean
## p.Mets p.DieMets
## 0.08647894 0.08656316
# Compute posterior median and 95% credible interval
m.calib.post.95cr <- matrixStats::colQuantiles(m.calib.post, probs = c(0.025, 0.5, 0.975))
m.calib.post.95cr
## 2.5% 50% 97.5%
## p.Mets 0.04521558 0.05803659 0.1436656
## p.DieMets 0.04527114 0.08814061 0.1444201
# compute maximum a posteriori
v.calib.like <- likelihood(m.calib.post)
v.calib.post.map <- m.calib.post[which.max(v.calib.like), ]
v.out.post.map <- markov_crs(v.calib.post.map)
# TARGET 1: Survival ("Surv")
plotrix::plotCI(x = CRS.targets$Surv$Time, y = CRS.targets$Surv$value,
ui = CRS.targets$Surv$ub,
li = CRS.targets$Surv$lb,
ylim = c(0, 1),
xlab = "Time", ylab = "Pr Survive")
grid()
for (i in 1:nrow(m.calib.post)){
mod_output <- markov_crs(m.calib.post[i, ])
lines(x = CRS.targets$Surv$Time,
y = mod_output$Surv,
col = "darkorange",
lwd = 0.1)
}
lines(x = CRS.targets$Surv$Time,
y = v.out.post.map$Surv,
col = "dodgerblue",
lwd = 2)
As before, we can use more than one target in the calibration. Insteady of definining a likelihood vector, we would want to define a likelihood matrix. Then, we can sum the \(i\)th row of the matrix to get a total likelihood for the the \(i\)th parameter set.
n.target <- 2 # or any number of targets that you have
# define log-likelihood function
f_llik_2tar <- function(v.params){
# par_vector: a vector (or matrix) of model parameters
if(is.null(dim(v.params))) { # If vector, change to matrix
v.params <- t(v.params)
}
n.samp <- nrow(v.params)
v.llik <- matrix(0, nrow = n.samp, ncol = n.target)
llik.overall <- numeric(n.samp)
for(j in 1:n.samp) { # j=1
jj <- tryCatch( {
### Run model for parametr set "v.params" ###
model.res <- markov_crs(v.params[j, ])
### Calculate log-likelihood of model outputs to targets ###
# TARGET 1: Survival ("Surv")
# log likelihood
v.llik[j, 1] <- sum(dnorm(x = CRS.targets$Surv$value,
mean = model.res$Surv,
sd = CRS.targets$Surv$se,
log = T))
# TARGET 2:
# log likelihood
v.llik[j, 2] <- sum(dnorm(x = CRS.targets$Target2$value,
mean = model.res$Target2,
sd = CRS.targets$Target2$se,
log = T))
# OVERALL
llik.overall[j] <- sum(v.llik[j, ])
}, error = function(e) NA)
if(is.na(jj)) { llik.overall <- -Inf }
} # End loop over sampled parameter sets
# return LLIK
return(llik.overall)
}
# define likelihood function
likelihood <- function(v.params){
exp(f_llik_2tar(v.params))
}
Then, the calibration would proceed as before.
Alarid-Escudero, Fernando, Richard F. MacLehose, Yadira Peralta, Karen M Kuntz, and Eva A Enns. 2018. “Nonidentifiability in Model Calibration and Implications for Medical Decision Making.” Medical Decision Making 38 (7). University of Minnesota Shcool of Public Health: 810–21. doi:10.1177/0272989X18792283.
Easterly, C. W., F. Alarid-Escudero, E. A. Enns, and S. Kulasingam. 2018. “Revisiting Assumptions About Age-Based Mixing Representations in Mathematical Models of Sexually Transmitted Infections.” Vaccine, August. doi:10.1016/j.vaccine.2018.07.058.
Menzies, Nicolas A., Djøra I. Soeteman, Ankur Pandya, and Jane J. Kim. 2017. “Bayesian Methods for Calibrating Health Policy Models: A Tutorial.” PharmacoEconomics. Springer International Publishing, 1–12. doi:10.1007/s40273-017-0494-4.
Raftery, Adrian E., and Le Bao. 2010. “Estimating and Projecting Trends in HIV/AIDS Generalized Epidemics Using Incremental Mixture Importance Sampling.” Biometrics 66 (4): 1162–73. doi:10.1111/j.1541-0420.2010.01399.x.
Teele, Russell J S, Adrian E Raftery, and Mary J E Mond. 2006. “Computing Normalizing Constants for Finite Mixture Models via Incremental Mixture Importance Sampling (IMIS).” Journal of Computational and Graphical Statistics 15 (3): 712–34. doi:10.1198/106186006X132358.