z_exp_pred
og z_ctl_pred
fra Stan-fits er til posterior predictive checking. Da modellerne i denne notebook er ret simple, bør disse værdiers medianer være lig de observerede i de kliniske studier.tidyr
og dplyr
er til data-wrangling, ggplot2
og gridExtra
til visualisering, rstan
til at køre Stan-modeller.
for (p in c("ggplot2", "tidyr", "dplyr", "rstan", "gridExtra"))
library(p, character.only = TRUE)
knitr::opts_chunk$set(fig.align = "center", fig.width = 8)
options(width = 800, warn = -1) # avoid wrapped output
Definere et (ret) tæt grid, over hvilket vi kan evaluere probability density function (f.eks. dbeta
):
theta_grid <- seq(0, 1, length.out = 1000)
Sæt standardindstillinger for ggplots:
base_theme <- theme_minimal(base_size = 10) +
theme(axis.text.y = element_blank(), axis.title.y = element_blank(),
legend.title = element_blank(), panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())
theme_set(base_theme)
Brug samme inddeling af logaritmiske x-akser i plots:
log_breaks <- c(0.1, 0.2, 0.33, 0.5, 0.67, 1, 1.5, 2, 3, 5, 10)
Data frame med data fra de tre foregående studier og EOLIA-studiet:
ecmo_df <- data.frame(z_exp = c(25, 18, 35, 44),
n_exp = c(68, 75, 98, 124),
z_ctl = c(44, 38, 54, 57),
n_ctl = c(90, 75, 98, 125))
Samlet plot af fordelingerne af prior, likelihood og posterior for beta prior og binomial likelihood (konjugat analyse).
plp_plot <- function(a, b, z, n) { # prior-likelihood-posterior plot
grid_res <- length(theta_grid)
pretty <- c(prior = "1. Prior (beta)",
likelihood = "2. Likelihood (binomial)",
posterior = "3. Posterior (beta)")
data.frame(theta_grid,
prior = dbeta(theta_grid, a, b),
likelihood = dbinom(z, n, theta_grid),
posterior = dbeta(theta_grid, a + z, b + n - z)) %>%
mutate(likelihood = likelihood / sum(likelihood / grid_res)) %>% # normalise to AUC = 1
gather(k, dens, -pi) %>%
mutate(k = pretty[k]) %>%
ggplot(aes(x = pi, y = dens)) +
geom_area(fill = "blue", alpha = 0.5, size = 0) +
facet_wrap(~ k, nrow = 1) +
coord_cartesian(xlim = c(0.1, 0.6)) +
guides(fill = FALSE) +
base_theme
}
Bruger grid search over \(\theta\). Forudsætter, at theta_grid
-objekt findes (er defineret øverst).
beta_df <- function(a, g, k = 0, n = 100) { # helper function for simpler code
data.frame(theta = theta_grid, dens = dbeta(theta_grid, a, n - a),
group = g, k = k, stringsAsFactors = FALSE)
}
Bruger grid search over \(\theta\). Forudsætter, at theta_grid
-objekt findes (er defineret øverst).
binom_df <- function(z, n, g) { # helper function for simpler code
data.frame(theta = theta_grid, dens = dbinom(z, n, theta_grid),
group = g, stringsAsFactors = FALSE)
}
show_prior_RR <- function(a_exp, b_exp, a_ctl, b_ctl) {
RR_prior <- rbeta(10000, a_exp, b_exp) / rbeta(10000, a_ctl, b_ctl)
q <- round(quantile(RR_prior, c(0.025, 0.975, 0.5)), 2)
d <- with(density(RR_prior), data.frame(x, y))
d_q <- filter(d, between(x, q[1], q[2]))
d_m <- d[which.min(abs(median(RR_prior) - d$x)), ]
message("Prior RR median (95% CrI): ", q[3], " (", q[1], "-", q[2], ")")
for (i in c(1, 0.9, 0.8, 0.67))
message("Sandsynlighed for prior RR < ", i, ": ", round(mean(RR_prior < i), 2) * 100, "%")
ggplot() +
geom_line(aes(x, y), d, colour = "black") +
geom_area(aes(x, y), d_q, fill = "blue", alpha = 0.5) +
geom_line(aes(x = rep(d_m$x, 2), y = c(0, d_m$y)), linetype = 2) +
scale_x_continuous(trans = "log10", breaks = log_breaks) +
labs(title = "Prior fordeling af RR", x = "",
subtitle = "—den stiplede linie angiver medianen, det blå område 95% CrI")
}
Giver også posteriore sandsynligheder for, at parameteren er større eller mindre end forskellige værdier (sv.t. tabel 2 og 3 i Coligher 2018).
posterior_metric <- function(a_exp, b_exp, z_exp = 44, n_exp = 124,
a_ctl, b_ctl, z_ctl = 57, n_ctl = 125,
metric = "RR", zoom = FALSE) {
# Draw 10,000 samples from posteriors
samples_exp <- rbeta(10000, a_exp + z_exp, b_exp + n_exp - z_exp)
samples_ctl <- rbeta(10000, a_ctl + z_ctl, b_ctl + n_ctl - z_ctl)
m <- switch(metric,
ARR = samples_ctl - samples_exp,
NNT = 1 / (samples_ctl - samples_exp),
RR = samples_exp / samples_ctl)
thresholds <- switch(metric,
ARR = c(2, 4, 6, 8, 10, 20) / 100,
NNT = c(0, 2, 4, 6, 8, 10, 20),
RR = c(1, 0.9, 0.8, 0.67))
# Appropriate operator differs according to effect metric used
o <- switch(metric, ARR = ">=", NNT = "<=", RR = "<")
q <- round(quantile(m, c(0.025, 0.975, 0.5)), 2)
message("Median posterior ", metric, " (95% CrI): ", q[3], " (", q[1], "-", q[2], ")")
for (i in thresholds)
message(sprintf("Sandsynlighed for prior %s %s %s: %s%%", metric, o, i,
round(mean(do.call(o, list(m, i))), 2) * 100))
lim <- quantile(m, c(0.01, 0.99))
if (zoom) m <- m[between(m, lim[1], lim[2])] # ignore outliers in the plot
d <- with(density(m), data.frame(x, y))
d_q <- filter(d, between(x, q[1], q[2]))
d_m <- d[which.min(abs(median(m) - d$x)), ]
p <- ggplot() +
geom_line(aes(x, y), d, colour = "black") +
geom_area(aes(x, y), d_q, fill = "blue", alpha = 0.5) +
geom_line(aes(x = rep(d_m$x, 2), y = c(0, d_m$y)), linetype = 2) +
labs(title = paste("Posterior fordeling af", metric),
subtitle = "—den stiplede linie angiver medianen, det blå område 95% CrI")
if (metric == "RR") {
p + scale_x_continuous(trans = "log", breaks = log_breaks)
} else {
p
}
}
prior_post_plot <- function(prior_vals, fit) {
RR <- as.data.frame(fit)$RR
q <- round(quantile(RR, c(0.025, 0.975, 0.5)), 2)
message("Median for posterior meta-analytic RR (95% CrI): ", q[3], " (", q[1], "-", q[2], ")")
for (i in c(1, 0.9, 0.8, 0.67))
message(sprintf("Sandsynlighed for prior RR < %s: %s%%", i, round(mean(RR < i) * 100)))
m <- prior_vals$median
sigma <- prior_vals$sigma
ggplot() +
stat_density(aes(rlnorm(10000, log(m), sigma)), geom = "line", colour = "orange") +
stat_density(aes(RR), geom = "line", colour = "darkblue") +
geom_errorbarh(aes(xmin = q[1], xmax = q[2], y = 0), height = 0, colour = "darkblue") +
geom_point(aes(x = q[3], y = 0), size = 3, colour = "darkblue") +
scale_x_continuous(trans = "log", breaks = log_breaks) +
coord_cartesian(xlim = c(0.2, 2)) +
theme(axis.text.y = element_text())
}
writeLines(readLines("lognormal_model_Coligher2018.stan"))
## data {
## int n_exp;
## int z_exp;
## int n_ctl;
## int z_ctl;
##
## // Prior values for log-RR distribution
## real<lower=0> median; // Coligher 2018 uses median as central tendency metric
## real<lower=0> sigma;
## }
##
## transformed data {
## real mu = log(median); // median is converted to mean, to be used in lognormal
## }
##
## parameters {
## real<lower=0> RR; // risk ratio
## real<lower=0, upper=1> theta_ctl; // risk in non-exposed in each study
## }
##
## transformed parameters {
## real<lower=0, upper=1> theta_exp; // risk in exposed in each study
## theta_exp = RR * theta_ctl;
## }
##
## model {
## RR ~ lognormal(mu, sigma);
##
## theta_ctl ~ uniform(0, 1); // normal(0.5, 0.3);
## z_ctl ~ binomial(n_ctl, theta_ctl);
##
## z_exp ~ binomial(n_exp, theta_exp);
## }
##
## generated quantities {
## int z_ctl_pred;
## int z_exp_pred;
##
## z_ctl_pred = binomial_rng(n_ctl, theta_ctl);
## z_exp_pred = binomial_rng(n_exp, theta_exp);
## }
writeLines(readLines("hier_lognormal_model_Coligher2018_for_prior.stan"))
## data {
## int n_study; // number of trials
## int n_exp[n_study]; // number of patients in exposed group
## int z_exp[n_study]; // number of events in exposed group
## int n_ctl[n_study]; // number of patients in control group
## int z_ctl[n_study]; // number of events in control group
##
## // Prior values for log-RR distribution
## real<lower=0> median; // Coligher 2018 uses median as central tendency metric
## real<lower=0> phi; // hyperprior
## real<lower=0> tau; // hyperprior
## real<lower=0> sigma; // hyperprior
## }
##
## transformed data {
## real mu0 = log(median); // median is converted to mean, to be used in lognormal
## }
##
## parameters {
## real<lower=0> RR; // risk ratio
## real<lower=0> mu; // hyperparameter
## real<lower=0> RR_i[n_study]; // study-specific RR
## real<lower=0, upper=1> theta_ctl[n_study]; // risk in non-exposed in each study
## }
##
## transformed parameters {
## real<lower=0, upper=1> theta_exp[n_study]; // risk in exposed in each study
## for (i in 1:n_study)
## theta_exp[i] = RR_i[i] * theta_ctl[i];
## }
##
## model {
## mu ~ lognormal(mu0, phi);
## RR ~ lognormal(log(mu), tau);
## RR_i ~ lognormal(log(RR), sigma);
##
## theta_ctl ~ uniform(0, 1);
## z_ctl ~ binomial(n_ctl, theta_ctl);
##
## z_exp ~ binomial(n_exp, theta_exp);
## }
##
## generated quantities {
## int z_ctl_pred[n_study];
## int z_exp_pred[n_study];
##
## // Posterior predictive checking
## for (i in 1:n_study) {
## z_ctl_pred[i] = binomial_rng(n_ctl[i], theta_ctl[i]);
## z_exp_pred[i] = binomial_rng(n_exp[i], theta_exp[i]);
## }
## }
Lav prior for nyt RCT, der undersøger effekten af ECMO ift. konventionel ventilation hos ARDS-patienter. I mangel på bedre hedder den her crowd prior. Disse fire linier indeholder forventede antal dødsfald og antal patienter i de to grupper, indsamlet fra deltagere på dagen.
z_exp <- c(20, 10, 50, 20, 5) # antal dødsfald i ECMO-gruppe
n_exp <- c(100, 100, 100, 50, 10) # antal patienter i ECMO-gruppe
z_ctl <- c(40, 20, 50, 30, 4) # antal dødsfald i kontrolgruppe
n_ctl <- c(100, 100, 100, 50, 9) # antal patienter i kontrolgruppe
Kombiner de fem crowd priors til én beta-prior, og standardiser, så det svarer til 100 patienter i hver arm.
a_exp <- round(mean(z_exp / n_exp * 100)); b_exp <- 100 - a_exp
a_ctl <- round(mean(z_ctl / n_ctl * 100)); b_ctl <- 100 - a_ctl
message("ECMO: z = ", a_exp, "\nKonventionel: z = ", a_ctl)
## ECMO: z = 34
## Konventionel: z = 43
df_exp <- df_ctl <- list()
for (i in seq(z_exp)) {
df_exp[[i]] <- beta_df(z_exp[i], "ECMO", i, n = n_exp[i])
df_ctl[[i]] <- beta_df(z_ctl[i], "Konventionel", i, n = n_ctl[i])
}
combined_df <- bind_rows(beta_df(a_exp, "ECMO"),
beta_df(a_ctl, "Konventionel"))
Plot individuelle priors (linier) og den samlede (grå). Høje, smalle fordelinger svarer til mere sikre/overbeviste priors; lave, brede til mere usikre/forsigtige priors.
mutate(bind_rows(df_exp, df_ctl), k = as.character(k)) %>%
ggplot(aes(x = theta, y = dens)) +
geom_area(data = combined_df, alpha = 0.2, position = "identity") +
geom_line(aes(colour = k), show.legend = FALSE) +
facet_wrap(~ group, ncol = 1) +
scale_color_brewer(palette = "Set2") +
labs(x = "Risiko for død", title = "Priors for nyt ECMO-trial")
Vis kombinerede priors for risiko for død i hver gruppe i samme plot. Relativt stort overlap = a priori-forventning at risikoen for død er forskellig i de to grupper, men at de ikke er helt forskellige.
crowd_priors <- mutate(bind_rows(df_exp, df_ctl), k = as.character(k)) %>%
ggplot(aes(x = theta, y = dens)) +
geom_area(aes(fill = group), combined_df, alpha = 0.7, position = "identity") +
scale_fill_brewer(palette = "Set1") +
labs(x = "Risiko for død", title = "Samlede crowd priors i samme plot") +
coord_cartesian(xlim = c(0.2, 0.63))
crowd_priors
show_prior_RR(a_exp, b_exp, a_ctl, b_ctl)
## Prior RR median (95% CrI): 0.79 (0.55-1.12)
## Sandsynlighed for prior RR < 1: 90%
## Sandsynlighed for prior RR < 0.9: 77%
## Sandsynlighed for prior RR < 0.8: 53%
## Sandsynlighed for prior RR < 0.67: 18%
Disse to beta-priors svarer nogenlunde til den hhv. moderat entusiatistiske og skeptiske prior i Coligher 2018 (sammenlign med række 3 og 4 i tabel 1).
show_prior_RR(42, 58, 54, 46)
## Prior RR median (95% CrI): 0.78 (0.58-1.04)
## Sandsynlighed for prior RR < 1: 95%
## Sandsynlighed for prior RR < 0.9: 84%
## Sandsynlighed for prior RR < 0.8: 58%
## Sandsynlighed for prior RR < 0.67: 17%
show_prior_RR(25, 75, 25, 75)
## Prior RR median (95% CrI): 1 (0.61-1.63)
## Sandsynlighed for prior RR < 1: 49%
## Sandsynlighed for prior RR < 0.9: 33%
## Sandsynlighed for prior RR < 0.8: 18%
## Sandsynlighed for prior RR < 0.67: 5%
Disse findes med grid search over \(\theta \in [0, 1]\). Læs som: Hvis \(\theta\) var en given værdi, hvor “likely” ville det være at observere netop denne kombination af \(z\) og \(n\)? Se relativt stort overlap (dog mindre end de tilsvarende priors).
likelihoods <- bind_rows(binom_df(44, 124, "ECMO"), binom_df(57, 125, "Konventionel")) %>%
ggplot(aes(x = theta, y = dens)) +
geom_area(aes(fill = group), position = "identity", alpha = 0.7) +
scale_fill_brewer(palette = "Set1") +
labs(x = "Risiko for død", title = "Likelihood for observerede data") +
coord_cartesian(xlim = c(0.2, 0.63))
likelihoods
Ved eksakt, konjugat analyse som i dette afsnit antager man, at priors og data stammer fra den samme population. Den antagelse holder sædvanligvis ikke, da man jo typisk vil bruge data fra et eller flere tidligere studier som prior for et nyt studie. Populationen i det nye studie er nødvendigvis en anden.
Fordelinger af posteriore risici for død i de to grupper ved eksakt, konjugat analyse (beta prior + binomial likelihood \(\implies\) beta posterior).
crowd_posteriors <- bind_rows(beta_df(a_exp + 44, "ECMO", n = a_exp + b_exp + 124),
beta_df(a_ctl + 57, "Konventionel", n = a_ctl + b_ctl + 125)) %>%
ggplot(aes(x = theta, y = dens)) +
geom_area(aes(fill = group), position = "identity", alpha = 0.7) +
scale_fill_brewer(palette = "Set1") +
scale_x_continuous(minor_breaks = 0:100 / 100, breaks = 0:10 / 10, labels = paste0(seq(0, 100, 10), "%")) +
coord_cartesian(xlim = c(0.2, 0.63)) +
labs(title = "Posterior risiko for død i hver af de to studiearme", x = "")
crowd_posteriors
Se blandt andet, at overlap faktisk er allermindst i posteriore fordeling, idet data har “trukket” posteriore fordelinger længere væk fra hinanden. På den måde har data forstærket vores a priori tro på, at der faktisk er forskel på mortaliteten i de to grupper.
grid.arrange(crowd_priors, likelihoods, crowd_posteriors, ncol = 1)
posterior_metric(a_exp, b_exp, 44, 124, a_ctl, b_ctl, 57, 125, "RR")
## Median posterior RR (95% CrI): 0.78 (0.62-0.98)
## Sandsynlighed for prior RR < 1: 98%
## Sandsynlighed for prior RR < 0.9: 89%
## Sandsynlighed for prior RR < 0.8: 57%
## Sandsynlighed for prior RR < 0.67: 9%
Sammenlign med tabel 2 i Coligher 2018.
posterior_metric(42, 58, 44, 124, 54, 46, 57, 125, "RR")
## Median posterior RR (95% CrI): 0.78 (0.63-0.96)
## Sandsynlighed for prior RR < 1: 99%
## Sandsynlighed for prior RR < 0.9: 92%
## Sandsynlighed for prior RR < 0.8: 61%
## Sandsynlighed for prior RR < 0.67: 9%
posterior_metric(25, 75, 44, 124, 25, 75, 57, 125, "RR")
## Median posterior RR (95% CrI): 0.84 (0.65-1.09)
## Sandsynlighed for prior RR < 1: 90%
## Sandsynlighed for prior RR < 0.9: 69%
## Sandsynlighed for prior RR < 0.8: 34%
## Sandsynlighed for prior RR < 0.67: 4%
posterior_metric(a_exp, b_exp, 44, 124, a_ctl, b_ctl, 57, 125, "ARR")
## Median posterior ARR (95% CrI): 0.1 (0.01-0.18)
## Sandsynlighed for prior ARR >= 0.02: 95%
## Sandsynlighed for prior ARR >= 0.04: 89%
## Sandsynlighed for prior ARR >= 0.06: 78%
## Sandsynlighed for prior ARR >= 0.08: 63%
## Sandsynlighed for prior ARR >= 0.1: 47%
## Sandsynlighed for prior ARR >= 0.2: 1%
Sammenlign med tabel 3 i Coligher 2018. Læg mærke til, at i artiklen har de udledt AAR fra RR, hvilket muligvis også forklarer noget af den (omend lille) forskel, vi ser mellem de følgende resultater og tabel 3.
posterior_metric(42, 58, 44, 124, 54, 46, 57, 125, "ARR")
## Median posterior ARR (95% CrI): 0.11 (0.02-0.2)
## Sandsynlighed for prior ARR >= 0.02: 97%
## Sandsynlighed for prior ARR >= 0.04: 93%
## Sandsynlighed for prior ARR >= 0.06: 86%
## Sandsynlighed for prior ARR >= 0.08: 74%
## Sandsynlighed for prior ARR >= 0.1: 58%
## Sandsynlighed for prior ARR >= 0.2: 2%
posterior_metric(25, 75, 44, 124, 25, 75, 57, 125, "ARR")
## Median posterior ARR (95% CrI): 0.06 (-0.03-0.14)
## Sandsynlighed for prior ARR >= 0.02: 80%
## Sandsynlighed for prior ARR >= 0.04: 64%
## Sandsynlighed for prior ARR >= 0.06: 47%
## Sandsynlighed for prior ARR >= 0.08: 30%
## Sandsynlighed for prior ARR >= 0.1: 16%
## Sandsynlighed for prior ARR >= 0.2: 0%
NNT er ikke en del af analysen i Coligher 2018, men er alligevel et interessant og ofte benyttet effektmål, så jeg har taget det med her.
posterior_metric(a_exp, b_exp, 44, 124, a_ctl, b_ctl, 57, 125, "NNT", TRUE) +
coord_cartesian(xlim = c(0, 70))
## Median posterior NNT (95% CrI): 10.07 (4.68-58.16)
## Sandsynlighed for prior NNT <= 0: 2%
## Sandsynlighed for prior NNT <= 2: 2%
## Sandsynlighed for prior NNT <= 4: 2%
## Sandsynlighed for prior NNT <= 6: 8%
## Sandsynlighed for prior NNT <= 8: 28%
## Sandsynlighed for prior NNT <= 10: 49%
## Sandsynlighed for prior NNT <= 20: 86%
posterior_metric(42, 58, 44, 124, 54, 46, 57, 125, "NNT", TRUE) +
coord_cartesian(xlim = c(0, 55))
## Median posterior NNT (95% CrI): 9.07 (4.83-39.46)
## Sandsynlighed for prior NNT <= 0: 1%
## Sandsynlighed for prior NNT <= 2: 1%
## Sandsynlighed for prior NNT <= 4: 1%
## Sandsynlighed for prior NNT <= 6: 12%
## Sandsynlighed for prior NNT <= 8: 38%
## Sandsynlighed for prior NNT <= 10: 59%
## Sandsynlighed for prior NNT <= 20: 91%
posterior_metric(25, 75, 44, 124, 25, 75, 57, 125, "NNT", TRUE) +
coord_cartesian(xlim = c(-200, 250))
## Median posterior NNT (95% CrI): 14.53 (-168.22-177.68)
## Sandsynlighed for prior NNT <= 0: 11%
## Sandsynlighed for prior NNT <= 2: 11%
## Sandsynlighed for prior NNT <= 4: 11%
## Sandsynlighed for prior NNT <= 6: 11%
## Sandsynlighed for prior NNT <= 8: 17%
## Sandsynlighed for prior NNT <= 10: 27%
## Sandsynlighed for prior NNT <= 20: 66%
Her vil vi gøre det samme som i artiklen, nemlig modellere RR direkte som parameter i stedet for at betragte den som afledt parameter. Disse figurer svarer til fig. 2A i Coligher 2018 (1. og 2. række). Resultaterne passer ikke helt men kommer tæt på. Det er ikke sikkert, Coligher et al. bruger præcis denne model, men ud fra beskrivelsen (sidste afsnit af Methods) synes den ikke helt ved siden af.
Værdierne for median
og sigma
kommer fra tabel 1, række 3.
mod_enthus_dat <- list(z_exp = 44, n_exp = 124, z_ctl = 57, n_ctl = 125,
median = 0.78, sigma = 0.15)
mod_enthus_fit <- stan("lognormal_model_Coligher2018.stan", data = mod_enthus_dat)
##
## SAMPLING FOR MODEL 'lognormal_model_Coligher2018' NOW (CHAIN 1).
## Rejecting initial value:
## Error evaluating the log probability at the initial value.
## Exception: validate transformed params: theta_exp is 4.12812, but must be less than or equal to 1 (in 'model119d222f1fa39_lognormal_model_Coligher2018' at line 22)
##
##
## Gradient evaluation took 9e-06 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.016791 seconds (Warm-up)
## 0.014906 seconds (Sampling)
## 0.031697 seconds (Total)
##
##
## SAMPLING FOR MODEL 'lognormal_model_Coligher2018' NOW (CHAIN 2).
##
## Gradient evaluation took 8e-06 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.019574 seconds (Warm-up)
## 0.015099 seconds (Sampling)
## 0.034673 seconds (Total)
##
##
## SAMPLING FOR MODEL 'lognormal_model_Coligher2018' NOW (CHAIN 3).
## Rejecting initial value:
## Error evaluating the log probability at the initial value.
## Exception: validate transformed params: theta_exp is 1.87562, but must be less than or equal to 1 (in 'model119d222f1fa39_lognormal_model_Coligher2018' at line 22)
##
##
## Gradient evaluation took 8e-06 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.017202 seconds (Warm-up)
## 0.016644 seconds (Sampling)
## 0.033846 seconds (Total)
##
##
## SAMPLING FOR MODEL 'lognormal_model_Coligher2018' NOW (CHAIN 4).
##
## Gradient evaluation took 5e-06 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.05 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.015856 seconds (Warm-up)
## 0.016445 seconds (Sampling)
## 0.032301 seconds (Total)
Median og 95% CrI for RR passer med tabel 2 (række 3). Det samme (nogenlunde) gør sandsynlighederne for, at RR ligger under specifikke grænseværdier.
mod_enthus_fit
## Inference for Stan model: lognormal_model_Coligher2018.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## RR 0.78 0.00 0.09 0.63 0.72 0.78 0.83 0.96 1883 1
## theta_ctl 0.46 0.00 0.04 0.38 0.43 0.46 0.48 0.53 1917 1
## theta_exp 0.35 0.00 0.04 0.29 0.33 0.35 0.38 0.42 3882 1
## z_ctl_pred 56.89 0.14 7.29 42.00 52.00 57.00 62.00 71.00 2854 1
## z_exp_pred 44.11 0.11 6.90 31.00 39.00 44.00 49.00 58.00 3696 1
## lp__ -169.21 0.03 1.03 -171.94 -169.59 -168.90 -168.50 -168.23 1565 1
##
## Samples were drawn using NUTS(diag_e) at Wed Nov 21 17:47:49 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
prior_post_plot(mod_enthus_dat, mod_enthus_fit) +
labs(title = "Moderataly enthusiastic prior og posterior", x = "",
subtitle = "—svarer cirka til Coligher 2018, fig. 2A (række 2, kolonne 2)")
## Median for posterior meta-analytic RR (95% CrI): 0.78 (0.63-0.96)
## Sandsynlighed for prior RR < 1: 99%
## Sandsynlighed for prior RR < 0.9: 92%
## Sandsynlighed for prior RR < 0.8: 61%
## Sandsynlighed for prior RR < 0.67: 9%
Værdierne for median
og sigma
kommer fra tabel 1, række 4.
skeptical_dat <- list(z_exp = 44, n_exp = 124, z_ctl = 57, n_ctl = 125,
median = 1, sigma = 0.24)
skeptical_fit <- stan("lognormal_model_Coligher2018.stan", data = skeptical_dat)
##
## SAMPLING FOR MODEL 'lognormal_model_Coligher2018' NOW (CHAIN 1).
## Rejecting initial value:
## Error evaluating the log probability at the initial value.
## Exception: validate transformed params: theta_exp is 2.73295, but must be less than or equal to 1 (in 'model119d222f1fa39_lognormal_model_Coligher2018' at line 22)
##
##
## Gradient evaluation took 6e-06 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.06 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.018108 seconds (Warm-up)
## 0.013449 seconds (Sampling)
## 0.031557 seconds (Total)
##
##
## SAMPLING FOR MODEL 'lognormal_model_Coligher2018' NOW (CHAIN 2).
##
## Gradient evaluation took 6e-06 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.06 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.018491 seconds (Warm-up)
## 0.01651 seconds (Sampling)
## 0.035001 seconds (Total)
##
##
## SAMPLING FOR MODEL 'lognormal_model_Coligher2018' NOW (CHAIN 3).
## Rejecting initial value:
## Error evaluating the log probability at the initial value.
## Exception: validate transformed params: theta_exp is 1.86643, but must be less than or equal to 1 (in 'model119d222f1fa39_lognormal_model_Coligher2018' at line 22)
##
## Rejecting initial value:
## Error evaluating the log probability at the initial value.
## Exception: validate transformed params: theta_exp is 1.7358, but must be less than or equal to 1 (in 'model119d222f1fa39_lognormal_model_Coligher2018' at line 22)
##
## Rejecting initial value:
## Error evaluating the log probability at the initial value.
## Exception: validate transformed params: theta_exp is 3.96571, but must be less than or equal to 1 (in 'model119d222f1fa39_lognormal_model_Coligher2018' at line 22)
##
##
## Gradient evaluation took 7e-06 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.07 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.018528 seconds (Warm-up)
## 0.015454 seconds (Sampling)
## 0.033982 seconds (Total)
##
##
## SAMPLING FOR MODEL 'lognormal_model_Coligher2018' NOW (CHAIN 4).
##
## Gradient evaluation took 4e-06 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.021195 seconds (Warm-up)
## 0.016204 seconds (Sampling)
## 0.037399 seconds (Total)
Median og 95% CrI for RR passer med tabel 2 (række 4). Det samme (nogenlunde) gør sandsynlighederne for, at RR ligger under specifikke grænseværdier.
skeptical_fit
## Inference for Stan model: lognormal_model_Coligher2018.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## RR 0.83 0.00 0.11 0.64 0.76 0.83 0.90 1.07 1570 1
## theta_ctl 0.44 0.00 0.04 0.36 0.42 0.44 0.47 0.53 1559 1
## theta_exp 0.37 0.00 0.04 0.29 0.34 0.37 0.39 0.45 3703 1
## z_ctl_pred 55.62 0.16 7.57 41.00 51.00 55.50 61.00 70.00 2287 1
## z_exp_pred 45.72 0.12 7.30 32.00 41.00 46.00 51.00 61.00 4000 1
## lp__ -169.61 0.02 0.99 -172.28 -169.99 -169.32 -168.89 -168.62 1714 1
##
## Samples were drawn using NUTS(diag_e) at Wed Nov 21 17:47:50 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
prior_post_plot(skeptical_dat, skeptical_fit) +
labs(title = "Skeptical prior og posterior", x = "",
subtitle = "—svarer cirka til Coligher 2018, fig. 2A (række 2, kolonne 3)")
## Median for posterior meta-analytic RR (95% CrI): 0.83 (0.64-1.07)
## Sandsynlighed for prior RR < 1: 93%
## Sandsynlighed for prior RR < 0.9: 74%
## Sandsynlighed for prior RR < 0.8: 40%
## Sandsynlighed for prior RR < 0.67: 6%
Dette er et forsøg på at opnå (næsten) den samme data-drevne prior med 0% down-weighting som i Coligher 2018. Det lykkes ikke helt, sandsynligvis da de bruger en anden model, hvis præcise design de dog ikke rapporterer. Modellen er virkeligt svær at få til at opføre sig ordentligt, men med denne specifikation går det, og den giver rimeligt fornuftige resultater.
prior_dat <- slice(ecmo_df, 1:3) %>%
c(n_study = nrow(.), median = 1, phi = 2, tau = 1.5, sigma = 0.5)
metaanalytic_prior_fit <- stan("hier_lognormal_model_Coligher2018_for_prior.stan", data = prior_dat)
##
## SAMPLING FOR MODEL 'hier_lognormal_model_Coligher2018_for_prior' NOW (CHAIN 1).
##
## Gradient evaluation took 3.4e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.34 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.048218 seconds (Warm-up)
## 0.041219 seconds (Sampling)
## 0.089437 seconds (Total)
##
##
## SAMPLING FOR MODEL 'hier_lognormal_model_Coligher2018_for_prior' NOW (CHAIN 2).
##
## Gradient evaluation took 1.5e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.15 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.05474 seconds (Warm-up)
## 0.039943 seconds (Sampling)
## 0.094683 seconds (Total)
##
##
## SAMPLING FOR MODEL 'hier_lognormal_model_Coligher2018_for_prior' NOW (CHAIN 3).
##
## Gradient evaluation took 1e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.052774 seconds (Warm-up)
## 0.043147 seconds (Sampling)
## 0.095921 seconds (Total)
##
##
## SAMPLING FOR MODEL 'hier_lognormal_model_Coligher2018_for_prior' NOW (CHAIN 4).
## Rejecting initial value:
## Error evaluating the log probability at the initial value.
## Exception: validate transformed params: theta_exp[k0__] is 1.22217, but must be less than or equal to 1 (in 'model119d24c904274_hier_lognormal_model_Coligher2018_for_prior' at line 27)
##
##
## Gradient evaluation took 1.1e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.049574 seconds (Warm-up)
## 0.038898 seconds (Sampling)
## 0.088472 seconds (Total)
metaanalytic_prior_fit
## Inference for Stan model: hier_lognormal_model_Coligher2018_for_prior.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## RR 0.64 0.00 0.20 0.33 0.50 0.61 0.75 1.11 4000 1
## mu 1.54 0.05 2.67 0.07 0.33 0.76 1.69 8.07 2956 1
## RR_i[1] 0.73 0.00 0.14 0.49 0.64 0.72 0.82 1.03 4000 1
## RR_i[2] 0.50 0.00 0.11 0.31 0.42 0.49 0.57 0.73 4000 1
## RR_i[3] 0.65 0.00 0.10 0.47 0.58 0.64 0.71 0.87 4000 1
## theta_ctl[1] 0.49 0.00 0.05 0.39 0.46 0.49 0.53 0.60 4000 1
## theta_ctl[2] 0.50 0.00 0.06 0.39 0.46 0.50 0.54 0.61 4000 1
## theta_ctl[3] 0.55 0.00 0.05 0.45 0.52 0.55 0.58 0.64 4000 1
## theta_exp[1] 0.36 0.00 0.06 0.25 0.32 0.35 0.39 0.47 4000 1
## theta_exp[2] 0.25 0.00 0.05 0.16 0.21 0.24 0.28 0.34 4000 1
## theta_exp[3] 0.35 0.00 0.05 0.27 0.32 0.35 0.38 0.45 4000 1
## z_ctl_pred[1] 44.48 0.10 6.55 32.00 40.00 44.00 49.00 57.00 4000 1
## z_ctl_pred[2] 37.48 0.09 6.00 26.00 33.00 37.50 42.00 49.00 4000 1
## z_ctl_pred[3] 53.87 0.11 6.89 40.00 49.00 54.00 59.00 67.00 4000 1
## z_exp_pred[1] 24.29 0.09 5.43 14.00 21.00 24.00 28.00 35.00 4000 1
## z_exp_pred[2] 18.46 0.08 5.21 9.00 15.00 18.00 22.00 29.00 4000 1
## z_exp_pred[3] 34.56 0.10 6.44 23.00 30.00 34.00 39.00 47.00 4000 1
## lp__ -340.07 0.05 2.00 -344.81 -341.16 -339.77 -338.63 -337.10 1946 1
##
## Samples were drawn using NUTS(diag_e) at Wed Nov 21 17:48:32 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
RR_prior <- as.data.frame(metaanalytic_prior_fit)$RR
Af fittet udleder vi medianen af risiko-ratioen og stanardafvigelsen af logaritmen af risiko-ratioen. Dette bruger vi efterfølgende til at definere en “smooth” log-normalfordelt prior som for de såkaldte “reference priors” i artiklen (sv.t. til kolonne 2 og 3 i tabel 1).
(median_prior <- median(RR_prior))
## [1] 0.608916
(sd_log_prior <- sd(log(RR_prior)))
## [1] 0.3044503
Brug medianen og standardafvigelsen for logaritmen som prior i Stan-model:
trial_dat <- c(ecmo_df[4, ], median = median_prior, sigma = sd_log_prior)
metaanalytic_post_fit <- stan("lognormal_model_Coligher2018.stan", data = trial_dat)
##
## SAMPLING FOR MODEL 'lognormal_model_Coligher2018' NOW (CHAIN 1).
##
## Gradient evaluation took 2.1e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.21 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.020064 seconds (Warm-up)
## 0.015711 seconds (Sampling)
## 0.035775 seconds (Total)
##
##
## SAMPLING FOR MODEL 'lognormal_model_Coligher2018' NOW (CHAIN 2).
##
## Gradient evaluation took 9e-06 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.020386 seconds (Warm-up)
## 0.017206 seconds (Sampling)
## 0.037592 seconds (Total)
##
##
## SAMPLING FOR MODEL 'lognormal_model_Coligher2018' NOW (CHAIN 3).
## Rejecting initial value:
## Error evaluating the log probability at the initial value.
## Exception: validate transformed params: theta_exp is 1.12487, but must be less than or equal to 1 (in 'model119d222f1fa39_lognormal_model_Coligher2018' at line 22)
##
## Rejecting initial value:
## Error evaluating the log probability at the initial value.
## Exception: validate transformed params: theta_exp is 2.01758, but must be less than or equal to 1 (in 'model119d222f1fa39_lognormal_model_Coligher2018' at line 22)
##
##
## Gradient evaluation took 6e-06 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.06 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.019474 seconds (Warm-up)
## 0.015337 seconds (Sampling)
## 0.034811 seconds (Total)
##
##
## SAMPLING FOR MODEL 'lognormal_model_Coligher2018' NOW (CHAIN 4).
##
## Gradient evaluation took 7e-06 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.07 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.018405 seconds (Warm-up)
## 0.01406 seconds (Sampling)
## 0.032465 seconds (Total)
Sammenlign RR-værdierne i fittet med tabel 2 (række 6)
metaanalytic_post_fit
## Inference for Stan model: lognormal_model_Coligher2018.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## RR 0.75 0.00 0.11 0.56 0.67 0.74 0.82 0.96 1607 1
## theta_ctl 0.46 0.00 0.04 0.38 0.43 0.46 0.49 0.55 1514 1
## theta_exp 0.34 0.00 0.04 0.27 0.32 0.34 0.37 0.42 3571 1
## z_ctl_pred 57.73 0.16 7.65 42.00 53.00 58.00 63.00 73.00 2161 1
## z_exp_pred 42.51 0.12 7.26 29.00 37.00 42.00 47.00 57.00 3706 1
## lp__ -169.47 0.03 1.01 -172.24 -169.86 -169.17 -168.74 -168.48 1483 1
##
## Samples were drawn using NUTS(diag_e) at Wed Nov 21 17:48:32 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
RR_post <- as.data.frame(metaanalytic_post_fit)$RR
q <- round(quantile(RR_post, c(0.025, 0.975, 0.5)), 2)
ggplot() +
stat_density(aes(RR_prior), geom = "line", colour = "orange") +
stat_density(aes(RR_post), geom = "line", colour = "darkblue") +
geom_errorbarh(aes(xmin = q[1], xmax = q[2], y = 0), height = 0, colour = "darkblue") +
geom_point(aes(x = q[3], y = 0), size = 3, colour = "darkblue") +
scale_x_continuous(trans = "log", breaks = log_breaks) +
coord_cartesian(xlim = c(0.2, 2)) +
theme(axis.text.y = element_text()) +
labs(title = "Meta-analytic prior with 0% downweighting",
subtitle = "—svarer cirka til Coligher 2018, fig. 2A (række 1, kolonne 4)")
Det er interessant, at selvom median og 95% CrI passer meget godt med tabel 2, passer sandsynlighederne ikke nær så godt:
{
message("Median for posterior meta-analytic RR (95% CrI): ", q[3], " (", q[1], "-", q[2], ")")
for (i in c(1, 0.9, 0.8, 0.67))
message(sprintf("Sandsynlighed for prior RR < %s: %s%%", i, round(mean(RR_post < i) * 100)))
}
## Median for posterior meta-analytic RR (95% CrI): 0.74 (0.56-0.96)
## Sandsynlighed for prior RR < 1: 99%
## Sandsynlighed for prior RR < 0.9: 92%
## Sandsynlighed for prior RR < 0.8: 70%
## Sandsynlighed for prior RR < 0.67: 24%
Der er vel i princippet tre måder at estimere sin posteriore fordeling: eksakt analyse, grid search og repræsentative samples fra den posteriore fordeling. De to sidste giver ikke “eksakte” resultater, men er alligevel væsensforskellige. Ved grid search afprøver man alle kombinationer af (næsten) alle mulige værdier af parametrene (= alle de værdier, parametrene kan antage med en given præcision). Dette kan fint lade sig gøre med få parametre og simple modeller, men det bliver hurtigt meget problematisk, når antallet af parametre øges, modellen bliver mere kompliceret, og/eller man gerne vil have mere præcise estimater.
Markov Chain Monte Carlo-metoder giver repræsentative samples. Det vil sige, man tager repræsentative samples fra de posteriore fordelinger af sine parametre ved nogle temmeligt elegante algoritmer. Vil du gerne have større præcision, tager du blot flere samples, så computeren har længere tid til at “udforske de posteriore fordelinger”; Stan bruger en avanceret type Markov Chain Monte Carlo. Doing Bayesian Data Analysis (2. udgave, kap. 7) giver en helt igennem glimrende og letforståelig forklaring på idéen bag, så vi almindelige mennesker får en fornemmelse for, hvad i alverden der foregår.
Da man sampler fra den posteriore fordeling, kunne det have betydning, hvor i den posteriore man starter sin udforskning. Derfor er det vigtigt at køre flere såkaldte kæder, som alle starter forskellige steder; i alle Stan-modeller her har jeg kørt 4 kæder (default). Hvis kæderne ender med at estimere den samme posteriore fordeling, kan man overbevise sig selv—og fagfæller—om, at man faktisk har fundet “de rigtige” posteriore fordelinger af sine parametre. Dette kaldes convergence.
Stan’s metode er temmeligt effektiv, så hvis man gerne vil have flere samples, anbefales det generelt at øge antallet af kæder; dette giver både flere samples fra den posteriore fordeling og bedre mulighed for at undersøge convergence. Man kan aldrig være sikker på at have opnået convergence, men ved hjælp af følgende tre diagnostika kommer man tæt på.
Hedder egentligt Potential Scale Reduction Statistic, men kaldes bare “R hat”. Det er egentligt et mål for stationarity, altså om kæderne er endt med at jolle rundt i den samme fordeling, hvilket er en god ting. Rhat er et mål for forholdet mellem variansen inden for den enkelte kæde og variansen mellem kæderne. Det lyder kringlet, men betyder egentligt blot, at kæderne bevæger sig lige meget rundt. Hvis \(\hat R < 1.1\) er vi okay glade, men vi vil helst have \(\hat R\) ned på 1.01 eller lavere; \(\hat R\) kan dog ikke blive lavere end 1 Hvis du kigger i output fra Stan-fits ovenfor, vil du se Rhat = 1
for alle parametre i alle fits.
Når man plotter kæderne over “tid”, skal de gerne lave en “fuzzy caterpillar”, hvor kæderne ligger og roder rundt oven i hinanden. Disse plots hedder trace plots, da man tracer (= følger) kæden fra start til slut i samplingprocessen. Et eksempel er fra vores hiararkiske fit ovenfor:
suppressMessages( # to avoid the message of overriding colour scale
stan_trace(metaanalytic_prior_fit, pars = c("RR", "RR_i"), nrow = 2, size = 0.4, alpha = 0.8) +
scale_color_brewer(palette = "Set1"))
Hver kæde skal gerne finde (næsten, det er trods alt ikke eksakt) den samme posteriore fordeling. For at undersøge dette, bør man for hver parameter lave densitetsplots (\(\approx\) udglattede histogrammer) af hver kæde og sikre sig, de er så godt som ens. Igen et eksempel fra det metaanalytiske fit fra ovenfor.
suppressMessages( # to avoid the message of overriding fill scale
stan_dens(metaanalytic_prior_fit, pars = c("RR", "RR_i"), separate_chains = TRUE,
size = 0, alpha = 0.4) +
scale_x_continuous(trans = "log", breaks = log_breaks) +
scale_fill_brewer(palette = "Set1"))