Noter

Initialiser, sæt defaults

tidyr og dplyr er til data-wrangling, ggplot2 og gridExtra til visualisering, rstan til at køre Stan-modeller.

for (p in c("ggplot2", "tidyr", "dplyr", "rstan", "gridExtra")) 
    library(p, character.only = TRUE)
knitr::opts_chunk$set(fig.align = "center", fig.width = 8)
options(width = 800, warn = -1) # avoid wrapped output

Definere et (ret) tæt grid, over hvilket vi kan evaluere probability density function (f.eks. dbeta):

theta_grid <- seq(0, 1, length.out = 1000)

Sæt standardindstillinger for ggplots:

base_theme <- theme_minimal(base_size = 10) + 
    theme(axis.text.y = element_blank(), axis.title.y = element_blank(), 
          legend.title = element_blank(), panel.grid.major.y = element_blank(),
          panel.grid.minor.y = element_blank())
theme_set(base_theme)

Brug samme inddeling af logaritmiske x-akser i plots:

log_breaks <- c(0.1, 0.2, 0.33, 0.5, 0.67, 1, 1.5, 2, 3, 5, 10) 

Data frame med data fra de tre foregående studier og EOLIA-studiet:

ecmo_df <- data.frame(z_exp = c(25, 18, 35, 44),
                      n_exp = c(68, 75, 98, 124),
                      z_ctl = c(44, 38, 54, 57),
                      n_ctl = c(90, 75, 98, 125))

Hjælperfunktioner

Prior-likelihood-posterior plot

Samlet plot af fordelingerne af prior, likelihood og posterior for beta prior og binomial likelihood (konjugat analyse).

plp_plot <- function(a, b, z, n) { # prior-likelihood-posterior plot
    grid_res <- length(theta_grid)
    pretty <- c(prior = "1. Prior (beta)", 
                likelihood = "2. Likelihood (binomial)", 
                posterior = "3. Posterior (beta)")
    
    data.frame(theta_grid,
               prior = dbeta(theta_grid, a, b),
               likelihood = dbinom(z, n, theta_grid),
               posterior = dbeta(theta_grid, a + z, b + n - z)) %>%
        mutate(likelihood = likelihood / sum(likelihood / grid_res)) %>% # normalise to AUC = 1
        gather(k, dens, -pi) %>%
        mutate(k = pretty[k]) %>%
        ggplot(aes(x = pi, y = dens)) +
            geom_area(fill = "blue", alpha = 0.5, size = 0) +
            facet_wrap(~ k, nrow = 1) +
            coord_cartesian(xlim = c(0.1, 0.6)) +
            guides(fill = FALSE) +
            base_theme
}
Lav data frame for beta-fordelt variat

Bruger grid search over \(\theta\). Forudsætter, at theta_grid-objekt findes (er defineret øverst).

beta_df <- function(a, g, k = 0, n = 100) { # helper function for simpler code
    data.frame(theta = theta_grid, dens = dbeta(theta_grid, a, n - a),
               group = g, k = k, stringsAsFactors = FALSE) 
}
Lav data frame med binomial-fordelt variat

Bruger grid search over \(\theta\). Forudsætter, at theta_grid-objekt findes (er defineret øverst).

binom_df <- function(z, n, g) { # helper function for simpler code
    data.frame(theta = theta_grid, dens = dbinom(z, n, theta_grid),
               group = g, stringsAsFactors = FALSE)
}
Vis prior risiko-ratio baseret på crowd priors (= de priors, vi lavede i plenum)
show_prior_RR <- function(a_exp, b_exp, a_ctl, b_ctl) {
    RR_prior <- rbeta(10000, a_exp, b_exp) / rbeta(10000, a_ctl, b_ctl)
    q <- round(quantile(RR_prior, c(0.025, 0.975, 0.5)), 2)
    d <- with(density(RR_prior), data.frame(x, y))
    d_q <- filter(d, between(x, q[1], q[2]))
    d_m <- d[which.min(abs(median(RR_prior) - d$x)), ]
    
    message("Prior RR median (95% CrI): ", q[3], " (", q[1], "-", q[2], ")")
    for (i in c(1, 0.9, 0.8, 0.67))
        message("Sandsynlighed for prior RR < ", i, ": ", round(mean(RR_prior < i), 2) * 100, "%")
    
    ggplot() + 
        geom_line(aes(x, y), d, colour = "black") +
        geom_area(aes(x, y), d_q, fill = "blue", alpha = 0.5) +
        geom_line(aes(x = rep(d_m$x, 2), y = c(0, d_m$y)), linetype = 2) +
        scale_x_continuous(trans = "log10", breaks = log_breaks) +
        labs(title = "Prior fordeling af RR", x = "",
             subtitle = "—den stiplede linie angiver medianen, det blå område 95% CrI")
}
Posteriore outputs ved beta prior og binomial likelihood

Giver også posteriore sandsynligheder for, at parameteren er større eller mindre end forskellige værdier (sv.t. tabel 2 og 3 i Coligher 2018).

posterior_metric <- function(a_exp, b_exp, z_exp = 44, n_exp = 124, 
                             a_ctl, b_ctl, z_ctl = 57, n_ctl = 125, 
                             metric = "RR", zoom = FALSE) {
    # Draw 10,000 samples from posteriors
    samples_exp <- rbeta(10000, a_exp + z_exp, b_exp + n_exp - z_exp)
    samples_ctl <- rbeta(10000, a_ctl + z_ctl, b_ctl + n_ctl - z_ctl)
    
    m <- switch(metric,
                ARR = samples_ctl - samples_exp,
                NNT = 1 / (samples_ctl - samples_exp),
                RR = samples_exp / samples_ctl)
    
    thresholds <- switch(metric,
                         ARR = c(2, 4, 6, 8, 10, 20) / 100,
                         NNT = c(0, 2, 4, 6, 8, 10, 20),
                         RR = c(1, 0.9, 0.8, 0.67))
    
    # Appropriate operator differs according to effect metric used
    o <- switch(metric, ARR = ">=", NNT = "<=", RR = "<")
    q <- round(quantile(m, c(0.025, 0.975, 0.5)), 2)
    message("Median posterior ", metric, " (95% CrI): ", q[3], " (", q[1], "-", q[2], ")")
    for (i in thresholds)
        message(sprintf("Sandsynlighed for prior %s %s %s: %s%%", metric, o, i, 
                        round(mean(do.call(o, list(m, i))), 2) * 100))
    
    lim <- quantile(m, c(0.01, 0.99))
    if (zoom) m <- m[between(m, lim[1], lim[2])] # ignore outliers in the plot
    
    d <- with(density(m), data.frame(x, y))
    d_q <- filter(d, between(x, q[1], q[2]))
    d_m <- d[which.min(abs(median(m) - d$x)), ]

    p <- ggplot() + 
        geom_line(aes(x, y), d, colour = "black") +
        geom_area(aes(x, y), d_q, fill = "blue", alpha = 0.5) +
        geom_line(aes(x = rep(d_m$x, 2), y = c(0, d_m$y)), linetype = 2) +
        labs(title = paste("Posterior fordeling af", metric),
             subtitle = "—den stiplede linie angiver medianen, det blå område 95% CrI")
    if (metric == "RR") {
        p + scale_x_continuous(trans = "log", breaks = log_breaks)
    } else {
        p
    }
}
Imiterer subplots i figur 2 in Coligher 2018
prior_post_plot <- function(prior_vals, fit) {
    RR <- as.data.frame(fit)$RR
    q <- round(quantile(RR, c(0.025, 0.975, 0.5)), 2)
    
    message("Median for posterior meta-analytic RR (95% CrI): ", q[3], " (", q[1], "-", q[2], ")")
    for (i in c(1, 0.9, 0.8, 0.67))
        message(sprintf("Sandsynlighed for prior RR < %s: %s%%", i, round(mean(RR < i) * 100)))
    
    m <- prior_vals$median
    sigma <- prior_vals$sigma
    ggplot() +
        stat_density(aes(rlnorm(10000, log(m), sigma)), geom = "line", colour = "orange") +
        stat_density(aes(RR), geom = "line", colour = "darkblue") +
        geom_errorbarh(aes(xmin = q[1], xmax = q[2], y = 0), height = 0, colour = "darkblue") +
        geom_point(aes(x = q[3], y = 0), size = 3, colour = "darkblue") +
        scale_x_continuous(trans = "log", breaks = log_breaks) +
        coord_cartesian(xlim = c(0.2, 2)) +
        theme(axis.text.y = element_text())
}

Stan-modeller

Almindelig model

writeLines(readLines("lognormal_model_Coligher2018.stan"))
## data {
##     int n_exp;
##     int z_exp;
##     int n_ctl;
##     int z_ctl;
##     
##     // Prior values for log-RR distribution
##     real<lower=0> median; // Coligher 2018 uses median as central tendency metric
##     real<lower=0> sigma;
## }
## 
## transformed data {
##     real mu = log(median); // median is converted to mean, to be used in lognormal
## }
## 
## parameters {
##     real<lower=0> RR; // risk ratio
##     real<lower=0, upper=1> theta_ctl; // risk in non-exposed in each study
## }
## 
## transformed parameters {
##     real<lower=0, upper=1> theta_exp; // risk in exposed in each study
##     theta_exp = RR * theta_ctl;
## }
## 
## model {
##     RR ~ lognormal(mu, sigma);
##     
##     theta_ctl ~ uniform(0, 1); // normal(0.5, 0.3);
##     z_ctl ~ binomial(n_ctl, theta_ctl);
##     
##     z_exp ~ binomial(n_exp, theta_exp);
## }
## 
## generated quantities {
##     int z_ctl_pred;
##     int z_exp_pred;
##     
##     z_ctl_pred = binomial_rng(n_ctl, theta_ctl);
##     z_exp_pred = binomial_rng(n_exp, theta_exp);
## }

Metaanalytisk (hierarkisk) model

writeLines(readLines("hier_lognormal_model_Coligher2018_for_prior.stan"))
## data {
##     int n_study; // number of trials
##     int n_exp[n_study]; // number of patients in exposed group
##     int z_exp[n_study]; // number of events in exposed group
##     int n_ctl[n_study]; // number of patients in control group
##     int z_ctl[n_study]; // number of events in control group
##     
##     // Prior values for log-RR distribution
##     real<lower=0> median; // Coligher 2018 uses median as central tendency metric
##     real<lower=0> phi; // hyperprior
##     real<lower=0> tau; // hyperprior
##     real<lower=0> sigma; // hyperprior
## }
## 
## transformed data {
##     real mu0 = log(median); // median is converted to mean, to be used in lognormal
## }
## 
## parameters {
##     real<lower=0> RR; // risk ratio
##     real<lower=0> mu; // hyperparameter
##     real<lower=0> RR_i[n_study]; // study-specific RR
##     real<lower=0, upper=1> theta_ctl[n_study]; // risk in non-exposed in each study
## }
## 
## transformed parameters {
##     real<lower=0, upper=1> theta_exp[n_study]; // risk in exposed in each study
##     for (i in 1:n_study)
##         theta_exp[i] = RR_i[i] * theta_ctl[i];
## }
## 
## model {
##     mu ~ lognormal(mu0, phi);
##     RR ~ lognormal(log(mu), tau);
##     RR_i ~ lognormal(log(RR), sigma);
##     
##     theta_ctl ~ uniform(0, 1);
##     z_ctl ~ binomial(n_ctl, theta_ctl);
##     
##     z_exp ~ binomial(n_exp, theta_exp);
## }
## 
## generated quantities {
##     int z_ctl_pred[n_study];
##     int z_exp_pred[n_study];
## 
##     // Posterior predictive checking
##     for (i in 1:n_study) {
##         z_ctl_pred[i] = binomial_rng(n_ctl[i], theta_ctl[i]);
##         z_exp_pred[i] = binomial_rng(n_exp[i], theta_exp[i]);
##     }
## }

Crowd priors: Priors for risici i de to arme

Lav prior for nyt RCT, der undersøger effekten af ECMO ift. konventionel ventilation hos ARDS-patienter. I mangel på bedre hedder den her crowd prior. Disse fire linier indeholder forventede antal dødsfald og antal patienter i de to grupper, indsamlet fra deltagere på dagen.

z_exp <- c(20, 10, 50, 20, 5) # antal dødsfald i ECMO-gruppe
n_exp <- c(100, 100, 100, 50, 10) # antal patienter i ECMO-gruppe
z_ctl <- c(40, 20, 50, 30, 4) # antal dødsfald i kontrolgruppe
n_ctl <- c(100, 100, 100, 50, 9) # antal patienter i kontrolgruppe

Priors for risici for død i de to arme

Kombiner de fem crowd priors til én beta-prior, og standardiser, så det svarer til 100 patienter i hver arm.

a_exp <- round(mean(z_exp / n_exp * 100)); b_exp <- 100 - a_exp
a_ctl <- round(mean(z_ctl / n_ctl * 100)); b_ctl <- 100 - a_ctl
message("ECMO: z = ", a_exp, "\nKonventionel: z = ", a_ctl)
## ECMO: z = 34
## Konventionel: z = 43
df_exp <- df_ctl <- list()
for (i in seq(z_exp)) {
    df_exp[[i]] <- beta_df(z_exp[i], "ECMO", i, n = n_exp[i])
    df_ctl[[i]] <- beta_df(z_ctl[i], "Konventionel", i, n = n_ctl[i]) 
}
combined_df <- bind_rows(beta_df(a_exp, "ECMO"), 
                         beta_df(a_ctl, "Konventionel"))

Plot individuelle priors (linier) og den samlede (grå). Høje, smalle fordelinger svarer til mere sikre/overbeviste priors; lave, brede til mere usikre/forsigtige priors.

mutate(bind_rows(df_exp, df_ctl), k = as.character(k)) %>%
    ggplot(aes(x = theta, y = dens)) +
    geom_area(data = combined_df, alpha = 0.2, position = "identity") +
    geom_line(aes(colour = k), show.legend = FALSE) +
    facet_wrap(~ group, ncol = 1) +
    scale_color_brewer(palette = "Set2") +
    labs(x = "Risiko for død", title = "Priors for nyt ECMO-trial")