Logo of Stan, a software for Bayesian modeling, featuring a blue background with a stylized ribbon design.

Here is R code you can use to learn about the basic pairwise models for binary outcomes, with simple simulated data. There are combinations of {common effect, random effects} x {contrast-based, arm-based} with examples of heuristic t likelihood and alternative specification of arm likelihoods. On the end, we show normal equivalents with reasonably large n per arm. This all uses Stan via cmdstanr (which is Robert’s favourite software for Bayesian models).

In keeping with the book, we want this code to be as human-readable as possible, so only base R is used, it may not be the most efficient R, and it certainly is not the most fashionable R. Likewise, you will see more arrays and looping within the Stan code than some people might like. This is because we find that it helps most learners to write it that way. Once you are familiar with these models and Stan, you should write your model code in whatever style you find easiest to read, understand and debug.

# full examples of pairwise BMA models in cmdstanr with simulated binary data

library(cmdstanr)
library(bayesplot)
library(meta)



# ----------------- binary outcome data, CE ---------------
set.seed(9876)

risk_ctl <- 0.3
odds_ctl <- risk_ctl/(1-risk_ctl)
lor_ctl <- log(odds_ctl)
true_or <- 0.9
true_lor <- log(true_or)
odds_int <- (risk_ctl/(1-risk_ctl)) * true_or
risk_int <- odds_int / (1+ odds_int)
print(paste0("Control risk: ", risk_ctl))
print(paste0("Control odds: ", odds_ctl))
print(paste0("Control log odds: ", lor_ctl))
print(paste0("Intervention risk: ", risk_int))
print(paste0("True log odds ratio: ", true_lor))
print(paste0("True odds ratio: ", exp(true_lor)))
arm_n <- 80
m <- 25

n1 <- n2 <- rep(arm_n, m)
sim_binary_ce <- data.frame(n1=n1, n2=n2)

sim_binary_ce$d1 <- rbinom(m, size=arm_n, prob=risk_ctl)
sim_binary_ce$d2 <- rbinom(m, size=arm_n, prob=risk_int)
sim_binary_ce$h1 <- sim_binary_ce$n1 - sim_binary_ce$d1
sim_binary_ce$h2 <- sim_binary_ce$n2 - sim_binary_ce$d2
sim_binary_ce$study <- 1:m
sim_binary_ce$study_or <- (sim_binary_ce$d2/sim_binary_ce$h2) / 
                            (sim_binary_ce$d1/sim_binary_ce$h1)
sim_binary_ce$logor = log(sim_binary_ce$study_or)
sim_binary_ce$se2 = (1/sim_binary_ce$d2) + 
                    (1/sim_binary_ce$h2) + 
                    (1/sim_binary_ce$d1) + 
                    (1/sim_binary_ce$h1)



# ------------ frequentist CE MA ---------------

ce_freq <- metabin(d2, n2, d1, n1, 
	               data=sim_binary_ce, 
	               sm="OR",
	               common=TRUE, random=FALSE,
	               method="Inverse")
print(summary(ce_freq))




# --------------- CE, binary, contrast-based ----------------

standata <- list(m = m,
	             logor = sim_binary_ce$logor,
	             se = sqrt(sim_binary_ce$se2))

stancode <- "
data {
  int m;
  array[m] real logor;
  array[m] real<lower=0> se;
}
parameters {
  real theta;
}
transformed parameters {
	real or;
	or = exp(theta);
}
model {
  theta ~ normal(0, 3);
  for(j in 1:m) {
    logor[j] ~ normal(theta, se[j]);
  }
}
"

stanmod <- cmdstan_model(write_stan_file(stancode), 
                         compile = TRUE)

stanfit <- stanmod$sample(data=standata,
                          iter_warmup=1000,
                          iter_sampling=10000,
                          chains = 2,
                          parallel_chains = 2,
                          refresh=1000,
                          seed=4790)

print(stanfit$summary())
standraws <- stanfit$draws(format="draws_matrix")

print(posterior::summarise_draws(standraws,
	                       mean,
	                       ~quantile(.x, probs=c(0.025,
	                       	                     0.5,
	                       	                     0.975))))

#mcmc_trace(standraws, pars=c('theta', 
#                             'or'))




# ------------------ CE/FE, binary, arm-based --------------------

sim_binary_ce_arm <- data.frame(d=c(sim_binary_ce$d1, sim_binary_ce$d2),
	                            n=c(sim_binary_ce$n1, sim_binary_ce$n2),
	                            active=rep(c(0,1), each=m),
	                            study=rep((1:m),2))
standata <- list(m = m,
	             K = 2*m,
	             d = sim_binary_ce_arm$d,
	             n = sim_binary_ce_arm$n,
	             active = sim_binary_ce_arm$active,
	             study = sim_binary_ce_arm$study)

stancode <- "
data {
  int m;
  int K;
  array[K] int d;
  array[K] int n;
  array[K] int active;
  array[K] int study;
}
parameters {
  real theta;
  array[m] real mu;
}
transformed parameters {
	real or;
	or = exp(theta);
}
model {
  array[K] real pred_logodds;
  mu ~ normal(-0.5, 1);
  theta ~ normal(0, 3);
  for(k in 1:K) {
  	pred_logodds[k] = mu[study[k]] + active[k]*theta;
    d[k] ~ binomial_logit(n[k], pred_logodds[k]);
  }
}
"

stanmod <- cmdstan_model(write_stan_file(stancode), 
                         compile = TRUE)

stanfit <- stanmod$sample(data=standata,
                          iter_warmup=1000,
                          iter_sampling=10000,
                          chains = 2,
                          parallel_chains = 2,
                          refresh=1000,
                          seed=4390)

print(stanfit$summary(), n=100)
standraws <- stanfit$draws(format="draws_matrix")

print(posterior::summarise_draws(standraws,
	                       mean,
	                       ~quantile(.x, probs=c(0.025,
	                       	                     0.5,
	                       	                     0.975))),
      n=100)




# ------------------ CE/FE, arm-based, equivalent alternative code --------------------

standata <- list(m = m,
	             d1 = sim_binary_ce$d1,
	             n1 = sim_binary_ce$n1,
	             d2 = sim_binary_ce$d2,
	             n2 = sim_binary_ce$n2)

stancode <- "
data {
  int m;
  array[m] int d1;
  array[m] int n1;
  array[m] int d2;
  array[m] int n2;
}
parameters {
  array[m] real mu;
  real theta;
}
transformed parameters {
	real or;
	or = exp(theta);
}
model {
  mu ~ normal(0, 100);
  theta ~ normal(0, 3);
  for(j in 1:m) {
  	d1[j] ~ binomial_logit(n1[j], mu[j]);
  	d2[j] ~ binomial_logit(n2[j], mu[j]+theta);
  }
}
"

stanmod <- cmdstan_model(write_stan_file(stancode), 
                         compile = TRUE)

stanfit <- stanmod$sample(data=standata,
                          iter_warmup=1000,
                          iter_sampling=10000,
                          chains = 2,
                          parallel_chains = 2,
                          refresh=1000,
                          seed=2460)

print(stanfit$summary(), n=100)
standraws <- stanfit$draws(format="draws_matrix")

print(posterior::summarise_draws(standraws,
	                       mean,
	                       ~quantile(.x, probs=c(0.025,
	                       	                     0.5,
	                       	                     0.975))),
      n=100)





# ----------------- binary outcome data, RE ---------------
set.seed(4888)

risk_ctl <- 0.3
odds_ctl <- risk_ctl/(1-risk_ctl)
logodds_ctl <- log(odds_ctl)
true_or <- 0.9
true_lor <- log(true_or)
odds_int <- (risk_ctl/(1-risk_ctl)) * true_or
risk_int <- odds_int / (1+ odds_int)
print(paste0("Control risk: ", risk_ctl))
print(paste0("Control odds: ", odds_ctl))
print(paste0("Control log odds: ", logodds_ctl))
print(paste0("True log odds ratio: ", true_lor))
print(paste0("True odds ratio: ", exp(true_lor)))
true_tau <- 0.15
print(paste0("True tau: ", true_tau))
arm_n <- 80
m <- 25
theta_j <- rnorm(m, true_lor, true_tau)
logodds_int <- logodds_ctl + theta_j
risk_int <- 1 / (1 + exp(-logodds_int))

n1 <- n2 <- rep(arm_n, m)
sim_binary_re <- data.frame(n1=n1, n2=n2, theta_j=theta_j)

sim_binary_re$d1 <- rbinom(m, size=arm_n, prob=risk_ctl)
sim_binary_re$d2 <- rbinom(m, size=arm_n, prob=risk_int)
sim_binary_re$h1 <- sim_binary_re$n1 - sim_binary_re$d1
sim_binary_re$h2 <- sim_binary_re$n2 - sim_binary_re$d2
sim_binary_re$study <- 1:m
sim_binary_re$study_or <- (sim_binary_re$d2/sim_binary_re$h2) / 
                            (sim_binary_re$d1/sim_binary_re$h1)
sim_binary_re$logor = log(sim_binary_re$study_or)
sim_binary_re$se2 = (1/sim_binary_re$d2) + 
                    (1/sim_binary_re$h2) + 
                    (1/sim_binary_re$d1) + 
                    (1/sim_binary_re$h1)



# ------------ frequentist RE MA ---------------

re_freq <- metabin(d2, n2, d1, n1, 
	               data=sim_binary_re, 
	               sm="OR",
	               common=FALSE, random=TRUE,
	               method="Inverse",
	               method.tau="DL")
print(summary(re_freq))




# --------------- RE, binary, contrast-based ----------------

standata <- list(m = m,
	             logor = sim_binary_re$logor,
	             se = sqrt(sim_binary_re$se2))

stancode <- "
data {
  int m;
  array[m] real logor;
  array[m] real<lower=0> se;
}
parameters {
  real theta;
  array[m] real u;
  real<lower=0> tau;
}
transformed parameters {
	real or;
	or = exp(theta);
}
model {
  theta ~ normal(0, 3);
  tau ~ gamma(1, 1);
  for(j in 1:m) {
  	u[j] ~ normal(0, tau);
    logor[j] ~ normal(theta + u[j], se[j]);
  }
}
"

stanmod <- cmdstan_model(write_stan_file(stancode), 
                         compile = TRUE)

stanfit <- stanmod$sample(data=standata,
                          iter_warmup=1000,
                          iter_sampling=10000,
                          chains = 2,
                          parallel_chains = 2,
                          refresh=1000,
                          seed=9937)

print(stanfit$summary(), n=100)
standraws <- stanfit$draws(format="draws_matrix")

print(posterior::summarise_draws(standraws,
	                       mean,
	                       ~quantile(.x, probs=c(0.025,
	                       	                     0.5,
	                       	                     0.975))),
      n=100)




# ------------------ RE/FE, binary, arm-based, equivalent alternative code --------------------

standata <- list(m = m,
	             d1 = sim_binary_re$d1,
	             n1 = sim_binary_re$n1,
	             d2 = sim_binary_re$d2,
	             n2 = sim_binary_re$n2)

stancode <- "
data {
  int m;
  array[m] int d1;
  array[m] int n1;
  array[m] int d2;
  array[m] int n2;
}
parameters {
  array[m] real mu;
  real theta;
  array[m] real u;
  real<lower=0> tau;
}
transformed parameters {
	real or;
	or = exp(theta);
}
model {
  mu ~ normal(0, 100);
  theta ~ normal(0, 3);
  tau ~ gamma(1, 1);
  for(j in 1:m) {
  	u[j] ~ normal(0, tau);
  	d1[j] ~ binomial_logit(n1[j], mu[j]);
  	d2[j] ~ binomial_logit(n2[j], mu[j]+theta+u[j]);
  }
}
"

stanmod <- cmdstan_model(write_stan_file(stancode), 
                         compile = TRUE)

stanfit <- stanmod$sample(data=standata,
                          iter_warmup=1000,
                          iter_sampling=10000,
                          chains = 2,
                          parallel_chains = 2,
                          refresh=1000,
                          seed=2460)

print(stanfit$summary(), n=100)
standraws <- stanfit$draws(format="draws_matrix")

print(posterior::summarise_draws(standraws,
	                       mean,
	                       ~quantile(.x, probs=c(0.025,
	                       	                     0.5,
	                       	                     0.975))),
      n=100)




# ---------------- simulated normal data ----------------

set.seed(5478)

mean_ctl <- 10
true_md <- (-1)
sd_ctl <- 1
mean_int <- mean_ctl + true_md
sd_int <- 0.8
arm_n <- 80
m <- 25

n1 <- n2 <- rep(arm_n, m)
sim_normal_ce <- data.frame(n1=n1, n2=n2)
for(j in 1:m) {
	deleteme1 <- rnorm(m, mean_ctl, sd_ctl)
	deleteme2 <- rnorm(m, mean_int, sd_int)
	sim_normal_ce$mean1[j] <- mean(deleteme1)
	sim_normal_ce$sd1[j] <- sd(deleteme1)
	sim_normal_ce$mean2[j] <- mean(deleteme2)
	sim_normal_ce$sd2[j] <- sd(deleteme2)
}
sim_normal_ce$study <- 1:m
sim_normal_ce$study_md <- sim_normal_ce$mean2 - sim_normal_ce$mean1
sim_normal_ce$pooled_var <- ((sim_normal_ce$n1-1)*(sim_normal_ce$sd1^2) + 
                              (sim_normal_ce$n2-1)*(sim_normal_ce$sd2^2)) / 
                            (sim_normal_ce$n1+sim_normal_ce$n2-2)
sim_normal_ce$var_md <- (sim_normal_ce$n1+sim_normal_ce$n2) * 
                         sim_normal_ce$pooled_var / 
                         (sim_normal_ce$n1*sim_normal_ce$n2)
sim_normal_ce$se_md <- sqrt(sim_normal_ce$var_md)




# --------------- frequentist contrast-based CE MA --------------

cema <- metacont(n2, mean2, sd2, 
                 n1, mean1, sd1,
                 data=sim_normal_ce,
                 studlab=study,
                 common=TRUE, random=FALSE,
                 sm="MD",
                 pooledvar=TRUE)

summary(cema)                 
plot(cema)

sim_normal_ce_arm <- data.frame(mean=c(sim_normal_ce$mean1, sim_normal_ce$mean2),
	                            sd=c(sim_normal_ce$sd1, sim_normal_ce$sd2),
	                            n_arm=c(sim_normal_ce$n1, sim_normal_ce$n2),
	                            active=rep(c(0,1), each=m),
	                            study=rep((1:m),2))
standata <- list(m = m,
	             K = 2*m,
	             n_arm = sim_normal_ce_arm$n_arm,
	             mean = sim_normal_ce_arm$mean,
	             sd = sim_normal_ce_arm$sd,
	             active = sim_normal_ce_arm$active,
	             study = sim_normal_ce_arm$study)

stancode <- "
data {
  int m;
  int K;
  array[K] real mean;
  array[K] real sd;
  array[K] int n_arm;
  array[K] real active;
  array[K] int study;
}
parameters {
  real theta;
  array[m] real mu;
}
model {
  array[K] real pred_mean;
  array[K] real se_mean;
  mu ~ normal(8, 100);
  theta ~ normal(0, 3);
  for(k in 1:K) {
  	pred_mean[k] = mu[study[k]] + active[k]*theta;
  	se_mean[k] = sd[k] / sqrt(n_arm[k]);
    mean[k] ~ normal(pred_mean[k], se_mean[k]);
  }
}
"

stanmod <- cmdstan_model(write_stan_file(stancode), 
                         compile = TRUE)

stanfit <- stanmod$sample(data=standata,
                          iter_warmup=1000,
                          iter_sampling=10000,
                          chains = 2,
                          parallel_chains = 2,
                          refresh=1000,
                          seed=4390)

print(stanfit$summary(), n=100)
standraws <- stanfit$draws(format="draws_matrix")

print(posterior::summarise_draws(standraws,
	                       mean,
	                       ~quantile(.x, probs=c(0.025,
	                       	                     0.5,
	                       	                     0.975))),
      n=100)




# ------------------ CE/FE, arm-based, equivalent alternative code --------------------

standata <- list(m = m,
	             mean1 = sim_normal_ce$mean1,
	             sd1 = sim_normal_ce$sd1,
	             n1 = sim_normal_ce$n1,
	             mean2 = sim_normal_ce$mean2,
	             sd2 = sim_normal_ce$sd2,
	             n2 = sim_normal_ce$n2)

stancode <- "
data {
  int m;
  array[m] real mean1;
  array[m] real sd1;
  array[m] real n1;
  array[m] real mean2;
  array[m] real sd2;
  array[m] real n2;
}
parameters {
  array[m] real mu;
  real theta;
}
model {
  array[m] real se_mean1;
  array[m] real se_mean2;
  mu ~ normal(8, 100);
  theta ~ normal(0, 3);
  for(j in 1:m) {
  	se_mean1[j] = sd1[j] / sqrt(n1[j]);
  	se_mean2[j] = sd2[j] / sqrt(n2[j]);
  	mean1[j] ~ normal(mu[j], se_mean1[j]);
  	mean2[j] ~ normal(mu[j] + theta, se_mean2[j]);
  }
}
"

stanmod <- cmdstan_model(write_stan_file(stancode), 
                         compile = TRUE)

stanfit <- stanmod$sample(data=standata,
                          iter_warmup=1000,
                          iter_sampling=10000,
                          chains = 2,
                          parallel_chains = 2,
                          refresh=1000,
                          seed=2460)

print(stanfit$summary(), n=100)
standraws <- stanfit$draws(format="draws_matrix")

print(posterior::summarise_draws(standraws,
	                       mean,
	                       ~quantile(.x, probs=c(0.025,
	                       	                     0.5,
	                       	                     0.975))),
      n=100)

#stan, #repo, #stan-repo, #cmdstanr


Discover more from The Bayesian Meta-Analysis Network

Subscribe to get the latest posts sent to your email.

Leave a Reply

Discover more from The Bayesian Meta-Analysis Network

Subscribe now to keep reading and get access to the full archive.

Continue reading