Learn Pairwise Models for Binary Outcomes in R with Stan

Here is R code you can use to learn about the basic pairwise models for binary outcomes, with simple simulated data. There are combinations of {common effect, random effects} x {contrast-based, arm-based} with examples of heuristic t likelihood and alternative specification of arm likelihoods. On the end, we show normal equivalents with reasonably large n per arm. This all uses Stan via cmdstanr (which is Robert’s favourite software for Bayesian models).
In keeping with the book, we want this code to be as human-readable as possible, so only base R is used, it may not be the most efficient R, and it certainly is not the most fashionable R. Likewise, you will see more arrays and looping within the Stan code than some people might like. This is because we find that it helps most learners to write it that way. Once you are familiar with these models and Stan, you should write your model code in whatever style you find easiest to read, understand and debug.
# full examples of pairwise BMA models in cmdstanr with simulated binary data

library(cmdstanr)
library(bayesplot)
library(meta)



# ----------------- binary outcome data, CE ---------------
set.seed(9876)

risk_ctl <- 0.3
odds_ctl <- risk_ctl/(1-risk_ctl)
lor_ctl <- log(odds_ctl)
true_or <- 0.9
true_lor <- log(true_or)
odds_int <- (risk_ctl/(1-risk_ctl)) * true_or
risk_int <- odds_int / (1+ odds_int)
print(paste0("Control risk: ", risk_ctl))
print(paste0("Control odds: ", odds_ctl))
print(paste0("Control log odds: ", lor_ctl))
print(paste0("Intervention risk: ", risk_int))
print(paste0("True log odds ratio: ", true_lor))
print(paste0("True odds ratio: ", exp(true_lor)))
arm_n <- 80
m <- 25

n1 <- n2 <- rep(arm_n, m)
sim_binary_ce <- data.frame(n1=n1, n2=n2)

sim_binary_ce$d1 <- rbinom(m, size=arm_n, prob=risk_ctl)
sim_binary_ce$d2 <- rbinom(m, size=arm_n, prob=risk_int)
sim_binary_ce$h1 <- sim_binary_ce$n1 - sim_binary_ce$d1
sim_binary_ce$h2 <- sim_binary_ce$n2 - sim_binary_ce$d2
sim_binary_ce$study <- 1:m
sim_binary_ce$study_or <- (sim_binary_ce$d2/sim_binary_ce$h2) / 
                            (sim_binary_ce$d1/sim_binary_ce$h1)
sim_binary_ce$logor = log(sim_binary_ce$study_or)
sim_binary_ce$se2 = (1/sim_binary_ce$d2) + 
                    (1/sim_binary_ce$h2) + 
                    (1/sim_binary_ce$d1) + 
                    (1/sim_binary_ce$h1)



# ------------ frequentist CE MA ---------------

ce_freq <- metabin(d2, n2, d1, n1, 
	               data=sim_binary_ce, 
	               sm="OR",
	               common=TRUE, random=FALSE,
	               method="Inverse")
print(summary(ce_freq))




# --------------- CE, binary, contrast-based ----------------

standata <- list(m = m,
	             logor = sim_binary_ce$logor,
	             se = sqrt(sim_binary_ce$se2))

stancode <- "
data {
  int m;
  array[m] real logor;
  array[m] real<lower=0> se;
}
parameters {
  real theta;
}
transformed parameters {
	real or;
	or = exp(theta);
}
model {
  theta ~ normal(0, 3);
  for(j in 1:m) {
    logor[j] ~ normal(theta, se[j]);
  }
}
"

stanmod <- cmdstan_model(write_stan_file(stancode), 
                         compile = TRUE)

stanfit <- stanmod$sample(data=standata,
                          iter_warmup=1000,
                          iter_sampling=10000,
                          chains = 2,
                          parallel_chains = 2,
                          refresh=1000,
                          seed=4790)

print(stanfit$summary())
standraws <- stanfit$draws(format="draws_matrix")

print(posterior::summarise_draws(standraws,
	                       mean,
	                       ~quantile(.x, probs=c(0.025,
	                       	                     0.5,
	                       	                     0.975))))

#mcmc_trace(standraws, pars=c('theta', 
#                             'or'))




# ------------------ CE/FE, binary, arm-based --------------------

sim_binary_ce_arm <- data.frame(d=c(sim_binary_ce$d1, sim_binary_ce$d2),
	                            n=c(sim_binary_ce$n1, sim_binary_ce$n2),
	                            active=rep(c(0,1), each=m),
	                            study=rep((1:m),2))
standata <- list(m = m,
	             K = 2*m,
	             d = sim_binary_ce_arm$d,
	             n = sim_binary_ce_arm$n,
	             active = sim_binary_ce_arm$active,
	             study = sim_binary_ce_arm$study)

stancode <- "
data {
  int m;
  int K;
  array[K] int d;
  array[K] int n;
  array[K] int active;
  array[K] int study;
}
parameters {
  real theta;
  array[m] real mu;
}
transformed parameters {
	real or;
	or = exp(theta);
}
model {
  array[K] real pred_logodds;
  mu ~ normal(-0.5, 1);
  theta ~ normal(0, 3);
  for(k in 1:K) {
  	pred_logodds[k] = mu[study[k]] + active[k]*theta;
    d[k] ~ binomial_logit(n[k], pred_logodds[k]);
  }
}
"

stanmod <- cmdstan_model(write_stan_file(stancode), 
                         compile = TRUE)

stanfit <- stanmod$sample(data=standata,
                          iter_warmup=1000,
                          iter_sampling=10000,
                          chains = 2,
                          parallel_chains = 2,
                          refresh=1000,
                          seed=4390)

print(stanfit$summary(), n=100)
standraws <- stanfit$draws(format="draws_matrix")

print(posterior::summarise_draws(standraws,
	                       mean,
	                       ~quantile(.x, probs=c(0.025,
	                       	                     0.5,
	                       	                     0.975))),
      n=100)




# ------------------ CE/FE, arm-based, equivalent alternative code --------------------

standata <- list(m = m,
	             d1 = sim_binary_ce$d1,
	             n1 = sim_binary_ce$n1,
	             d2 = sim_binary_ce$d2,
	             n2 = sim_binary_ce$n2)

stancode <- "
data {
  int m;
  array[m] int d1;
  array[m] int n1;
  array[m] int d2;
  array[m] int n2;
}
parameters {
  array[m] real mu;
  real theta;
}
transformed parameters {
	real or;
	or = exp(theta);
}
model {
  mu ~ normal(0, 100);
  theta ~ normal(0, 3);
  for(j in 1:m) {
  	d1[j] ~ binomial_logit(n1[j], mu[j]);
  	d2[j] ~ binomial_logit(n2[j], mu[j]+theta);
  }
}
"

stanmod <- cmdstan_model(write_stan_file(stancode), 
                         compile = TRUE)

stanfit <- stanmod$sample(data=standata,
                          iter_warmup=1000,
                          iter_sampling=10000,
                          chains = 2,
                          parallel_chains = 2,
                          refresh=1000,
                          seed=2460)

print(stanfit$summary(), n=100)
standraws <- stanfit$draws(format="draws_matrix")

print(posterior::summarise_draws(standraws,
	                       mean,
	                       ~quantile(.x, probs=c(0.025,
	                       	                     0.5,
	                       	                     0.975))),
      n=100)





# ----------------- binary outcome data, RE ---------------
set.seed(4888)

risk_ctl <- 0.3
odds_ctl <- risk_ctl/(1-risk_ctl)
logodds_ctl <- log(odds_ctl)
true_or <- 0.9
true_lor <- log(true_or)
odds_int <- (risk_ctl/(1-risk_ctl)) * true_or
risk_int <- odds_int / (1+ odds_int)
print(paste0("Control risk: ", risk_ctl))
print(paste0("Control odds: ", odds_ctl))
print(paste0("Control log odds: ", logodds_ctl))
print(paste0("True log odds ratio: ", true_lor))
print(paste0("True odds ratio: ", exp(true_lor)))
true_tau <- 0.15
print(paste0("True tau: ", true_tau))
arm_n <- 80
m <- 25
theta_j <- rnorm(m, true_lor, true_tau)
logodds_int <- logodds_ctl + theta_j
risk_int <- 1 / (1 + exp(-logodds_int))

n1 <- n2 <- rep(arm_n, m)
sim_binary_re <- data.frame(n1=n1, n2=n2, theta_j=theta_j)

sim_binary_re$d1 <- rbinom(m, size=arm_n, prob=risk_ctl)
sim_binary_re$d2 <- rbinom(m, size=arm_n, prob=risk_int)
sim_binary_re$h1 <- sim_binary_re$n1 - sim_binary_re$d1
sim_binary_re$h2 <- sim_binary_re$n2 - sim_binary_re$d2
sim_binary_re$study <- 1:m
sim_binary_re$study_or <- (sim_binary_re$d2/sim_binary_re$h2) / 
                            (sim_binary_re$d1/sim_binary_re$h1)
sim_binary_re$logor = log(sim_binary_re$study_or)
sim_binary_re$se2 = (1/sim_binary_re$d2) + 
                    (1/sim_binary_re$h2) + 
                    (1/sim_binary_re$d1) + 
                    (1/sim_binary_re$h1)



# ------------ frequentist RE MA ---------------

re_freq <- metabin(d2, n2, d1, n1, 
	               data=sim_binary_re, 
	               sm="OR",
	               common=FALSE, random=TRUE,
	               method="Inverse",
	               method.tau="DL")
print(summary(re_freq))




# --------------- RE, binary, contrast-based ----------------

standata <- list(m = m,
	             logor = sim_binary_re$logor,
	             se = sqrt(sim_binary_re$se2))

stancode <- "
data {
  int m;
  array[m] real logor;
  array[m] real<lower=0> se;
}
parameters {
  real theta;
  array[m] real u;
  real<lower=0> tau;
}
transformed parameters {
	real or;
	or = exp(theta);
}
model {
  theta ~ normal(0, 3);
  tau ~ gamma(1, 1);
  for(j in 1:m) {
  	u[j] ~ normal(0, tau);
    logor[j] ~ normal(theta + u[j], se[j]);
  }
}
"

stanmod <- cmdstan_model(write_stan_file(stancode), 
                         compile = TRUE)

stanfit <- stanmod$sample(data=standata,
                          iter_warmup=1000,
                          iter_sampling=10000,
                          chains = 2,
                          parallel_chains = 2,
                          refresh=1000,
                          seed=9937)

print(stanfit$summary(), n=100)
standraws <- stanfit$draws(format="draws_matrix")

print(posterior::summarise_draws(standraws,
	                       mean,
	                       ~quantile(.x, probs=c(0.025,
	                       	                     0.5,
	                       	                     0.975))),
      n=100)




# ------------------ RE/FE, binary, arm-based, equivalent alternative code --------------------

standata <- list(m = m,
	             d1 = sim_binary_re$d1,
	             n1 = sim_binary_re$n1,
	             d2 = sim_binary_re$d2,
	             n2 = sim_binary_re$n2)

stancode <- "
data {
  int m;
  array[m] int d1;
  array[m] int n1;
  array[m] int d2;
  array[m] int n2;
}
parameters {
  array[m] real mu;
  real theta;
  array[m] real u;
  real<lower=0> tau;
}
transformed parameters {
	real or;
	or = exp(theta);
}
model {
  mu ~ normal(0, 100);
  theta ~ normal(0, 3);
  tau ~ gamma(1, 1);
  for(j in 1:m) {
  	u[j] ~ normal(0, tau);
  	d1[j] ~ binomial_logit(n1[j], mu[j]);
  	d2[j] ~ binomial_logit(n2[j], mu[j]+theta+u[j]);
  }
}
"

stanmod <- cmdstan_model(write_stan_file(stancode), 
                         compile = TRUE)

stanfit <- stanmod$sample(data=standata,
                          iter_warmup=1000,
                          iter_sampling=10000,
                          chains = 2,
                          parallel_chains = 2,
                          refresh=1000,
                          seed=2460)

print(stanfit$summary(), n=100)
standraws <- stanfit$draws(format="draws_matrix")

print(posterior::summarise_draws(standraws,
	                       mean,
	                       ~quantile(.x, probs=c(0.025,
	                       	                     0.5,
	                       	                     0.975))),
      n=100)




# ---------------- simulated normal data ----------------

set.seed(5478)

mean_ctl <- 10
true_md <- (-1)
sd_ctl <- 1
mean_int <- mean_ctl + true_md
sd_int <- 0.8
arm_n <- 80
m <- 25

n1 <- n2 <- rep(arm_n, m)
sim_normal_ce <- data.frame(n1=n1, n2=n2)
for(j in 1:m) {
	deleteme1 <- rnorm(m, mean_ctl, sd_ctl)
	deleteme2 <- rnorm(m, mean_int, sd_int)
	sim_normal_ce$mean1[j] <- mean(deleteme1)
	sim_normal_ce$sd1[j] <- sd(deleteme1)
	sim_normal_ce$mean2[j] <- mean(deleteme2)
	sim_normal_ce$sd2[j] <- sd(deleteme2)
}
sim_normal_ce$study <- 1:m
sim_normal_ce$study_md <- sim_normal_ce$mean2 - sim_normal_ce$mean1
sim_normal_ce$pooled_var <- ((sim_normal_ce$n1-1)*(sim_normal_ce$sd1^2) + 
                              (sim_normal_ce$n2-1)*(sim_normal_ce$sd2^2)) / 
                            (sim_normal_ce$n1+sim_normal_ce$n2-2)
sim_normal_ce$var_md <- (sim_normal_ce$n1+sim_normal_ce$n2) * 
                         sim_normal_ce$pooled_var / 
                         (sim_normal_ce$n1*sim_normal_ce$n2)
sim_normal_ce$se_md <- sqrt(sim_normal_ce$var_md)




# --------------- frequentist contrast-based CE MA --------------

cema <- metacont(n2, mean2, sd2, 
                 n1, mean1, sd1,
                 data=sim_normal_ce,
                 studlab=study,
                 common=TRUE, random=FALSE,
                 sm="MD",
                 pooledvar=TRUE)

summary(cema)                 
plot(cema)

sim_normal_ce_arm <- data.frame(mean=c(sim_normal_ce$mean1, sim_normal_ce$mean2),
	                            sd=c(sim_normal_ce$sd1, sim_normal_ce$sd2),
	                            n_arm=c(sim_normal_ce$n1, sim_normal_ce$n2),
	                            active=rep(c(0,1), each=m),
	                            study=rep((1:m),2))
standata <- list(m = m,
	             K = 2*m,
	             n_arm = sim_normal_ce_arm$n_arm,
	             mean = sim_normal_ce_arm$mean,
	             sd = sim_normal_ce_arm$sd,
	             active = sim_normal_ce_arm$active,
	             study = sim_normal_ce_arm$study)

stancode <- "
data {
  int m;
  int K;
  array[K] real mean;
  array[K] real sd;
  array[K] int n_arm;
  array[K] real active;
  array[K] int study;
}
parameters {
  real theta;
  array[m] real mu;
}
model {
  array[K] real pred_mean;
  array[K] real se_mean;
  mu ~ normal(8, 100);
  theta ~ normal(0, 3);
  for(k in 1:K) {
  	pred_mean[k] = mu[study[k]] + active[k]*theta;
  	se_mean[k] = sd[k] / sqrt(n_arm[k]);
    mean[k] ~ normal(pred_mean[k], se_mean[k]);
  }
}
"

stanmod <- cmdstan_model(write_stan_file(stancode), 
                         compile = TRUE)

stanfit <- stanmod$sample(data=standata,
                          iter_warmup=1000,
                          iter_sampling=10000,
                          chains = 2,
                          parallel_chains = 2,
                          refresh=1000,
                          seed=4390)

print(stanfit$summary(), n=100)
standraws <- stanfit$draws(format="draws_matrix")

print(posterior::summarise_draws(standraws,
	                       mean,
	                       ~quantile(.x, probs=c(0.025,
	                       	                     0.5,
	                       	                     0.975))),
      n=100)




# ------------------ CE/FE, arm-based, equivalent alternative code --------------------

standata <- list(m = m,
	             mean1 = sim_normal_ce$mean1,
	             sd1 = sim_normal_ce$sd1,
	             n1 = sim_normal_ce$n1,
	             mean2 = sim_normal_ce$mean2,
	             sd2 = sim_normal_ce$sd2,
	             n2 = sim_normal_ce$n2)

stancode <- "
data {
  int m;
  array[m] real mean1;
  array[m] real sd1;
  array[m] real n1;
  array[m] real mean2;
  array[m] real sd2;
  array[m] real n2;
}
parameters {
  array[m] real mu;
  real theta;
}
model {
  array[m] real se_mean1;
  array[m] real se_mean2;
  mu ~ normal(8, 100);
  theta ~ normal(0, 3);
  for(j in 1:m) {
  	se_mean1[j] = sd1[j] / sqrt(n1[j]);
  	se_mean2[j] = sd2[j] / sqrt(n2[j]);
  	mean1[j] ~ normal(mu[j], se_mean1[j]);
  	mean2[j] ~ normal(mu[j] + theta, se_mean2[j]);
  }
}
"

stanmod <- cmdstan_model(write_stan_file(stancode), 
                         compile = TRUE)

stanfit <- stanmod$sample(data=standata,
                          iter_warmup=1000,
                          iter_sampling=10000,
                          chains = 2,
                          parallel_chains = 2,
                          refresh=1000,
                          seed=2460)

print(stanfit$summary(), n=100)
standraws <- stanfit$draws(format="draws_matrix")

print(posterior::summarise_draws(standraws,
	                       mean,
	                       ~quantile(.x, probs=c(0.025,
	                       	                     0.5,
	                       	                     0.975))),
      n=100)
#stan, #repo, #stan-repo, #cmdstanr
All binary pairwise models in cmdstanr

Share this:

Like this:

Discover more from The Bayesian Meta-Analysis Network

Leave a ReplyCancel reply

Discover more from The Bayesian Meta-Analysis Network