8  Bayesian Analysis of Simulated RCT with Two Endpoints

Code
# Simulate a single trial with sample size n
sim <- function(n) {
    trt  <- c(rep('A', n / 2), rep('B', n / 2))
    sbp0 <- rnorm(n, 140, 7)
    sbp  <- sbp0 - 5 - 3 * (trt == 'B') + rnorm(n, sd=7)
    logit <- -2.6 + log(0.8) * (trt == 'B') + 0.05 * (sbp0 - 140) +
             0.05 * (sbp - 130)
    ds   <- ifelse(runif(n) <= plogis(logit), 1, 0)
    data.frame(trt, sbp0, sbp, ds)
}
Code
set.seed(1)
d <- sim(n=40000)
require(rms)
ols(sbp ~ sbp0 + trt, data=d)

Linear Regression Model

ols(formula = sbp ~ sbp0 + trt, data = d)
Model Likelihood
Ratio Test
Discrimination
Indexes
Obs 40000 LR χ2 28287.46 R2 0.507
σ 7.0270 d.f. 2 R2adj 0.507
d.f. 39997 Pr(>χ2) 0.0000 g 8.038

Residuals

       Min         1Q     Median         3Q        Max 
-3.012e+01 -4.736e+00 -7.268e-04  4.737e+00  2.748e+01 
β S.E. t Pr(>|t|)
Intercept  -4.2576  0.7026 -6.06 <0.0001
sbp0   0.9944  0.0050 198.58 <0.0001
trt=B  -2.9380  0.0703 -41.81 <0.0001
Code
lrm(ds ~ sbp0 + sbp + trt, data=d)

Logistic Regression Model

lrm(formula = ds ~ sbp0 + sbp + trt, data = d)
Model Likelihood
Ratio Test
Discrimination
Indexes
Rank Discrim.
Indexes
Obs 40000 LR χ2 2142.26 R2 0.113 C 0.717
0 36293 d.f. 3 R23,40000 0.052 Dxy 0.434
1 3707 Pr(>χ2) <0.0001 R23,10090.4 0.191 γ 0.435
max |∂log L/∂β| 2×10-9 Brier 0.079 τa 0.073
β S.E. Wald Z Pr(>|Z|)
Intercept  -16.6218  0.3847 -43.21 <0.0001
sbp0   0.0517  0.0036 14.40 <0.0001
sbp   0.0522  0.0026 20.32 <0.0001
trt=B   -0.2594  0.0367 -7.07 <0.0001
Code
f <- lrm(ds ~ sbp0 + trt, data=d)
print(f)

Logistic Regression Model

lrm(formula = ds ~ sbp0 + trt, data = d)
Model Likelihood
Ratio Test
Discrimination
Indexes
Rank Discrim.
Indexes
Obs 40000 LR χ2 1719.66 R2 0.091 C 0.697
0 36293 d.f. 2 R22,40000 0.042 Dxy 0.394
1 3707 Pr(>χ2) <0.0001 R22,10090.4 0.157 γ 0.394
max |∂log L/∂β| 3×10-11 Brier 0.080 τa 0.066
β S.E. Wald Z Pr(>|Z|)
Intercept  -16.5559  0.3807 -43.49 <0.0001
sbp0   0.1019  0.0026 38.49 <0.0001
trt=B   -0.4045  0.0358 -11.31 <0.0001
Code
btrt <- coef(f)['trt=B']
Code
set.seed(7)
d <- sim(n=1500)
re <- round(cor(d$ds, d$sbp), 3)
dd <- datadist(d); options(datadist='dd')
f <- ols(sbp ~ sbp0 + trt, data=d)
f

Linear Regression Model

ols(formula = sbp ~ sbp0 + trt, data = d)
Model Likelihood
Ratio Test
Discrimination
Indexes
Obs 1500 LR χ2 1087.47 R2 0.516
σ 7.0059 d.f. 2 R2adj 0.515
d.f. 1497 Pr(>χ2) 0.0000 g 8.177

Residuals

    Min      1Q  Median      3Q     Max 
-24.580  -4.619   0.154   4.241  24.293 
β S.E. t Pr(>|t|)
Intercept  -5.4630  3.6567 -1.49 0.1354
sbp0   1.0048  0.0260 38.62 <0.0001
trt=B  -3.1831  0.3620 -8.79 <0.0001
Code
summary(f)
Effects   Response: sbp
Low High Δ Effect S.E. Lower 0.95 Upper 0.95
sbp0 135.2 144.5 9.296 9.341 0.2419 8.867 9.816
trt --- B:A 1.0 2.0 -3.183 0.3620 -3.893 -2.473
Code
f <- lrm(ds  ~ sbp0 + trt, data=d)
f

Logistic Regression Model

lrm(formula = ds ~ sbp0 + trt, data = d)
Model Likelihood
Ratio Test
Discrimination
Indexes
Rank Discrim.
Indexes
Obs 1500 LR χ2 53.82 R2 0.075 C 0.670
0 1357 d.f. 2 R22,1500 0.034 Dxy 0.339
1 143 Pr(>χ2) <0.0001 R22,388.1 0.125 γ 0.340
max |∂log L/∂β| 2×10-5 Brier 0.083 τa 0.059
β S.E. Wald Z Pr(>|Z|)
Intercept  -15.1566  1.8989 -7.98 <0.0001
sbp0   0.0920  0.0132 6.96 <0.0001
trt=B   -0.2715  0.1807 -1.50 0.1330
Code
summary(f)
Effects   Response: ds
Low High Δ Effect S.E. Lower 0.95 Upper 0.95
sbp0 135.2 144.5 9.296 0.8549 0.1229 0.6141 1.09600
Odds Ratio 135.2 144.5 9.296 2.3510 1.8480 2.99100
trt --- B:A 1.0 2.0 -0.2715 0.1807 -0.6256 0.08268
Odds Ratio 1.0 2.0 0.7623 0.5349 1.08600

Bayesian Analysis

  • Uses Stan and R rstan package
  • Regression coefficients have multivariate normal prior with means zero
  • SD of priors computed so that
    • P(SBP reduction > 10mmHg) = 0.1
    • P(OR < 0.5) = 0.05
  • Residual SD has flat prior on the positive line
  • Stan No-U-turn sampler in 4 chains with 5,000 post-warmup iterations
  • 20,000 paired posterior draws, took 10m on 4-course machine
Code
require(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())  # 4 CPUs used

model <- "
data {
    int       n;
    vector[n] x;
    real      y1[n];
    int       y2[n];
    vector[n] treat;
    vector[2] Zero;
    vector<lower=0>[2] sigma_b;
}

parameters {
    vector[2] alpha;
    vector[2] beta;
    vector[2] mu;
    real<lower=0> sigma_y;
    cholesky_factor_corr[2] L_b;
}

transformed parameters {
    vector[n]     theta1;
    vector[n]     theta2;
        
    theta1 = mu[1] + alpha[1]*x + beta[1]*treat;
    theta2 = mu[2] + alpha[2]*x + beta[2]*treat;
}

model {
    beta ~ multi_normal_cholesky(Zero, diag_pre_multiply(sigma_b, L_b)); 
    L_b ~ lkj_corr_cholesky(1);  // correlation matrix for reg. parameters, LKJ prior
        
    y1 ~ normal(theta1, sigma_y);
    y2 ~ bernoulli_logit(theta2);
}

generated quantities {
  matrix[2,2] Omega;
  matrix[2,2] Sigma;
  Omega = multiply_lower_tri_self_transpose(L_b);
  Sigma = quad_form_diag(Omega, sigma_b); 
}
"

s <- stan(model_code = model, iter=10000, seed=7,
          data=with(d, list(x=sbp0, treat=1*(trt == 'B'),
                            y1=sbp, y2=ds,
                            sigma_b=c(-10 / qnorm(0.1),
                                      log(0.5) / qnorm(0.05)),
                            Zero=c(0,0), n=nrow(d))))
s
betas <- extract(s, pars='beta')$beta
Inference for Stan model: cd388c9aa01c1c78a612ddca57e2c5c6.
4 chains, each with iter=10000; warmup=5000; thin=1; 
post-warmup draws per chain=5000, total post-warmup draws=20000.

             mean se_mean     sd     2.5%    97.5% n_eff   Rhat
alpha[1]   1.0047  0.0002 0.0259   0.9539   1.0557 11616 1.0006
alpha[2]   0.0923  0.0001 0.0133   0.0666   0.1182 11596 1.0006
beta[1]   -3.1780  0.0027 0.3607  -3.8797  -2.4695 18285 1.0001
beta[2]   -0.2129  0.0012 0.1596  -0.5325   0.1026 16387 1.0001
mu[1]     -5.4464  0.0339 3.6436 -12.6012   1.6968 11581 1.0006
mu[2]    -15.2366  0.0177 1.9063 -18.9511 -11.5603 11583 1.0006

Samples were drawn using NUTS(diag_e) at Mon Dec 19 13:51:42 2016.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
  • Posterior densities obtained using kernel density estimators
Code
spar(mfrow=c(1,2), bty='l')
b1 <- betas[, 1]
b2 <- betas[, 2]
plot(density(b1), type='l', xlab='B-A SBP Difference', main='')
plot(density(exp(b2)), type='l', xlab='B:A OR for Death or Stroke', main='')

Posterior densities for treatment effects on two outcomes
  • Posterior means, medians, and 0.95 credible intervals are computed simply by using ordinary samples estimates on the posterior draws
Code
ci1 <- quantile(b1, c(0.025, 0.975))
ci2 <- quantile(b2, c(0.025, 0.975))
data.frame(Mean=c(mean(b1), mean(b2)), Median=c(median(b1), median(b2)),
           Lower=c(ci1[1], ci2[1]), Upper=c(ci2[1], ci2[2]), row.names=c('b1', 'b2'))
        Mean     Median      Lower      Upper
b1 -3.178025 -3.1782677 -3.8796797 -0.5324795
b2 -0.212865 -0.2117236 -0.5324795  0.1025766
  • The posterior mean and median log odds ratio is less impressive than the frequentist maximum likelihood estimate of -0.271 because of the skeptical prior
  • Variety of posterior probabilities easily calculated
  • Non-inferiority is defined by SBP increase less than 1mmHg and DS increased by an odds ratio less than 1.05
  • Similarity with respect to effect on DS is an odds ratio between 0.85 and 1/0.85
  • Final calculation is mean number of targets achieved when the targets are any SBP reduction and any reduction of odds of DS
Code
cat('Prob(SBP reduced at least 2 mmHg) = ', rmean(b1 < -2), '\n',
    'Prob(B:A OR for DS < 1)           = ', rmean(b2 < 0),  '\n',
    'Prob(SBP reduced ≥ 2 and OR < 1)  = ', rmean(b1 < -2 & b2 < 0), '\n',
    'Prob(SBP reduced ≥ 2 or  OR < 1)  = ', rmean(b1 < -2 | b2 < 0), '\n',
    'Prob(Non-inferiority)             = ', rmean(b1 < 1 & b2 < log(1.05)),'\n',
    'Prob(DS similar)                  = ', rmean(exp(b2) > 0.85 &
                                                  exp(b2) < 1 / 0.85), '\n',
    'E(# targets achieved)             = ', rmean((b1 < 0) + (b2 < 0)), '\n',
    sep='')
Prob(SBP reduced at least 2 mmHg) = 0.999
Prob(B:A OR for DS < 1)           = 0.908
Prob(SBP reduced ≥ 2 and OR < 1)  = 0.908
Prob(SBP reduced ≥ 2 or  OR < 1)  = 1.000
Prob(Non-inferiority)             = 0.948
Prob(DS similar)                  = 0.363
E(# targets achieved)             = 1.908
  • Correlation between the posterior draws is 0.024
  • 6 probabilities above are forward probabilities directly addressing clinical questions
  • Frequentist solutions for these questions are highly indirect or unavailable