bbr rmsb models
Objective
This short vignette is based on Section 6-12 of Prof Frank Harrell’s incredibly useful Biostatistics and Biomedical Research (bbr). Its goal is to re-fit the brms
Bernoulli models with Harrell’s rmsb
package.
Dataset
## Creating the dataset
set.seed(10)
<- factor(c(rep('emergency', 25), rep('other', 111)), c('other', 'emergency'))
priority <- c(rep(0, 19), rep(1, 6), rep(0, 100), rep(1, 11))
death <- data.frame(priority, death)
d <- runif(length(priority))
randomUniform $random <- ifelse(priority == 'emergency', randomUniform < 1/3,
d> 1/3) * 1
randomUniform <- datadist(d); options(datadist='dd')
dd
<- describe(d, descript="Section 6-12 database")
d_describe html(d_describe, size=100, scroll=TRUE)
3 Variables 136 Observations
priority
n | missing | distinct |
---|---|---|
136 | 0 | 2 |
Value other emergency Frequency 111 25 Proportion 0.816 0.184
death
n | missing | distinct | Info | Sum | Mean | Gmd |
---|---|---|---|---|---|---|
136 | 0 | 2 | 0.328 | 17 | 0.125 | 0.2204 |
random
n | missing | distinct | Info | Sum | Mean | Gmd |
---|---|---|---|---|---|---|
136 | 0 | 2 | 0.727 | 80 | 0.5882 | 0.488 |
Prior Specification
normsolve
Modify bbr’s useful normsolve
function which solves for the SD (of a normal distribution) such that the tail (area) probability beyond myvalue
is myprob
<- function (myvalue, myprob = 0.025, mu=0, convert2 =c("none", "log", "logodds")){
normsolve ## modified bbr's normsolve()
## normsolve() solves for SD (of a normal distribution) such that the tail (area) probability beyond myvalue is myprob
## myprob = tail area probability
## convert2() converts mu and myvalue to the scale of the normal distribution
## If mu and myvalue are OR values, use "log" to convert them to log(OR)
## If mu and myvalue are prob values, use "logodds" to convert them to log(odds)
if (missing(convert2)) {convert2 <- "none"}
if (convert2=="log" & mu==0) {
cat ("Note: mu cannot be 0 so setting an arbitrary value of 1.0")
= 1
mu
}<- switch(convert2,
mysd "none" = (myvalue-mu)/qnorm(1-myprob),
"log" = (log(myvalue)- log(mu))/qnorm(1-myprob),
"logodds" = (qlogis(myvalue)- qlogis(mu))/qnorm(1-myprob) )
return(mysd)
}
\(\beta_0\)
Specify a normal distribution on the log odds scale such that it corresponds to a mean proportion of 0.05 with a tail probability of 10% for proportions exceeding 0.20
<- normsolve(mu = 0.05,
(intercept_sd myvalue = 0.20,
myprob = 0.10,
convert2 = "logodds"))
## [1] 1.215827
\(\beta_{priority}\)
Specify a normal distribution on the log OR scale such that it corresponds to a mean OR of 1.0 with a 10% prob that OR>3
<- normsolve(mu = 1,
(priority_sd myvalue = 3,
myprob = 0.10,
convert2 = "log"))
## [1] 0.8572517
\(\beta_{random}\)
Specify a normal distribution on the log OR scale such that it corresponds to a mean OR of 1.0 with a 5% prob that OR>1.25
<- normsolve(mu = 1,
(random_sd myvalue = 1.25,
myprob = 0.05,
convert2 = "log"))
## [1] 0.1356616
Model Fitting
brms model
Fit a brms
logistic regression model and measure model fitting time. Model fitting took ~160 secs on my 5-year old laptop.
<- c(
p prior_string(glue('normal(-2.944, {intercept_sd})'),class='Intercept') ,
prior_string(glue('normal(0, {priority_sd})'), class='b', coef='priorityemergency') ,
prior_string(glue('normal(0, {random_sd})'), class='b', coef='random')
)
if(rerun_analysis) {
tic()
<- brm(death ~ priority + random,
mybrm data=d,
prior=p,
family='bernoulli',
seed=121,
refresh=FALSE)
toc() ##165 sec elapsed
saveRDS( mybrm, file= here::here("PYH Bayesian/rmsb/mybrm.Rds"))
else {
}
<- readRDS(here::here("PYH Bayesian/rmsb/mybrm.Rds"))
mybrm
}
mybrm
## Family: bernoulli
## Links: mu = logit
## Formula: death ~ priority + random
## Data: d (Number of observations: 136)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -2.16 0.31 -2.80 -1.59 1.00 3364 2606
## priorityemergency 0.71 0.50 -0.29 1.68 1.00 3088 2598
## random -0.06 0.13 -0.32 0.20 1.00 4325 3037
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
rmsb model
Fit a rmsb
logistic regression model and measure model fitting time. rmsb
took ~60 secs and both packages gave similar regression coefficients.
if(rerun_analysis) {
tic()
<- blrm(death ~ priority + random,
myblrm data=d,
keepsep=('priority|random'),
priorsd = c(priority_sd , random_sd))
toc() ##70.8 sec elapsed
saveRDS( myblrm, file= here::here("PYH Bayesian/rmsb/myblrm.Rds"))
else {
}
<- readRDS(here::here("PYH Bayesian/rmsb/myblrm.Rds"))
myblrm
}
myblrm
## Bayesian Logistic Model
##
## Dirichlet Priors With Concentration Parameter 0.541 for Intercepts
##
## blrm(formula = death ~ priority + random, keepsep = ("priority|random"),
## data = d, priorsd = c(priority_sd, random_sd))
##
## Mixed Calibration/ Discrimination Rank Discrim.
## Discrimination Indexes Indexes Indexes
## Obs136 LOO log L-51.43+/-7.56 g 0.256 [0.029, 0.519] C 0.598 [0.446, 0.659]
## 0 119 LOO IC 102.86+/-15.13 gp 0.033 [0.002, 0.07] Dxy 0.196 [-0.107, 0.319]
## 1 17 Effective p1.78+/-0.36 EV 0.018 [0, 0.066]
## Draws4000 B 0.108 [0.105, 0.112] v 0.111 [0, 0.349]
## Chains4 vp 0.002 [0, 0.007]
## p 2
##
## Mode Beta Mean Beta Median Beta S.E. Lower Upper
## Intercept -2.0752 -2.0778 -2.0712 0.2975 -2.6583 -1.5102
## priority=emergency 0.7144 0.6760 0.6895 0.4845 -0.3001 1.5920
## random -0.0585 -0.0573 -0.0566 0.1307 -0.3061 0.2126
## Pr(Beta>0) Symmetry
## Intercept 0.0000 0.92
## priority=emergency 0.9190 0.95
## random 0.3385 0.94
##
## The following parameters remained separate (where not orthogonalized) during model fitting so that prior distributions could be focused explicitly on them: priority=emergency, random
Model Checking
brms model
Run bayesplot
::pp_check to examine if our Bayesian models capture the data-generating process
pp_check(mybrm, "bars", nsamples = 1000) + ## bayesplot::pp_check()
facet_wrap (. ~"brms model") +
theme_bw()
rmsb model
Write a function to mimick bayesplot::pp_check
and apply it to our rmsb
model
<- function (modblrm, nsamples = 1000){
ppcheck_rmsb
## ppcheck examines if data-generating process is captured by blrm model
## this is done by comparing observed and simulated Y
## mimicking bayesplot::pp_check
## TODO: extend this to handle rmsb models fitted on a short (say, <10 levels) ordinal outcome
if (!any(class(modblrm) %in% Cs(blrm, rmsb))) {
stop("rms object must be of class blrm or rmsb",
call. = FALSE)
}
## Y observed
<-
Yobs $freq %>%
modblrm%>%
enframe mutate(name = as.integer(name),
yobs = "Y_obs")
## Y rep
<-
Yrep predict(modblrm,
eval(modblrm$call$data),
fun=plogis,
funint=FALSE, posterior.summary="all") %>%
%>%
as_tibble slice_sample(n = nsamples) %>%
mutate(draw = 1:n()) %>%
pivot_longer(-draw) %>%
mutate(prediction = map_int(value, ~ rbinom(1, size = 1, prob = .x))) %>%
count(draw, prediction) %>% ## draw = 1 data-set
complete(draw, prediction, fill = list(n = 0)) %>%
group_by(prediction) %>%
median_hdi(n) %>%
clean_names() %>%
mutate(
yemp = "Y_rep",
prediction = prediction)
## Generate the plot
<-
ppcheck_barplot ggplot(Yobs, aes(x = name, y = value)) +
geom_col(aes(fill=yobs)) +
scale_fill_manual(values = 'lightsteelblue') +
geom_pointrange(size = 0.7,
data = Yrep, aes(x=prediction, y = n, ymin = lower, ymax = upper, color = yemp)) +
scale_color_manual(values = 'black') +
theme_classic() +
labs(fill = "", color = "", y = "Count", x = str_glue({modblrm$terms[[2]]})) +
theme_minimal()
return(ppcheck_barplot)
}
ppcheck_rmsb (myblrm) +
facet_wrap (. ~"rmsb model") +
theme_bw()
Visualization
Use the impressive tidybayes to visualize the posterior distribution of the exponentiated \(\beta_{emergency}\) and superimpose over it its prior distribution. Shade the posterior density plot based on “watershed threshold” such as OR >1.0 or other values. (Bayesian methods do not place undue emphasis on “null” values.)
<- myblrm %>% plot("priority=emergency") %>% pluck("data")
post_emergency
ggplot(data = data.frame(x = c(0.01,10)), aes(x)) +
scale_x_log10('Odds Ratio', breaks=c(0.1, 0.2,0.33,1,3, 5, 10)) +
# posterior density plot
stat_halfeye(
data=data.frame(x=exp(post_emergency$draws)),
normalize = "none",
slab_size = 0.4,slab_color = "black", alpha=0.5,
aes(fill = stat(x > 0)), ## 0 on the logged scale = OR of 1
color= NA) +
## prior distribution
## lnorm dist: normal distribution on the log OR scale
stat_dist_halfeye(
orientation = "horizontal",
linetype = 2,
slab_size = 0.5,
slab_fill = NA,
slab_colour = "black",
normalize = "none",
alpha=0.5,
color= NA,
aes(dist = "lnorm", arg1 = 0, arg2 =priority_sd)) +
scale_fill_manual(labels = c("Other: Higher Odds", "Emergency: Higher odds"),
values = c("blue", "skyblue")) +
stat_interval(data=data.frame(x= exp(post_emergency$draws)), aes(y = 0.03),
point_interval= median_hdi,
.width = c(.95), color="black", show_point = TRUE, size=1, show.legend = FALSE) +
scale_y_continuous('Probability density', expand = c(0, 0), breaks=seq(0, 3.4, 0.2)) +
coord_cartesian(xlim = c(0.1,11), ylim = c(0, 1.8)) +
geom_vline(xintercept=1,linetype="solid") +
## labels + arrows to indicate prior, posterior
annotate("text", x = 0.2, y = 0.4, hjust = 0,
label = "Prior",
size = 5, color="#818283") +
annotate("text", x = 3, y = 1.4, hjust = 0,
label = "Posterior",
size = 5, color="#818283") +
theme_light(base_size = 13) +
theme(
legend.position=c(0.2,0.8),
text = element_text(colour = "#2E3F4F"),
plot.title = element_text(face = "bold"),
plot.caption = element_text(face = "italic"),
panel.grid.major = element_line(colour = "#E7E9E9"),
panel.grid.minor = element_line(colour = "#F4F5F5"),
strip.text = element_text(face = "bold", colour = "#2E3F4F" ),
strip.background = element_rect(colour = "#2E3F4F", fill = "grey90"),
legend.background = element_rect(fill = 'transparent'),
legend.title=element_blank())