mu$styles()              # define HTML styles, functions

# Function to generate data for one study
gendat <- function(study, n) {
f <- function(p) rbinom(n, 1, p)
d <-
  data.frame(study=study,
               subjid=1:n,
             site=sample(1:10, n, TRUE),
             tx=sample(c('A','B'), n, TRUE),
                         age=rnorm(n, 50, 10),
                         sex=sample(c('female', 'male'), n, TRUE),
                         edate=as.Date('2019-10-01') + round(runif(n, 0, 100)),
                         sbp=rnorm(n, 120, 7),
             dbp=rnorm(n,  80, 6),
             race=sample(c('Asian', 'Black/AA', 'White'), n, TRUE),
             meda=f(0.3),
             medb=f(0.2),
                         td=round(runif(n, 0, 30)),
                         death=rbinom(n, 1, 0.4),
                         e1     = f(.03),
             e2     = f(.03),
             e3     = f(.03),
             e4     = f(0.05),
             tested = rbinom(n, 1, .75),
             e6     = f(.03),
                 e7     = f(0.04)
                         )
d <-
  upData(d,
        e5     = ifelse(tested, f(.06), NA),    # don't randomize if exclusion present:
        rdate  = ifelse(e1|e2|e3|e4|ifelse(is.na(e5), 0, e5)|e6|e7, NA,
                    edate + round(runif(n, 0, 30))),
        rdate  = as.Date(rdate, origin='1970-01-01'),   # ifelse loses Date class
        tx     = ifelse(is.na(rdate), NA, tx),
        labels=c(age='Age',
                 rdate='Randomization Date',
                         edate='Enrollment Date',
                   e1='Prior MI', e2='History of Asthma',
             e3='History of Upper GI Bleeding',
             e4='No Significant CAD', e5='Inadequate Renal Function',
             e6='Pneumonia within 6 weeks', e7='Hostile',
             race='Race', sex='Sex', tx='treatment',
             sbp='Systolic BP', dbp='Diastolic BP',
             meda='Comcomitant medication A',
                         medb='Concomitant medication B',
                         td='Follow-up Time', death='Death'),
             units=c(sbp='mmHg', dbp='mmHg', age='years', td='day'), 
       print=FALSE)

denom <- c(enrolled=nrow(d), randomized=sum(! is.na(d$rdate)),
           table(subset(d, ! is.na(rdate))$tx))
sethreportOption(tx.var='tx', denom=denom, study=study)
d
}
set.seed(1)
d1 <- gendat('Study 1', 500)
d2 <- gendat('Study 2', 250)
d3 <- rbind(d1, d2)
denom <- c(enrolled=nrow(d3), randomized=sum(! is.na(d3$rdate)),
           table(subset(d3, ! is.na(rdate))$tx))
sethreportOption(tx.var='tx', denom=denom, study='Study 1+2')

Introduction

This is an example report using simulated data. Two studies are simulated. Variable names in the two datasets are the same here, though they needn’t be. The study IDs are Study 1 and Study 2. These IDs are used to label the tabs that can be clicked to allow the alternate study result to be viewed.

Since the variable names are the same, the two studies have also been combined to produce a third “study” called e.g. Study 1+2. For certain results, study is used as a stratification variable for the Study 1+2 tab. Since patient accrual is unique to each study, a combined accrual report is not presented. The exclusion criteria report is also not included for the combined trials.

For accrual and exclusion reports, the entire database is used. After that point, datasets are subsetted to include only randomized participants.

Interactive Graphs

Most of the graphs produced here are semi-interactive. One can hover over elements of graphs with the mouse to have detailed information pop up.

Figure Captions

Needles represent the fraction of observations used in the current analysis. The first needle (red) shows the fraction of enrolled patients used. If randomization was taken into account, a second needle (green) represents the fraction of randomized subjects included in the analysis. When the analyses consider treatment assignment, two more needles may be added to the display, showing, respectively, the fraction of subjects randomized to treatment A used in the analysis and the fraction of subjects on treatment B who were analyzed. The colors of these last two needles are the colors used for the two treatments throughout the report. The following table shows some examples. dNeedle uses colors in sethreportOption(tx.col=, er.col=).

# Store using short variable names so Rmarkdown table column
# width will not be wider than actually needed
dned <- function(x) dNeedle(x, study='Study 1')
n1 <- dned(1)
n2 <- dned((3:4)/4)
n3 <- dned((1:2)/4)
n4 <- dned(c(1,2,3,1)/4)

Signpost	Interpretation
	All enrolled subjects analyzed, randomization not considered
	Analysis uses ³⁄₄ of enrolled subjects, and all randomized subjects
	Analysis uses ¹⁄₄ of enrolled subjects, and ¹⁄₂ of randomized subjects
	Same as previous example, and in addition the analysis utilized treatment assignment, analyzing ³⁄₄ of those randomized to A and ¹⁄₄ of those randomized to B

Survival Curves

Graphs containing pairs of Kaplan-Meier survival curves show a shaded region centered at the midpoint of the two survival estimates and having a height equal to the half-width of the approximate 0.95 pointwise confidence interval for the difference of the two survival probabilities. Time points at which the two survival estimates do not touch the shaded region denote approximately significantly different survival estimates, without any multiplicity correction.

Accrual

Study 1

accrualReport(enroll(edate) + randomize(rdate) ~ site(site),
              data=d1,
              dateRange=c('2019-10-01', '2020-03-01'),
              targetN=
                data.frame(edate=c(250, 500), rdate=c(125, 250)),
              targetDate=c('2020-01-15', '2020-03-01'),
              closeDate='2020-06-01', study='Study 1')

Study Numbers
Number	Category
10	Sites
500	Participants enrolled
385	Participants randomized
38.5	Participants per site
10	Sites randomizing
38.5	Subjects randomized per randomizing site
8	Months from first subject randomized (2019-10-01) to 2020-06-01
77.4	Site-months for sites randomizing
7.7	Average months since a site first randomized
4.97	Participants randomized per site per month
14.8	Mean days from enrollment to randomization
14	Median days from enrollment to randomization

∟ Participants enrolled over time

The blue line depicts the cumulative frequency. The thick grayscale line represent targets.

Category	N	Used
Enrolled	500	500

∟ Participants randomized over time

The blue line depicts the cumulative frequency. The thick grayscale line represent targets.

Category	N	Used
Enrolled	500	385
Randomized	385	385

∟ Number of sites × number of participants

Number of sites having the given number of participants

Category	N	Used
Enrolled	500	385
Randomized	385	385

∟ Participants enrolled by site

∟ Participants randomized by site

∟ Fraction of enrolled participants randomized by site

Study 2

accrualReport(enroll(edate) + randomize(rdate) ~ site(site),
              data=d2,
              dateRange=c('2019-10-01', '2020-03-01'),
              targetN=
                data.frame(edate=c(125, 250), rdate=c(75, 125)),
              targetDate=c('2020-01-15', '2020-03-01'),
              closeDate='2020-06-01', study='Study 2')

Study Numbers
Number	Category
10	Sites
250	Participants enrolled
199	Participants randomized
19.9	Participants per site
10	Sites randomizing
19.9	Subjects randomized per randomizing site
7.9	Months from first subject randomized (2019-10-04) to 2020-06-01
75.2	Site-months for sites randomizing
7.5	Average months since a site first randomized
2.65	Participants randomized per site per month
15.6	Mean days from enrollment to randomization
16	Median days from enrollment to randomization

∟ Participants enrolled over time

The blue line depicts the cumulative frequency. The thick grayscale line represent targets.

Category	N	Used
Enrolled	250	250

∟ Participants randomized over time

The blue line depicts the cumulative frequency. The thick grayscale line represent targets.

Category	N	Used
Enrolled	250	199
Randomized	199	199

∟ Number of sites × number of participants

Number of sites having the given number of participants

Category	N	Used
Enrolled	250	199
Randomized	199	199

∟ Participants enrolled by site

∟ Participants randomized by site

∟ Fraction of enrolled participants randomized by site

Exclusions

Study 1

exReport(~ e1 + e2 + e3 + e4 + e5 + e6 + e7 +
         id(subjid) + cond(e5, 'Tested', tested),
         whenapp= c(e4='CCTA done'), study='Study 1', data=d1)

∟ All combinations of exclusions

All combinations of exclusions occurring in the data.

∟ Cumulative exclusions

Cumulative number of exclusions (\(y\)-axis) and number of additional exclusions after exclusions placed higher. Exclusions are sorted by descending number of incremental exclusions. 500 participants were enrolled and 115 participants were excluded.

◫ Exclusions

Incremental exclusions are those in addition to exclusions in earlier rows. Marginal exclusions are numbers of participants excluded for the indicated reason whether or not she was excluded for other reasons. The three Fractions are based on incremental exclusions.

Exclusions	Incremental Exclusions	Marginal Exclusions	Fraction of Enrolled	Fraction of Exclusions	Fraction Remaining

Hostile	26	26	0.052	0.226	0.948
No Significant CAD	24	25	0.048	0.209	0.900
Inadequate Renal Function / 370	17	19	0.034	0.148	0.866
¹⁷⁄₃₇₀ = 0.046 of Tested
History of Asthma	15	17	0.030	0.130	0.836
History of Upper GI Bleeding	14	18	0.028	0.122	0.808
Prior MI	10	11	0.020	0.087	0.788
Pneumonia within 6 Weeks	9	11	0.018	0.078	0.770
Total	115		0.230	1.000	0.770

Study 2

exReport(~ e1 + e2 + e3 + e4 + e5 + e6 + e7 +
         id(subjid) + cond(e5, 'Tested', tested),
         whenapp= c(e4='CCTA done'), study='Study 2', data=d2)

∟ All combinations of exclusions

All combinations of exclusions occurring in the data.

∟ Cumulative exclusions

Cumulative number of exclusions (\(y\)-axis) and number of additional exclusions after exclusions placed higher. Exclusions are sorted by descending number of incremental exclusions. 250 participants were enrolled and 51 participants were excluded.

◫ Exclusions

Exclusions	Incremental Exclusions	Marginal Exclusions	Fraction of Enrolled	Fraction of Exclusions	Fraction Remaining

Inadequate Renal Function / 185	12	12	0.048	0.235	0.952
¹²⁄₁₈₅ = 0.065 of Tested
No Significant CAD	9	10	0.036	0.176	0.916
Hostile	9	9	0.036	0.176	0.880
History of Asthma	8	8	0.032	0.157	0.848
Pneumonia within 6 Weeks	6	6	0.024	0.118	0.824
History of Upper GI Bleeding	4	7	0.016	0.078	0.808
Prior MI	3	5	0.012	0.059	0.796
Total	51		0.204	1.000	0.796

Baseline Variables

Study 1

# From here on, only randomized participants will be considered
d1 <- subset(d1, ! is.na(rdate))
d2 <- subset(d2, ! is.na(rdate))
d3 <- subset(d3, ! is.na(rdate))
dReport(race + sex ~ 1, head='Overall frequencies of categorical demographic variables',
        data=d1, study='Study 1')

∟ Overall frequencies of categorical demographic variables

Overall frequencies of categorical demographic variables. N=385

Category	N	Used
Enrolled	500	385
Randomized	385	385

Variable	N
Race	385
Sex	385

# Show continuous variables stratified by treatment
dReport(dbp + sbp + age ~ tx, data=d1, study='Study 1', sopts=list(width=800))

∟ Histograms for diastolic BP, systolic BP, and age stratified by treatment

Histograms for diastolic BP, systolic BP, and age stratified by treatment. N=385

Category	N	Used
Enrolled	500	385
Randomized	385	385
A	177	177
B	208	208

Variable	A	B
Diastolic BP	177	208
Systolic BP	177	208
Age	177	208

Study 2

dReport(race + sex ~ 1, head='Overall frequencies of categorical demographic variables',
        data=d2, study='Study 2')

∟ Overall frequencies of categorical demographic variables

Overall frequencies of categorical demographic variables. N=199

Category	N	Used
Enrolled	250	199
Randomized	199	199

Variable	N
Race	199
Sex	199

dReport(dbp + sbp + age ~ tx, data=d2, study='Study 2', sopts=list(width=800))

∟ Histograms for diastolic BP, systolic BP, and age stratified by treatment

Histograms for diastolic BP, systolic BP, and age stratified by treatment. N=199

Category	N	Used
Enrolled	250	199
Randomized	199	199
A	101	101
B	98	98

Variable	A	B
Diastolic BP	101	98
Systolic BP	101	98
Age	101	98

Study 1+2

When combining studies we don’t need to examine baseline variables stratified by treatment but rather stratified by study.

dReport(race + sex ~ study,
        head='Overall frequencies of categorical demographic variables',
        data=addMarginal(d3, study),
        study='Study 1+2')

∟ Overall frequencies of categorical demographic variables stratified by study

Overall frequencies of categorical demographic variables stratified by study. N=584

Category	N	Used
Enrolled	750	584
Randomized	584	584

Variable	N
Race	584
Sex	584

dReport(dbp + sbp + age ~ study,
        data=addMarginal(d3, study),
        study='Study 1+2', sopts=list(width=800, ncols=2))

∟ Histograms for diastolic BP, systolic BP, and age stratified by study

Histograms for diastolic BP, systolic BP, and age stratified by study. N=584

Category	N	Used
Enrolled	750	584
Randomized	584	584

Variable	N
Diastolic BP	584
Systolic BP	584
Age	584

Survival

Study 1

survReport(Surv(td, death) ~ tx, data=d1, study='Study 1')

∟ Kaplan-Meier cumulative incidence estimates for death stratified by treatment

Kaplan-Meier cumulative incidence estimates for death stratified by treatment, along with half-height of 0.95 confidence limits for differences centered at estimate midpoints. \(N\)=385.

Category	N	Used
Enrolled	500	385
Randomized	385	385
A	177	177
B	208	208

Variable	A	B
Death	177	208

Study 2

survReport(Surv(td, death) ~ tx, data=d2, study='Study 2')

∟ Kaplan-Meier cumulative incidence estimates for death stratified by treatment

Kaplan-Meier cumulative incidence estimates for death stratified by treatment, along with half-height of 0.95 confidence limits for differences centered at estimate midpoints. \(N\)=199.

Category	N	Used
Enrolled	250	199
Randomized	199	199
A	101	101
B	98	98

Variable	A	B
Death	101	98

Study 1+2

survReport(Surv(td, death) ~ tx,
           data=d3, study='Study 1+2')

∟ Kaplan-Meier cumulative incidence estimates for death stratified by treatment

Kaplan-Meier cumulative incidence estimates for death stratified by treatment, along with half-height of 0.95 confidence limits for differences centered at estimate midpoints. \(N\)=584.

Category	N	Used
Enrolled	750	584
Randomized	584	584
A	278	278
B	306	306

Variable	A	B
Death	278	306

Comparative DSMB Report

Comparative DSMB Report

Introduction

Interactive Graphs

Figure Captions

Survival Curves

Accrual

Study 1

∟ Participants enrolled over time

∟ Participants randomized over time

∟ Number of sites × number of participants

∟ Participants enrolled by site

∟ Participants randomized by site

∟ Fraction of enrolled participants randomized by site

Study 2

∟ Participants enrolled over time

∟ Participants randomized over time

∟ Number of sites × number of participants

∟ Participants enrolled by site

∟ Participants randomized by site

∟ Fraction of enrolled participants randomized by site

Exclusions

Study 1

∟ All combinations of exclusions

∟ Cumulative exclusions

◫ Exclusions

Study 2

∟ All combinations of exclusions

∟ Cumulative exclusions

◫ Exclusions

Baseline Variables

Study 1

∟ Overall frequencies of categorical demographic variables

∟ Histograms for diastolic BP, systolic BP, and age stratified by treatment

Study 2

∟ Overall frequencies of categorical demographic variables

∟ Histograms for diastolic BP, systolic BP, and age stratified by treatment

Study 1+2

∟ Overall frequencies of categorical demographic variables stratified by study

∟ Histograms for diastolic BP, systolic BP, and age stratified by study

Survival

Study 1

∟ Kaplan-Meier cumulative incidence estimates for death stratified by treatment

Study 2

∟ Kaplan-Meier cumulative incidence estimates for death stratified by treatment

Study 1+2

∟ Kaplan-Meier cumulative incidence estimates for death stratified by treatment