mu$styles()              # define HTML styles, functions
# Function to generate data for one study
gendat <- function(study, n) {
f <- function(p) rbinom(n, 1, p)
d <-
  data.frame(study=study,
               subjid=1:n,
             site=sample(1:10, n, TRUE),
             tx=sample(c('A','B'), n, TRUE),
                         age=rnorm(n, 50, 10),
                         sex=sample(c('female', 'male'), n, TRUE),
                         edate=as.Date('2019-10-01') + round(runif(n, 0, 100)),
                         sbp=rnorm(n, 120, 7),
             dbp=rnorm(n,  80, 6),
             race=sample(c('Asian', 'Black/AA', 'White'), n, TRUE),
             meda=f(0.3),
             medb=f(0.2),
                         td=round(runif(n, 0, 30)),
                         death=rbinom(n, 1, 0.4),
                         e1     = f(.03),
             e2     = f(.03),
             e3     = f(.03),
             e4     = f(0.05),
             tested = rbinom(n, 1, .75),
             e6     = f(.03),
                 e7     = f(0.04)
                         )
d <-
  upData(d,
        e5     = ifelse(tested, f(.06), NA),    # don't randomize if exclusion present:
        rdate  = ifelse(e1|e2|e3|e4|ifelse(is.na(e5), 0, e5)|e6|e7, NA,
                    edate + round(runif(n, 0, 30))),
        rdate  = as.Date(rdate, origin='1970-01-01'),   # ifelse loses Date class
        tx     = ifelse(is.na(rdate), NA, tx),
        labels=c(age='Age',
                 rdate='Randomization Date',
                         edate='Enrollment Date',
                   e1='Prior MI', e2='History of Asthma',
             e3='History of Upper GI Bleeding',
             e4='No Significant CAD', e5='Inadequate Renal Function',
             e6='Pneumonia within 6 weeks', e7='Hostile',
             race='Race', sex='Sex', tx='treatment',
             sbp='Systolic BP', dbp='Diastolic BP',
             meda='Comcomitant medication A',
                         medb='Concomitant medication B',
                         td='Follow-up Time', death='Death'),
             units=c(sbp='mmHg', dbp='mmHg', age='years', td='day'), 
       print=FALSE)

denom <- c(enrolled=nrow(d), randomized=sum(! is.na(d$rdate)),
           table(subset(d, ! is.na(rdate))$tx))
sethreportOption(tx.var='tx', denom=denom, study=study)
d
}
set.seed(1)
d1 <- gendat('Study 1', 500)
d2 <- gendat('Study 2', 250)
d3 <- rbind(d1, d2)
denom <- c(enrolled=nrow(d3), randomized=sum(! is.na(d3$rdate)),
           table(subset(d3, ! is.na(rdate))$tx))
sethreportOption(tx.var='tx', denom=denom, study='Study 1+2')

Introduction

This is an example report using simulated data. Two studies are simulated. Variable names in the two datasets are the same here, though they needn’t be. The study IDs are Study 1 and Study 2. These IDs are used to label the tabs that can be clicked to allow the alternate study result to be viewed.

Since the variable names are the same, the two studies have also been combined to produce a third “study” called e.g. Study 1+2. For certain results, study is used as a stratification variable for the Study 1+2 tab. Since patient accrual is unique to each study, a combined accrual report is not presented. The exclusion criteria report is also not included for the combined trials.

For accrual and exclusion reports, the entire database is used. After that point, datasets are subsetted to include only randomized participants.

Interactive Graphs

Most of the graphs produced here are semi-interactive. One can hover over elements of graphs with the mouse to have detailed information pop up.

Figure Captions

Needles represent the fraction of observations used in the current analysis. The first needle (red) shows the fraction of enrolled patients used. If randomization was taken into account, a second needle (green) represents the fraction of randomized subjects included in the analysis. When the analyses consider treatment assignment, two more needles may be added to the display, showing, respectively, the fraction of subjects randomized to treatment A used in the analysis and the fraction of subjects on treatment B who were analyzed. The colors of these last two needles are the colors used for the two treatments throughout the report. The following table shows some examples. dNeedle uses colors in sethreportOption(tx.col=, er.col=).

# Store using short variable names so Rmarkdown table column
# width will not be wider than actually needed
dned <- function(x) dNeedle(x, study='Study 1')
n1 <- dned(1)
n2 <- dned((3:4)/4)
n3 <- dned((1:2)/4)
n4 <- dned(c(1,2,3,1)/4)
Signpost Interpretation
image All enrolled subjects analyzed, randomization not considered
image Analysis uses 34 of enrolled subjects, and all randomized subjects
image Analysis uses 14 of enrolled subjects, and 12 of randomized subjects
image Same as previous example, and in addition the analysis utilized treatment assignment, analyzing 34 of those randomized to A and 14 of those randomized to B

Survival Curves

Graphs containing pairs of Kaplan-Meier survival curves show a shaded region centered at the midpoint of the two survival estimates and having a height equal to the half-width of the approximate 0.95 pointwise confidence interval for the difference of the two survival probabilities. Time points at which the two survival estimates do not touch the shaded region denote approximately significantly different survival estimates, without any multiplicity correction.

Accrual

Study 1

accrualReport(enroll(edate) + randomize(rdate) ~ site(site),
              data=d1,
              dateRange=c('2019-10-01', '2020-03-01'),
              targetN=
                data.frame(edate=c(250, 500), rdate=c(125, 250)),
              targetDate=c('2020-01-15', '2020-03-01'),
              closeDate='2020-06-01', study='Study 1')
Study Numbers
Number Category
10 Sites
500 Participants enrolled
385 Participants randomized
38.5 Participants per site
10 Sites randomizing
38.5 Subjects randomized per randomizing site
8 Months from first subject randomized (2019-10-01) to 2020-06-01
77.4 Site-months for sites randomizing
7.7 Average months since a site first randomized
4.97 Participants randomized per site per month
14.8 Mean days from enrollment to randomization
14 Median days from enrollment to randomization

Participants enrolled over time

The blue line depicts the cumulative frequency. The thick grayscale line represent targets.
Category N Used
Enrolled 500 500
image

Participants randomized over time

The blue line depicts the cumulative frequency. The thick grayscale line represent targets.
Category N Used
Enrolled 500 385
Randomized 385 385
image

Number of sites × number of participants

Number of sites having the given number of participants
Category N Used
Enrolled 500 385
Randomized 385 385
image

Participants enrolled by site


Participants randomized by site


Fraction of enrolled participants randomized by site

Study 2

accrualReport(enroll(edate) + randomize(rdate) ~ site(site),
              data=d2,
              dateRange=c('2019-10-01', '2020-03-01'),
              targetN=
                data.frame(edate=c(125, 250), rdate=c(75, 125)),
              targetDate=c('2020-01-15', '2020-03-01'),
              closeDate='2020-06-01', study='Study 2')
Study Numbers
Number Category
10 Sites
250 Participants enrolled
199 Participants randomized
19.9 Participants per site
10 Sites randomizing
19.9 Subjects randomized per randomizing site
7.9 Months from first subject randomized (2019-10-04) to 2020-06-01
75.2 Site-months for sites randomizing
7.5 Average months since a site first randomized
2.65 Participants randomized per site per month
15.6 Mean days from enrollment to randomization
16 Median days from enrollment to randomization

Participants enrolled over time

The blue line depicts the cumulative frequency. The thick grayscale line represent targets.
Category N Used
Enrolled 250 250
image

Participants randomized over time

The blue line depicts the cumulative frequency. The thick grayscale line represent targets.
Category N Used
Enrolled 250 199
Randomized 199 199
image

Number of sites × number of participants

Number of sites having the given number of participants
Category N Used
Enrolled 250 199
Randomized 199 199
image

Participants enrolled by site


Participants randomized by site


Fraction of enrolled participants randomized by site

Exclusions

Study 1

exReport(~ e1 + e2 + e3 + e4 + e5 + e6 + e7 +
         id(subjid) + cond(e5, 'Tested', tested),
         whenapp= c(e4='CCTA done'), study='Study 1', data=d1)

All combinations of exclusions

All combinations of exclusions occurring in the data.

Cumulative exclusions

Cumulative number of exclusions (\(y\)-axis) and number of additional exclusions after exclusions placed higher. Exclusions are sorted by descending number of incremental exclusions. 500 participants were enrolled and 115 participants were excluded.

Exclusions

Incremental exclusions are those in addition to exclusions in earlier rows. Marginal exclusions are numbers of participants excluded for the indicated reason whether or not she was excluded for other reasons. The three Fractions are based on incremental exclusions.
Exclusions Incremental
Exclusions
Marginal
Exclusions
Fraction of
Enrolled
Fraction of
Exclusions
Fraction
Remaining
Hostile 26 26 0.052 0.226 0.948
No Significant CAD 24 25 0.048 0.209 0.900
Inadequate Renal Function / 370 17 19 0.034 0.148 0.866
17370 = 0.046 of Tested
History of Asthma 15 17 0.030 0.130 0.836
History of Upper GI Bleeding 14 18 0.028 0.122 0.808
Prior MI 10 11 0.020 0.087 0.788
Pneumonia within 6 Weeks 9 11 0.018 0.078 0.770
Total 115 0.230 1.000 0.770

Study 2

exReport(~ e1 + e2 + e3 + e4 + e5 + e6 + e7 +
         id(subjid) + cond(e5, 'Tested', tested),
         whenapp= c(e4='CCTA done'), study='Study 2', data=d2)

All combinations of exclusions

All combinations of exclusions occurring in the data.

Cumulative exclusions

Cumulative number of exclusions (\(y\)-axis) and number of additional exclusions after exclusions placed higher. Exclusions are sorted by descending number of incremental exclusions. 250 participants were enrolled and 51 participants were excluded.

Exclusions

Incremental exclusions are those in addition to exclusions in earlier rows. Marginal exclusions are numbers of participants excluded for the indicated reason whether or not she was excluded for other reasons. The three Fractions are based on incremental exclusions.
Exclusions Incremental
Exclusions
Marginal
Exclusions
Fraction of
Enrolled
Fraction of
Exclusions
Fraction
Remaining
Inadequate Renal Function / 185 12 12 0.048 0.235 0.952
12185 = 0.065 of Tested
No Significant CAD 9 10 0.036 0.176 0.916
Hostile 9 9 0.036 0.176 0.880
History of Asthma 8 8 0.032 0.157 0.848
Pneumonia within 6 Weeks 6 6 0.024 0.118 0.824
History of Upper GI Bleeding 4 7 0.016 0.078 0.808
Prior MI 3 5 0.012 0.059 0.796
Total 51 0.204 1.000 0.796

Baseline Variables

Study 1

# From here on, only randomized participants will be considered
d1 <- subset(d1, ! is.na(rdate))
d2 <- subset(d2, ! is.na(rdate))
d3 <- subset(d3, ! is.na(rdate))
dReport(race + sex ~ 1, head='Overall frequencies of categorical demographic variables',
        data=d1, study='Study 1')

Overall frequencies of categorical demographic variables

Overall frequencies of categorical demographic variables. N=385
Category N Used
Enrolled 500 385
Randomized 385 385
Variable N
Race 385
Sex 385
image
# Show continuous variables stratified by treatment
dReport(dbp + sbp + age ~ tx, data=d1, study='Study 1', sopts=list(width=800))

Histograms for diastolic BP, systolic BP, and age stratified by treatment

Histograms for diastolic BP, systolic BP, and age stratified by treatment. N=385
Category N Used
Enrolled 500 385
Randomized 385 385
A 177 177
B 208 208
Variable A B
Diastolic BP 177 208
Systolic BP 177 208
Age 177 208
image

Study 2

dReport(race + sex ~ 1, head='Overall frequencies of categorical demographic variables',
        data=d2, study='Study 2')

Overall frequencies of categorical demographic variables

Overall frequencies of categorical demographic variables. N=199
Category N Used
Enrolled 250 199
Randomized 199 199
Variable N
Race 199
Sex 199
image
dReport(dbp + sbp + age ~ tx, data=d2, study='Study 2', sopts=list(width=800))

Histograms for diastolic BP, systolic BP, and age stratified by treatment

Histograms for diastolic BP, systolic BP, and age stratified by treatment. N=199
Category N Used
Enrolled 250 199
Randomized 199 199
A 101 101
B 98 98
Variable A B
Diastolic BP 101 98
Systolic BP 101 98
Age 101 98
image

Study 1+2

When combining studies we don’t need to examine baseline variables stratified by treatment but rather stratified by study.

dReport(race + sex ~ study,
        head='Overall frequencies of categorical demographic variables',
        data=addMarginal(d3, study),
        study='Study 1+2')

Overall frequencies of categorical demographic variables stratified by study

Overall frequencies of categorical demographic variables stratified by study. N=584
Category N Used
Enrolled 750 584
Randomized 584 584
Variable N
Race 584
Sex 584
image
dReport(dbp + sbp + age ~ study,
        data=addMarginal(d3, study),
        study='Study 1+2', sopts=list(width=800, ncols=2))

Histograms for diastolic BP, systolic BP, and age stratified by study

Histograms for diastolic BP, systolic BP, and age stratified by study. N=584
Category N Used
Enrolled 750 584
Randomized 584 584
Variable N
Diastolic BP 584
Systolic BP 584
Age 584
image

Survival

Study 1

survReport(Surv(td, death) ~ tx, data=d1, study='Study 1')

Kaplan-Meier cumulative incidence estimates for death stratified by treatment

Kaplan-Meier cumulative incidence estimates for death stratified by treatment, along with half-height of 0.95 confidence limits for differences centered at estimate midpoints. \(N\)=385.
Category N Used
Enrolled 500 385
Randomized 385 385
A 177 177
B 208 208
Variable A B
Death 177 208
image

Study 2

survReport(Surv(td, death) ~ tx, data=d2, study='Study 2')

Kaplan-Meier cumulative incidence estimates for death stratified by treatment

Kaplan-Meier cumulative incidence estimates for death stratified by treatment, along with half-height of 0.95 confidence limits for differences centered at estimate midpoints. \(N\)=199.
Category N Used
Enrolled 250 199
Randomized 199 199
A 101 101
B 98 98
Variable A B
Death 101 98
image

Study 1+2

survReport(Surv(td, death) ~ tx,
           data=d3, study='Study 1+2')

Kaplan-Meier cumulative incidence estimates for death stratified by treatment

Kaplan-Meier cumulative incidence estimates for death stratified by treatment, along with half-height of 0.95 confidence limits for differences centered at estimate midpoints. \(N\)=584.
Category N Used
Enrolled 750 584
Randomized 584 584
A 278 278
B 306 306
Variable A B
Death 278 306
image