# Usage: R < sim.r > sim.lst

require(rms)

pvals <- c(.2, 7, 5, 82, 24, 35, 100, 16, 100, 81, 9, 36, 82, 6, 54, 47,
           16, 70, 58, 17, 100, 56, 39, 4, 35, 4, 77, 100, 5, 17, 32,
           1.7, 56, 5, 41, 37, 48, 58, 30, 57, 67, 6, 55, 97, 67, 46)/100

if(FALSE)
  {
    plot(ecdf(pvals), xlab='P-value', main='Cumulative Distribution of P-values')
    abline(a=0, b=1, col=gray(.93))
  }
sum(pvals <= 0.05)  #  7
length(pvals)       # 46
.05*length(pvals)   # 2.3
sum(pvals <= .1)    # 11
.1*length(pvals)    # 4.6

n <- 80
y <- c(rep(1, 49), rep(0, n - 49))
catp <- 31
conp <- 15
p <- catp + conp

set.seed(1)
ns <- 500
pus <- pfin <- integer(ns)
r2  <- roc  <- double(ns)

for(k in 1:ns)
  {
    xcat <- matrix(sample(0:1, n*catp, replace=TRUE), nrow=n)
    xcon <- matrix(rnorm(n*conp), nrow=n)
    x <- cbind(xcat, xcon)

    pval <- rep(NA, p)
    for(j in 1:catp) pval[j] <- chisq.test(xcat[,j], y)$p.value
    for(j in 1:conp) pval[j + catp] <-
      t.test(xcon[y==0,j], xcon[y==1,j])$p.value
    if(sum(pval < .1) == 0)
      {
        pus[k] <- pfin[k] <- 0
        r2[k] <- 0
        roc[k] <- .5
        next
      }
    w <- x[, pval < .1, drop=FALSE]
    pus[k] <- ncol(w)
    
    repeat
      {
        if(!length(w)) break
        f <- lrm(y ~ w)
        cof <- coef(f)[-1]
        v   <- vcov(f)[-1,-1, drop=FALSE]
        wald <- (cof^2)/diag(v)
        pv   <- 1 - pchisq(wald, 1)
        if(all(pv < .1)) break
        i <- which.max(pv)
        w <- w[, -i, drop=FALSE]
      }
    pfin[k] <- ncol(w)
    if(pfin[k] == 0)
      {
        r2[k] <- 0
        roc[k] <- .5
      }
    else
      {
        f <- lrm(y ~ w)
        s <- f$stats
        r2[k]  <- s['R2']
        roc[k] <- s['C']
      }
    cat('Sim:', k, '  Passed screen:', pus[k], '  # in final model:',
        pfin[k], '\r')
  }
cat('\n')
cat('Frequencies of # variables passing univariable screening:\n\n')
table(pus)
mean(pus)

cat('\n\nFrequencies of # variables in final logistic model:\n\n')
table(pfin)
mean(pfin)

describe(roc)
describe(r2)  # Nagelkerke R^2
