Prediction Error in Cox Models Varying Number of Predictors
This program runs Cox fits of simulated data with exponential survival and 1-30 covariables. Each covariable is U(-.5,.5) and has a true regression coefficient of .25. Estimates the average error (over simulations and over all subjects in sample) in predicted 5-yr survival compared to actual 5-year survival. Also estimates average absolute error for subjects in lowest decile of predicted survival. Intercept is chosen so that underlying survival (with all x=mean=0) is .75 at 5 years. Censoring distribution is U(0, 10y). Avg. # deaths is around 180.
This is useful for developing rules of thumb such as the "15:1 rule."
# FE Harrell 14Aug91
nsim <- 10
n <- 250
ncpts <- c(seq(1,29,by=2),30)
#nsim <- 1
#ncpts <- c(5,10)
s5 <- .75 #desired underlying 5-year survival
b0 <- log(-log(s5)/5)
xdc <- matrix(runif(n*40),nrow=n)-.5
meanabs <- NULL
meanlow <- NULL
for(ncov in ncpts) {
cat('Using first',ncov,' covariables\n')
x <- xdc[,1:ncov,drop=F]
beta <- rep(.25,ncov)
xbeta <- b0 + matxv(x, beta)
surv5 <- exp(-5*exp(xbeta)) #True 5-yr survival for each subject
describe(surv5)
#Now take beta, simulate several samples from this using
#exponential survival model, fit, and look at differences in
#5-year survival from original xbeta
meand <- single(nsim)
meanl <- meand
sumd <- 0
for(i in 1:nsim) {
#Simulate survival times with exponential survival function
#S(t) = exp(-exp(xbeta)*t)
ftime <- -log(runif(n)) /exp(xbeta)
cens <- runif(n, 0, 10) #Censoring is uniform(0, 10y)
dead <- rep(1,n)
dead[ftime>cens] <- 0
sumd <- sumd + sum(dead)
ftime <- pmin(ftime, cens)
units(ftime) <- "Year"
f <- coxph(x, ftime, dead, surv=T, time.inc=5, pr=F)
if(i==1)print(f)
b <- f$coef
newxbeta <- matxv(x, b)
pred.s5 <- (f$surv.summary[2,1,1])^exp(newxbeta-f$center)
if(i==1)describe(pred.s5)
d <- abs(pred.s5-surv5)
meand[i] <- mean(d)
k <- pred.s5 <= quantile(pred.s5, .1)
meanl[i] <- mean(d[k])
#cat(ncov,i,meand[i],meanl[i],"\n")
}
cat(ncov,mean(meand),mean(meanl),"\n")
cat("Avg. # deaths:",sumd/nsim,"\n")
meanabs <- c(meanabs,mean(meand))
meanlow <- c(meanlow,mean(meanl))
}
print(cbind(ncpts,meanabs,meanlow))
plot(ncpts,meanlow,xlab="Number of Covariables Fitted",
ylab="Average Error",type="n",ylim=range(c(meanlow,meanabs)))
points(ncpts,meanlow,type="b",lty=2)
points(ncpts,meanabs,type="b",lty=1)
title("Average Error in Cox 5-year Survival Estimates")
title(sub="Solid: all pts Dashed: pts in first decile of predicted",
cex=.7,adj=0)
title(sub="n=750",adj=1)
print(cbind(ncpts,meanabs,meanlow))