# library(Hmisc,T); library(Design,T)
attach("/home/feh3k/data/teaching/.Data")
attach(support[!is.na(support$totcst) & support$totcst>0,
               Cs(totcst,dzgroup,scoma,meanbp,age)])
store()

totcst <- totcst/1000

f <- ols(totcst ~ dzgroup + scoma +
                rcs(meanbp, 5) + rcs(age,4))
f2 <- ols(totcst ~ (dzgroup + scoma +
                rcs(meanbp, 5) + rcs(age,4))^2)
lrtest(f, f2)  # 87.7, 75df
spearman(fitted(f), totcst)  #.660
spearman(fitted(f2), totcst) #.684
abs.error.pred(lp=fitted(f), y=totcst)
abs.error.pred(lp=fitted(f2),y=totcst)

f <- ols(log(totcst) ~ dzgroup + scoma +
                rcs(meanbp, 5) + rcs(age,4))
f2 <- ols(log(totcst) ~ (dzgroup + scoma +
                rcs(meanbp, 5) + rcs(age,4))^2)
lrtest(f, f2) # 113, 75df
spearman(fitted(f), totcst)  #.673
spearman(fitted(f2), totcst) #.718
abs.error.pred(lp=exp(fitted(f)), y=totcst)
abs.error.pred(lp=exp(fitted(f2)),y=totcst)

#With no IA, median(abs(Yhat - Y)):
#original scale: 19.3K   log scale:8.0K


set.seed(123)
f.areg <- areg.boot(totcst ~ dzgroup + scoma + meanbp + age)
f.areg
s.areg <- summary(f.areg,
                  values=list(scoma=c(0,44), meanbp=c(90,20,60,130)))
s.areg
setps(areg, h=6, pointsize=14)
plot(f.areg, boot=F)
dev.off()

setps(areg.resid)
par(mfrow=c(1,2))
plot(predict(f.areg), resid(f.areg),
     xlab='Predicted Transformed Cost', ylab='Residual', type='n')
points(predict(f.areg), resid(f.areg), cex=.5)
qqnorm(resid(f.areg),
       xlab='Predicted Transformed Cost', ylab='Residual')
qqline(resid(f.areg), lty=2)
dev.off()

Function(f.areg, type='individual')

cst <- seq(1.5,160,length=100)
plot(-2.3876+.8663*log(cst),.totcst(cst),type='l'); abline(a=0,b=1,lty=2)
w <- ols(.totcst(totcst) ~ log(totcst))
plot(cst, .totcst(cst), type='l')
lines(cst, -2.3876+.8663*log(cst), col=2)
lines(cst, .totcst(cst)+2.3876-.8663*log(cst), lty=2)
text(120, 2, 'areg', col=2)
text(120, 1.35, 'log')

dd <- datadist(dzgroup,scoma,meanbp,age)
options(datadist='dd')

ols(f.areg$linear.predictor ~ .dzgroup(dzgroup) + .scoma(scoma) +
    .meanbp(meanbp) + .age(age))$stats

f.ols.approx <- ols(.totcst(totcst) ~ .dzgroup(dzgroup) + .scoma(scoma) +
                    .meanbp(meanbp) + .age(age))

pmed.areg  <- Quantile(f.areg)
pmean.areg <- Mean(f.areg)

setps(areg.nomo, h=5)
nomogram(f.ols.approx,
         meanbp=c(0,20,40,80,100,120,140,160,180,200),
         fun=list('Median ($/1000)'=pmed.areg,
                  'Mean   ($/1000)'=pmean.areg),
         fun.at=c(1,5,10,20,30,40,50,75,100,150))
dev.off()

# Show plot of meanbp vs. mean and median total cost,
# other variables set to default reference values
meanbps <- 0:140
d <- expand.grid(meanbp=meanbps, dzgroup=levels(dzgroup)[1],
                 scoma=0, age=median(age))
pmed.areg  <- predict(f.areg, d, statistic='median')
pmean.areg <- predict(f.areg, d, statistic='mean')

f.ols <- ols(log(totcst) ~ dzgroup + lsp(scoma,c(26,44)) + 
             rcs(meanbp,7) + rcs(age,5))
#par(mfrow=c(1,2))
#plot(predict(f.ols), resid(f.ols))
#qqnorm(resid(f.ols)); qqline(resid(f.ols))

pmed.ols  <- exp(predict(f.ols, d))
pmean.ols <- pmed.ols*exp(.5*(f.ols$stats['Sigma']^2))

# Model on original scale; another way to get mean
f.ols.os <- ols(totcst ~ dzgroup + lsp(scoma,c(26,44)) +
                rcs(meanbp, 7) + rcs(age,5))
meanbps2 <- seq(0,140,length=20)
d2 <- expand.grid(meanbp=meanbps2, dzgroup=levels(dzgroup)[1],
                  scoma=0, age=median(age))

pmean.ols.os <- predict(f.ols.os, d2)

f.cox <- cph(Surv(totcst) ~ dzgroup + lsp(scoma,c(26,44)) +
             rcs(meanbp,7) + rcs(age,5), surv=T)
pcox  <- predict(f.cox, d)
pquant.cox  <- Quantile(f.cox)
pmed.cox <- function(lp) pquant.cox(lp=lp)
pmean.cox <- Mean(f.cox)

if(F) {
fcox <- f.cox
fcox$coefficients <- -fcox$coefficients
fcox$center <- -fcox$center
fcox$linear.predictors <- -fcox$linear.predictors
pmed.cox.neg <- function(lp) pmed.cox(-lp)
pmean.cox.neg <- function(lp) pmean.cox(-lp)

nomogram(fcox, vnames='names',
         meanbp=c(0,20,40,80,100,120,140,160,180,200),
         fun=list('Median ($/1000)'=pmed.cox.neg,
                  'Mean   ($/1000)'=pmean.cox.neg),
         fun.at=c(1,5,10,20,30,40,50,75,100,125,150))
}

curves <- list('Mean areg'  =list(x=meanbps,y=pmean.areg),
               'Median areg'=list(x=meanbps,y=pmed.areg),
               'Mean ols'   =list(x=meanbps,y=pmean.ols),
               'Median ols' =list(x=meanbps,y=pmed.ols),
               'Mean Cox'   =list(x=meanbps,y=pmean.cox(pcox)),
               'Median Cox' =list(x=meanbps,y=pmed.cox(pcox)))
setps(comparison, h=4)
labcurve(curves, pl=T, labels=F,
         ylab='Total Cost, $/1000', xlab='Mean Arterial BP',
         col=c(1,1,1,1,2,2), lty=c(1,1,2,2,1,1),
               lwd=c(1,1,1,1,3,3))
points(meanbps2, pmean.ols.os)
scat1d(meanbp)
key(2, 68,
    text=list(c('AVAS', 'OLS log', 'Cox', 'OLS mean'),
      col=c(1,1,2,1)),
    lines=list(lty=c(1,2,1,1), col=c(1,1,2,1), lwd=c(1,1,3,1),
      type=c(rep('l',3),'p')))
text(55,23,'Median'); text(58,56,'Mean')
dev.off()


#For each subject compute predicted mean cost by ols, areg, Cox
mols.os <- predict(f.ols.os)
mols    <- exp(predict(f.ols)+.5*(f.ols$stats['Sigma']^2))
mareg   <- predict(f.areg, statistic='mean')
mcox    <- pmean.cox(predict(f.cox))

w <- cbind(mols.os, mols, mareg, mcox)
splom(~ w, varnames=c('OLS', 'OLS log', 'areg', 'Cox'))

#Compare predicted means to observed means within intervals of
#predicted means

h <- function(y, na.rm=T) {
  qu <- quantile(y, c(.25,.75), na.rm=na.rm)
  c(Mean=mean(y,na.rm=na.rm), Lower=qu[1], Upper=qu[2])
}
lim <- c(0,90)
     
Cost <- rep(totcst, 4)
pred <- c(mols.os, mols, mareg, mcox)
n <- length(totcst)
type <- rep(c('OLS','OLS log','AVAS','Cox'),each=n)
setps(meanstrat, h=6, trellis=T)
xYplot(Cost ~ pred | type, method=h,
       xlab='Predicted Mean Cost', ylab='Mean Observed Cost in Interval',
       xlim=lim, ylim=lim, abline=list(a=0,b=1,col=3),
       lty.bands=rep(2,2), aspect='fill')
dev.off()


rcorr(w, totcst, type='spear')   # Lowest .97 against each other

#Show homoscedasticity for log OLS and AVAS models
sd1 <- sqrt(var(resid(f.ols)))
sd2 <- sqrt(var(resid(f.areg)))
r   <- c(resid(f.ols)/sd1, resid(f.areg)/sd2)
pred <- c(mols, mareg)
type <- rep(c('OLS log','AVAS'), each=n)
setps(res.ols.avas, h=4, trellis=T)
xYplot(r ~ pred | type, method=smean.sdl,
       xlab='Predicted Mean Cost', ylab='Residual',
       abline=list(h=c(0,-2,2), col=3, lwd=.2),
       xlim=c(0,90), ylim=c(-3,3))
dev.off()

