load(url('http://hbiostat.org/data/safety.rda'))
treats <- levels(All$trx)
week   <- All$week
source('http://hbiostat.org/R/talks/plot.rpart.s')
# version that uses grayscale for lines, no |,
# and defines prpart for pretty plotting of trees


omit  <- Cs(amylase,aty.lymph,glucose.fasting,neutrophil.bands)

## Make a list that separates variables into major categories
vars <- list(baseline=Cs(age,sex,race,height,weight,bmi,smoking,pack.yrs),
             ae  =Cs(headache, ab.pain, nausea, dyspepsia, diarrhea,
                     upper.resp.infect, coad),
             vital=Cs(sbp,dbp,hr),
             ekg =names(All)[50:54],
             chem=setdiff(names(All)[16:48],
               c(omit, Cs(lymphocytes.abs,monocytes.abs,
                          neutrophils.seg,eosinophils.abs,basophils.abs)))) 

sform <- function(formula) {
  ## expands abbreviations such as y ~ baseline + ekg + x1
  f <- deparse(formula)
  n <- names(vars)
  v <- sapply(vars, paste, collapse='+')
  f <- sedit(f, n, v)
  as.formula(f)
}

#abl <- function(dframe) {
#  a <- sapply(dframe, label)
#  abbreviate(ifelse(a=='', names(dframe), a), minlength=8)
#}

# Analysis of AEs alone

setps(varclus.ae1n, h=5, sublines=3, toplines=1)
par(mfrow=c(1,2))
par(mar=c(4,4,2,1))
par(mgp=c(2.5,.5,0))
w <- 1

for(tr in treats) {
    v <- varclus(sform(~ ae),
                 sim='bothpos',  
                 data=All,
                 subset=week==w & trx==tr)
    plot(v, ylim=c(0,.01))  #, labels=lbls)
    title(paste('Week',w,tr))
  }
dev.off()

wks <- sort(unique(week))
s <- array(NA, c(length(vars$ae),length(vars$ae),length(wks)),
           dimnames=list(vars$ae,vars$ae,NULL))

setps(multsim.aen,h=7)map
opar <- par(mar=c(0,2.1,0,0)) # was 2.1
mapmap
for(tr in treats) {
	
  for(i in 1:length(wks)) {
	 
	w <- wks[i]
	
    s[,,i] <- varclus(sform(~ ae),
                      sim='ccbothpos',  
                      data=All,
                      subset=week==w & trx==tr)$sim

  }
  plotMultSim(s, wks, slimds=T, # vname=lbls,
              add=tr=='Placebo', slim=c(0,1),
              lty=1+(tr=='Placebo'))
  
}
dev.off()

#-----------------------------------------------------------------
## Lab analysis

na <- naclus(All[vars$chem])
naplot(na,which='na per var')

numna <- function(x) sum(is.na(x))
for(w in sort(unique(labsumm$week))) {
  cat(w,'\n')
  print(sapply(labsumm,numna))
}

wks <- c(8)

setps(varclus.lab8n, h=5, sublines=3, toplines=1)
par(mfrow=c(length(wks),2))
for(w in wks) {
  for(tr in treats) {
    v <- varclus(sform(~chem), data=All,
                 subset=trx==tr)
    plot(v, ylim=c(0,1.0))
    title(paste('Week',w,tr))
  }
}
dev.off()

#------------------------------------------------------------------
##  Lab analysis - plotMultSim
##

wks <- setdiff(sort(unique(week)), 1)
#lbls <- abl(labsumm)[1+c(1:8,10,13:16,18,20:23,26,30:34)]
#lbls <- abl(labsumm)[1+c(3,7,8,13,14,16,20,21,23,30,31,32,34)]
#trt <- tx[labsumm$pid]

s <- array(NA, c(length(vars$chem),length(vars$chem),length(wks)),
           dimnames=list(vars$chem,vars$chem,NULL))

setps(multsim.labn, h=7)
opar <- par(mar=c(0,2,0,0))
par('mar')
for(tr in treats) {
	
  for(i in 1:length(wks)) {
	 
	w <- wks[i]
    s[,,i] <- varclus(sform(~ chem),
                      data=All,
                      subset=week==w & trx==tr)$sim
  }
  if(tr=='Drug') s.drug <- s else s.placebo <- s
  plotMultSim(s, wks, #vname=lbls,
              add=tr=='Placebo', slim=c(0,1),
              lty=1+(tr=='Placebo'), labelx=FALSE, xspace=1)
}
par(opar)
dev.off()

sc <- function(s) {
  d <- dim(s)
  r <- matrix(NA, nrow=d[1], ncol=d[2],
              dimnames=dimnames(s)[1:2])
  for(i in 1:d[1])
    for(j in 1:d[2])
      r[i,j] <- lsfit(wks, s[i,j,])$coef[2]
  r
}

d <- sc(s.drug) - sc(s.placebo)
p <- dim(d)[1]
setps(labcorcor,h=7)
par(mar=c(2,3,0,0),xpd=NA)
plot(c(-.35,p+.5),c(.5,p+.25), type='n', axes=FALSE, xlab='',ylab='')
v <- dimnames(d)[[1]]
text(rep(.5,p), 1:p, v, adj=1)
w <- max(abs(d))
for(i in 1:p) {
  for(j in 1:p) {
    if(i>=j) next
    lines(c(i,i),c(j,j+d[i,j]/w/2), lwd=3)
    lines(c(i-.2,i+.2),c(j,j),col=gray(.7))
  }
  text(i,i,v[i],srt=-45,adj=0)
}
dev.off()

#--------------------------------------------------------------------
##  ECG analysis

setps(varclus.ecg8n, h=5, sublines=3, toplines=1)
par(mfrow=c(1,2))
#par(mar=c(4,4,2,1))
#par(mgp=c(2.5,.5,0))
w <- 8

for(tr in treats) {
    v <- varclus(sform(~ ekg),
                 data=All,
                 subset=week==w & trx==tr)
    plot(v) #, labels=lbls)
    title(paste('Week',w,tr))
  }
dev.off()

wks <- setdiff(sort(unique(weeks)), 1)
v <- vars$ekg
s <- array(NA, c(length(v),length(v),length(wks)),
           dimnames=list(v,v,NULL))
opar <- par(mar=c(0,0,4.1,0))

for(tr in treats) {
	
  for(i in 1:length(wks)) {
	 
	w <- wks[i]; cat(w)
	
   s[,,i] <- varclus(sform(~ ekg),
                      data=All,
                      subset=week==w & trx==tr)$sim

    }
  plotMultSim(s, wks, #vname=lbls,
              add=tr=='Placebo', slim=c(0,1),
              lty=1+(tr=='Placebo'))
}
# No trends seen, don't use graph

#------------------------------------------------------------------
# Clustering of vitals, AEs, EKG, chem lab at 8 weeks

setps(varclus.all, h=5, sublines=3, toplines=1)
par(mfrow=c(1,2))
#par(mar=c(4,4,2,1))
#par(mgp=c(2.5,.5,0))
w <- 8

for(tr in treats) {
    v <- varclus(sform(~ vital + coad + ekg + chem),
                 data=All,
                 subset=week==w & trx==tr)
    plot(v) #, labels=lbls)
    title(paste('Week',w,tr))
  }
dev.off()



#------------------------------------------------------------------

library(rpart)

r <- rpart(sform(coad ~ trx + baseline + vital + ekg + chem),
           data=All, subset=week==8,
           control=rpart.control(minbucket=25))

setps(rpart.coad.week8n, h=4, w=4, toplines=2, sublines=1)
prpart(r, 'Chronic Obstructive Airways Disease\nWeek 8')
dev.off()


r <- rpart(sform(coad ~ week + trx + baseline + vital + ekg + chem),
           control=rpart.control(minbucket=25,cp=.0029),
           data=All)   # subset(All,week!=1))
setps(rpart.coadn, h=5, w=5, toplines=1)
prpart(r, 'Chronic Obstructive Airways Disease\nAll Weeks')
dev.off()


drug <- 1*(All$trx[week > 4]=='Drug')

r <- rpart(sform(drug ~ week + ae + vital + ekg + chem),
           control=rpart.control(minbucket=150,cp=.0015),
           data=subset(All, week > 4))
setps(rpart.drugn, h=7, w=6, toplines=1)
prpart(r, "Regression Tree for Prob[drug]\nWeek 8-20 ")
dev.off()

rm(week)
f <- lrm(trx ~ week + rcs(sbp,4) + rcs(dbp,4) +
         rcs(lymphocytes,4) + rcs(albumin,4) + rcs(alk.phos,4) +
         rcs(bilirubin,4) +
         rcs(bun,4) + rcs(creatinine,4) + rcs(glucose,4) +
         rcs(hematocrit,4) + rcs(potassium,4) + 
         rcs(platelets,4) + rcs(protein,4) + rcs(rbc,4) + rcs(wbc,4) +
         rcs(axis,4) + rcs(corr.qt,4) +
         rcs(pr,4) + rcs(qrs,4) + rcs(rr,4) + 
         headache + diarrhea + ab.pain + dyspepsia + nausea +
         upper.resp.infect + coad,
         data=subset(All, week > 2)) # !=1 & week<=12)
# Used 1115 obs



setps(anova.drug,h=5,toplines=1)
plot(anova(f))
title('Independent Predictors of Drug\nPost 2 Weeks')
dev.off()

d <- data.frame(lymphocytes,albumin,alk.phos,hematocrit,bilirubin,
                creatinine,bilirubin,WBC,RBC,platelets,
                corr.qt,qrs,all.tx)[all$week==8,]
setps(ecdf.wk8,h=6,pointsize=12)
par(mfrow=c(3,4))
ecdf(d, group=d$all.tx, label.curve=F)
dev.off()



rm(WBC,HR,uncorr.qt,nausea,diarrhea,creatinine,platelets,protein,lymphocytes,
   BUN, axis, pr, headache, ab.pain, dyspepsia, upper.resp.infect,
   coad, albumin, alk.phos, bilirubin, hematocrit, potassium, RBC,
   atrial.rate, corr.qt, pr, qrs, rr, glucose, d)
   


## For each subject determine if had coad after week 8 and no coad
## week 8 or earlier
coad <- All$coad
id   <- All$id
g <- function(x) {
  before <- x[,1] <= 8
  1*(any(x[!before,2] > 0) & !any(x[before,2] > 0))
}
s <- summarize(cbind(week,coad), id, g, stat.name='newcoad')
## Get week 8 lab results
w8 <- subset(All, week==8 & id %in% s$id)  # summarize drops NA results 
table(w8$id-s$id)
w8$newcoad <- s$newcoad
dd <- datadist(subset(w8,select=-aty.lymph))
options(datadist='dd')
f <- lrm(newcoad ~ age + sex + smoking + 
         rcs(lymphocytes,5) + rcs(wbc,5) +
         rcs(hr,3) + trx + rcs(bmi,3), x=T, y=T, data=w8)
validate(f, B=100)  # App C: .739  Boot corr: .660 Dxy=.319

h <- lrm(newcoad ~ age + sex + smoking + 
         rcs(wbc,5) +
         rcs(hr,3) + trx + rcs(bmi,3), x=T, y=T, data=w8)

setps(wbcsex, h=4, sublines=1)
plot(f, wbc=NA, sex=NA, conf.int=F, fun=plogis,
     ylab='Prob[New COAD]')
dev.off()

p <- perimeter(w8$wbc, w8$lymphocytes)

setps(wbclymph,h=4,w=4,sublines=1)
plot(f, wbc=seq(4,10,length=40), lymphocytes=seq(1,3,length=40),
     method='image', perim=p)
dev.off()

r <- rpart(sform(newcoad ~ trx + baseline + vital + ekg + chem),
           control=rpart.control(minbucket=25, cp=.001),
           data=w8)
r$cptable    # no splits that validated found

## Prediction of GI problems at 8 weeks based on data at week 4,
## excluding AEs
gi <- 1*evalq(ab.pain | nausea | dyspepsia | diarrhea, All)
describe(gi)

g <- function(y) {
  wk <- y[,1]
  gi <- y[,2]
  if(!any(wk >= 8 & !is.na(gi))) return(NA)
  any(wk >= 8 & gi == 1)
}
s <- mApply(cbind(week,gi), All$id, g)
w4 <- subset(All, week==4)
w4$gi8 <- s
f <- rpart(sform(gi8 ~ trx + baseline + vital + ekg + chem),
           data=w4,
           control=rpart.control(minbucket=50,cp=.0068))
# No splits
f <- rpart(gi8 ~ trx + age + sex + bmi + smoking +
           dbp + sbp + hr + wbc, data=w4,
           control=rpart.control(minbucket=50,cp=.001))
# No splits
           

f <- lrm(gi8 ~ trx + rcs(age,3) + sex + rcs(bmi,3) + smoking +
         rcs(dbp,3)+rcs(sbp,3)+rcs(hr,3)+rcs(wbc,4),
         data=w4, x=T,y=T)
validate(f, B=120)
# Apparent: C=.64 val: .58
f$stats
setps(anovagi8, toplines=1)
plot(anova(f))
title('Partial Associations with GI AEs')
dev.off()

meanbp <- (2*w4$dbp + w4$sbp)/3
f <- lrm(gi8 ~ trx + rcs(age,3) + sex + rcs(bmi,3) + smoking +
         rcs(meanbp,3)+rcs(hr,3),
         data=w4)
# aic=-3.61, was -3.15; stick with separate bp
