load(url('http://hbiostat.org/data/safety.rda')) treats <- levels(All$trx) week <- All$week source('http://hbiostat.org/R/talks/plot.rpart.s') # version that uses grayscale for lines, no |, # and defines prpart for pretty plotting of trees omit <- Cs(amylase,aty.lymph,glucose.fasting,neutrophil.bands) ## Make a list that separates variables into major categories vars <- list(baseline=Cs(age,sex,race,height,weight,bmi,smoking,pack.yrs), ae =Cs(headache, ab.pain, nausea, dyspepsia, diarrhea, upper.resp.infect, coad), vital=Cs(sbp,dbp,hr), ekg =names(All)[50:54], chem=setdiff(names(All)[16:48], c(omit, Cs(lymphocytes.abs,monocytes.abs, neutrophils.seg,eosinophils.abs,basophils.abs)))) sform <- function(formula) { ## expands abbreviations such as y ~ baseline + ekg + x1 f <- deparse(formula) n <- names(vars) v <- sapply(vars, paste, collapse='+') f <- sedit(f, n, v) as.formula(f) } #abl <- function(dframe) { # a <- sapply(dframe, label) # abbreviate(ifelse(a=='', names(dframe), a), minlength=8) #} # Analysis of AEs alone setps(varclus.ae1n, h=5, sublines=3, toplines=1) par(mfrow=c(1,2)) par(mar=c(4,4,2,1)) par(mgp=c(2.5,.5,0)) w <- 1 for(tr in treats) { v <- varclus(sform(~ ae), sim='bothpos', data=All, subset=week==w & trx==tr) plot(v, ylim=c(0,.01)) #, labels=lbls) title(paste('Week',w,tr)) } dev.off() wks <- sort(unique(week)) s <- array(NA, c(length(vars$ae),length(vars$ae),length(wks)), dimnames=list(vars$ae,vars$ae,NULL)) setps(multsim.aen,h=7)map opar <- par(mar=c(0,2.1,0,0)) # was 2.1 mapmap for(tr in treats) { for(i in 1:length(wks)) { w <- wks[i] s[,,i] <- varclus(sform(~ ae), sim='ccbothpos', data=All, subset=week==w & trx==tr)$sim } plotMultSim(s, wks, slimds=T, # vname=lbls, add=tr=='Placebo', slim=c(0,1), lty=1+(tr=='Placebo')) } dev.off() #----------------------------------------------------------------- ## Lab analysis na <- naclus(All[vars$chem]) naplot(na,which='na per var') numna <- function(x) sum(is.na(x)) for(w in sort(unique(labsumm$week))) { cat(w,'\n') print(sapply(labsumm,numna)) } wks <- c(8) setps(varclus.lab8n, h=5, sublines=3, toplines=1) par(mfrow=c(length(wks),2)) for(w in wks) { for(tr in treats) { v <- varclus(sform(~chem), data=All, subset=trx==tr) plot(v, ylim=c(0,1.0)) title(paste('Week',w,tr)) } } dev.off() #------------------------------------------------------------------ ## Lab analysis - plotMultSim ## wks <- setdiff(sort(unique(week)), 1) #lbls <- abl(labsumm)[1+c(1:8,10,13:16,18,20:23,26,30:34)] #lbls <- abl(labsumm)[1+c(3,7,8,13,14,16,20,21,23,30,31,32,34)] #trt <- tx[labsumm$pid] s <- array(NA, c(length(vars$chem),length(vars$chem),length(wks)), dimnames=list(vars$chem,vars$chem,NULL)) setps(multsim.labn, h=7) opar <- par(mar=c(0,2,0,0)) par('mar') for(tr in treats) { for(i in 1:length(wks)) { w <- wks[i] s[,,i] <- varclus(sform(~ chem), data=All, subset=week==w & trx==tr)$sim } if(tr=='Drug') s.drug <- s else s.placebo <- s plotMultSim(s, wks, #vname=lbls, add=tr=='Placebo', slim=c(0,1), lty=1+(tr=='Placebo'), labelx=FALSE, xspace=1) } par(opar) dev.off() sc <- function(s) { d <- dim(s) r <- matrix(NA, nrow=d[1], ncol=d[2], dimnames=dimnames(s)[1:2]) for(i in 1:d[1]) for(j in 1:d[2]) r[i,j] <- lsfit(wks, s[i,j,])$coef[2] r } d <- sc(s.drug) - sc(s.placebo) p <- dim(d)[1] setps(labcorcor,h=7) par(mar=c(2,3,0,0),xpd=NA) plot(c(-.35,p+.5),c(.5,p+.25), type='n', axes=FALSE, xlab='',ylab='') v <- dimnames(d)[[1]] text(rep(.5,p), 1:p, v, adj=1) w <- max(abs(d)) for(i in 1:p) { for(j in 1:p) { if(i>=j) next lines(c(i,i),c(j,j+d[i,j]/w/2), lwd=3) lines(c(i-.2,i+.2),c(j,j),col=gray(.7)) } text(i,i,v[i],srt=-45,adj=0) } dev.off() #-------------------------------------------------------------------- ## ECG analysis setps(varclus.ecg8n, h=5, sublines=3, toplines=1) par(mfrow=c(1,2)) #par(mar=c(4,4,2,1)) #par(mgp=c(2.5,.5,0)) w <- 8 for(tr in treats) { v <- varclus(sform(~ ekg), data=All, subset=week==w & trx==tr) plot(v) #, labels=lbls) title(paste('Week',w,tr)) } dev.off() wks <- setdiff(sort(unique(weeks)), 1) v <- vars$ekg s <- array(NA, c(length(v),length(v),length(wks)), dimnames=list(v,v,NULL)) opar <- par(mar=c(0,0,4.1,0)) for(tr in treats) { for(i in 1:length(wks)) { w <- wks[i]; cat(w) s[,,i] <- varclus(sform(~ ekg), data=All, subset=week==w & trx==tr)$sim } plotMultSim(s, wks, #vname=lbls, add=tr=='Placebo', slim=c(0,1), lty=1+(tr=='Placebo')) } # No trends seen, don't use graph #------------------------------------------------------------------ # Clustering of vitals, AEs, EKG, chem lab at 8 weeks setps(varclus.all, h=5, sublines=3, toplines=1) par(mfrow=c(1,2)) #par(mar=c(4,4,2,1)) #par(mgp=c(2.5,.5,0)) w <- 8 for(tr in treats) { v <- varclus(sform(~ vital + coad + ekg + chem), data=All, subset=week==w & trx==tr) plot(v) #, labels=lbls) title(paste('Week',w,tr)) } dev.off() #------------------------------------------------------------------ library(rpart) r <- rpart(sform(coad ~ trx + baseline + vital + ekg + chem), data=All, subset=week==8, control=rpart.control(minbucket=25)) setps(rpart.coad.week8n, h=4, w=4, toplines=2, sublines=1) prpart(r, 'Chronic Obstructive Airways Disease\nWeek 8') dev.off() r <- rpart(sform(coad ~ week + trx + baseline + vital + ekg + chem), control=rpart.control(minbucket=25,cp=.0029), data=All) # subset(All,week!=1)) setps(rpart.coadn, h=5, w=5, toplines=1) prpart(r, 'Chronic Obstructive Airways Disease\nAll Weeks') dev.off() drug <- 1*(All$trx[week > 4]=='Drug') r <- rpart(sform(drug ~ week + ae + vital + ekg + chem), control=rpart.control(minbucket=150,cp=.0015), data=subset(All, week > 4)) setps(rpart.drugn, h=7, w=6, toplines=1) prpart(r, "Regression Tree for Prob[drug]\nWeek 8-20 ") dev.off() rm(week) f <- lrm(trx ~ week + rcs(sbp,4) + rcs(dbp,4) + rcs(lymphocytes,4) + rcs(albumin,4) + rcs(alk.phos,4) + rcs(bilirubin,4) + rcs(bun,4) + rcs(creatinine,4) + rcs(glucose,4) + rcs(hematocrit,4) + rcs(potassium,4) + rcs(platelets,4) + rcs(protein,4) + rcs(rbc,4) + rcs(wbc,4) + rcs(axis,4) + rcs(corr.qt,4) + rcs(pr,4) + rcs(qrs,4) + rcs(rr,4) + headache + diarrhea + ab.pain + dyspepsia + nausea + upper.resp.infect + coad, data=subset(All, week > 2)) # !=1 & week<=12) # Used 1115 obs setps(anova.drug,h=5,toplines=1) plot(anova(f)) title('Independent Predictors of Drug\nPost 2 Weeks') dev.off() d <- data.frame(lymphocytes,albumin,alk.phos,hematocrit,bilirubin, creatinine,bilirubin,WBC,RBC,platelets, corr.qt,qrs,all.tx)[all$week==8,] setps(ecdf.wk8,h=6,pointsize=12) par(mfrow=c(3,4)) ecdf(d, group=d$all.tx, label.curve=F) dev.off() rm(WBC,HR,uncorr.qt,nausea,diarrhea,creatinine,platelets,protein,lymphocytes, BUN, axis, pr, headache, ab.pain, dyspepsia, upper.resp.infect, coad, albumin, alk.phos, bilirubin, hematocrit, potassium, RBC, atrial.rate, corr.qt, pr, qrs, rr, glucose, d) ## For each subject determine if had coad after week 8 and no coad ## week 8 or earlier coad <- All$coad id <- All$id g <- function(x) { before <- x[,1] <= 8 1*(any(x[!before,2] > 0) & !any(x[before,2] > 0)) } s <- summarize(cbind(week,coad), id, g, stat.name='newcoad') ## Get week 8 lab results w8 <- subset(All, week==8 & id %in% s$id) # summarize drops NA results table(w8$id-s$id) w8$newcoad <- s$newcoad dd <- datadist(subset(w8,select=-aty.lymph)) options(datadist='dd') f <- lrm(newcoad ~ age + sex + smoking + rcs(lymphocytes,5) + rcs(wbc,5) + rcs(hr,3) + trx + rcs(bmi,3), x=T, y=T, data=w8) validate(f, B=100) # App C: .739 Boot corr: .660 Dxy=.319 h <- lrm(newcoad ~ age + sex + smoking + rcs(wbc,5) + rcs(hr,3) + trx + rcs(bmi,3), x=T, y=T, data=w8) setps(wbcsex, h=4, sublines=1) plot(f, wbc=NA, sex=NA, conf.int=F, fun=plogis, ylab='Prob[New COAD]') dev.off() p <- perimeter(w8$wbc, w8$lymphocytes) setps(wbclymph,h=4,w=4,sublines=1) plot(f, wbc=seq(4,10,length=40), lymphocytes=seq(1,3,length=40), method='image', perim=p) dev.off() r <- rpart(sform(newcoad ~ trx + baseline + vital + ekg + chem), control=rpart.control(minbucket=25, cp=.001), data=w8) r$cptable # no splits that validated found ## Prediction of GI problems at 8 weeks based on data at week 4, ## excluding AEs gi <- 1*evalq(ab.pain | nausea | dyspepsia | diarrhea, All) describe(gi) g <- function(y) { wk <- y[,1] gi <- y[,2] if(!any(wk >= 8 & !is.na(gi))) return(NA) any(wk >= 8 & gi == 1) } s <- mApply(cbind(week,gi), All$id, g) w4 <- subset(All, week==4) w4$gi8 <- s f <- rpart(sform(gi8 ~ trx + baseline + vital + ekg + chem), data=w4, control=rpart.control(minbucket=50,cp=.0068)) # No splits f <- rpart(gi8 ~ trx + age + sex + bmi + smoking + dbp + sbp + hr + wbc, data=w4, control=rpart.control(minbucket=50,cp=.001)) # No splits f <- lrm(gi8 ~ trx + rcs(age,3) + sex + rcs(bmi,3) + smoking + rcs(dbp,3)+rcs(sbp,3)+rcs(hr,3)+rcs(wbc,4), data=w4, x=T,y=T) validate(f, B=120) # Apparent: C=.64 val: .58 f$stats setps(anovagi8, toplines=1) plot(anova(f)) title('Partial Associations with GI AEs') dev.off() meanbp <- (2*w4$dbp + w4$sbp)/3 f <- lrm(gi8 ~ trx + rcs(age,3) + sex + rcs(bmi,3) + smoking + rcs(meanbp,3)+rcs(hr,3), data=w4) # aic=-3.61, was -3.15; stick with separate bp