# Function to start a verbatim quote if results='asis' in effect
# Works for all output formats in R markdown
squote <- function() {    # start quote
  r <- knitr::opts_current$get('results')
  if(length(r) && r == 'asis') cat('\n```')
  invisible()
}

# Function to close the quote if needed
equote <- function() {
  r <- knitr::opts_current$get('results')
  if(length(r) && r == 'asis') cat('```\n\n')
  invisible()
}

# Function to print an object or inline text or both,
# verbatim quoting if needed (results='asis') in effect in chunk
# Inline text is printed with cat()
# Fractional numbers are rounded to the nearest dec digits for data frames
pr <- function(x='', obj=NULL, inline=NULL, dec=3) {
  r <- knitr::opts_current$get('results')
  asis <- length(r) && r == 'asis'
  if(asis) cat('\n```')
  if(any(x != '') || length(inline))
    cat('\n', x, if(any(x != '')) ' ', inline, '\n\n', sep='')
  if(length(obj)) {
    if(is.data.frame(obj))
      for(i in 1 : length(obj)) {
        x <- obj[[i]]
        if(is.numeric(x)) obj[[i]] <- round(x, dec)
      }
    print(obj, quote=FALSE)
    }
  if(asis) cat('```\n\n')
	invisible()
}
# This function is now in Hmisc so just do:
# pr <- markupSpecs$markdown$pr

pst <- function(x) paste(x, collapse=', ')

rn <- function(x) round(x, 2)

relfreq <- function(x) table(x) / length(x)

# Randomly tries different starting values given to intMarkovOrd
# This is needed because intMarkovOrd can get lost if a single set of
# starting values is used and these values are far from the optima.
findstart <- function(ntry=50, seed=8, g, intercepts, extra, constraints=NULL,
                      ftarget=NULL, absorb=4, initial=2, y=1:4, times) {
	set.seed(seed)
	m    <- numeric(ntry)
  ints <- matrix(0, nrow=ntry, ncol=length(intercepts),
								 dimnames=list(NULL, names(intercepts)))
	ex   <- matrix(0, nrow=ntry, ncol=length(extra),
								 dimnames=list(NULL, names(extra)))
	t    <- as.numeric(row.names(target))
	for(i in 1 : ntry) {
    ui <- runif(length(intercepts), -1, 1) # random increments to initial ints
		u  <- runif(length(extra),      -1, 1) # random increments to initial values
    ints[i, ] <- intercepts + ui
		ex[i, ] <- extra + u
		m[i] <- intMarkovOrd(y, times, initial=initial, absorb=absorb,
                         intercepts=ints[i, ], g=g, target=target,
                         t=t, ftarget=ftarget, extra=ex[i, ],
                         onlycrit=TRUE, constraints=constraints)
	}
	pr('Minimum sum of absolute errors:', inline=min(m))
	extra <- ex[which.min(m), ]
	pr('Extra achieving this minimum:', extra)

  squote()
	z <- intMarkovOrd(y, times, initial=initial, absorb=absorb,
                    intercepts=intercepts, g=g, target=target,
                    t=t, ftarget=ftarget, extra=extra, constraints=constraints,
                    printsop=FALSE)
  equote()
  z
}

# Function to print or plot state occupancy probabilities for groups
# 1 and 2.  If a pdf report is being produced, prints and produces
# regular ggplot2 graphic.  If an html report is produced, does not
# print, and produces a plotly interactive graphics
# gtype='combined' to create pairs of bars for groups 1, 2 or
# gtype='sep' to make separate panels for 1, 2
# Assumes that extra has been updated inside g or is passed
# as extra=... to this function
sop12 <- function(y, times, initial, absorb=NULL, intercepts, g,
                  which=12, gtype=c('combined', 'sep'),
                  pri=outfmt != 'html', ...) {

  gtype <- match.arg(gtype)

 	# Convert from matrix to tall and thin data frame
  cnv <- function(z)
    data.frame(p    = as.vector(z),
               time = as.numeric(rownames(z)[as.vector(row(z))]),
               y    = y[as.vector(col(z))])

  if(which != 2) {
    s1 <- soprobMarkovOrd(y, times, initial=initial, absorb=absorb,
                          intercepts=intercepts, g=g, X=1, ...)
    if(pri)
      pr('Occupancy probabilities for group 1:', round(s1, 3))

    z1 <- cnv(s1)
    if(which == 1) {
      z1$txt <- with(z1, paste0(y, '<br>t=', time, '<br>p=', round(p, 3)))
      g <- ggplot(z1, aes(x=factor(time), y=p, fill=factor(y), label=txt)) +
        geom_col() + guides(fill = guide_legend(title='')) +
        xlab('Time') + ylab('Probability')
      return(ggp(g))  # ggp defined in sim.Rmd
      }
  }
    
  if(which != 1) {
    s2 <- soprobMarkovOrd(y, times, initial=initial, absorb=absorb,
                          intercepts=intercepts, g=g, X=2, ...)
    if(pri) pr('Occupancy probabilities for group 2:', round(s2, 3))

    z2 <- cnv(s2)
    if(which == 2) {
      z2$txt <- with(z2, paste0(y, '<br>t=', time, '<br>p=', round(p, 3)))
      g <- ggplot(z2, aes(x=factor(time), y=p, fill=factor(y), label=txt)) +
        geom_col() + guides(fill = guide_legend(title='')) +
        xlab('Time') + ylab('Probability') +
      return(ggp(g))  # ggp defined in sim.Rmd
      }
  }

  # which=12: make combined plot for groups 1 and 2

  z <- rbind(data.frame(z1, group=1), data.frame(z2, group=2))
  z$group <- factor(z$group)
  z$txt <- with(z, paste0('Group ', group, '<br>State:', y,
                          '<br>t=', time, '<br>p=', round(p, 3)))
  g <- if(gtype == 'sep')
         ggplot(z, aes(x=factor(time), y=p, fill=factor(y), label=txt)) +
           geom_col() + facet_wrap(~ group) + xlab('Time')
       else
         ggplot(z, aes(x=group, y=p, fill=factor(y), label=txt)) +
           geom_col() + facet_wrap(~ time, nrow=1) + xlab('') +
           theme(axis.text.x=element_blank())
  g <- g + guides(fill = guide_legend(title='')) + ylab('Probability')
  
  ggp(g)
  }

# Plot single result from soprobMarkovOrd, along the lines of sop12
# Also prints if using pdf output
plotsop <- function(s, pri=outfmt != 'html') {

 	# Convert from matrix to tall and thin data frame
  cnv <- function(z)
    data.frame(p    = as.vector(z),
               time = as.numeric(rownames(z)[as.vector(row(z))]),
               y    = dimnames(z)[[2]][as.vector(col(z))])

  if(pri) pr('Occupancy probabilities for group 1:', round(s, 3))

  z <- cnv(s)
  z$txt <- with(z, paste0('State:', y, '<br>t=', time, '<br>p=', round(p, 3)))
  g <- ggplot(z, aes(x=factor(time), y=p, fill=factor(y), label=txt)) +
    geom_col() + guides(fill = guide_legend(title='')) +
    xlab('Time') + ylab('Probability')
  ggp(g)
}

# Check non-proportional-odds induced intercepts to see if in descending order
# Only need to vary time since it's the only thing we interact with y
chkints <- function(times, intercepts, extra, more=NULL) {
	x <- NULL
	for(t in times) {
		talpha <- intercepts + as.vector(g(1, t=t, gap=2, X=1, extra=extra))
		if(any(diff(talpha) > 0.))
			x <- rbind(x, data.frame(t=t, alpha=paste(round(talpha, 3), collapse=' '), .more.=more))
	}
	if(length(x)) {
		names(x) <- ifelse(names(x) == '.more.', names(more), names(x))
		pr('Intercepts out of order at the following times due to magnitude of departure from PO:', x)
	}
	invisible(length(x) == 0)
	}

# For a given set of objects defining the simulation model, run nsim
# simulations for each odds ratio in ors and save the results in
# file.  Don't re-run the simulations unless one of the arguments to
# dosim changed since the last time the file was stored, or
# code for one of the two called Hmisc functions has changed.
# This is accomplished by comparing hashes from the current call
# and the previously stored fit if there was one.
# The function assumes that the extra parameter values are stored
# in the default value of g function arguments 'extra'.
# The nsim simulations are split into batches, with 'cores' batches
# run in parallel.  The 'runParallel' function in
# https://github.com/harrelfe/rscripts uses all cores but one.
#
# If estcut and/or vestcut are given, after initial results are
# printed, any records with Markov point estimate greater in absolute value
# than estcut or with variance estimate > vestcut will be set to missing
# and basic calculations will be repeated.
dosim <- function(y=1:4, times,
                  g, absorb=4, initial,
                  intercepts=intercepts,
                  ors, formula, ppo=NULL,
                  yprevfactor=TRUE, groupContrast=NULL,
                  timecriterion=NULL, cscov=FALSE,
                  coxzph=length(timecriterion) > 0,
                  sstat=NULL,
                  rdsample=NULL,
                  looks=600, nsim=1000, seed=1,
                  maxest=NULL, maxvest=NULL, file) {

	# See if previous fit had identical inputs and Hmisc code

  hashobj  <-
    hashCheck(estSeqMarkovOrd, simMarkovOrd,
      y, times, g, absorb, initial, intercepts, ors, formula, ppo,
      yprevfactor, groupContrast, timecriterion, cscov,
      coxzph, sstat, rdsample, looks, nsim, seed, maxest, maxvest, file=file)
  hash <- hashobj$hash
  R    <- hashobj$result
  if(! length(R)) {
    # Run the simulations since something changed or file didn't exist
    # f runs simulations for one core

    f <- function(reps, showprogress, core) {
      pfile <- showprogress(core=core, pr=FALSE) # just get file name
      r <-
        estSeqMarkovOrd(y, times=times, initial=initial, absorb=absorb,
                        intercepts=intercepts,
                        parameter=log(ors), looks=looks, g=g,
                        formula=formula, ppo=ppo, cscov=cscov,
                        yprevfactor=yprevfactor, groupContrast=groupContrast,
                        timecriterion=timecriterion, sstat=sstat, nsim=reps,
                        rdsample=rdsample,
                        maxest=maxest, maxvest=maxvest,
                        progress=TRUE, pfile=pfile)

      sim       <- r
      ## Make simulation numbers distinct for later across-core combining
      sim$sim <- paste(core, sim$sim)
      attr(sim, 'etimefreq') <- attr(sim, 'sstat') <-
        attr(sim, 'lrmcoef') <- NULL
      R <- list(sim=sim)
      etimefreq <- attr(r, 'etimefreq')
      if(length(etimefreq)) R$etimefreq <- etimefreq
      sstat     <- attr(r, 'sstat')
      if(length(sstat)) {
        dimnames(sstat)[[1]] <- paste(core, dimnames(sstat)[[1]])
        R$sstat <- sstat
      }
      lrmcoef   <- attr(r, 'lrmcoef')
      if(length(lrmcoef)) R$lrmcoef <- lrmcoef
      failures  <- attr(r, 'failures')
      if(length(failures)) R$failures <- failures
      R
  }

    R <- runParallel(f, reps=nsim, seed=seed,
                     along=c(etimefreq=1, sstat=1, lrmcoef=2, failures=1))

    sim <- R$sim
    setDT(sim)
    sim[, OR := exp(parameter)]
    setkey(sim, OR)
    R$sim <- sim
    ## Condense failure frequencies
    fa <- R$failures
    if(length(fa)) {
      setDT(fa, key='failure')
      fa <- fa[, .(Freq=sum(Freq)), by=failure]
      fa <- fa[Freq > 0, ]
      R$failures <- fa
      }
    setattr(R, 'hash', hash)
    saveRDS(R, file, compress='xz')
	}

  fa  <- R$failures
  if(length(fa) && nrow(fa) > 0) pr('Failed simulations', fa)
  sim <- R$sim
  cof <- R$lrmcoef
  cof <- round(apply(cof, c(1, 3), median, na.rm=TRUE), 4)
  rownames(cof) <- as.character(round(exp(as.numeric(row.names(cof))), 2))
  pr(paste('Median of', nsim,
           'logistic regression coefficients for each OR'), cof)

  w <- sim[, .(SD    = round(sd (est, na.rm=TRUE), 3),
               MAD   = round(mad(est, na.rm=TRUE), 3),
               SDest = round(sqrt(median(vest, na.rm=TRUE)), 3)), by=.(OR,look)]
  pr('Standard deviation of simulated group effects, scaled MAD, and square root of median of estimated variances',
     w)
  R
}

# Simulate a sample of size n from an ordinal Markov model
# testsamp assumes that values of extra are stored as default argument values
# for g.  Returns the correlation matrix.
testsamp <- function(n=10000, initial=2, absorb=4,
                     intercepts=intercepts, y=1:4, times) {
  s <- simMarkovOrd(n=n, y, times, initial=initial, X=c(group=1),
                    absorb=absorb, intercepts=intercepts, g=g, carry=TRUE)
  # Remember: vector extra is now predefined in g
  w  <- with(s, tapply(y, time, relfreq))
  pr(paste('State occupancy proportions from', n, 'samples'),
     round(do.call(rbind, w), 3))
  setDT(s, key=c('id', 'time'))
  w <- dcast(s, id ~ time, value.var='y')
  r <- cor(as.matrix(w)[, -1])
  p <- dim(r)[1]
  if(p < 11 && outfmt != 'html') {	
    w <- round(r, 2)
    w[lower.tri(w)] <- NA
    w[] <- ifelse(is.na(w), '', format(w))
    pr('Correlation matrix', w)
  }
  invisible(r)
}

# Examine and analyze the simulated regression coefficients, inluding
# making various graphical displays
examSim <- function(R, timedist=TRUE, ORvsHR=TRUE,
                    hist=TRUE, scatter=TRUE, timeevent='Y=1', desc='') {
  sim <- R$sim
  setkey(sim, OR, look)
	cox <- 'lrchisq' %in% names(sim)
	ph  <- 'phchisq' %in% names(sim)

  caps <- character(0); pobj <- list(); no <- 0
  
  bar <- function(x) mean(x, na.rm=TRUE)

  nsim <- min(sim[, .(n=length(est)), by=.(OR, look)]$n)
  pr(paste('Number of successful simulations out of', nsim),
     sim[, .(n=sum(! is.na(est))), by=.(OR, look)])

  ## Check: at last look should equal this:
  ## apply(R$lrmcoef[,,1], 1, function(x) sum(! is.na(x)))
  
	et <- R$etimefreq
	if(timedist && length(et)) {
		et <- apply(et, 2:5, sum)
    dn <- dimnames(et)
    et <-
      data.frame(n=as.vector(et),
                 OR    = exp(as.numeric(dn[[1]])[as.vector(slice.index(et, 1))]),
                 group = paste('Group', dn[[2]][as.vector(slice.index(et, 2))]),
                 event = dn[[3]][as.vector(slice.index(et, 3))],
                 t     = as.numeric(dn[[4]][as.vector(slice.index(et, 4))],
                                    levels=dn[[4]]))
    ang <- if(length(unique(et$t)) > 10) 90 else 0
    et$txt <- with(et, paste0(event, '<br>Group:', group, '<br>OR=', OR,
                           '<br>n=', n, '<br>t=', t))
    gp <- ggplot(et, aes(x=factor(t), y=n, fill=event, label=txt)) +
      geom_col() + facet_grid(OR ~ group) + xlab('t') +
      theme(axis.text.x=element_text(angle=ang))

    no <- no + 1
    pobj[[no]] <- ggp(gp, tooltip='text')
    caps <- c(caps, paste('Time to ', timeevent,
                    'by group and OR, over simulations'))

    ORs <- sort(unique(et$OR))
    m <- length(ORs)
    d <- NULL
    for(or in ORs) {
      ets <- subset(et, OR == or)
      km  <- survfit(Surv(t, event=='event') ~ group, weights=n, data=ets)
      d   <- rbind(d, data.frame(survfit2df(km), OR=or))
    }
    d$txt <- with(d, paste0('Group:',       group,
                            '<br>OR:',      OR,
                            '<br>t:',       time,
                            '<br>At risk:', nrisk,
                            '<br>',         round(1. - surv, 3)))
    g <- ggplot(d, aes(x=time, y=1. - surv, color=group,
                       group=group, label=txt)) +
      geom_step(direction='hv') +
      facet_wrap(~ OR) + xlab('Day') + ylab('Cumulative Incidence')
    no   <- no + 1
    pobj[[no]] <- ggp(g)
    caps <- c(caps, paste('Cumulative incidence of', timeevent))

    # Old way using base graphics
    if(FALSE) {
    par(mar=c(2.5, 2.4, 1, 1), mgp=c(1.5, 0.5, 0), bty='l')
    ## When multiple graphs are produced in one chunk, knitr will
    ## not allow you to vary the plot sizes, so break into groups
    ## of 4 paneled plots
    ORgroups <- if(m > 8) list(ORs[1 : 4], ORs[5 : 8], ORs[-(1 : 8)])
                else
                  if(m > 4) list(ORs[1 : 4], ORs[-(1 : 4)]) else list(ORs)
    for(ors in ORgroups) {
      m <- length(ors)
      par(mfrow = c(1 + (m > 1), 1 + (m > 2)))
      for(or in ors) {
        ets <- subset(et, OR == or) 
        km <- survfit(Surv(t, event=='event') ~ group, weights=n, data=ets)
        plot(km, fun='event', col=1:2, xlab='Day', ylab='Cumulative Incidence')
        if(or == ors[1])
          title(sub='Black: group 1; Red: group 2', adj=0, cex.sub=0.7,
                line=1.5, col.sub='blue')
        title(sub=paste('OR:', or), line=1.5, cex.sub=0.7, adj=1,
              col.sub='blue')
      }
    }
    }
	
if(FALSE) {
  pr('Frequency distribution of times to Y=1 by group')
    for(pm in dimnames(et)[[1]]) {
      pr('\nOR:', inline=exp(as.numeric(pm)))
      print(et[pm, ,])
    }
}
  }
	if(ORvsHR && 'loghr' %in% names(sim))
		pr('Comparison of OR and HR',
       rn(sim[, .(HR=exp(- bar(loghr))), by=OR]))

	if(hist && cox && ph) {
		z <- rbind(data.frame(x=sim[, lrchisq], Test='Cox model group comparison'),
							 data.frame(x=sim[, phchisq], Test='Cox Model PH test'))
    no <- no + 1
		pobj[[no]] <- ggp(
      ggplot(z, aes(x=x)) + geom_histogram(bins=50) + facet_wrap(~ Test) +
			xlab(expression(chi^2)))
    caps <- c(caps, 'Histograms of Cox 2-sample and PH test statistics')
	}
	if(cox)
		pr(paste('Power of Cox test at last look for time until', timeevent),
			 rn(sim[, .(power=bar(lrchisq > 3.8415)), by=OR]))
	if(ph)
		pr('Proportion of simulations with chi-sq for PH > 3.84',
			 rn(sim[, .(rejectPH=bar(phchisq > 3.8415)), by=OR]))
	
	sim[, tchi := est * est / vest]
	pr('Power of Markov proportional odds model test',
		 sim[, .(power=round(bar(tchi > 3.8415), 2)), by=.(OR, look)] )
	if(cox) {
		rho <- sim[, spearman(tchi, lrchisq)]
		pr('Spearman rho correlation between Markov and Cox model chi-square:',
			 inline=rn(rho))
		}
	gr <- function(x, y, z) 
		ggfreqScatter(x, y, ylab='PO Model Z Statistic',
									xlab='Cox Model Z Statistic',
									by=z, bins=25) +
		  geom_abline(intercept=0, slope=1, col='red', alpha=0.3) +
      theme(legend.position='bottom', axis.text.x = element_text(angle = 90))
	
  if(scatter && cox) {
    g <- with(sim, gr(pmin(10, sqrt(lrchisq)), pmin(10, sqrt(tchi)),
                      exp(parameter)))
    no <- no + 1
    pobj[[no]] <- ggp(g, remove='Freq: ')
    caps <- c(caps, 'Scatter plot of Cox and PO model Z statistics')
	}

  s <- R$sstat
  if(length(s)) {
    group <- s[, , , 'group']
    y     <- s[, , , 'y']
    dn    <- dimnames(y)
    ss <- data.table(
      sim = dn[[1]][as.vector(slice.index(y, 1))],
      OR  = exp(as.numeric(dn[[2]])[as.vector(slice.index(y, 2))]),
      group = as.vector(group),
      y     = as.vector(y),
      key   = c('sim', 'OR', 'group'))
    tab <- with(ss, as.data.frame(table(group, y, OR)))
    tab$txt <- with(tab, paste0('Group:', group, '<br>y:', y, '<br>', Freq))
    g <- ggplot(tab, aes(x=group, y=Freq, label=txt)) +
      geom_col() + facet_grid(OR ~ y) +
      xlab('Ventilator/ARDS - Free Days') +
      theme(axis.text.x=element_blank()) +
      guides(fill = guide_legend(title='')) + ylab('Frequency')
    no <- no + 1
    pobj[[no]] <- ggp(g)
    caps <- c(caps, 'Distribution of ventilator/ARDS-free days by group and OR.  Left bar of each pair is group 1, right bar is group 2.  -1 represents death.')

    ## For each simulation and true OR compute the p-value from the
    ## Wilcoxon test, then compute the power of this test for vent-free days
    h <- function(group, y) wilcox.test(y[group == 1], y[group == 2])$p.value
    s <- ss[, .(p = h(group, y)), by=.(sim, OR)]
    pr('Power of Wilcoxon test on vent/ards-free days',
		 s[, .(power=bar(p < 0.05)), by=OR] )
    }

  # All the ggplot2 or plotly plots are saved in list pobj
  # Render all the plots, each with its own chunk and caption
  if(no > 0) {
    if(desc != '') caps <- paste0(caps, '. ', desc, '.')
    chunkname <- knitr::opts_current$get('label')
    if(! length(chunkname)) stop('no chunk name')
    cnames <- paste0(chunkname, 'examsim', letters[1 : no])
    markupSpecs$html$mdchunk(robj=pobj, caption=caps, cnames=cnames)
    }
  return(invisible())
  }

# Function to create a data frame suitable for ggplot2 for plotting
# Kaplan-Meier estimates from output of survfit (fit)
# Put in a data frame suitable for ggplot2/plotly
survfit2df <- function(fit) {
  n     <- fit$strata
  sn    <- sub('^.*=', '', names(n))
  sl    <- rep(sn, n)
  r <- function(x, prefix) {
    X <- split(x, sl)
    X <- lapply(X, prefix)
    unlist(X)
  }
  time  <- r(fit$time,   function(x) c(0.,   x))
  surv  <- r(fit$surv,   function(x) c(1.,   x))
  nrisk <- r(fit$n.risk, function(x) c(x[1], x))
  group <- r(sl,         function(x) c(x[1], x))

  data.frame(group=group, time=time, surv=surv, nrisk=nrisk)
}

