## See http://stackoverflow.com/questions/24480031/roll-join-with-start-end-window
require(data.table)
d1 <- data.table(x=letters[1:5], start=c(1,5,19,30, 7), end=c(3,11,22,39,25))
#setkey(d1, x,start)
d2 <- data.table(x=letters[c(1,1,2,2,3:5)], pos=c(2,3,3,12,20,52,10))
#setkey(d2, x,pos)
setkey(d1)
d2[, pos2 := pos]
d <- foverlaps(d2, d1, by.x = names(d2), type="within", mult="all", nomatch=0L)
d1; d2; d
d <- foverlaps(d2, d1, by.x = names(d2), type="within", mult="all", nomatch=NA)
d1; d2; d
## Example where there is an "events" dataset e with 0 or more rows per
## subject containing start (s) and end (e) times and a measurement x
## representing a daily dose of something given to the subject from s to e.
## The base dataset b has one record per subject with times c and d. We
## want to compute the total dose of drug received between c and d for the
## subject. This is done by finding all records in e for the subject
## such that the interval [c,d] has any overlap with the interval [s,e].
## For each match compute the number of days in the interval [s,e] that are
## also in [c,d]. This is given by min(e,d) + 1 - max(c,s). Multiply this
## duration by x to get the total dose given in [c,d]. For multiple records
## with intervals touching [c,d] add these products.
base <- data.table(id=c('a','b','c'), low=10, hi=20)
events <- data.table(id=c('a','b','b','b','k'),
start = c(8,7,12,19,99),
end = c(9,8,14,88,99),
dose = c(13, 17, 19, 23, 29))
setkey(base, id, low, hi)
setkey(events, id, start, end)
d <- foverlaps(base, events,
by.x = c('id', 'low', 'hi'),
by.y = c('id', 'start', 'end' ),
type = "any", mult="all", nomatch=NA)
d[, elapsed := pmin(end, hi) + 1 - pmax(start, low)]
d[, j=list(total.dose = sum(dose * elapsed, na.rm=TRUE)), by=id]