## Prediction interval example for a GAM # based on https://stat.ethz.ch/pipermail/r-help/2011-April/275632.html library(mgcv) ## simulate some data... set.seed(8) dat <- gamSim() # just want one predictor here to make y just f2 + noise dat$y <- dat$f2 + rnorm(400, 0, 1) ## fit smooth model to x, y data... b <- gam(y~s(x2,k=20), method="REML", data=dat) ## simulate replicate beta vectors from posterior... n.rep <- 10000 br <- rmvn(n.rep, coef(b), vcov(b)) ## turn these into replicate smooths xp <- seq(0, 1, length.out=200) # this is the "Lp matrix" which maps the coefficients to the predictors # you can think of it like the design matrix but for the predictions Xp <- predict(b, newdata=data.frame(x2=xp), type="lpmatrix") # note that we should apply the link function here if we have one # this is now a matrix with n.rep replicate smooths over length(xp) locations fv <- Xp%*%t(br) # now simulate from normal deviates with mean as in fv # and estimated scale... # (can replace rnorm at this point with the distribution of the data we're # using in a given example) yr <- matrix(rnorm(length(fv), fv, b$scale), nrow(fv), ncol(fv)) # now make some plots # plot the replicates in yr plot(rep(xp, n.rep), yr, pch=".") # and the data we used to fit the model points(dat$x2, dat$y, pch=19, cex=.5) # compute 95% prediction interval # since yr is a matrix where each row is a data location along x2 and # each column is a replicate we want the quantiles over the rows, summarizing # the replicates each time PI <- apply(yr, 1, quantile, prob=c(.025,0.975)) # the result has the 2.5% bound in the first row and the 97.5% bound # in the second # we can then plot those two bounds lines(xp, PI[1,], col=2, lwd=2) lines(xp, PI[2,], col=2, lwd=2) # and optionally add the confidence interval for comparison... pred <- predict(b, newdata=data.frame(x2=xp), se=TRUE) lines(xp, pred$fit, col=3, lwd=2) u.ci <- pred$fit + 2*pred$se.fit l.ci <- pred$fit - 2*pred$se.fit lines(xp, u.ci, col=3, lwd=2) lines(xp, l.ci, col=3, lwd=2)