Like the model evaluation measures calculated by the threshMeasures function, the area under the ROC curve (AUC) assesses the discrimination performance of a model; but unlike them, it does not require the choice of a particular threshold above which to consider that a model predicts species presence, but rather averages discrimination performance over all possible thresholds at a given interval. Mind that there has been criticism to the AUC (Lobo et al. 2008, Jiménez-Valverde et al. 2013), although it is still one of the most widely used metrics in model evaluation. It is highly correlated with species prevalence, so this value is also provided by the AUC function (if simplif = FALSE, the default) for reference.
Although there are functions to calculate the AUC in other R packages (such as ROCR, PresenceAbsence and verification), the AUC function below is more compatible with the remaining functions in this blog and in the modEvA package where it is included (Barbosa et al. 2014), and can be applied not only to a set of presence-absence versus predicted values, but also directly to a model object of class glm.
AUC <- function(model = NULL, obs = NULL, pred = NULL, simplif = FALSE, interval = 0.01, FPR.limits = c(0, 1), curve = "ROC", method = "rank", plot = TRUE, diag = TRUE, diag.col = "grey", diag.lty = 1, curve.col = "black", curve.lty = 1, curve.lwd = 2, plot.values = TRUE, plot.digits = 3, plot.preds = FALSE, grid = FALSE, xlab = "auto", ylab = "auto", ...) {
# version 2.2 (17 Jan 2020)
if (all.equal(FPR.limits, c(0, 1)) != TRUE) stop ("Sorry, 'FPR.limits' not yet implemented. Please use default values.")
if (length(obs) != length(pred)) stop ("'obs' and 'pred' must be of the same length (and in the same order).")
if (!is.null(model)) {
if(!("glm" %in% class(model) && model$family$family == "binomial" && model$family$link == "logit")) stop ("'model' must be an object of class 'glm' with 'binomial' family and 'logit' link.")
if (!is.null(obs)) message("Argument 'obs' ignored in favour of 'model'.")
if (!is.null(pred)) message("Argument 'pred' ignored in favour of 'model'.")
obs <- model$y
pred <- model$fitted.values
} # end if model
dat <- data.frame(obs, pred)
n.in <- nrow(dat)
dat <- na.omit(dat)
n.out <- nrow(dat)
if (n.out < n.in) warning (n.in - n.out, " observations removed due to missing data; ", n.out, " observations actually evaluated.")
obs <- dat$obs
pred <- dat$pred
stopifnot(
obs %in% c(0,1),
#pred >= 0,
#pred <= 1,
interval > 0,
interval < 1,
curve %in% c("ROC", "PR"),
method %in% c("rank", "trapezoid", "integrate")
)
n1 <- sum(obs == 1)
n0 <- sum(obs == 0)
if (curve != "ROC" && method == "rank") {
method <- "trapezoid"
#message("'rank' method not applicable to the specified 'curve'; using 'trapezoid' instead.")
}
if (method == "rank") {
# next 3 lines from Wintle et al 2005 supp mat "roc" function
xy <- c(pred[obs == 0], pred[obs == 1])
rnk <- rank(xy)
AUC <- ((n0 * n1) + ((n0 * (n0 + 1))/2) - sum(rnk[1 : n0])) / (n0 * n1)
if (simplif && !plot) return(AUC)
}
if (any(pred < 0) | any(pred > 1)) warning("Some of your predicted values are outside the [0, 1] interval within which thresholds are calculated.")
N <- length(obs)
preval <- prevalence(obs)
thresholds <- seq(0, 1, by = interval)
Nthresh <- length(thresholds)
true.positives <- true.negatives <- sensitivity <- specificity <- precision <- false.pos.rate <- n.preds <- prop.preds <- numeric(Nthresh)
for (t in 1 : Nthresh) {
true.positives[t] <- sum(obs == 1 & pred >= thresholds[t])
true.negatives[t] <- sum(obs == 0 & pred < thresholds[t])
sensitivity[t] <- true.positives[t] / n1
specificity[t] <- true.negatives[t] / n0
precision[t] <- true.positives[t] / sum(pred >= thresholds[t], na.rm = TRUE)
#if (true.positives[t] == 0 && sum(pred >= thresholds[t], na.rm = TRUE) == 0) precision[t] <- 0 # to avoid NaN?
false.pos.rate[t] <- 1 - specificity[t]
n.preds[t] <- sum(round(pred, nchar(Nthresh) - 1) == thresholds[t])
prop.preds[t] <- n.preds[t] / length(pred)
}
if (curve == "ROC") {
xx <- false.pos.rate
yy <- sensitivity
} else {
if (curve == "PR") {
xx <- sensitivity
yy <- precision
} else {
stop ("'curve' must be either 'ROC' or 'PR'.")
}
}
if (method == "trapezoid") {
xy <- na.omit(data.frame(xx, yy))
#if (length(xx) != nrow(xy)) warning("Some non-finite values omitted from area calculation.")
xx <- xy$xx
yy <- xy$yy
# next line adapted from https://stackoverflow.com/a/22418496:
AUC <- sum(diff(xx) * (yy[-1] + yy[-length(yy)]) / 2)
AUC <- -AUC # euze
}
if (method == "integrate") {
xx.interp <- stats::approx(x = thresholds, y = xx, n = length(thresholds))
yy.interp <- stats::approx(x = thresholds, y = yy, n = length(thresholds))
f <- approxfun(x = xx.interp$y, y = yy.interp$y)
AUC <- integrate(f, lower = min(thresholds), upper = max(thresholds))$value
}
if (plot) {
if (curve == "ROC") {
if (xlab == "auto") xlab <- c("False positive rate", "(1-specificity)")
if (ylab == "auto") ylab <- c("True positive rate", "(sensitivity)")
}
if (curve == "PR") {
if (xlab == "auto") xlab <- c("Recall", "(sensitivity)")
if (ylab == "auto") ylab <- c("Precision", "(positive predictive value)")
}
d <- ifelse(diag, "l", "n") # to plot the 0.5 diagonal (or not if diag=FALSE)
if (curve == "ROC") plot(x = c(0, 1), y = c(0, 1), type = d, xlab = xlab, ylab = ylab, col = diag.col, lty = diag.lty, ...)
if (curve == "PR") plot(x = c(0, 1), y = c(1, 0), type = d, xlab = xlab, ylab = ylab, col = diag.col, lty = diag.lty, ...)
if (grid) abline(h = thresholds, v = thresholds, col = "lightgrey")
lines(x = xx, y = yy, col = curve.col, lty = curve.lty, lwd = curve.lwd)
if (plot.preds == TRUE) plot.preds <- c("curve", "bottom") # for back-compatibility
if ("bottom" %in% plot.preds) {
points(x = thresholds, y = rep(0, Nthresh), cex = 100 * prop.preds, col = "darkgrey") # 20 * sqrt(prop.preds)
}
if ("curve" %in% plot.preds) {
points(x = xx, y = yy, cex = 100 * prop.preds, col = "darkgrey")
}
if (plot.values) {
if (curve == "ROC") place <- 0.4
if (curve == "PR") place <- 1
text(1, place, adj = 1, substitute(paste(AUC == a), list(a = round(AUC, plot.digits))))
} # end if plot.values
} # end if plot
if (simplif) return (AUC)
thresholds.df <- data.frame(thresholds, true.positives, true.negatives, sensitivity, specificity, precision, false.pos.rate, n.preds, prop.preds)
rownames(thresholds.df) <- thresholds
return (list(thresholds = thresholds.df, N = N, prevalence = preval, AUC = AUC, AUCratio = AUC / 0.5))
}
EDIT: since modEvA version 1.7 (and as per updated code above), the AUC function can also compute the precision-recall curve.
Usage examples:
AUC(obs = mydata$myspecies, pred = mydata$myspecies_P, simplif = TRUE)
AUC(obs = mydata$myspecies, pred = mydata$myspecies_P)
AUC(model = mymodel, curve = "PR")
If you want the AUC values for a list of models produced e.g. by the multGLM function, you can do:
mymodels.auc <- vector("numeric", length(mymodels$models))
names(mymodels.auc) <- names(mymodels$models)
for (i in 1:length(mymodels$models)) {
mymodels.auc[i] <- AUC(model = mymodels$models[[i]], simplif = TRUE)
}
mymodels.auc
References:
Barbosa A.M., Brown J.A. & Real R. (2014) modEvA – an R package for model evaluation and analysis. R package, version 0.1.
Lobo, J.M., Jiménez-Valverde, A. & Real, R. (2008). AUC: a misleading measure of the performance of predictive distribution models. Global Ecology and Biogeography 17: 145-151
Jiménez-Valverde, A., Acevedo, P., Barbosa, A.M., Lobo, J.M. & Real, R. (2013). Discrimination capacity in species distribution models depends on the representativeness of the environmental domain. Global Ecology and Biogeography 22: 508–516
