This tutorial demonstrates how to fit the TTR model as a species distribution model using the R package TTR.PGM. It assumes the user is familiar with R. The tutorial is designed to accompany the manuscript “TTR.PGM: A R package for modelling the distributions and dynamics of plants using the Thornley transport resistance plant growth model”. This manuscript and the TTR.PGM help provide additional information.
If necessary install the TTR.PGM package using e.g.
install.packages("TTR.PGM_1.0.0.tar.gz",repos=NULL, type="source")
setwd(tempdir())
Load the required packages.
library(TTR.PGM)
library(LaplacesDemon)
library(DEoptim)
The model is written using LaplacesDemon style, which helps provide a predictable structure. In this case it is a logistic regression where the biomass predicted by the TTR model is the predictor variable.
model_sdm <- function(parm, Data)
{
### Parameters
ttr_params <- TTR.PGM::get_parms(parm, Data)
parm <- TTR.PGM::interval_parms(parm,Data)
names(parm)<-Data$parm.names
### Process model
x <- TTR.PGM::run_ttr(ttr_params,Data)
# explicitly name the indices of the components of x we want to use:
i_step <- Data$out.dim["steps"] # the steps dimension is the last time step in x
i_species <- Data$out.dim["species"] # placebo just to clarify that this dimension exists
i_sites <- 1:Data$out.dim["sites"] # placebo just to clarify that this dimension exists
### Log-likelihood
MsMr <- (x["Ms",i_species,i_step,i_sites,drop=F] + x["Mr",i_species,i_step,i_sites,drop=F])
mu <- as.vector( MsMr*parm["scale"] + 1e-22)
p01 <- invcloglog(log(mu))
yhat <- rbern(Data$n.sites,p01)
obs <- Data$y
LL <- sum(dbern(obs,p01,log=TRUE))
### Log-Prior
# undefined in this example: equivalent to having completely non-informative priors
Prior <- 0
### Log-Posterior
LP <- LL + Prior
### Return
if (Data$options$optim == "DEoptim")
return(-1 * LP)
if (Data$options$optim == "LaplacesDemon")
return(list(LP = LP, Dev = -2 * LL, Monitor = LP, yhat = yhat, parm = parm))
if (Data$options$optim == "no")
return(list(yhat=yhat, p01 = p01, mu = mu))
}
Load a data.frame that contains the presence, (pseudo-) absence data for a species and the associated environmental data. Here using a prepared data set for Terminalia sericea (see help(data_input_space_Terminalia_sericea)). In practice producing such data sets requires considerable thought, but this is a generic problem that is not particular to TTR.PGM. The manuscript contains a brief description of how this data set was assembled.
spname="Terminalia_sericea"
data(data_input_space_Terminalia_sericea)
in.df <- data_input_space_Terminalia_sericea
Now use the get_input function to format in.df for the TTR.PGM package. For more detailed information on options see help(get_input).
seconds.month = 60*60*24*30.44
seconds.day = 60*60*24
input_species <- TTR.PGM::get_input(observations=matrix(in.df$obs,nrow=length(in.df$obs),ncol=1),
lon=in.df$pts.lon,
lat=in.df$pts.lat,
tcur=t(in.df[,grep("tmax",colnames(in.df))]),
tnur=t(in.df[,grep("tmin",colnames(in.df))]),
tgrowth=t(in.df[,grep("tavg",colnames(in.df))]),
tloss =t(in.df[,grep("tavg",colnames(in.df))]),
seconds = seconds.month,
p3=p3, p4=p4,
catm=array(data=33.8, dim=c(12,length(in.df$obs))),
pet=t(in.df[,grep("pet",colnames(in.df))]) / seconds.month,
rain=t(in.df[,grep("rain",colnames(in.df))]) / seconds.month,
rsds=t(in.df[,grep("rsds",colnames(in.df))]) / seconds.day*1e06, #1e06 from MJ to J
wp=in.df$wp,
fc=in.df$fc)
Saving the input data objects can be helpful if the analyses are to be repeated or varied.
saveRDS(input_species,file=paste0("input_",spname,".rds"))
input_species <- readRDS(paste0("input_",spname,".rds"))
Specify some options (see help(standard_options) for an example and help(options) for more information). In this case the “oak” variant is specified.
options <- TTR.PGM::standard_options
options$species <- spname
options$photo <- "c3"
options$steps <- 300
options$initial_mass <- 0.1
options$ve <- "oak"
Set the bounds; if these are not set the defaults are used. See help(bounds) for more information. If the user defined statistical model requires additional parameters that are not in the defaults, they need to be defined here with their bounds.
MyData <- TTR.PGM::make_data(input_species, options=options)
#See which default bounds are associated with this model.
MyData$bounds
## lower upper
## CU_Ns_1 0 1
## CU_Ns_2 0 1
## CU_swc_1 0 100
## CU_swc_2 0 100
## L_mix_1 0 100
## L_mix_2 0 100
## L_mix_3 0 100
## L_swc_1 0 100
## L_swc_2 0 100
## L_tloss_1 -50 50
## L_tloss_2 0 50
## NU_Nsoil_1 0 1
## NU_Nsoil_2 0 1
## NU_swc_1 0 100
## NU_swc_2 0 100
## NU_swc_3 0 100
## NU_swc_4 0 100
## NU_tnur_1 -50 50
## NU_tnur_2 0 50
## g_swc_1 0 100
## g_swc_2 0 100
## g_tgrowth_1 -50 50
## g_tgrowth_2 0 50
## g_tgrowth_3 0 50
## g_tgrowth_4 0 50
Use this information to reset the bounds as desired. In this example, the oak model is simplified by removing some environmental factors by setting their bounds to NA.
oak_beta_bounds = list(
CU_Ns_1 = c(NA,NA),
CU_Ns_2 = c(NA,NA),
CU_swc_1 = c(0,100),
CU_swc_2 = c(0,100),
g_swc_1 = c(0,100),
g_swc_2 = c(0,100),
g_tgrowth_1 = c(0,50),
g_tgrowth_2 = c(0,50),
g_tgrowth_3 = c(0,50),
g_tgrowth_4 = c(0,50),
L_mix_1 = c(100,100),
L_mix_2 = c(0,0),
L_mix_3 = c(0,0),
L_swc_1 = c(NA,NA),
L_swc_2 = c(NA,NA),
L_tloss_1 = c(NA,NA),
L_tloss_2 = c(NA,NA),
NU_Nsoil_1 = c(NA,NA),
NU_Nsoil_2 = c(NA,NA),
NU_swc_1 = c(0,100),
NU_swc_2 = c(0,100),
NU_swc_3 = c(0,100),
NU_swc_4 = c(0,100),
NU_tnur_1 = c(0,50),
NU_tnur_2 = c(0,50)
)
Set some of the global parameters; see help(standard_globals).
globals = TTR.PGM::standard_globals
globals["gmax"] = 40
globals["KM"] = 2.5
globals["mmax"] = 0.1
Using input_species, options, bounds and globals make the object needed to run and fit the model. See help(make_data).
MyData <- TTR.PGM::make_data(input_species,
options=options,
bounds=list(alpha=list(scale=c(0.1,10)), beta=oak_beta_bounds),
globals=globals)
MyData$bounds
## lower upper
## scale 0.1 10
## CU_swc_1 0.0 100
## CU_swc_2 0.0 100
## NU_swc_1 0.0 100
## NU_swc_2 0.0 100
## NU_swc_3 0.0 100
## NU_swc_4 0.0 100
## NU_tnur_1 0.0 50
## NU_tnur_2 0.0 50
## g_swc_1 0.0 100
## g_swc_2 0.0 100
## g_tgrowth_1 0.0 50
## g_tgrowth_2 0.0 50
## g_tgrowth_3 0.0 50
## g_tgrowth_4 0.0 50
By calling MyData$bounds one can check how the bounds set in oak_beta_bounds have been interpreted by make_data. One can also use MyData$PGF(MyData) to generate parameter values and TTR.PGM::get_parms(parm, MyData) to check how the values are set for parameters that were excluded from the statistical model when defining oak_beta_bounds.
parm <- MyData$PGF(MyData)
TTR.PGM::get_parms(parm, MyData)
## $alpha
## scale
## 5.181213
##
## $beta
## Terminalia_sericea
## CU_Ns_1 -1.000000e+20
## CU_Ns_2 -1.000000e+20
## CU_swc_1 5.000992e+01
## CU_swc_2 9.994267e+01
## L_mix_1 1.000000e+02
## L_mix_2 0.000000e+00
## L_mix_3 0.000000e+00
## L_swc_1 -1.000000e+20
## L_swc_2 -1.000000e+20
## L_tloss_1 -1.000000e+20
## L_tloss_2 -1.000000e+20
## NU_Nsoil_1 -1.000000e+20
## NU_Nsoil_2 -1.000000e+20
## NU_swc_1 4.993581e+01
## NU_swc_2 9.990711e+01
## NU_swc_3 1.498811e+02
## NU_swc_4 1.999264e+02
## NU_tnur_1 2.503306e+01
## NU_tnur_2 5.022165e+01
## g_swc_1 5.020912e+01
## g_swc_2 1.002140e+02
## g_tgrowth_1 2.502363e+01
## g_tgrowth_2 5.001333e+01
## g_tgrowth_3 7.490011e+01
## g_tgrowth_4 9.970184e+01
Fit the model using DEoptim. See help(DEoptim) for how to use DEoptim. This will take several minutes depending on the DEoptim settings selected and the size of MyData. For production analyses the DEoptim documentation should be followed to make sensible decisions on which values to use for deoptim_pop and deoptim_iter.
set.seed(2024)
MyData$options$optim <- "DEoptim"
deoptim_pop = 50
deoptim_iter = 200
deoptim_trace = 5
ft.DE <-DEoptim(model_sdm,
upper=MyData$bounds[,"upper"],
lower=MyData$bounds[,"lower"],
control=DEoptim.control(
NP = deoptim_pop,
itermax = deoptim_iter,
trace = deoptim_trace,
CR=0.9, F=0.8, c= 0.75,
initialpop = NULL,
strategy = 2,
parallelType=0,
packages = list("LaplacesDemon","TTR.PGM")),
Data=MyData)
## Warning in DEoptim(model_sdm, upper = MyData$bounds[, "upper"], lower = MyData$bounds[, : For many problems it is best to set 'NP' (in 'control') to be at least ten times the length of the parameter vector.
We now have parameter estimates inside the ft.DE object. See the DEoptim documentation for guidelines on assessing whether a model has converged. We extract the parameter estimates for later use.
parm.DE <- ft.DE$optim$bestmem
We can also fit the model using LaplacesDemon. See help(LaplacesDemon) for how to use LaplacesDemon. This will take several minutes depending on the LaplacesDemon settings selected and the size of MyData. In this example we use the DEoptim solution to define the Initial.Values used by LaplacesDemon. We then run multiple chains in serial, using the covariance matrix from previous chains in subsequent chains.
MyData$options$optim <- "LaplacesDemon"
iter<-2e4
status<-1e3
thin<-10
Initial.Values <- as.numeric(parm.DE)
ft.LD.DRAM <- LaplacesDemon(model_sdm, Data=MyData, Initial.Values,
Covar=NULL, Iterations=iter, Status=status, Thinning=thin,
Algorithm="DRAM", Specs=list(Adaptive=iter/10, Periodicity=10))
Initial.Values <- as.initial.values(ft.LD.DRAM)
ft.LD.DRAM <- LaplacesDemon(model_sdm, Data=MyData, Initial.Values,
Covar=ft.LD.DRAM$covar, Iterations=iter, Status=status, Thinning=thin,
Algorithm="DRAM", Specs=list(Adaptive=iter/10, Periodicity=10))
Model evaluation is its own topic and the TTR.PGM package does not provide any tools for model evaluation. Here we perform a few simple model evaluations, mostly to provide insights on how to work with the data objects produced by TTR.PGM.
First, we write a wrapper function for calculating some model evaluation statistics using the R package ROCR.
library(ROCR)
fit_eval <- function(parm,Data,Model, PLOT=TRUE) {
# opt.cut from
# https://www.r-bloggers.com/2014/12/a-small-introduction-to-the-rocr-package/
# see https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3755824/
# To determine the optimal cut off values the square of distance between the point (0, 1)
# on the upper left hand corner of the ROC space and any point on the ROC curve is minimized
opt.cut = function(perf, pred){
cut.ind = mapply(FUN=function(x, y, p){
d = (x - 0)^2 + (y-1)^2
ind = which(d == min(d))
c(sensitivity = y[[ind]], specificity = 1-x[[ind]],
cutoff = p[[ind]], ind=ind)
}, perf@x.values, perf@y.values, pred@cutoffs)
}
# generate the TTR.PGM model predictions
p01 <- TTR.PGM::predict_ttr(parm,Model,Data)$p01
pred <- ROCR::prediction(as.numeric(p01),as.numeric(unlist(Data$y)))
roc.perf <- ROCR::performance(pred, measure = "tpr", x.measure = "fpr")
stats <- opt.cut(roc.perf, pred)
auc.perf <- ROCR::performance(pred, measure = "auc")
if(PLOT==TRUE) {
plot(roc.perf, main = paste0("AUC=",round(unlist(auc.perf@y.values),3) ) )
abline(a=0, b= 1,lty=2)
}
ret <- c ( auc = unlist(auc.perf@y.values),
cutoff = as.numeric(unlist(stats["cutoff",1])),
sensitivity = as.numeric(unlist(stats["sensitivity",1])),
specificity = as.numeric(unlist(stats["specificity",1])),
tp = unlist(pred@tp)[stats["ind",1]],
tn = unlist(pred@tn)[stats["ind",1]],
fp = unlist(pred@fp)[stats["ind",1]],
fn = unlist(pred@fn)[stats["ind",1]],
tss = NULL)
ret["tss"] <- ret["sensitivity"] - (1-ret["specificity"])
return( ret )
}
A function for calculating the Maximum a Posteriori. Note that by default the upper 0.75 quantile is used to estimate this rather than the maximum.
myMAPmean <- function(ft,MyData,q=0.75) {
LP<-ft$Monitor[,"LP"]
n <- length(LP)
LP <- LP[(n/2):n] # discard first half of chain
wLP <- which(LP > quantile(LP,q) )
MAPmeans <- apply(ft$Posterior1[wLP,MyData$parm.names],2,mean)
return(MAPmeans)
}
Calculate some evaluation statistics by calling the fit_eval function. Then use the predict_ttr function to predict from the fitted model using a global environmental data set, data_map, which was produced using the get_input function. It can be accessed using data(data_map).
data(data_map)
ft.DE.eval <- fit_eval(parm.DE ,MyData,model_sdm,PLOT=FALSE)
ft.DE.proj <- TTR.PGM::predict_ttr(parm.DE,model_sdm,MyData, data_map)
Save some of the information from the DEoptim model in a list.
ft.DE <-list(proj = ft.DE.proj,
parm = parm.DE,
eval = ft.DE.eval,
spname = MyData$options$species,
MyData = MyData)
saveRDS(ft.DE ,file = paste0("ft_DE_",MyData$options$species,".rds"))
Now evaluate the LaplacesDemon model. Use the LaplacesDemon plot method to view the MCMC chains. The chains can be useful diagnostic tools to assess model convergence.
plot(ft.LD.DRAM, BurnIn=1000, MyData, PDF=TRUE, Parms=NULL,
FileName=paste0("MCMC_",MyData$options$species,".pdf"))
## png
## 2
As was the case for the DEoptim model, we can also calculate some AUC and TSS statistics, this time plotting the AUC curve.
parm.LD <- myMAPmean(ft.LD.DRAM,MyData)
ft.LD.eval <- fit_eval(parm.LD ,MyData,model_sdm,PLOT=TRUE)
ft.LD.proj <- TTR.PGM::predict_ttr(parm.LD,model_sdm,MyData, data_map)
Save some of the information from the LaplacesDemon model in a list.
ft.LD.oak <-list(proj = ft.LD.proj,
parm = parm.LD,
eval = ft.LD.eval,
spname = MyData$options$species,
MyData = MyData)
saveRDS(ft.LD.oak ,file = paste0("ft_LD_",MyData$options$species,".rds"))
First load the R packages we will use when plotting maps.
library(terra)
## terra 1.8.15
library(rnaturalearth)
library(pals)
library(plotrix)
##
## Attaching package: 'plotrix'
## The following object is masked from 'package:terra':
##
## rescale
Load a map of countries at 1:110 million scale and then remove Antarctica.
land <- rnaturalearth::ne_countries(scale = 110, returnclass = "sv")
landc <- land[land$continent != "Antarctica"]
And create a raster template for plotting, this raster is compatible with the list saved in data(data_map) and therefore with ft.DE.proj and ft.LD.proj.
rWRLD <- terra::rast( terra::ext(c(-180, 180, -90, 90)),
nrows=90, ncols=180)
To make things more convenient we write a wrapper function to do the plotting. This uses the landc and rWRLD objects as well as the projection of the species suitability, which is stored in the ft.LD.proj object. See the terra package help for more information on working with spatial data in R.
plot_map <- function(ft,input,rWRLD,land,TEXT) {
crs.latlon <- "+proj=longlat +datum=WGS84"
crs.robinson <- "+proj=robin +datum=WGS84"
d <- terra::vect(ft$MyData$lonlat,crs=crs.latlon)
d <- terra::project(d,crs.robinson)
land <- terra::project(land,crs.robinson)
p01.raster <- terra::rasterize(input$lonlat, rWRLD,ft$proj$p01)
p01.raster <- terra::project(p01.raster,crs.robinson)
p01.raster <- terra::crop(p01.raster,land)
m = c(0,0,0,0)
plot(land,main=paste(TEXT,ft$spname,"AUC=",round(ft$eval["auc"],3)),axes=F,mar=m)
plot(p01.raster,range=c(0,1),add=T,legend=F,col=pals::ocean.speed(10)[1:8])
plot(land,add=T)
plot(d[MyData$y==1],col="white",cex=0.5,add=T)
}
Plot the map in a pdf file.
pdf(file=paste0("map_all_",MyData$options$species,".pdf"),width=8,height=4)
oldpar <- par(mfrow=c(1,1),mar=c(0,0,2,0))
plot_map(ft.LD.oak,data_map,rWRLD,landc,"")
col.labels<-c("Suitable","Unsuitable")
plotrix::color.legend(-15503940,0,-15003940,-2898757,col.labels,
rev(pals::ocean.speed(10)[1:8]),
align="rb",gradient="y")
dev.off()
## png
## 2
par(oldpar)
Plot the map here in the document.
oldpar <- par(mfrow=c(1,1),mar=c(0,0,2,0))
plot_map(ft.LD.oak,data_map,rWRLD,landc,"")
col.labels<-c("Suitable","Unsuitable")
plotrix::color.legend(-15503940,0,-15003940,-2898757,
col.labels,
rev(pals::ocean.speed(10)[1:8]),
align="rb",
gradient="y")
par(oldpar)