Flimo (Fixed Landscape Inference MethOd) is a likelihood-free inference method for stochastic models. It is based on simple simulations of the process under study with a specific management of the randomness. This makes it possible to use deterministic optimization algorithms to find the optimal parameters in the sense of summary statistics.
This document presents two small examples to illustrate how the method works. You can find more details on the git page of the project: https://metabarcoding.org/flimo.
Inference is possible in R but is more efficient in the Julia language for non-trivial models. This Julia mode uses the ‘Jflimo’ package available on the git page of the project: https://metabarcoding.org/flimo.
Five Poisson variables with parameter \(\theta = 100\) are drawn. We try to find this value by comparing mean of 10 simulated Poisson variables with the observed data. The distance between the summary statistics is :
\[dsumstats(\theta) =\left( \widehat{\mathbb{E}[X|\theta]}-\overline{X^{data}}\right)^2=\left(\frac{1}{n_{sim}}\sum_{i=1}^{n_{sim}}X_i^\theta - \frac{1}{n_{data}}\sum_{i=1}^{n_{data}}X_i^{data}\right)^2\]
With the Normal approximation, Poisson distribution is replaced by a Normal distribution with same mean and variance :
\[\mathcal{P}(\theta) \leftarrow \mathcal{N}(\mu = \theta, \sigma^2 = \theta)\]
#Setup
set.seed(1)
#Create data
<- 100 #data parameter
Theta_true1 <- 5 #data size
n1
<- function(Theta, n){
simulator1 #classical random simulator
rpois(n, lambda = Theta)
}
<- simulator1(Theta_true1, n1)
data1
#Simulations with quantiles
#See README to know how to build this simulator
<- n1 #number of random draws for one simulation
ndraw1
check_simulator(simulator1, ndraw1, 0, 200)
#> [1] FALSE
<- function(Theta, quantiles){
simulatorQ1 qpois(quantiles, lambda = Theta)
}check_simulator(simulatorQ1, ndraw1, 0, 200)
#> [1] TRUE
#With Normal approximation
<- function(Theta, quantiles){
simulatorQ1N qnorm(quantiles, mean = Theta, sd = sqrt(Theta))
}
check_simulator(simulatorQ1N, ndraw1, 0, 200)
#> [1] TRUE
#Quantile-simulator with Normal approximation
#First simulations
<- 50
Theta11 <- 200
Theta21
<- 10
nsim1
<- matrix(runif(ndraw1*nsim1), nrow = nsim1)
quantiles1
#Just one simulation
simulatorQ1(Theta11, quantiles1[1,])
#> [1] 49 43 52 45 49
#No random effect:
simulatorQ1(Theta11, quantiles1[1,])
#> [1] 49 43 52 45 49
#Independent values:
simulatorQ1(Theta11, quantiles1[2,])
#> [1] 43 55 61 54 50
#Matrix of nsim simulations
<- simulatorQ1(Theta11, quantiles1)
simu11 <- simulatorQ1(Theta21, quantiles1)
simu21
#Sample Comparison: summary statistics
<- function(simulations, data){
dsumstats1 #simulations : 2D array
#data : 1D array
<- mean(rowMeans(simulations))
mean_simu <- mean(data)
mean_data -mean_data)^2
(mean_simu }
Plot the objective function:
#Objective by parameter:
plot_objective(ndraw1, data1, dsumstats1, simulatorQ1,
nsim = nsim1, lower = 0, upper = 200)
#Objective with Normal approximation :
plot_objective(ndraw1, data1, dsumstats1, simulatorQ1N,
nsim = nsim1, lower = 0, upper = 200)
#both plots
#We use same quantiles for both
<- matrix(runif(ndraw1*nsim1), nrow = nsim1)
quantiles1
<- plot_objective(data = data1,
p dsumstats = dsumstats1,
simulatorQ = simulatorQ1,
lower = 0, upper = 200, quantiles = quantiles1)
plot_objective(data = data1,
dsumstats = dsumstats1,
simulatorQ = simulatorQ1N,
lower = 0, upper = 200,
visualize_min = FALSE,
add_to_plot = p, quantiles = quantiles1)
Locally, Poisson quantiles and then objective function are constant by pieces. Let’s compare it with normal approximation.
<- plot_objective(data = data1,
p dsumstats = dsumstats1,
simulatorQ = simulatorQ1,
lower = 71, upper = 72,
visualize_min = FALSE, quantiles = quantiles1)
plot_objective(ndraw1, data1, dsumstats1, simulatorQ1N,
lower = 71, upper = 72,
nsim = nsim1,
visualize_min = FALSE, add_to_plot = p,
quantiles = quantiles1)
There are optimization issues in R for normalized process and Theta close to 0. Lower bound set to 1.
#Optimization with normal approximation of Poisson distribution
#Default mode: full R
system.time(optim1R <- flimoptim(ndraw1, data1, dsumstats1, simulatorQ1N,
ninfer = 10,
nsim = nsim1,
lower = 1, upper = 1000,
randomTheta0 = TRUE))
#> user system elapsed
#> 0.014 0.000 0.014
optim1R#> optimization Result
#> Mode : R
#> method : L-BFGS-B
#> Number of inferences : 10
#> Convergence : 10/10
#> Mean of minimizer :
#> 100.659429936366
#> Best minimizer :
#> 101.125271999801
#> Reached minima : from 8.27180612553028e-25 to 6.61968703244348e-20
#> Median time by inference 0.00103199481964111
#> optimization Result
#> Mode : R
#> method : L-BFGS-B
#> Number of inferences : 10
#> Convergence : 10/10
#> Mean of minimizer :
#> 100.659429936366
#> Best minimizer :
#> 101.125271999801
#> Reached minima : from 8.27180612553028e-25 to 6.61968703244348e-20
#> Median time by inference 0.00103199481964111
summary(optim1R)
#> $Mode
#> [1] "R"
#>
#> $method
#> [1] "L-BFGS-B"
#>
#> $number_inferences
#> [1] 10
#>
#> $number_converged
#> [1] 10
#>
#> $minimizer
#> par1
#> Min. : 99.29
#> 1st Qu.: 99.76
#> Median :100.72
#> Mean :100.66
#> 3rd Qu.:101.09
#> Max. :102.62
#>
#> $minimum
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 8.300e-25 5.614e-23 3.429e-22 6.947e-21 5.661e-22 6.620e-20
#>
#> $median_time_inference
#> [1] 0.001031995
attributes(optim1R)
#> $names
#> [1] "mode" "method" "AD" "minimizer" "minimum" "converged"
#> [7] "initial_x" "f_calls" "g_calls" "message" "time_run"
#>
#> $class
#> [1] "flimo_result"
plot(optim1R)
#Version with objective function provided
<- function(Theta, quantiles, data = data1){
obj1 <- simulatorQ1N(Theta, quantiles)
simulations dsumstats1(simulations, data)
}
system.time(optim1Rbis <- flimoptim(ndraw1,
obj = obj1,
ninfer = 10,
nsim = nsim1,
lower = 1, upper = 1000,
randomTheta0 = TRUE))
#> user system elapsed
#> 0.007 0.000 0.007
To use the Julia mode, see Readme on the git page of the project.
Five normal variables with mean = 0 and sd = 1 are drawn. We try to find these mean/sd values by comparing mean and sd of 10 simulated normal variables with the observed data. The summary statistic is :
\[dsumstats(\theta) =\left( \widehat{\mathbb{E}[X|\theta]}-\overline{X^{data}}\right)^2 + \left( \widehat{\sigma(X|\theta)}-\sigma(X^{data})\right)^2\]
#Setup
set.seed(1)
#Create data
<- c(3, 2) #data parameter
Theta_true2 <- 5 #data size
n2
<- function(Theta, n){
simulator2 #classical random simulator
rnorm(n, mean = Theta[1], sd = Theta[2])
}
<- simulator2(Theta_true2, n2)
data2
#Simulations with quantiles
#See README to know how to build this simulator
<- function(Theta, quantiles){
simulatorQ2 qnorm(quantiles, mean = Theta[1], sd = Theta[2])
}
<- 5
ndraw2
check_simulator(simulatorQ2, ndraw2, c(0, 0), c(10, 10))
#> [1] TRUE
check_simulator(simulator2, ndraw2, c(0, 0), c(10, 10))
#> [1] FALSE
<-function(simulations, data){
dsumstats2 <- mean(rowMeans(simulations))
mean_simu <- mean(data)
mean_data <-mean(apply(simulations, 1, sd))
sd_simu <- sd(data)
sd_data -mean_data)^2+(sd_simu-sd_data)^2
(mean_simu
}
<- 10 nsim2
plot_objective(ndraw2, data2, dsumstats2, simulatorQ2,
index = 1, other_param = c(1, 2, 10),
nsim = nsim2,
lower = -5, upper = 10)
#Optimization
#Default mode: full R
<- flimoptim(ndraw2, data2, dsumstats2, simulatorQ2,
optim2R ninfer = 10, nsim = nsim2,
lower = c(-5, 0), upper = c(10, 10),
randomTheta0 = TRUE)
optim2R#> optimization Result
#> Mode : R
#> method : L-BFGS-B
#> Number of inferences : 10
#> Convergence : 10/10
#> Mean of minimizer :
#> 3.20567648680977 2.11520187608635
#> Best minimizer :
#> 3.14574674163785 1.97643436033205
#> Reached minima : from 0 to 3.8290401805314e-14
#> Median time by inference 0.006988525390625
#> optimization Result
#> Mode : R
#> method : L-BFGS-B
#> Number of inferences : 10
#> Convergence : 10/10
#> Mean of minimizer :
#> 3.20567648680977 2.11520187608635
#> Best minimizer :
#> 3.14574674163785 1.97643436033205
#> Reached minima : from 0 to 3.8290401805314e-14
#> Median time by inference 0.006988525390625
summary(optim2R)
#> $Mode
#> [1] "R"
#>
#> $method
#> [1] "L-BFGS-B"
#>
#> $number_inferences
#> [1] 10
#>
#> $number_converged
#> [1] 10
#>
#> $minimizer
#> par1 par2
#> Min. :2.975 Min. :1.681
#> 1st Qu.:3.044 1st Qu.:1.948
#> Median :3.130 Median :2.116
#> Mean :3.206 Mean :2.115
#> 3rd Qu.:3.211 3rd Qu.:2.276
#> Max. :3.820 Max. :2.536
#>
#> $minimum
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.000e+00 0.000e+00 0.000e+00 3.830e-15 1.000e-18 3.829e-14
#>
#> $median_time_inference
#> [1] 0.006988525
plot(optim2R, pairwise_par = TRUE, hist = TRUE, par_minimum = TRUE)
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Warning: Transformation introduced infinite values in continuous y-axis