Given the non-linear utility specification of WTP space models, these models often diverge during estimation and can be highly sensitive to starting parameters. While many other packages support the estimation of WTP space models, in practice these packages often fail to find solutions.
Compared to most other packages, {logitr} tends to perform better and converge more often. It also has a built-in, parallelized multi-start optimization loop for searching for different local minima from different random starting points when minimizing the negative log-likelihood, which improves the odds of converging to a solution for at least some of the multi-start iterations.
This vignette illustrates the convergence issues, comparing the performance of {logitr} to the following R packages that support WTP space models:
For the same mixed logit WTP space model, only {logitr} is able to consistently converge to a solution.
First load the required packages:
library(logitr)
library(mlogit)
library(gmnl)
library(apollo)
library(mixl)
library(dplyr)
library(tidyr)
set.seed(1234)
Now set the starting parameters for each package as well as the number of draws to use for the simulated log-likelihood:
numDraws_wtp <- 50
start_wtp <- c(
scalePar = 1,
feat = 0,
brandhiland = 0,
brandweight = 0,
brandyoplait = 0,
sd_feat = 0.1,
sd_brandhiland = 0.1,
sd_brandweight = 0.1,
sd_brandyoplait = 0.1
)
Take only half of the yogurt data to speed things up:
Both {apollo} and {mixl} require that the user hand-specify the model to be estimated as well as several other settings. These settings are provided here.
Convert the yogurt data for into the format required for {gmnl}
data_gmnl <- mlogit.data(
data = yogurt,
shape = "long",
choice = "choice",
id.var = 'id',
alt.var = 'alt',
chid.var = 'obsID'
)
Format the yogurt
data to a “wide” format for
{apollo}
yogurt_price <- yogurt %>%
select(id, obsID, price, brand) %>%
mutate(price = -1*price) %>%
pivot_wider(
names_from = 'brand',
values_from = 'price') %>%
rename(
price_dannon = dannon,
price_hiland = hiland,
price_weight = weight,
price_yoplait = yoplait)
yogurt_feat <- yogurt %>%
select(id, obsID, feat, brand) %>%
pivot_wider(
names_from = 'brand',
values_from = 'feat') %>%
rename(
feat_dannon = dannon,
feat_hiland = hiland,
feat_weight = weight,
feat_yoplait = yoplait)
yogurt_choice <- yogurt %>%
filter(choice == 1) %>%
select(id, obsID, choice = alt)
data_apollo <- yogurt_price %>%
left_join(yogurt_feat, by = c('id', 'obsID')) %>%
left_join(yogurt_choice, by = c('id', 'obsID')) %>%
arrange(id, obsID) %>%
mutate(
av_dannon = 1,
av_hiland = 1,
av_weight = 1,
av_yoplait = 1
)
Define the {apollo} probabilities function
apollo_probabilities_wtp <- function(
apollo_beta, apollo_inputs, functionality = "estimate"
) {
### Attach inputs and detach after function exit
apollo_attach(apollo_beta, apollo_inputs)
on.exit(apollo_detach(apollo_beta, apollo_inputs))
### Create list of probabilities P
P <- list()
### List of utilities: these must use the same names as in mnl_settings,
# order is irrelevant
V <- list()
V[["dannon"]] <- scalePar * (b_feat * feat_dannon - price_dannon)
V[["hiland"]] <- scalePar * (b_brandhiland + b_feat * feat_hiland - price_hiland)
V[["weight"]] <- scalePar * (b_brandweight + b_feat * feat_weight - price_weight)
V[["yoplait"]] <- scalePar * (b_brandyoplait + b_feat * feat_yoplait - price_yoplait)
### Define settings for MNL model component
mnl_settings <- list(
alternatives = c(dannon = 1, hiland = 2, weight = 3, yoplait = 4),
avail = list(
dannon = av_dannon,
hiland = av_hiland,
weight = av_weight,
yoplait = av_yoplait),
choiceVar = choice,
utilities = V
)
### Compute probabilities using MNL model
P[["model"]] <- apollo_mnl(mnl_settings, functionality)
### Take product across observation for same individual
P <- apollo_panelProd(P, apollo_inputs, functionality)
### Average across inter-individual draws
P = apollo_avgInterDraws(P, apollo_inputs, functionality)
### Prepare and return outputs of function
P <- apollo_prepareProb(P, apollo_inputs, functionality)
return(P)
}
Define random parameters function
apollo_randCoeff <- function(apollo_beta, apollo_inputs) {
randcoeff <- list()
randcoeff[['b_feat']] <- feat + d_feat * sd_feat
randcoeff[['b_brandhiland']] <- brandhiland + d_brandhiland * sd_brandhiland
randcoeff[['b_brandweight']] <- brandweight + d_brandweight * sd_brandweight
randcoeff[['b_brandyoplait']] <- brandyoplait + d_brandyoplait * sd_brandyoplait
return(randcoeff)
}
Main control settings for {apollo}
apollo_control_wtp <- list(
modelName = "MXL_WTP_space",
modelDescr = "MXL model on yogurt choice SP data, in WTP space",
indivID = "id",
mixing = TRUE,
analyticGrad = TRUE,
panelData = TRUE,
nCores = 1
)
# Set parameters for generating draws
apollo_draws_n <- list(
interDrawsType = "halton",
interNDraws = numDraws_wtp,
interUnifDraws = c(),
interNormDraws = c(
"d_feat", "d_brandhiland", "d_brandweight", "d_brandyoplait"),
intraDrawsType = "halton",
intraNDraws = 0,
intraUnifDraws = c(),
intraNormDraws = c()
)
# Set input
apollo_inputs_wtp <- apollo_validateInputs(
apollo_beta = start_wtp,
apollo_fixed = NULL,
database = data_apollo,
apollo_draws = apollo_draws_n,
apollo_randCoeff = apollo_randCoeff,
apollo_control = apollo_control_wtp
)
Format the yogurt
data to a “wide” format for {mixl}
data_mixl <- data_apollo # Uses the same "wide" format as {apollo}
data_mixl$ID <- data_mixl$id
data_mixl$CHOICE <- data_mixl$choice
Define the {mixl} utility function
mixl_wtp <- "
feat_RND = @feat + draw_1 * @sd_feat;
brandhiland_RND = @brandhiland + draw_2 * @sd_brandhiland;
brandweight_RND = @brandweight + draw_3 * @sd_brandweight;
brandyoplait_RND = @brandyoplait + draw_4 * @sd_brandyoplait;
U_1 = @scalePar * (feat_RND * $feat_dannon - $price_dannon);
U_2 = @scalePar * (brandhiland_RND + feat_RND * $feat_hiland - $price_hiland);
U_3 = @scalePar * (brandweight_RND + feat_RND * $feat_weight - $price_weight);
U_4 = @scalePar * (brandyoplait_RND + feat_RND * $feat_yoplait - $price_yoplait);
"
mixl_spec_wtp <- specify_model(mixl_wtp, data_mixl)
availabilities <- generate_default_availabilities(data_mixl, 4)
The same model is now estimated using all five packages.
model_logitr <- logitr(
data = yogurt,
outcome = 'choice',
obsID = 'obsID',
panelID = 'id',
pars = c('feat', 'brand'),
scalePar = 'price',
randPars = c(feat = "n", brand = "n"),
startVals = start_wtp,
numDraws = numDraws_wtp
)
{logitr} converges, even without running a multi-start:
summary(model_logitr)
#> =================================================
#>
#> Model estimated on: Sat Oct 01 16:46:57 2022
#>
#> Using logitr version: 0.8.0
#>
#> Call:
#> logitr(data = yogurt, outcome = "choice", obsID = "obsID", pars = c("feat",
#> "brand"), scalePar = "price", randPars = c(feat = "n", brand = "n"),
#> panelID = "id", startVals = start_wtp, numDraws = numDraws_wtp,
#> numCores = numCores)
#>
#> Frequencies of alternatives:
#> 1 2 3 4
#> 0.415721 0.029984 0.170989 0.383306
#>
#> Exit Status: 3, Optimization stopped because ftol_rel or ftol_abs was reached.
#>
#> Model Type: Mixed Logit
#> Model Space: Willingness-to-Pay
#> Model Run: 1 of 1
#> Iterations: 85
#> Elapsed Time: 0h:0m:4s
#> Algorithm: NLOPT_LD_LBFGS
#> Weights Used?: FALSE
#> Panel ID: id
#> Robust? FALSE
#>
#> Model Coefficients:
#> Estimate Std. Error z-value Pr(>|z|)
#> scalePar 0.388098 0.050706 7.6538 1.954e-14 ***
#> feat 1.738832 0.725107 2.3980 0.01648 *
#> brandhiland -13.536714 1.810701 -7.4760 7.661e-14 ***
#> brandweight -4.510068 1.015856 -4.4397 9.010e-06 ***
#> brandyoplait 6.663869 0.929658 7.1681 7.605e-13 ***
#> sd_feat 0.491225 1.072013 0.4582 0.64679
#> sd_brandhiland 5.730571 1.125803 5.0902 3.577e-07 ***
#> sd_brandweight 11.420500 1.727799 6.6099 3.847e-11 ***
#> sd_brandyoplait 8.872470 1.366376 6.4934 8.390e-11 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Log-Likelihood: -732.2132421
#> Null Log-Likelihood: -1710.6872416
#> AIC: 1482.4264842
#> BIC: 1528.4886000
#> McFadden R2: 0.5719771
#> Adj McFadden R2: 0.5667161
#> Number of Observations: 1234.0000000
#>
#> Summary of 10k Draws for Random Coefficients:
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> feat -Inf 1.4071720 1.738457 1.738145 2.069629 Inf
#> brandhiland -Inf -17.4039453 -13.538553 -13.542027 -9.674388 Inf
#> brandweight -Inf -12.2189602 -4.519436 -4.527690 3.182253 Inf
#> brandyoplait -Inf 0.6670773 6.648678 6.641070 12.632059 Inf
Including a multi-start helps build confidence in the solution reached:
model_logitr <- logitr(
data = yogurt,
outcome = 'choice',
obsID = 'obsID',
panelID = 'id',
pars = c('feat', 'brand'),
scalePar = 'price',
randPars = c(feat = "n", brand = "n"),
startVals = start_wtp,
numDraws = numDraws_wtp,
numMultiStarts = 10
)
summary(model_logitr)
#> =================================================
#>
#> Model estimated on: Sat Oct 01 16:46:57 2022
#>
#> Using logitr version: 0.8.0
#>
#> Call:
#> logitr(data = yogurt, outcome = "choice", obsID = "obsID", pars = c("feat",
#> "brand"), scalePar = "price", randPars = c(feat = "n", brand = "n"),
#> panelID = "id", startVals = start_wtp, numDraws = numDraws_wtp,
#> numCores = numCores)
#>
#> Frequencies of alternatives:
#> 1 2 3 4
#> 0.415721 0.029984 0.170989 0.383306
#>
#> Exit Status: 3, Optimization stopped because ftol_rel or ftol_abs was reached.
#>
#> Model Type: Mixed Logit
#> Model Space: Willingness-to-Pay
#> Model Run: 1 of 1
#> Iterations: 85
#> Elapsed Time: 0h:0m:4s
#> Algorithm: NLOPT_LD_LBFGS
#> Weights Used?: FALSE
#> Panel ID: id
#> Robust? FALSE
#>
#> Model Coefficients:
#> Estimate Std. Error z-value Pr(>|z|)
#> scalePar 0.388098 0.050706 7.6538 1.954e-14 ***
#> feat 1.738832 0.725107 2.3980 0.01648 *
#> brandhiland -13.536714 1.810701 -7.4760 7.661e-14 ***
#> brandweight -4.510068 1.015856 -4.4397 9.010e-06 ***
#> brandyoplait 6.663869 0.929658 7.1681 7.605e-13 ***
#> sd_feat 0.491225 1.072013 0.4582 0.64679
#> sd_brandhiland 5.730571 1.125803 5.0902 3.577e-07 ***
#> sd_brandweight 11.420500 1.727799 6.6099 3.847e-11 ***
#> sd_brandyoplait 8.872470 1.366376 6.4934 8.390e-11 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Log-Likelihood: -732.2132421
#> Null Log-Likelihood: -1710.6872416
#> AIC: 1482.4264842
#> BIC: 1528.4886000
#> McFadden R2: 0.5719771
#> Adj McFadden R2: 0.5667161
#> Number of Observations: 1234.0000000
#>
#> Summary of 10k Draws for Random Coefficients:
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> feat -Inf 1.4071720 1.738457 1.738145 2.069629 Inf
#> brandhiland -Inf -17.4039453 -13.538553 -13.542027 -9.674388 Inf
#> brandweight -Inf -12.2189602 -4.519436 -4.527690 3.182253 Inf
#> brandyoplait -Inf 0.6670773 6.648678 6.641070 12.632059 Inf
First attempt using same starting points as {logitr}
model_mixl <- estimate(
mixl_spec_wtp, start_wtp,
data_mixl, availabilities,
nDraws = numDraws_wtp
)
{mixl} converges to a local minimum:
#> [1] -732.2132 -1544.8531
#> [,1] [,2]
#> scalePar 0.3880976 -2270.55731
#> feat 1.7388324 42.24281
#> brandhiland -13.5367140 24.36731
#> brandweight -4.5100684 110.99687
#> brandyoplait 6.6638687 -492.36959
#> sd_feat 0.4912255 10.81470
#> sd_brandhiland 5.7305708 10.23414
#> sd_brandweight 11.4204997 334.92761
#> sd_brandyoplait 8.8724698 915.14924
Second attempt using {logitr} solution as starting points
model_mixl <- estimate(
mixl_spec_wtp, coef(model_logitr),
data_mixl, availabilities,
nDraws = numDraws_wtp
)
Again, {mixl} converges to a local minimum:
#> [1] -732.2132 -1544.8531
#> [,1] [,2]
#> scalePar 0.3880976 -2270.55731
#> feat 1.7388324 42.24281
#> brandhiland -13.5367140 24.36731
#> brandweight -4.5100684 110.99687
#> brandyoplait 6.6638687 -492.36959
#> sd_feat 0.4912255 10.81470
#> sd_brandhiland 5.7305708 10.23414
#> sd_brandweight 11.4204997 334.92761
#> sd_brandyoplait 8.8724698 915.14924
First attempt using same starting points as {logitr}. Note that additional starting parameters must be added as the {gmnl} approach to estimating WTP is a slightly different model.
model_gmnl <- gmnl(
data = data_gmnl,
formula = choice ~ price + feat + brand | 0 | 0 | 0 | 1,
ranp = c(
feat = "n", brandhiland = "n", brandweight = "n",
brandyoplait = "n"),
fixed = c(TRUE, rep(FALSE, 10), TRUE),
model = "gmnl",
method = "bfgs",
haltons = NA,
panel = TRUE,
start = c(start_wtp, 0.1, 0.1, 0),
R = numDraws_wtp
)
{gmnl} converges to a local minimum:
#> [1] -732.2132 -1710.6872
#> [,1] [,2]
#> feat 0.3880976 8.1540692
#> brandhiland 1.7388324 4.4266391
#> brandweight -13.5367140 20.9581382
#> brandyoplait -4.5100684 -93.0969112
#> het.(Intercept) 6.6638687 -440.5183170
#> sd.feat 0.4912255 2.4208585
#> sd.brandhiland 5.7305708 0.1085233
#> sd.brandweight 11.4204997 53.2376327
#> sd.brandyoplait 8.8724698 95.0008933
#> tau 0.3880976 653.9956379
Second attempt using {logitr} solution as starting points:
model_gmnl <- gmnl(
data = data_gmnl,
formula = choice ~ price + feat + brand | 0 | 0 | 0 | 1,
ranp = c(
feat = "n", brandhiland = "n", brandweight = "n",
brandyoplait = "n"),
fixed = c(TRUE, rep(FALSE, 10), TRUE),
model = "gmnl",
method = "bfgs",
haltons = NA,
panel = TRUE,
start = c(coef(model_logitr), 0.1, 0.1, 0),
R = numDraws_wtp
)
#> Error in optim(par = c(feat = 1.74, brandhiland = -13.54, brandweight = -4.51, :
#> initial value in 'vmmin' is not finite
In this case, {gmnl} fails due to an error with the starting values provided.
First attempt using same starting points as {logitr}:
model_apollo <- apollo_estimate(
apollo_beta = start_wtp,
apollo_fixed = NULL,
apollo_probabilities = apollo_probabilities_wtp,
apollo_inputs = apollo_inputs_wtp,
estimate_settings = list(printLevel = 0)
)
{apollo} converges to a local minimum:
#> [1] -732.2132 -776.5525
#> [,1] [,2]
#> scalePar 0.3880976 0.02264706
#> feat 1.7388324 52.92881692
#> brandhiland -13.5367140 -129.27886303
#> brandweight -4.5100684 -104.76878544
#> brandyoplait 6.6638687 23.77062341
#> sd_feat 0.4912255 11.06432598
#> sd_brandhiland 5.7305708 -86.57222932
#> sd_brandweight 11.4204997 164.38217567
#> sd_brandyoplait 8.8724698 156.45076147
Second attempt using {logitr} solution as starting points:
model_apollo <- apollo_estimate(
apollo_beta = coef(model_logitr),
apollo_fixed = NULL,
apollo_probabilities = apollo_probabilities_wtp,
apollo_inputs = apollo_inputs_wtp,
estimate_settings = list(printLevel = 0)
)
Again, {apollo} converges to a local minimum:
#> [1] -732.2132 -772.6268
#> [,1] [,2]
#> scalePar 0.3880976 2.180707e-03
#> feat 1.7388324 7.365495e+02
#> brandhiland -13.5367140 -2.378832e+03
#> brandweight -4.5100684 -1.382908e+03
#> brandyoplait 6.6638687 1.571897e+02
#> sd_feat 0.4912255 -3.484523e+02
#> sd_brandhiland 5.7305708 9.805109e+02
#> sd_brandweight 11.4204997 1.899130e+03
#> sd_brandyoplait 8.8724698 1.211976e+03