# 17: MTC Expanded MNL Mode Choice

In [None]:
# TEST
import larch.numba as lx
import larch
import pandas as pd
pd.set_option("display.max_columns", 999)
pd.set_option('expand_frame_repr', False)
pd.set_option('display.precision', 3)
larch._doctest_mode_ = True

For this example, we're going to re-create model 17 from the
[Self Instructing Manual](http://www.caee.utexas.edu/prof/Bhat/COURSES/LM_Draft_060131Final-060630.pdf). (pp. 128)

In [None]:
import larch.numba as lx
d = lx.examples.MTC(format='dataset')
m = lx.Model(d)

We will use the usual choice and availability variables.

In [None]:
m.availability_var = 'avail'
m.choice_ca_var = 'chose'

In [None]:
from larch.roles import P, X

m.utility_ca = (
    + X("totcost/hhinc") * P("costbyincome")
    + X("tottime * (altnum <= 4)") * P("motorized_time")
    + X("tottime * (altnum >= 5)") * P("nonmotorized_time")
    + X("ovtt/dist * (altnum <= 4)") * P("motorized_ovtbydist")
)

The "totcost/hhinc" data is computed once as a new variable when loading the model data.
The same applies for tottime filtered by motorized modes (we harness the convenient fact
that all the motorized modes have identifying numbers 4 or less), and "ovtt/dist".

In [None]:
for a in [4,5,6]:
    m.utility_co[a] += X("hhinc") * P("hhinc#{}".format(a))

Since the model we want to create groups together DA, SR2 and SR3+ jointly as
reference alternatives with respect to income, we can simply omit all of these alternatives
from the block that applies to **hhinc**.

For vehicles per worker, the preferred model include a joint parameter on SR2 and SR3+,
but not including DA and not fixed at zero.  Here we might use a shadow_parameter (also
called an alias in some places), which allows
us to specify one or more parameters that are simply a fixed proportion of another parameter.
For example, we can say that vehbywrk_SR2 will be equal to vehbywrk_SR.

In [None]:
for i in d['alt_names'][1:3]:
    name = str(i.values)
    a = int(i.altid)
    m.utility_co[a] += (
        + X("vehbywrk") * P("vehbywrk_SR")
        + X("wkccbd+wknccbd") * P("wkcbd_"+name)
        + X("wkempden") * P("wkempden_"+name)
        + P("ASC_"+name)
    )

for i in d['alt_names'][3:]:
    name = str(i.values)
    a = int(i.altid)
    m.utility_co[a] += (
        + X("vehbywrk") * P("vehbywrk_"+name)
        + X("wkccbd+wknccbd") * P("wkcbd_"+name)
        + X("wkempden") * P("wkempden_"+name)
        + P("ASC_"+name)
    )

We didn't explicitly define our parameters first, which is fine; Larch will
find them in the utility functions (or elsewhere in more complex models).
But they may be found in a weird order that is hard to read in reports.
We can define an ordering scheme by assigning to the parameter_groups attribute,
like this:

In [None]:
m.ordering = (
    ('LOS', ".*cost.*", ".*time.*", ".*dist.*",),
    ('Zonal', "wkcbd.*", "wkempden.*",),
    ('Household', "hhinc.*", "vehbywrk.*",),
    ('ASCs', "ASC.*",),
)

Each item in parameter_ordering is a tuple, with a label and one or more regular expressions,
which will be compared against
all the parameter names.  Any names that match will be pulled out and put into the
reporting order sequentially.  Thus if a parameter name would match more than one
regex, it will appear in the ordering only for the first match.


Having created this model, we can then estimate it:

In [None]:
m.maximize_loglike()

In [None]:
# TEST
r = _
from pytest import approx
assert r.loglike == approx(-3444.185105027836)
assert r.n_cases == 5029
assert 'success' in r.message.lower()
assert r.x.to_dict() == approx({
    'ASC_Bike': -1.6288174781480145,
    'ASC_SR2': -1.8077821796310174,
    'ASC_SR3+': -3.4336998987834213,
    'ASC_Transit': -0.6850205869302504,
    'ASC_Walk': 0.06826615821030824,
    'costbyincome': -0.05239236004239274,
    'hhinc#4': -0.0053231144110710265,
    'hhinc#5': -0.008643179890815506,
    'hhinc#6': -0.005997795266774085,
    'motorized_ovtbydist': -0.1328389672470942,
    'motorized_time': -0.02018676908268187,
    'nonmotorized_time': -0.04544467417768392,
    'vehbywrk_Bike': -0.7021221804213855,
    'vehbywrk_SR': -0.31664078667048384,
    'vehbywrk_Transit': -0.9462364952409247,
    'vehbywrk_Walk': -0.7218049107571212,
    'wkcbd_Bike': 0.48936706067828845,
    'wkcbd_SR2': 0.25986035009653136,
    'wkcbd_SR3+': 1.069304378606234,
    'wkcbd_Transit': 1.308896887615559,
    'wkcbd_Walk': 0.10177663194876692,
    'wkempden_Bike': 0.0019282498545339284,
    'wkempden_SR2': 0.0015778182187284415,
    'wkempden_SR3+': 0.002257039208670294,
    'wkempden_Transit': 0.003132740135033535,
    'wkempden_Walk': 0.0028906014986955593,
})

In [None]:
m.calculate_parameter_covariance()
m.parameter_summary()

In [None]:
# TEST
assert m.pf.t_stat.to_dict() == approx({
    'ASC_Bike': -3.8110051632761968,
    'ASC_SR2': -17.03471916394958,
    'ASC_SR3+': -22.610264384635116,
    'ASC_Transit': -2.764269785206984,
    'ASC_Walk': 0.19617043561070976,
    'costbyincome': -5.0360570040949515,
    'hhinc#4': -2.6923847354101915,
    'hhinc#5': -1.676857732750138,
    'hhinc#6': -1.9049215648409885,
    'motorized_ovtbydist': -6.763234843764025,
    'motorized_time': -5.291965825624687,
    'nonmotorized_time': -7.878190061966541,
    'vehbywrk_Bike': -2.7183965402594508,
    'vehbywrk_SR': -4.751992210976383,
    'vehbywrk_Transit': -7.999145737275119,
    'vehbywrk_Walk': -4.261234830020787,
    'wkcbd_Bike': 1.3552321494507682,
    'wkcbd_SR2': 2.1066605695091867,
    'wkcbd_SR3+': 5.590372196382326,
    'wkcbd_Transit': 7.899400934474615,
    'wkcbd_Walk': 0.40370690248331875,
    'wkempden_Bike': 1.5864614051558108,
    'wkempden_SR2': 4.042074989321517,
    'wkempden_SR3+': 4.993778175062689,
    'wkempden_Transit': 8.684498489531592,
    'wkempden_Walk': 3.8952326996888065,
})
assert m.pf.robust_t_stat.to_dict() == approx({
    'ASC_Bike': -3.350788895379893,
    'ASC_SR2': -15.450849978191432,
    'ASC_SR3+': -22.047875467016553,
    'ASC_Transit': -2.546641253284614,
    'ASC_Walk': 0.19546387137430002,
    'costbyincome': -3.927312777634008,
    'hhinc#4': -2.6000468880002883,
    'hhinc#5': -1.448502844590286,
    'hhinc#6': -1.7478834622063846,
    'motorized_ovtbydist': -5.512721233692836,
    'motorized_time': -5.1781560789822985,
    'nonmotorized_time': -7.890366874224642,
    'vehbywrk_Bike': -2.26956809717166,
    'vehbywrk_SR': -4.1884543094363345,
    'vehbywrk_Transit': -6.907359588761182,
    'vehbywrk_Walk': -3.552049105845569,
    'wkcbd_Bike': 1.3353508709464412,
    'wkcbd_SR2': 2.1061572488997933,
    'wkcbd_SR3+': 5.629597757231176,
    'wkcbd_Transit': 8.258699769521979,
    'wkcbd_Walk': 0.3932045643537346,
    'wkempden_Bike': 1.640126229774069,
    'wkempden_SR2': 3.8222350454916496,
    'wkempden_SR3+': 4.974652568010134,
    'wkempden_Transit': 8.178299823852544,
    'wkempden_Walk': 4.06724937563278,
})

In [None]:
# TEST
# model also works for IDCE
df = pd.read_csv(lx.example_file("MTCwork.csv.gz"), index_col=['casenum','altnum'])
df.index = df.index.rename('altid', level=1)
df['altnum'] = df.index.get_level_values(1)
m.datatree = lx.Dataset.construct.from_idce(df)
m.availability_var = '1'
assert m.loglike() == approx(-3444.185105027836)
assert m.n_cases == 5029
assert 'ca' not in m.dataset
assert m.dataset['ce_data'].shape == (22033,4)