### Multinomial Logistic Regression

# Load required libraries
library(yarrr)  # pirateplot
library(nnet)   # multinom
source("http://www.math.montana.edu/shancock/courses/stat439/R/summ.mfit.R")   # summ.mfit


### Data: Greene (1995)
## Travel Choices:
# Measure travelers' decisions regarding travel mode
# between Sydney and Melbourne, Australia.
# Modes of travel (choices):
# mode = 0 --> air
# mode = 1 --> train
# mode = 2 --> bus
# mode = 3 --> car
# (Note that this is a nominal response variable -
# the numbers have no quantitative meaning)

## Explanatory variables of interest:
# hinc = household income ($k)
# psize = number of individuals traveling in a party
# Can you think of other unmeasured confounders?

## Read in data and attach:
travel <- read.table("http://www.math.montana.edu/shancock/data/TravelChoices.txt",header=T)
travel$mode.fac <- factor(travel$mode,levels=c(0:3),labels=c("Air","Train","Bus","Car"))
head(travel)
dim(travel)
# Sample size = 210

##
#### Some descriptive statistics:
##

# What proportion are in each travel category?
with(travel,
   cbind(Frequency = table(mode.fac), 
         Proportion = table(mode.fac)/sum(table(mode.fac)))
)
barplot(table(travel$mode.fac), ylab = "Frequency",
        xlab = "Mode of Travel")
# --> travel by train most common (30%), then
# car (28%)
# air (27.6%)
# bus (14.2%)

# Travel mode vs. party size?
my.tab <- with(travel, table(mode.fac,psize))
my.tab

pirateplot(psize ~ mode.fac, data = travel, avg.line.fun = median,
           inf.method = "iqr")

barplot(prop.table(my.tab, 2), legend = TRUE,
        xlab = "Party Size")

mosaicplot(psize ~ mode.fac, data = travel)

# As psize increases, probability of traveling by air decreases, and probability of traveling by car increases.
# Note sparse data for large party sizes (only one observation with psize = 6)

# Travel mode vs. hinc?
pirateplot(hinc ~ mode.fac, ylab="Household Income ($1000)",
           xlab = "Mode of Travel",
           data = travel, inf.method = "iqr",
           avg.line.fun = median)
lapply( split(travel$hinc,travel$mode.fac), FUN=summary)
# Median household income among travelers choosing air or car appears
# to be higher than those traveling by train or bus.

##
#### Fit multinomial logistic regression model
##
# Use multinom function in nnet library

mfit <- multinom(mode.fac ~ hinc + psize, data=travel)
summary(mfit) # Not too helpful

summ.mfit(mfit)
# rrr = "relative risk ratio" = odds of level ___ to level ___
# Interpretations of rrr for psize in each equation?

# Plot fitted probabilities vs. hinc for given party size:
co <- coef(mfit) # Note is a 3x3 matrix
is.matrix(co)
dim(co)

# Function to calculate probabilities
my.prob <- function(c, ps, x, mode){
    # c = 3x3 matrix of coefficients
    # ps = party size
    # x = hinc
    # mode = "Air","Train","Bus", or "Car"
    denom <- 1+exp(c[1,1]+c[1,2]*x+c[1,3]*ps) +
                       exp(c[2,1]+c[2,2]*x+c[2,3]*ps) +
                       exp(c[3,1]+c[3,2]*x+c[3,3]*ps)
    if(mode == "Air"){
        rslt <- 1/denom
    }
    if(mode == "Train"){
        rslt <- exp(c[1,1]+c[1,2]*x+c[1,3]*ps)/denom
    }
    if(mode == "Bus"){
        rslt <- exp(c[2,1]+c[2,2]*x+c[2,3]*ps)/denom
    }
    if(mode == "Car"){
        rslt <- exp(c[3,1]+c[3,2]*x+c[3,3]*ps)/denom
    }
    return(rslt)
}

# For party size = 2
curve(my.prob(co, 2, x, "Air"), from=0, to=80,
      xlab="Household Income ($k)", ylab="Fitted Probabilities",
      main="Fitted Probabilities of Travel Type for Parties of Size 2",
      col="red", lty=1)
curve(my.prob(co, 2, x, "Train"), add=T, col="blue",lty=2)
curve(my.prob(co, 2, x, "Bus"), add=T, col="darkgreen",lty=3)
curve(my.prob(co, 2, x, "Car"), add=T, col="purple",lty=4)
legend("topleft",c("Air","Train","Bus","Car"),
	col=c("red","blue","darkgreen","purple"),
	lty=c(1,2,3,4))

# Any interaction between psize and hinc on mode?
mfit.int <- multinom( mode.fac ~ hinc * psize, data=travel)
summ.mfit(mfit.int)

anova(mfit,mfit.int)
# Conclusion?

### Fitting separate logistic models to each (less efficient):
# Compare to multinomial coefs.
# Train vs. Air:
mod.TA <- glm( (mode==1) ~ hinc+psize, data=travel, family=binomial,
			subset = (mode==0 | mode==1))
# The logical argument "mode==0 | mode==1" tells R to only take observations
# from the data set that have mode 0 OR (|) mode 1.

# Bus vs. Air:
mod.BA <- glm( (mode==2) ~ hinc+psize, data=travel, family=binomial,
			subset = (mode==0 | mode==2))

# Car vs. Air:
mod.CA <- glm( (mode==3) ~ hinc+psize, data=travel, family=binomial,
			subset = (mode==0 | mode==3))

mod.TA
mod.BA
mod.CA

## Only the intercept term for Bus vs. Air seems to differ from
## the multinomial fit.