\documentclass[a4paper]{article} \usepackage{amsmath}%the AMS math extension of LaTeX. \usepackage{amssymb}%the extended AMS math symbols. %% \usepackage{amsthm} \usepackage{bm}%Use 'bm.sty' to get `bold math' symbols \usepackage{natbib} \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} \usepackage{Sweave} \usepackage{url} \usepackage{float}%Use `float.sty' \usepackage[left=3.5cm,right=3.5cm]{geometry} \usepackage{algorithmic} \usepackage[amsmath,thmmarks,standard,thref]{ntheorem} %%\VignetteIndexEntry{clmm2 tutorial} %%\VignetteDepends{ordinal, xtable} \title{A Tutorial on fitting Cumulative Link Mixed Models with \texttt{clmm2} from the \textsf{ordinal} Package} \author{Rune Haubo B Christensen} %% \numberwithin{equation}{section} \setlength{\parskip}{2mm}%.8\baselineskip} \setlength{\parindent}{0in} %% \DefineVerbatimEnvironment{Sinput}{Verbatim}%{} %% {fontshape=sl, xleftmargin=1em} %% \DefineVerbatimEnvironment{Soutput}{Verbatim}%{} %% {xleftmargin=1em} %% \DefineVerbatimEnvironment{Scode}{Verbatim}%{} %% {fontshape=sl, xleftmargin=1em} \fvset{listparameters={\setlength{\topsep}{0pt}}} %% \fvset{listparameters={\setlength{\botsep}{0pt}}} \renewenvironment{Schunk}{\vspace{-1mm}}{\vspace{-1mm}} %RE-DEFINE marginpar \setlength{\marginparwidth}{1in} \let\oldmarginpar\marginpar \renewcommand\marginpar[1]{\oldmarginpar[\-\raggedleft\tiny #1]% {\tiny #1}} %uncomment to _HIDE_MARGINPAR_: %\renewcommand\marginpar[1]{} \newcommand{\var}{\textup{var}} \newcommand{\I}{\mathcal{I}} \newcommand{\bta}{\bm \theta} \newcommand{\ta}{\theta} \newcommand{\tah}{\hat \theta} \newcommand{\di}{~\textup{d}} \newcommand{\td}{\textup{d}} \newcommand{\Si}{\Sigma} \newcommand{\si}{\sigma} \newcommand{\bpi}{\bm \pi} \newcommand{\bmeta}{\bm \eta} \newcommand{\tdots}{\hspace{10mm} \texttt{....}} \newcommand{\FL}[1]{\fvset{firstline= #1}} \newcommand{\LL}[1]{\fvset{lastline= #1}} \newcommand{\s}{\square} \newcommand{\bs}{\blacksquare} % figurer bagerst i artikel %% \usepackage[tablesfirst, nolists]{endfloat} %% \renewcommand{\efloatseparator}{\vspace{.5cm}} \theoremstyle{plain} %% {break} \theoremseparator{:} \theoremsymbol{{\tiny $\square$}} %%\theoremstyle{plain} \theorembodyfont{\small} \theoremindent5mm \renewtheorem{example}{Example} %% \newtheoremstyle{example}{\topsep}{\topsep}% %% {}% Body font %% {}% Indent amount (empty = no indent, \parindent = para indent) %% {\bfseries}% Thm head font %% {}% Punctuation after thm head %% {\newline}% Space after thm head (\newline = linebreak) %% {\thmname{#1}\thmnumber{ #2}\thmnote{ #3}}% Thm head spec %% %% \theoremstyle{example} %% %% \newtheorem{example}{Example}[subsection] %% \newtheorem{example}{Example}[section] \usepackage{lineno} % \linenumbers \newcommand*\patchAmsMathEnvironmentForLineno[1]{% \expandafter\let\csname old#1\expandafter\endcsname\csname #1\endcsname \expandafter\let\csname oldend#1\expandafter\endcsname\csname end#1\endcsname \renewenvironment{#1}% {\linenomath\csname old#1\endcsname}% {\csname oldend#1\endcsname\endlinenomath}}% \newcommand*\patchBothAmsMathEnvironmentsForLineno[1]{% \patchAmsMathEnvironmentForLineno{#1}% \patchAmsMathEnvironmentForLineno{#1*}}% \AtBeginDocument{% \patchBothAmsMathEnvironmentsForLineno{equation}% \patchBothAmsMathEnvironmentsForLineno{align}% \patchBothAmsMathEnvironmentsForLineno{flalign}% \patchBothAmsMathEnvironmentsForLineno{alignat}% \patchBothAmsMathEnvironmentsForLineno{gather}% \patchBothAmsMathEnvironmentsForLineno{multline}% } \begin{document} \bibliographystyle{chicago} \maketitle \begin{abstract} It is shown by example how a cumulative link mixed model is fitted with the \texttt{clmm2} function in package \textsf{ordinal}. Model interpretation and inference is briefly discussed. A tutorial for the more recent \texttt{clmm} function is work in progress. \end{abstract} %% \newpage %% \tableofcontents %% \newpage \SweaveOpts{echo=TRUE, results=verb, width=4.5, height=4.5} \SweaveOpts{prefix.string=figs} \fvset{listparameters={\setlength{\topsep}{0pt}}, gobble=0, fontsize=\small} %% \fvset{gobble=0, fontsize=\small} \setkeys{Gin}{width=.49\textwidth} <>= ## Load common packages, functions and set settings: library(ordinal) library(xtable) ## RUN <- FALSE #redo computations and write .RData files ## Change options: op <- options() ## To be able to reset settings options("digits" = 7) options(help_type = "html") ## options("width" = 75) options("SweaveHooks" = list(fig=function() par(mar=c(4,4,.5,0)+.5))) options(continue=" ") @ We will consider the data on the bitterness of wine from \citet{randall89} presented in Table~\ref{tab:winedata} and available as the object \texttt{wine} in package \textsf{ordinal}. The data were also analyzed with mixed effects models by \citet{tutz96}. The following gives an impression of the wine data object: <<>>= data(wine) head(wine) str(wine) @ The data represent a factorial experiment on factors determining the bitterness of wine with 1 = ``least bitter'' and 5 = ``most bitter''. Two treatment factors (temperature and contact) each have two levels. Temperature and contact between juice and skins can be controlled when crushing grapes during wine production. Nine judges each assessed wine from two bottles from each of the four treatment conditions, hence there are 72 observations in all. For more information see the manual entry for the wine data: \texttt{help(wine)}. \begin{table} \centering \caption{Ratings of the bitterness of some white wines. Data are adopted from \citet{randall89}.} \label{tab:winedata} \begin{tabular}{lllrrrrrrrrr} \hline & & & \multicolumn{9}{c}{Judge} \\ \cline{4-12} <>= data(wine) temp.contact.bottle <- with(wine, temp:contact:bottle)[drop=TRUE] tab <- xtabs(as.numeric(rating) ~ temp.contact.bottle + judge, data=wine) class(tab) <- "matrix" attr(tab, "call") <- NULL mat <- cbind(rep(c("cold", "warm"), each = 4), rep(rep(c("no", "yes"), each=2), 2), 1:8, tab) colnames(mat) <- c("Temperature", "Contact", "Bottle", 1:9) xtab <- xtable(mat) print(xtab, only.contents=TRUE, include.rownames=FALSE, sanitize.text.function = function(x) x) @ \end{tabular} \end{table} We will fit the following cumulative link mixed model to the wine data: \begin{equation} \label{eq:mixedModel} \begin{array}{c} \textup{logit}(P(Y_i \leq j)) = \theta_j - \beta_1 (\mathtt{temp}_i) - \beta_2(\mathtt{contact}_i) - u(\mathtt{judge}_i) \\ i = 1,\ldots, n, \quad j = 1, \ldots, J-1 \end{array} \end{equation} This is a model for the cumulative probability of the $i$th rating falling in the $j$th category or below, where $i$ index all observations and $j = 1, \ldots, J$ index the response categories ($J = 5$). $\{\theta_j\}$ are known as threshold parameters or cut-points. We take the judge effects to be random, and assume that the judge effects are IID normal: $u(\mathtt{judge}_i) \sim N(0, \sigma_u^2)$. We fit this model with the \texttt{clmm2} function in package \textsf{ordinal}. Here we save the fitted \texttt{clmm2} model in the object \texttt{fm1} (short for \texttt{f}itted \texttt{m}odel \texttt{1}) and \texttt{print} the model by simply typing its name: <<>>= fm1 <- clmm2(rating ~ temp + contact, random=judge, data=wine) fm1 @ Maximum likelihood estimates of the parameters are provided using the Laplace approximation to compute the likelihood function. A more accurate approximation is provided by the adaptive Gauss-Hermite quadrature method. Here we use 10 quadrature nodes and use the \texttt{summary} method to display additional information: <<>>= fm2 <- clmm2(rating ~ temp + contact, random=judge, data=wine, Hess=TRUE, nAGQ=10) summary(fm2) @ The small changes in the parameter estimates show that the Laplace approximation was in fact rather accurate in this case. Observe that we set the option \texttt{Hess = TRUE}. This is needed if we want to use the \texttt{summary} method since the Hessian is needed to compute standard errors of the model coefficients. The results contain the maximum likelihood estimates of the parameters: \begin{equation} \label{eq:parameters} \hat\beta_1 = 3.06, ~~\hat\beta_2 = 1.83, ~~\hat\sigma_u^2 = 1.29 = 1.13^2, ~~\{\hat\theta_j\} = [-1.62,~ 1.51,~ 4.23,~ 6.09]. \end{equation} Observe the number under \texttt{Std.Dev} for the random effect is \textbf{not} the standard error of the random effects variance, \texttt{Var}. Rather, it is the standard deviation of the random effects, i.e., it is the square root of the variance. In our example $\sqrt{1.29} \simeq 1.13$. The condition number of the Hessian measures the empirical identifiability of the model. High numbers, say larger than $10^4$ or $10^6$ indicate that the model is ill defined. This would indicate that the model can be simplified, that possibly some parameters are not identifiable, and that optimization of the model can be difficult. In this case the condition number of the Hessian does not indicate a problem with the model. The coefficients for \texttt{temp} and \texttt{contact} are positive indicating that higher temperature and more contact increase the bitterness of wine, i.e., rating in higher categories is more likely. The odds ratio of the event $Y \geq j$ is $\exp(\beta_{\textup{treatment}})$, thus the odds ratio of bitterness being rated in category $j$ or above at warm relative to cold temperatures is <<>>= exp(coef(fm2)[5]) @ The $p$-values for the location coefficients provided by the \texttt{summary} method are based on the so-called Wald statistic. More accurate test are provided by likelihood ratio tests. These can be obtained with the \texttt{anova} method, for example, the likelihood ratio test of \texttt{contact} is <<>>= fm3 <- clmm2(rating ~ temp, random=judge, data=wine, nAGQ=10) anova(fm3, fm2) @ which in this case is slightly more significant. The Wald test is not reliable for variance parameters, so the \texttt{summary} method does not provide a test of $\sigma_u$, but a likelihood ratio test can be obtained with \texttt{anova}: <<>>= fm4 <- clm2(rating ~ temp + contact, data=wine) anova(fm4, fm2) @ showing that the judge term is significant. Since this test of $\sigma_u = 0$ is on the boundary of the parameter space (a variance cannot be negative), it is often argued that a more correct $p$-value is obtained by halving the $p$-value produced by the conventional likelihood ratio test. In this case halving the $p$-value is of little relevance. A profile likelihood confidence interval of $\sigma_u$ is obtained with: <<>>= pr2 <- profile(fm2, range=c(.1, 4), nSteps=30, trace=0) confint(pr2) @ The profile likelihood can also be plotted: <>= plot(pr2) @ The result is shown in Fig.~\ref{fig:PRsigma_u} where horizontal lines indicate 95\% and 99\% confindence bounds. Clearly the profile likelihood function is asymmetric and symmetric confidence intervals would be inaccurate. \begin{figure} \centering <>= <> @ \caption{Profile likelihood of $\sigma_u$.} \label{fig:PRsigma_u} \end{figure} The judge effects, $u(\mathtt{judge}_i)$ are not parameters, so they cannot be \emph{estimated} in the conventional sense, but a ``best guess'' is provided by the \emph{conditional modes}. Similarly the \emph{conditional variance} provides an uncertainty measure of the conditional modes. These quantities are included in \texttt{clmm2} objects as the \texttt{ranef} and \texttt{condVar} components. The following code generates the plot in Fig.~\ref{fig:ranef} illustrating judge effects via conditional modes with 95\% confidence intervals based on the conditional variance: <>= ci <- fm2$ranef + qnorm(0.975) * sqrt(fm2$condVar) %o% c(-1, 1) ord.re <- order(fm2$ranef) ci <- ci[order(fm2$ranef),] plot(1:9, fm2$ranef[ord.re], axes=FALSE, ylim=range(ci), xlab="Judge", ylab="Judge effect") axis(1, at=1:9, labels = ord.re) axis(2) for(i in 1:9) segments(i, ci[i,1], i, ci[i, 2]) abline(h = 0, lty=2) @ The seventh judge gave the lowest ratings of bitterness while the first judge gave the highest ratings of bitterness. The significant judge effect indicate that judges perceived the bitterness of the wines differently. Two natural interpretations are that either a bitterness of, say, 3 means different things to different judges, or the judges actually perceived the bitterness of the wines differently. Possibly both effects play their part. \begin{figure} \centering <>= <> @ \caption{Judge effects given by conditional modes with 95\% confidence intervals based on the conditional variance.} \label{fig:ranef} \end{figure} The fitted or predicted probabilites can be obtained with the judge effects at their conditional modes or for an average judge ($u = 0$). The former are available with \texttt{fitted(fm)} or with \texttt{predict(fm)}, where \texttt{fm} is a \texttt{f}itted \texttt{m}odel object. In our example we get <<>>= head(cbind(wine, fitted(fm2))) @ Predicted probabilities for an average judge can be obtained by including the data used to fit the model in the \texttt{newdata} argument of \texttt{predict}: <<>>= head(cbind(wine, pred=predict(fm2, newdata=wine))) @ Model~\eqref{eq:mixedModel} says that for an average judge at cold temperature the cumulative probability of a bitterness rating in category $j$ or below is \begin{equation*} P(Y_i \leq j) = \textup{logit}^{-1} [ \theta_j - \beta_2(\mathtt{contact}_i) ] \end{equation*} since $u$ is set to zero and $\beta_1(\mathtt{temp}_i) = 0$ at cold conditions. Further, $\textup{logit}^{-1}(\eta) = 1 / [1 + \exp(\eta)]$ is the cumulative distribution function of the logistic distribution available as the \texttt{plogis} function. The (non-cumulative) probability of a bitterness rating in category $j$ is $\pi_j = P(Y_i \leq j) - P(Y_i \leq j-1)$, for instance the probability of a bitterness rating in the third category at these conditions can be computed as <<>>= plogis(fm2$Theta[3] - fm2$beta[2]) - plogis(fm2$Theta[2] - fm2$beta[2]) @ This corresponds to the third entry of \texttt{predict(fm2, newdata=wine)} given above. Judge effects are random and normally distributed, so an average judge effect is 0. Extreme judge effects, say 5th and 95th percentile judge effects are given by <<>>= qnorm(0.95) * c(-1, 1) * fm2$stDev @ At the baseline experimental conditions (cold and no contact) the probabilites of bitterness ratings in the five categories for a 5th percentile judge is <<>>= pred <- function(eta, theta, cat = 1:(length(theta)+1), inv.link = plogis) { Theta <- c(-1e3, theta, 1e3) sapply(cat, function(j) inv.link(Theta[j+1] - eta) - inv.link(Theta[j] - eta) ) } pred(qnorm(0.05) * fm2$stDev, fm2$Theta) @ We can compute these probabilities for average, 5th and 95th percentile judges at the four experimental conditions. The following code plots these probabilities and the results are shown in Fig.~\ref{fig:ratingProb}. <>= mat <- expand.grid(judge = qnorm(0.95) * c(-1, 0, 1) * fm2$stDev, contact = c(0, fm2$beta[2]), temp = c(0, fm2$beta[1])) pred.mat <- pred(eta=rowSums(mat), theta=fm2$Theta) lab <- paste("contact=", rep(levels(wine$contact), 2), ", ", "temp=", rep(levels(wine$temp), each=2), sep="") par(mfrow=c(2, 2)) for(k in c(1, 4, 7, 10)) { plot(1:5, pred.mat[k,], lty=2, type = "l", ylim=c(0,1), xlab="Bitterness rating scale", axes=FALSE, ylab="Probability", main=lab[ceiling(k/3)], las=1) axis(1); axis(2) lines(1:5, pred.mat[k+1, ], lty=1) lines(1:5, pred.mat[k+2, ], lty=3) legend("topright", c("avg. judge", "5th %-tile judge", "95th %-tile judge"), lty=1:3, bty="n") } @ \begin{figure} \centering <>= k <- 1 plot(1:5, pred.mat[k,], lty=2, type = "l", ylim=c(0,1), xlab="Bitterness rating scale", axes=FALSE, ylab="Probability", main=lab[ceiling(k/3)], las=1) axis(1); axis(2) lines(1:5, pred.mat[k+1, ], lty=1) lines(1:5, pred.mat[k+2, ], lty=3) legend("topright", c("avg. judge", "5th %-tile judge", "95th %-tile judge"), lty=1:3, bty="n") @ <>= k <- 4 plot(1:5, pred.mat[k,], lty=2, type = "l", ylim=c(0,1), xlab="Bitterness rating scale", axes=FALSE, ylab="Probability", main=lab[ceiling(k/3)], las=1) axis(1); axis(2) lines(1:5, pred.mat[k+1, ], lty=1) lines(1:5, pred.mat[k+2, ], lty=3) legend("topright", c("avg. judge", "5th %-tile judge", "95th %-tile judge"), lty=1:3, bty="n") @ <>= k <- 7 plot(1:5, pred.mat[k,], lty=2, type = "l", ylim=c(0,1), xlab="Bitterness rating scale", axes=FALSE, ylab="Probability", main=lab[ceiling(k/3)], las=1) axis(1); axis(2) lines(1:5, pred.mat[k+1, ], lty=1) lines(1:5, pred.mat[k+2, ], lty=3) legend("topright", c("avg. judge", "5th %-tile judge", "95th %-tile judge"), lty=1:3, bty="n") @ <>= k <- 10 plot(1:5, pred.mat[k,], lty=2, type = "l", ylim=c(0,1), xlab="Bitterness rating scale", axes=FALSE, ylab="Probability", main=lab[ceiling(k/3)], las=1) axis(1); axis(2) lines(1:5, pred.mat[k+1, ], lty=1) lines(1:5, pred.mat[k+2, ], lty=3) legend("topright", c("avg. judge", "5th %-tile judge", "95th %-tile judge"), lty=1:3, bty="n") @ \caption{Rating probabilities for average and extreme judges at different experimental conditions.} \label{fig:ratingProb} \end{figure} At constant experimental conditions the odds ratio for a bitterness rating in category $j$ or above for a 95th percentile judge relative to a 5th percentile judge is <<>>= exp(2*qnorm(0.95) * fm2$stDev) @ The differences between judges can also be expressed in terms of the interquartile range: the odds ratio for a bitterness rating in category $j$ or above for a third quartile judge relative to a first quartile judge is <<>>= exp(2*qnorm(0.75) * fm2$stDev) @ \newpage \bibliography{ordinal} %% \newpage \end{document} <>= @