@article{choi_maximum_1969,
	title = {Maximum Likelihood Estimation of the Parameters of the Gamma Distribution and Their Bias},
	volume = {11},
	issn = {0040-1706},
	url = {http://www.jstor.org/stable/1266892},
	doi = {10.2307/1266892},
	abstract = {The numerical technique of the maximum likelihood method to estimate the parameters of Gamma distribution is examined. A convenient table is obtained to facilitate the maximum likelihood estimation of the parameters and the estimates of the variance-covariance matrix. The bias of the estimates is investigated numerically. The empirical result indicates that the bias of both parameter estimates produced by the maximum likelihood method is positive.},
	number = {4},
	journal = {Technometrics},
	author = {Choi, S. C. and Wette, R.},
	month = nov,
	year = {1969},
	pages = {683--690},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/UWWXXDPI/Choi and Wette - 1969 - Maximum Likelihood Estimation of the Parameters of.pdf:application/pdf}
},


@article{white_multiple_2011,
	title = {Multiple imputation using chained equations: Issues and guidance for practice},
	volume = {30},
	issn = {1097-0258},
	shorttitle = {Multiple imputation using chained equations},
	url = {http://onlinelibrary.wiley.com/doi/10.1002/sim.4067/abstract},
	doi = {10.1002/sim.4067},
	abstract = {Multiple imputation by chained equations is a flexible and practical approach to handling missing data. We describe the principles of the method and show how to impute categorical and quantitative variables, including skewed variables. We give guidance on how to specify the imputation model and how many imputations are needed. We describe the practical analysis of multiply imputed data, including model building and model checking. We stress the limitations of the method and discuss the possible pitfalls. We illustrate the ideas using a data set in mental health, giving Stata code fragments. Copyright © 2010 John Wiley \& Sons, Ltd.},
	number = {4},
	journal = {Statistics in Medicine},
	author = {White, Ian R and Royston, Patrick and Wood, Angela M},
	month = feb,
	year = {2011},
	keywords = {fully conditional specification, missing data, multiple imputation},
	pages = {377--399}
},

@article{kvalseth_cautionary_1985,
	title = {Cautionary Note about R2},
	volume = {39},
	issn = {0003-1305},
	url = {http://www.jstor.org/stable/2683704},
	doi = {10.2307/2683704},
	abstract = {The coefficient of determination {(R2)} is perhaps the single most extensively used measure of goodness of fit for regression models. It is also widely misused. The primary source of the problem is that except for linear models with an intercept term, the several alternative R2 statistics are not generally equivalent. This article discusses various considerations and potential pitfalls in using the R2's. Specific points are exemplified by means of empirical data. A new resistant statistic is also introduced.},
	number = {4},
	journal = {The American Statistician},
	author = {Kvalseth, Tarald O.},
	month = nov,
	year = {1985},
	pages = {279--285},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/8CS2HP4W/Kvalseth - 1985 - Cautionary Note about R2.pdf:application/pdf}
},

@article{fisher_interpretation_1922,
	title = {On the Interpretation of \${\textbackslash}chi{\textasciicircum}2\$ from Contingency Tables, and the Calculation of P},
	volume = {85},
	issn = {09528385},
	url = {http://www.jstor.org/stable/2340521},
	number = {1},
	journal = {Journal of the Royal Statistical Society},
	author = {Fisher, R. A.},
	month = jan,
	year = {1922},
	pages = {87--94}
},

@book{albert_bayesian_2007,
	address = {New York, {NY}},
	title = {Bayesian Computation with R},
	isbn = {978-0-387-71384-7},
	url = {http://www.springerlink.com/content/q44075/},
	publisher = {Springer New York},
	editor = {Albert, Jim},
	year = {2007},
	file = {SpringerLink - Fulltext:/home/pauljohn/Documents/Zotero/storage/4MQ7CR8H/q44075.html:text/html}
},

@book{jackman_bayesian_2009,
	address = {Chichester, {UK}},
	title = {Bayesian Analysis for the Social Sciences},
	isbn = {9780470686621},
	publisher = {John Wiley and Sons},
	author = {Jackman, Simon},
	month = oct,
	year = {2009},
	file = {Wiley InterScience: Book Home - Bayesian Analysis for the Social Sciences:/home/pauljohn/Documents/Zotero/storage/BGDGC3AS/122648004.html:text/html}
},

@article{okunade_comparative_1993,
	title = {Comparative Analysis of Regression Output Summary Statistics in Common Statistical Packages},
	volume = {47},
	issn = {0003-1305},
	url = {http://www.jstor.org/stable/2685291},
	doi = {10.2307/2685291},
	abstract = {An important question in applied statistics is whether statistical regression programs output distorted summary statistics {(F} and R2) for a no-intercept regression model. Uyar and Erdem recently confirmed this problem using one of the premier statistical packages. This article extends the Uyar-Erdem experiment to eight common statistical and econometric regression packages. Only four of them warn users that the R2 has been redefined for a model without an explicit intercept. Consequently, the problem Uyar and Erdem identified extends in varying degrees, ranging from mild to severe, to many other regression packages.},
	number = {4},
	journal = {The American Statistician},
	author = {Okunade, Albert A. and Chang, Cyril F. and Evans, Richard D.},
	month = nov,
	year = {1993},
	pages = {298--303},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/AXBWR4MS/Okunade et al. - 1993 - Comparative Analysis of Regression Output Summary .pdf:application/pdf}
},

@article{hirji_computing_1987,
	title = {Computing Distributions for Exact Logistic Regression},
	volume = {82},
	issn = {01621459},
	url = {http://www.jstor.org/stable/2289388},
	doi = {10.2307/2289388},
	abstract = {Logistic regression is a commonly used technique for the analysis of retrospective and prospective epidemiological and clinical studies with binary response variables. Usually this analysis is performed using large sample approximations. When the sample size is small or the data structure sparse, the accuracy of the asymptotic approximations is in question. On other occasions, singularity of the covariance matrix of parameter estimates precludes asymptotic analysis. Under these circumstances, use of exact inferential procedures would seem to be a prudent alternative. Cox (1970) showed that exact inference on the parameters of a logistic model with binary response requires consideration of the distribution of sufficient statistics for these parameters. To date, however, resorting to the exact method has not been computationally feasible except in a few special situations. This article presents an efficient recursive algorithm that generates the joint and conditional distributions of the sufficient statistics and thus makes it feasible to perform exact inference for a much wider range of situations. Various methods of improving the efficiency of the basic algorithm, such as the application of appropriate criteria to delete infeasible vectors, recording covariates, sorting observations by covariate values, and use of a two-step recursive procedure, are also described. The algorithm given in this article enables the data analyst to perform exact inference for models with or without interaction terms and for matched as well as unmatched designs. Exact analysis proposed by Cox (1970) was restricted to a single parameter. Since our algorithm can be used to generate any combination of joint and conditional distributions of the sufficient statistics, it paves the way for multiparametric exact inference. Further, this algorithm also provides a tool for comparing exact and asymptotic inferential procedures. Such comparisons would, it is hoped, provide statisticians with guidelines stating when each of the procedures should be preferred.},
	number = {400},
	journal = {Journal of the American Statistical Association},
	author = {Hirji, Karim F. and Mehta, Cyrus R. and Patel, Nitin R.},
	month = dec,
	year = {1987},
	pages = {1110--1117}
},

@article{mehta_exact_1995,
	title = {Exact logistic regression: Theory and examples},
	volume = {14},
	issn = {1097-0258},
	url = {http://dx.doi.org/10.1002/sim.4780141908},
	doi = {10.1002/sim.4780141908},
	number = {19},
	journal = {Statistics in Medicine},
	author = {Mehta, Cyrus R and Patel, Nitin R},
	year = {1995},
	pages = {2143–2160}
},

@book{hacker_end_1970,
	address = {New York},
	edition = {1st ed.},
	title = {The End of the American Era},
	publisher = {Atheneum},
	author = {Hacker, Andrew},
	year = {1970},
	keywords = {1960-1980, 1970-, Civilization, Social conditions, United States}
},

@book{lancaster_introduction_2004,
	title = {Introduction to Modern Bayesian Econometrics},
	isbn = {1405117206},
	publisher = {Wiley-Blackwell},
	author = {Lancaster, Tony},
	month = jun,
	year = {2004}
},

@article{fienberg_when_2006,
	title = {When did {{B}ayesian} inference become {"{B}ayesian"?}},
	url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.124.8632},
	journal = {{{B}ayesian} {{A}nalysis}},
	author = {Fienberg, Stephen E},
	year = {2006},
	pages = {1---41}
},

@article{anderson-sprecher_model_1994,
	title = {Model Comparisons and R2},
	volume = {48},
	issn = {0003-1305},
	url = {http://www.jstor.org/stable/2684259},
	doi = {10.2307/2684259},
	abstract = {Much of the confusion surrounding interpretation and application of the coefficient of determination, R2, can be alleviated if it is defined explicitly as a comparison of a given model to the null model {EY} = β0. The model-comparison definition allows R2 to be easily generalized, and standard extensions such as coefficients of partial determination are seen to be special cases of this generalization. Formulas become simpler, more unified, and more easily understood. Commonly cited problem areas such as R2 for the no-intercept model and model comparisons using different values of R2 are also clarified by this perspective.},
	number = {2},
	journal = {The American Statistician},
	author = {Anderson-Sprecher, Richard},
	month = may,
	year = {1994},
	pages = {113--117},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/S899WKWT/Anderson-Sprecher - 1994 - Model Comparisons and R2.pdf:application/pdf}
},

@article{mullahy_instrumental-variable_1997,
	title = {Instrumental-Variable Estimation of Count Data Models: Applications to Models of Cigarette Smoking Behavior},
	shorttitle = {Instrumental-Variable Estimation of Count Data Models},
	url = {http://papers.ssrn.com/sol3/papers.cfm?abstract_id=61248},
	abstract = {st analyses involving microdata, applications of count data models must somehow account for unobserved heterogeneity. The count model literature has generally assumed that unobservables and observed covariates are statistically independent. Yet for many applications this independence assumption is clearly tenuous. When the unobservables are omitted variables correlated with included regressors, standard estimation methods will generally be inconsistent. Though alternative consistent estimators may exist in special circumstances, it is suggested here that a nonlinear instrumental- variable strategy offers a reasonably general solution to such estimation problems. This approach is applied in two examples that focus on cigarette smoking behavior.},
	journal = {{SSRN} {eLibrary}},
	author = {Mullahy, John},
	month = nov,
	year = {1997},
	file = {SSRN Snapshot:/home/pauljohn/Documents/Zotero/storage/JDXMZZWF/papers.html:text/html}
},

@book{degroot_optimal_2004,
	edition = {1},
	title = {Optimal Statistical Decisions},
	isbn = {{047168029X}},
	publisher = {Wiley-Interscience},
	author = {{DeGroot}, Morris H.},
	month = apr,
	year = {2004}
},

@book{berger_statistical_1985,
	edition = {2nd},
	title = {Statistical Decision Theory and Bayesian Analysis},
	isbn = {0387960988},
	publisher = {Springer},
	author = {Berger, James O.},
	month = aug,
	year = {1985}
},

@book{harrell_regression_2010,
	title = {Regression Modeling Strategies: With Applications to Linear Models, Logistic Regression, and Survival Analysis},
	isbn = {1441929185},
	shorttitle = {Regression Modeling Strategies},
	publisher = {Springer},
	author = {Harrell, Frank E.},
	month = dec,
	year = {2010}
},

@article{godambe_parameters_1986,
	title = {Parameters of Superpopulation and Survey Population: Their Relationships and Estimation},
	volume = {54},
	issn = {0306-7734},
	shorttitle = {Parameters of Superpopulation and Survey Population},
	url = {http://www.jstor.org/stable/1403139},
	doi = {10.2307/1403139},
	abstract = {Utilizing the theory of estimating functions {(Godambe}, 1960; Godambe \& Thompson, 1978, 1984), this paper relates superpopulation parameters with those of a survey population under study Further it establishes optimal estimation, simultaneously, of both types of parameters. /// Cet article étudie les relations entre les paramètres d'une population et de la 'superpopulation' correspondante. Le concept de fonctions définies par un paramètre {(Godambe} \& Thompson, 1984) est utilisé à cette fin. De la théorie des fonctions d'estimation {(Godambe}, 1960, Godambe \& Thompson, 1978), un processus optimal d'estimation simultanée pour chaque type de paramètre est introduit.},
	number = {2},
	journal = {International Statistical Review / Revue Internationale de Statistique},
	author = {Godambe, V. P. and Thompson, M. E.},
	year = {1986},
	pages = {127--138},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/W8QFBTAZ/Godambe and Thompson - 1986 - Parameters of Superpopulation and Survey Populatio.pdf:application/pdf}
},

@book{mccullagh_generalized_1983,
	address = {London},
	series = {Monographs on statistics and applied probability},
	title = {Generalized Linear Models},
	isbn = {0412238500},
	number = {37},
	publisher = {Chapman and Hall},
	author = {{McCullagh}, P. and Nelder, John A},
	year = {1983},
	keywords = {Linear models {(Statistics)}}
},

@book{gill_bayesian_2007,
	edition = {2},
	title = {Bayesian Methods: A Social and Behavioral Sciences Approach, Second Edition},
	isbn = {1584885629},
	shorttitle = {Bayesian Methods},
	publisher = {Chapman and {Hall/CRC}},
	author = {Gill, Jeff},
	month = nov,
	year = {2007}
},

@article{kolassa_algorithms_2003,
	title = {Algorithms for approximate conditional inference},
	volume = {13},
	issn = {0960-3174},
	url = {http://dx.doi.org/10.1023/A:1023252308207},
	journal = {Statistics and Computing},
	author = {Kolassa, John E},
	year = {2003},
	pages = {121--126}
},

@book{pinheiro_mixed_2000,
	title = {Mixed Effects Models in S and S-Plus},
	isbn = {0387989579},
	publisher = {Springer},
	author = {Pinheiro, Jose C. and Bates, Douglas M.},
	month = may,
	year = {2000}
},

@article{roy_convergence_2007,
	title = {Convergence rates and asymptotic standard errors for Markov chain Monte Carlo algorithms for Bayesian probit regression.},
	volume = {69},
	issn = {13697412},
	doi = {10.1111/j.1467-9868.2007.00602.x},
	abstract = {Consider a probit regression problem in which Y1, ..., Y n are independent Bernoulli random variables such that where x i is a p-dimensional vector of known covariates that are associated with Y i, β is a p-dimensional vector of unknown regression coefficients and Φ(·) denotes the standard normal distribution function. We study Markov chain Monte Carlo algorithms for exploring the intractable posterior density that results when the probit regression likelihood is combined with a flat prior on β. We prove that Albert and Chib's data augmentation algorithm and Liu and Wu's {PX-DA} algorithm both converge at a geometric rate, which ensures the existence of central limit theorems for ergodic averages under a second-moment condition. Although these two algorithms are essentially equivalent in terms of computational complexity, results of Hobert and Marchev imply that the {PX-DA} algorithm is theoretically more efficient in the sense that the asymptotic variance in the central limit theorem under the {PX-DA} algorithm is no larger than that under Albert and Chib's algorithm. We also construct minorization conditions that allow us to exploit regenerative simulation techniques for the consistent estimation of asymptotic variances. As an illustration, we apply our results to van Dyk and Meng's lupus data. This example demonstrates that huge gains in efficiency are possible by using the {PX-DA} algorithm instead of Albert and Chib's algorithm. {[ABSTRACT} {FROM} {AUTHOR]}},
	number = {4},
	journal = {Journal of the Royal Statistical Society: Series B {(Statistical} Methodology)},
	author = {Roy, Vivekananda and Hobert, James P.},
	year = {2007},
	keywords = {{ALGORITHMS}, {ASYMPTOTIC} distribution {(Probability} theory), Asymptotic variance, {CENTRAL} limit theorem, Data augmentation algorithm, Geometric ergodicity, {GEOMETRIC} measure theory, {LIMIT} theorems {(Probability} theory), {MARKOV} processes, Minorization condition, {PX-DA} algorithm, Regeneration, Reversible Markov chain},
	pages = {607--623}
},

@book{dalgaard_introductory_2002,
	address = {New York},
	series = {Statistics and computing},
	title = {Introductory Statistics with R},
	isbn = {0387954759},
	lccn = {{QA276.4} {.D33} 2002},
	publisher = {Springer},
	author = {Dalgaard, Peter},
	year = {2002},
	keywords = {Data processing, R {(Computer} program language), Statistics}
},

@article{stigler_who_1983,
	title = {Who Discovered Bayes's Theorem?},
	volume = {37},
	issn = {00031305},
	url = {http://www.jstor.org/stable/2682766},
	number = {4},
	journal = {The American Statistician},
	author = {Stigler, Stephen M.},
	month = nov,
	year = {1983},
	pages = {290--296}
},

@techreport{celeux_stochastic_????,
	title = {On Stochastic Versions of the {EM} Algorithm},
	url = {http://hal.archives-ouvertes.fr/inria-00074164/PDF/RR-2514.pdf},
	abstract = {We compare three different stochastic versions of the {EM} algorithm: The {SEM} algorithm, the {SAEM} algorithm and the {MCEM} algorithm. We suggest that the most relevant contribution of the {MCEM} methodology is what we call the simulated annealing {MCEM} algorithm, which turns out to be very close to {SAEM.} We focus particularly on the mixture of distributions problem. In this context, we review the available theoretical results on the convergence of these algorithms and on the behavior of {SEM} as the sample size tends to infinity. The second part is devoted to intensive Monte Carlo numerical simulations and a real data study. We show that, for some particular mixture situations, the {SEM} algorithm is almost always preferable to the {EM} and simulated annealing versions {SAEM} and {MCEM.} For some very intricate mixtures, however, none of these algorithms can be confidently used. Then, {SEM} can be used as an efficient data exploratory tool for locating significant maxima of the likelihood function. In the real data case, we show that the {SEM} stationary distribution provides a contrasted view of the loglikelihood by emphasizing sensible maxima.},
	author = {Celeux, Gilles and Chauveau, Didier and Diebolt, Jean},
	keywords = {/, {ALGORITHMS}, {DATA}, {DOSTRIBUTION}, {EXPERIMENTS}, {INCOMPLETE}, {MIXTURE}, {MODELS}, {MONTE-CARLO}, {OF}, {STOCHASTIC}}
},

@article{gromping_variable_????,
	title = {Variable Importance Assessment in Regression: Linear Regression versus Random Forest},
	volume = {63},
	shorttitle = {Variable Importance Assessment in Regression},
	doi = {10.1198/tast.2009.08199},
	number = {4},
	journal = {American Statistician},
	author = {Grömping, Ulrike},
	pages = {308--319}
},

@article{wong_hierarchical_1985,
	title = {The Hierarchical Logistic Regression Model for Multilevel Analysis},
	volume = {80},
	issn = {01621459},
	url = {http://www.jstor.org.www2.lib.ku.edu:2048/stable/2288464},
	abstract = {A hierarchical logistic regression model is proposed for studying data with group structure and a binary response variable. The group structure is defined by the presence of micro observations embedded within contexts (macro observations), and the specification is at both of these levels. At the first (micro) level, the usual logistic regression model is defined for each context. The same regressors are used in each context, but the micro regression coefficients are free to vary over contexts. At the second level, the micro coefficients are treated as functions of macro regressors. An empirical Bayes estimation procedure is proposed for estimating the micro and macro coefficients. Explicit formulas are provided that are computationally feasible for large-scale data analyses; these include an algorithm for finding the maximum likelihood estimates of the covariance components representing within- and between-macro-equation error variability. The methodology is applied to World Fertility Survey data, with individuals viewed as micro observations and countries as macro observations.},
	number = {391},
	journal = {Journal of the American Statistical Association},
	author = {Wong, George Y. and Mason, William M.},
	month = sep,
	year = {1985},
	pages = {513--524}
},

@article{cai_metropolis-hastings_2010,
	title = {Metropolis-Hastings Robbins-Monro Algorithm for Confirmatory Item Factor Analysis},
	volume = {35},
	url = {http://jeb.sagepub.com/content/35/3/307.abstract},
	doi = {10.3102/1076998609353115},
	abstract = {Item factor analysis {(IFA)}, already well established in educational measurement, is increasingly applied to psychological measurement in research settings. However, high-dimensional confirmatory {IFA} remains a numerical challenge. The current research extends the Metropolis-Hastings Robbins-Monro {(MH-RM)} algorithm, initially proposed for exploratory {IFA}, to the case of maximum likelihood estimation under user-defined linear restrictions for confirmatory {IFA.} {MH-RM} naturally integrates concepts such as the missing data formulation, data augmentation, the Metropolis algorithm, and stochastic approximation. In a limited simulation study, the accuracy of the {MH-RM} algorithm is checked against the standard Bock-Aitkin expectation-maximization {(EM)} algorithm. To demonstrate the efficiency and flexibility of the {MH-RM} algorithm, it is applied to the {IFA} of real data from pediatric quality-of-life {(QOL)} research in comparison with adaptive quadrature-based {EM} algorithm. The particular data set required a confirmatory item factor model with eight factors and a variety of equality and fixing constraints to implement the hypothesized factor pattern. {MH-RM} converged in less than 3 minutes to the maximum likelihood solution while the {EM} algorithm spent well over 4 hourrs.},
	number = {3},
	journal = {Journal of Educational and Behavioral Statistics},
	author = {Cai, Li},
	month = jun,
	year = {2010},
	pages = {307 --335},
	file = {Snapshot:/home/pauljohn/Documents/Zotero/storage/E8PA58DA/307.html:text/html}
},

@article{kirby_using_2009,
	title = {Using Instrumental Variable {(IV)} Tests to Evaluate Model Specification in Latent Variable Structural Equation Models},
	volume = {39},
	issn = {0081-1750},
	abstract = {Structural Equation Modeling with latent variables {(SEM)} is a powerful tool for social and behavioral scientists, combining many of the strengths of psychometrics and econometrics into a single framework. The most common estimator for {SEM} is the full-information maximum likelihood estimator {(ML)}, but there is continuing interest in limited information estimators because of their distributional robustness and their greater resistance to structural specification errors. However, the literature discussing model fit for limited information estimators for latent variable models is sparse compared to that for full information estimators. We address this shortcoming by providing several specification tests based on the {2SLS} estimator for latent variable structural equation models developed by Bollen (1996). We explain how these tests can be used to not only identify a misspecified model, but to help diagnose the source of misspecification within a model. We present and discuss results from a Monte Carlo experiment designed to evaluate the finite sample properties of these tests. Our findings suggest that the {2SLS} tests successfully identify most misspecified models, even those with modest misspecification, and that they provide researchers with information that can help diagnose the source of misspecification.},
	number = {1},
	journal = {Sociological methodology},
	author = {Kirby, James B. and Bollen, Kenneth A.},
	month = jul,
	year = {2009},
	pages = {327--355}
},

@book{raudenbush_hierarchical_2002,
	title = {Hierarchical linear models: applications and data analysis methods},
	isbn = {9780761919049},
	shorttitle = {Hierarchical linear models},
	publisher = {{SAGE}},
	author = {Raudenbush, Stephen W. and Bryk, Anthony S.},
	year = {2002}
},

@article{zamar_elrm:_2007,
	title = {elrm: Software Implementing Exact-like Inference for Logistic Regression Models},
	volume = {21},
	url = {http://www.jstatsoft.org/},
	number = {3},
	journal = {Journal of Statistical Software},
	author = {Zamar, David and {McNeney}, Brad and Graham, Jinko},
	year = {2007}
},

@book{congdon_bayesian_2006,
	title = {Bayesian statistical modelling},
	isbn = {9780470018750},
	publisher = {John Wiley \& Sons},
	author = {Congdon, P.},
	year = {2006}
},

@article{fan_variable_2001,
	title = {Variable Selection via Nonconcave Penalized Likelihood and its Oracle Properties},
	volume = {96},
	issn = {0162-1459},
	url = {http://pubs.amstat.org/doi/abs/10.1198/016214501753382273},
	doi = {10.1198/016214501753382273},
	number = {456},
	journal = {Journal of the American Statistical Association},
	author = {Fan, Jianqing and Li, Runze},
	month = dec,
	year = {2001},
	pages = {1348--1360},
	file = {American Statistical Association - Journal of the American Statistical Association - 96(456):1348:/home/pauljohn/Documents/Zotero/storage/2QSAUNU8/016214501753382273.html:text/html}
},

@article{albert_bayesian_1992,
	title = {Bayesian Estimation of Normal Ogive Item Response Curves Using Gibbs Sampling},
	volume = {17},
	url = {http://jeb.sagepub.com/content/17/3/251.abstract},
	doi = {10.3102/10769986017003251},
	abstract = {The problem of estimating item parameters from a two-parameter normal ogive model is considered. Gibbs sampling {(Gelfand} \& Smith, 1990) is used to simulate draws from the joint posterior distribution of the ability and item parameters. This method gives marginal posterior density estimates for any parameter of interest; these density estimates can be used to judge the accuracy of normal approximations based on maximum likelihood estimates. This simulation technique is illustrated using data from a mathematics placement exam.},
	number = {3},
	journal = {Journal of Educational and Behavioral Statistics},
	author = {Albert, James H.},
	year = {1992},
	pages = {251 --269},
	file = {Snapshot:/home/pauljohn/Documents/Zotero/storage/M5BWZQIT/251.html:text/html}
},

@book{venables_modern_2002,
	address = {New York},
	edition = {4th ed},
	series = {Statistics and computing},
	title = {Modern Applied Statistics with S},
	isbn = {0387954570},
	lccn = {{QA276.4} {.V46} 2002},
	publisher = {Springer},
	author = {Venables, W. N and Ripley, Brian D},
	year = {2002},
	keywords = {Data processing, Mathematical statistics, S {(Computer} system), Statistics}
},

@article{tam_optimality_1987,
	title = {Optimality of Royall's predictor under a Gaussian superpopulation model},
	volume = {74},
	url = {http://biomet.oxfordjournals.org/content/74/3/659.abstract},
	doi = {10.1093/biomet/74.3.659},
	abstract = {This note shows that under the assumption of a Gaussian superpopulation model with a general symmetric and positive-definite covariance matrix, the model-based predictor of Royall (1976) is the unbiased minimum mean squared error predictor of the total of a given finite population. This somewhat anticipated result is a generalization of recent published work in which it was derived under more restrictive assumptions.},
	number = {3},
	journal = {Biometrika},
	author = {{TAM}, S. M.},
	year = {1987},
	pages = {659 --660},
	file = {Snapshot:/home/pauljohn/Documents/Zotero/storage/EAG8D629/659.html:text/html}
},

@article{king_how_1986,
	title = {How Not to Lie with Statistics: Avoiding Common Mistakes in Quantitative Political Science},
	volume = {30},
	issn = {0092-5853},
	shorttitle = {How Not to Lie with Statistics},
	url = {http://www.jstor.org/stable/2111095},
	doi = {10.2307/2111095},
	abstract = {This article identifies a set of serious theoretical mistakes appearing with troublingly high frequency throughout the quantitative political science literature. These mistakes are all based on faulty statistical theory or on erroneous statistical analysis. Through algebraic and interpretive proofs, some of the most commonly made mistakes are explicated and illustrated. The theoretical problem underlying each is highlighted, and suggested solutions are provided throughout. It is argued that closer attention to these problems and solutions will result in more reliable quantitative analyses and more useful theoretical contributions},
	number = {3},
	journal = {American Journal of Political Science},
	author = {King, Gary},
	year = {1986},
	pages = {666--687},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/PJEDN8MT/King - 1986 - How Not to Lie with Statistics Avoiding Common Mi.pdf:application/pdf}
},

@article{king_analyzing_2001,
	title = {Analyzing Incomplete Political Science Data: An Alternative Algorithm for Multiple Imputation},
	volume = {95},
	issn = {0003-0554},
	shorttitle = {Analyzing Incomplete Political Science Data},
	url = {http://www.jstor.org/stable/3117628},
	abstract = {{{\textless}p{\textgreater}We} propose a remedy for the discrepancy between the way political scientists analyze data with missing values and the recommendations of the statistics community. Methodologists and statisticians agree that "multiple imputation" is a superior approach to the problem of missing data scattered through one's explanatory and dependent variables than the methods currently used in applied data analysis. The discrepancy occurs because the computational algorithms used to apply the best multiple imputation models have been slow, difficult to implement, impossible to run with existing commercial statistical packages, and have demanded considerable expertise. We adapt an algorithm and use it to implement a general-purpose, multiple imputation model for missing data. This algorithm is considerably faster and easier to use than the leading method recommended in the statistics literature. We also quantify the risks of current missing data practices, illustrate how to use the new procedure, and evaluate this alternative through simulated data as well as actual empirical examples. Finally, we offer easy-to-use software that implements all methods discussed.},
	number = {1},
	journal = {The American Political Science Review},
	author = {King, Gary and Honaker, James and Joseph, Anne and Scheve, Kenneth},
	month = mar,
	year = {2001},
	pages = {49--69}
},

@article{mccullough_accuracy_2008,
	title = {On the accuracy of statistical procedures in Microsoft Excel 2007},
	volume = {52},
	issn = {0167-9473},
	url = {http://www.sciencedirect.com/science/article/B6V8V-4S1S6FC-5/2/ce3a208e9c596cee263a6f90b7ffe936},
	doi = {10.1016/j.csda.2008.03.004},
	abstract = {{{\textless}p{\textgreater}{\textless}br/{\textgreater}Excel} 2007, like its predecessors, fails a standard set of intermediate-level accuracy tests in three areas: statistical distributions, random number generation, and estimation. Additional errors in specific Excel procedures are discussed. Microsoft's continuing inability to correctly fix errors is discussed. No statistical procedure in Excel should be used until Microsoft documents that the procedure is correct; it is not safe to assume that Microsoft Excel's statistical procedures give the correct answer. Persons who wish to conduct statistical analyses should use some other package.{\textless}/p{\textgreater}},
	number = {10},
	journal = {Computational Statistics \& Data Analysis},
	author = {{McCullough}, {B.D.} and Heiser, David A.},
	month = jun,
	year = {2008},
	pages = {4570--4578},
	file = {ScienceDirect Full Text PDF:/home/pauljohn/Documents/Zotero/storage/QGFE7T66/McCullough and Heiser - 2008 - On the accuracy of statistical procedures in Micro.pdf:application/pdf;ScienceDirect Snapshot:/home/pauljohn/Documents/Zotero/storage/V96MKF2P/science.html:text/html}
},

@article{carlin_case_2001,
	title = {A case study on the choice, interpretation and checking of multilevel models for longitudinal binary outcomes},
	volume = {2},
	url = {http://biostatistics.oxfordjournals.org/content/2/4/397.abstract},
	doi = {10.1093/biostatistics/2.4.397},
	abstract = {Recent advances in statistical software have led to the rapid diffusion of new methods for modelling longitudinal data. Multilevel (also known as hierarchical or random effects) models for binary outcomes have generally been based on a logistic–normal specification, by analogy with earlier work for normally distributed data. The appropriate application and interpretation of these models remains somewhat unclear, especially when compared with the computationally more straightforward semiparametric or ‘marginal’ modelling {(GEE)} approaches. In this paper we pose two interrelated questions. First, what limits should be placed on the interpretation of the coefficients and inferences derived from random‐effect models involving binary outcomes? Second, what diagnostic checks are appropriate for evaluating whether such random‐effect models provide adequate fits to the data? We address these questions by means of an extended case study using data on adolescent smoking from a large cohort study. Bayesian estimation methods are used to fit a discrete‐mixture alternative to the standard logistic–normal model, and posterior predictive checking is used to assess model fit. Surprising parallels in the parameter estimates from the logistic–normal and mixture models are described and used to question the interpretability of the socalled ‘subject‐specific’ regression coefficients from the standard multilevel approach. Posterior predictive checks suggest a serious lack of fit of both multilevel models. The results do not provide final answers to the two questions posed, but we expect that lessons learned from the case study will provide general guidance for further investigation of these important issues.},
	number = {4},
	journal = {Biostatistics},
	author = {Carlin, John B. and Wolfe, Rory and Brown, C. Hendricks and Gelman, Andrew},
	month = dec,
	year = {2001},
	pages = {397 --416},
	file = {Snapshot:/home/pauljohn/Documents/Zotero/storage/JDDCTHFT/397.html:text/html}
},

@book{efron_introduction_1993,
	address = {New York},
	series = {Monographs on statistics and applied probability},
	title = {An Introduction to the Bootstrap},
	isbn = {0412042312},
	number = {57},
	publisher = {Chapman \& Hall},
	author = {Efron, Bradley and Tibshirani, Robert},
	year = {1993},
	keywords = {Bootstrap {(Statistics)}}
},

@article{huberty_note_1994,
	title = {A Note on Interpreting an R2 Value},
	volume = {19},
	issn = {1076-9986},
	url = {http://www.jstor.org/stable/1165398},
	doi = {10.2307/1165398},
	abstract = {It is suggested that the standard traditional null hypothesis of ρ {\textless}sup{\textgreater}2{\textless}/sup{\textgreater} = 0 be reconsidered in favor of ρ {{\textless}sup{\textgreater}2{\textless}/sup{\textgreater}=E(R{\textless}sup{\textgreater}2{\textless}/sup{\textgreater}).} The test of the latter hypothesis is discussed. Also discussed is an effect-size index that is the difference between an adjusted R{\textless}sup{\textgreater}2{\textless}/sup{\textgreater} and {E(R{\textless}sup{\textgreater}2{\textless}/sup{\textgreater}).}},
	number = {4},
	journal = {Journal of Educational and Behavioral Statistics},
	author = {Huberty, Carl J.},
	month = dec,
	year = {1994},
	pages = {351--356},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/4JJI8V3A/Huberty - 1994 - A Note on Interpreting an R2 Value.pdf:application/pdf}
},

@article{dempster_maximum_1977,
	title = {Maximum likelihood from incomplete data via the {EM} algorithm},
	volume = {39},
	url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.133.4884},
	number = {1},
	journal = {Journal of the Royal Statistical Society, Series B},
	author = {Dempster, A. P and Laird, N. M and Rubin, D. B},
	year = {1977},
	pages = {1---38}
},

@article{zuur_protocol_2010,
	title = {A protocol for data exploration to avoid common statistical problems},
	volume = {1},
	issn = {2041-{210X}},
	url = {http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2009.00001.x/abstract},
	doi = {10.1111/j.2041-210X.2009.00001.x},
	abstract = {1. While teaching statistics to ecologists, the lead authors of this paper have noticed common statistical problems. If a random sample of their work (including scientific papers) produced before doing these courses were selected, half would probably contain violations of the underlying assumptions of the statistical techniques employed.},
	number = {1},
	journal = {Methods in Ecology and Evolution},
	author = {Zuur, Alain F and Ieno, Elena N and Elphick, Chris S},
	month = mar,
	year = {2010},
	keywords = {collinearity, data exploration, independence, transformations, type I and {II} errors, zero inflation},
	pages = {3--14}
},

@article{wang_variable_2009,
	title = {Variable selection in spatial regression via penalized least squares},
	volume = {37},
	issn = {03195724},
	url = {http://onlinelibrary.wiley.com/doi/10.1002/cjs.10032/full#bib6},
	doi = {10.1002/cjs.10032},
	number = {4},
	journal = {Canadian Journal of Statistics},
	author = {Wang, Haonan and Zhu, Jun},
	month = dec,
	year = {2009},
	pages = {607--624},
	file = {Variable selection in spatial regression via penalized least squares - Wang - 2009 - Canadian Journal of Statistics - Wiley Online Library:/home/pauljohn/Documents/Zotero/storage/ZV5BUAWV/full.html:text/html}
},

@article{geyer_practical_1992,
	title = {Practical Markov Chain Monte Carlo},
	volume = {7},
	issn = {08834237},
	url = {http://www.jstor.org.www2.lib.ku.edu:2048/stable/2246094},
	abstract = {Markov chain Monte Carlo using the Metropolis-Hastings algorithm is a general method for the simulation of stochastic processes having probability densities known up to a constant of proportionality. Despite recent advances in its theory, the practice has remained controversial. This article makes the case for basing all inference on one long run of the Markov chain and estimating the Monte Carlo error by standard nonparametric methods well-known in the time-series and operations research literature. In passing it touches on the Kipnis-Varadhan central limit theorem for reversible Markov chains, on some new variance estimators, on judging the relative efficiency of competing Monte Carlo schemes, on methods for constructing more rapidly mixing Markov chains and on diagnostics for Markov chain Monte Carlo.},
	number = {4},
	journal = {Statistical Science},
	author = {Geyer, Charles J.},
	month = nov,
	year = {1992},
	pages = {473--483}
},

@article{menard_coefficients_2000,
	title = {Coefficients of Determination for Multiple Logistic Regression Analysis},
	volume = {54},
	issn = {0003-1305},
	url = {http://www.jstor.org/stable/2685605},
	doi = {10.2307/2685605},
	abstract = {Coefficients of determination for continuous predicted values {(R2} analogs) in logistic regression are examined for their conceptual and mathematical similarity to the familiar R2 statistic from ordinary least squares regression, and compared to coefficients of determination for discrete predicted values (indexes of predictive efficiency). An example motivated by substantive concerns and using empirical data from a national household probability sample is presented to illustrate the behavior of the different coefficients of determination in the evaluation of models including dependent variables with different base rates-that is, different proportions of cases or observations with "positive" outcomes. One R2 analog appears to be preferable to the others both in terms of conceptual similarity to the ordinary least squares coefficient of determination, and in terms of its relative independence from the base rate. In addition, base rate should also be considered when selecting an index of predictive efficiency. As expected, the conclusions based on R2 analogs are not necessarily consistent with conclusions based on predictive efficiency, with respect to which of several outcomes is better predicted by a given model.},
	number = {1},
	journal = {The American Statistician},
	author = {Menard, Scott},
	month = feb,
	year = {2000},
	pages = {17--24},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/XMT25XT9/Menard - 2000 - Coefficients of Determination for Multiple Logisti.pdf:application/pdf}
},

@article{becker_examining_2001,
	title = {Examining Theoretical Models through Research Synthesis},
	volume = {24},
	url = {http://ehp.sagepub.com/content/24/2/190.abstract},
	doi = {10.1177/01632780122034876},
	abstract = {Quantitative research synthesis methods can be used to examine theoretical models for substantive research questions. Model-driven syntheses can address more complex questions than have typically been addressed in quantitative reviews in a more systematic and organized manner than in a traditional narrative review. A rationale and set of objectives for syntheses involving models are reviewed. The author describes how to incorporate models into each stage of a review, outlining problems and limitations that may arise at each stage. Examples from several model-driven syntheses illustrate the points made in the article.},
	number = {2},
	journal = {Evaluation \& the Health Professions},
	author = {Becker, Betsy Jane},
	month = jun,
	year = {2001},
	pages = {190 --217},
	file = {Full Text PDF:/home/pauljohn/Documents/Zotero/storage/6XSF65Q9/Becker - 2001 - Examining Theoretical Models through Research Synt.pdf:application/pdf;Snapshot:/home/pauljohn/Documents/Zotero/storage/5DTIB42R/190.html:text/html}
},

@article{meng_using_1991,
	title = {Using {EM} to Obtain Asymptotic Variance-Covariance Matrices: The {SEM} Algorithm},
	volume = {86},
	issn = {0162-1459},
	shorttitle = {Using {EM} to Obtain Asymptotic Variance-Covariance Matrices},
	url = {http://www.jstor.org/stable/2290503},
	doi = {10.2307/2290503},
	abstract = {The expectation maximization {(EM)} algorithm is a popular, and often remarkably simple, method for maximum likelihood estimation in incomplete-data problems. One criticism of {EM} in practice is that asymptotic variance-covariance matrices for parameters (e.g., standard errors) are not automatic byproducts, as they are when using some other methods, such as Newton-Raphson. In this article we define and illustrate a procedure that obtains numerically stable asymptotic variance-covariance matrices using only the code for computing the complete-data variance-covariance matrix, the code for {EM} itself, and code for standard matrix operations. The basic idea is to use the fact that the rate of convergence of {EM} is governed by the fractions of missing information to find the increased variability due to missing information to add to the complete-data variance-covariance matrix. We call this supplemented {EM} algorithm the {SEM} algorithm. Theory and particular examples reinforce the conclusion that the {SEM} algorithm can be a practically important supplement to {EM} in many problems. {SEM} is especially useful in multiparameter problems where only a subset of the parameters are affected by missing information and in parallel computing environments. {SEM} can also be used as a tool for monitoring whether {EM} has converged to a (local) maximum.},
	number = {416},
	journal = {Journal of the American Statistical Association},
	author = {Meng, Xiao-Li and Rubin, Donald B.},
	month = dec,
	year = {1991},
	pages = {899--909},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/8M2ETDEQ/Meng and Rubin - 1991 - Using EM to Obtain Asymptotic Variance-Covariance .pdf:application/pdf}
},

@article{bock_high-dimensional_1996,
	title = {High-Dimensional Multivariate Probit Analysis},
	volume = {52},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/2532834},
	doi = {10.2307/2532834},
	abstract = {A computationally practical form of probit analysis for multiple response variables based on an assumed common factor model for the latent tolerances is proposed. Numerical integration over the factor space provides maximum likelihood estimation of the probit regression parameters and of the probabilities of response combinations under the model. The procedure is applied to five variables from the Pneumoconiosis Field Trial, two variables of which were previously analyzed by Ashford and Sowden (1970, Biometrics 26, 535-546).},
	number = {4},
	journal = {Biometrics},
	author = {Bock, R. Darrell and Gibbons, Robert D.},
	month = dec,
	year = {1996},
	pages = {1183--1194},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/VRE8H7EB/Bock and Gibbons - 1996 - High-Dimensional Multivariate Probit Analysis.pdf:application/pdf}
},

@article{lesaffre_existence_1992,
	title = {Existence and Uniqueness of the Maximum Likelihood Estimator for a Multivariate Probit Model},
	volume = {87},
	issn = {0162-1459},
	url = {http://www.jstor.org/stable/2290218},
	doi = {10.2307/2290218},
	abstract = {The multivariate probit model {(MPM)} is a particular case of the class of correlated prediction models. A correlated prediction model is especially useful when prediction or classification is envisaged into diagnostic classes that are combinations of binary responses. The parameter vector consists of a "location" part and an "association" part. The location part accounts for the effect the regressors have on the marginal probabilities of the binary responses. The association part corrects these probabilities, taking into account that the responses are related. This article investigates conditions for the existence and unicity of the maximum likelihood estimator {(MLE)} of the parameter vector. It turns out that the existence and uniqueness of the {MLE} for the location parameters when the association parameters are known are related to those of the multigroup logistic model. Necessary and sufficient conditions are given for the existence of the {MLE} of the association part. On the other hand the conditions for the unicity of the {MLEs} of the association parameters are much more complicated and not yet established. Finally, the article shows that for an {MPM} the estimates of the regression parameters for the location part exist and are unique if and only if they exist and are unique for each marginal univariate probit model. This result provides practical guidelines to detect early divergence Good starting values are essential; this problem is touched on briefly. The theoretical results are illustrated by a medical example.},
	number = {419},
	journal = {Journal of the American Statistical Association},
	author = {Lesaffre, Emmanuel and Kaufmann, Heinz},
	year = {1992},
	pages = {805--811},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/JW94VIK2/Lesaffre and Kaufmann - 1992 - Existence and Uniqueness of the Maximum Likelihood.pdf:application/pdf}
},

@article{williamson_analyzing_1995,
	title = {Analyzing Bivariate Ordinal Data Using a Global Odds Ratio},
	volume = {90},
	issn = {0162-1459},
	url = {http://www.jstor.org/stable/2291535},
	doi = {10.2307/2291535},
	abstract = {A moment method is proposed for regression analysis of bivariate ordered categorical data using the global odds ratio as the measure of association. For modeling the margins, this method utilizes the stochastic ordering implicit in the data. This method allows for covariate effects in modeling the association between responses. An application of the proposed model is illustrated using the ophthalmological data from the Wisconsin Epidemiologic Study of Diabetic Retinopathy for identifying risk factors among younger onset diabetics. This model is also extended to a longitudinal data setting with more than two repeated measures.},
	number = {432},
	journal = {Journal of the American Statistical Association},
	author = {Williamson, John M. and Kim, {KyungMann} and Lipsitz, Stuart R.},
	month = dec,
	year = {1995},
	pages = {1432--1437},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/RMTTTVFP/Williamson et al. - 1995 - Analyzing Bivariate Ordinal Data Using a Global Od.pdf:application/pdf}
},

@article{liao_adjusted_2003,
	title = {Adjusted Coefficients of Determination for Logistic Regression},
	volume = {57},
	issn = {0003-1305},
	url = {http://www.jstor.org/stable/30037263},
	abstract = {This article proposes two adjusted coefficients of determination for logistic regression for measuring the strength of association between the outcome variable and the predictors as an extension of a similar concept in linear regression. The proposed adjusted coefficients correct the overestimation problem of the unadjusted coefficients and are especially useful when the sample size is small or the number of predictors is large. An R program is provided on the Internet.},
	number = {3},
	journal = {The American Statistician},
	author = {Liao, J. G. and {McGee}, Dan},
	year = {2003},
	pages = {161--165},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/D3B9ENBD/Liao and McGee - 2003 - Adjusted Coefficients of Determination for Logisti.pdf:application/pdf}
},

@article{song_joint_2009,
	title = {Joint Regression Analysis of Correlated Data Using Gaussian Copulas},
	volume = {65},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/25502244},
	abstract = {This article concerns a new joint modeling approach for correlated data analysis. Utilizing Gaussian copulas, we present a unified and flexible machinery to integrate separate one-dimensional generalized linear models {(GLMs)} into a joint regression analysis of continuous, discrete, and mixed correlated outcomes. This essentially leads to a multivariate analogue of the univariate {GLM} theory and hence an efficiency gain in the estimation of regression coefficients. The availability of joint probability models enables us to develop a full maximum likelihood inference. Numerical illustrations are focused on regression models for discrete correlated data, including multidimensional logistic regression models and a joint model for mixed normal and binary outcomes. In the simulation studies, the proposed copula-based joint model is compared to the popular generalized estimating equations, which is a moment-based estimating equation method to join univariate {GLMs.} Two real-world data examples are used in the illustration.},
	number = {1},
	journal = {Biometrics},
	author = {Song, Peter X.-K. and Li, Mingyao and Yuan, Ying},
	month = mar,
	year = {2009},
	pages = {60--68}
},

@article{echambadi_mean-centering_2007,
	title = {Mean-Centering Does Not Alleviate Collinearity Problems in Moderated Multiple Regression Models},
	volume = {26},
	url = {http://mktsci.journal.informs.org/cgi/content/abstract/26/3/438},
	doi = {<p>10.1287/mksc.1060.0263</p>},
	abstract = {The cross-product term in moderated regression may be collinear with its constituent parts, making it difficult to detect main, simple, and interaction effects. The literature shows that mean-centering can reduce the covariance between the linear and the interaction terms, thereby suggesting that it reduces collinearity. We analytically prove that mean-centering neither changes the computational precision of parameters, the sampling accuracy of main effects, simple effects, interaction effects, nor the R2. We also show that the determinants of the cross product matrix X' X are identical for uncentered and mean-centered data, so the collinearity problem in the moderated regression is unchanged by mean-centering. Many empirical marketing researchers commonly mean-center their moderated regression data hoping that this will improve the precision of estimates from ill conditioned, collinear data, but unfortunately, this hope is futile. Therefore, researchers using moderated regression models should not mean-center in a specious attempt to mitigate collinearity between the linear and the interaction terms. Of course, researchers may wish to mean-center for interpretive purposes and other reasons.},
	number = {3},
	journal = {{MARKETING} {SCIENCE}},
	author = {Echambadi, Raj and Hess, James D.},
	month = may,
	year = {2007},
	pages = {438--445},
	file = {HighWire Full Text PDF:/home/pauljohn/Documents/Zotero/storage/5TK33WZI/Echambadi and Hess - 2007 - Mean-Centering Does Not Alleviate Collinearity Pro.pdf:application/pdf;HighWire Snapshot:/home/pauljohn/Documents/Zotero/storage/7NIN5943/438.html:text/html}
},

@article{mccullagh_sampling_2008,
	title = {Sampling Bias and Logistic Models},
	volume = {70},
	issn = {1369-7412},
	url = {http://www.jstor.org/stable/20203849},
	abstract = {In a regression model, the joint distribution for each finite sample of units is determined by a function \$p\_{{{\textbackslash}bf x}}({{\textbackslash}bf y})\$ depending only on the list of covariate values \${{\textbackslash}bf x}=(x(u\_{1}),...,x(u\_{n}))\$ on the sampled units. No random sampling of units is involved. In biological work, random sampling is frequently unavoidable, in which case the joint distribution p(y,x) depends on the sampling scheme. Regression models can be used for the study of dependence provided that the conditional distribution p(yǀx) for random samples agrees with \$p\_{{{\textbackslash}bf x}}({{\textbackslash}bf y})\$ as determined by the regression model for a fixed sample having a non-random configuration x. The paper develops a model that avoids the concept of a fixed population of units, thereby forcing the sampling plan to be incorporated into the sampling distribution. For a quota sample having a predetermined covariate configuration x, the sampling distribution agrees with the standard logistic regression model with correlated components. For most natural sampling plans such as sequential or simple random sampling, the conditional distribution p(yǀx) is not the same as the regression distribution unless \$p\_{{{\textbackslash}bf x}}({{\textbackslash}bf y})\$ has independent components. In this sense, most natural sampling schemes involving binary random-effects models are biased. The implications of this formulation for subject-specific and population-averaged procedures are explored.},
	number = {4},
	journal = {Journal of the Royal Statistical Society. Series B {(Statistical} Methodology)},
	author = {{McCullagh}, Peter},
	year = {2008},
	pages = {643--677}
},

@article{kamenetzky_estimating_1982,
	title = {Estimating Need and Demand for Prehospital Care},
	volume = {30},
	issn = {0030-{364X}},
	url = {http://www.jstor.org/stable/170204},
	abstract = {Models estimating demand and need for emergency transportation services are developed. These models can provide reliable estimates which can be used for planning purposes, by complementing and/or substituting for historical data. The model estimating demand requires only four independent variables: population in the area, employment in the area, and two indicators of socioeconomic status which can be obtained from census data. The model can be used to estimate demand according to 4 operational categories and 11 clinical categories. The parameters of the model are calibrated with 1979 data from 82 ambulance services covering over 200 minor civil divisions in Southwestern Pennsylvania. This model was tested with data from another 55 minor civil divisions, also in Southwestern Pennsylvania, and it provided good estimates of total demand. The model to estimate need evolves from the demand model. It enables planners to estimate unmet need occurring in the region. The effect of emergency transportation service {(ETS)} provider characteristics on demand was also investigated. Statistical tests show that, for purposes of forecasting demand, when the sociodemographic factors are taken into account, provider characteristics are not significant.},
	number = {6},
	journal = {Operations Research},
	author = {Kamenetzky, Ricardo D. and Shuman, Larry J. and Wolfe, Harvey},
	month = nov,
	year = {1982},
	pages = {1148--1167},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/RQH5Q2WK/Kamenetzky et al. - 1982 - Estimating Need and Demand for Prehospital Care.pdf:application/pdf}
},

@article{kromrey_mean_1998,
	title = {Mean Centering in Moderated Multiple Regression: Much Ado about Nothing},
	volume = {58},
	shorttitle = {Mean Centering in Moderated Multiple Regression},
	url = {http://epm.sagepub.com/content/58/1/42.abstract},
	doi = {10.1177/0013164498058001005},
	abstract = {Centering variables prior to the analysis of moderated multiple regression equations has been advocated for reasons both statistical (reduction of multicollinearity) and substantive (improved interpretation of the resulting regression equations). This article provides a comparison of centered and raw score analyses in least squares regression. The two methods are demonstrated to be equivalent, yielding identical hypothesis tests associated with the moderation effect and regression equations that are functionally equivalent.},
	number = {1},
	journal = {Educational and Psychological Measurement},
	author = {Kromrey, Jeffrey D. and Foster-Johnson, Lynn},
	month = feb,
	year = {1998},
	pages = {42 --67},
	file = {Snapshot:/home/pauljohn/Documents/Zotero/storage/UQRM7J28/42.html:text/html}
},

@article{king_statistical_1988,
	title = {Statistical Models for Political Science Event Counts: Bias in Conventional Procedures and Evidence for the Exponential Poisson Regression Model},
	volume = {32},
	issn = {0092-5853},
	shorttitle = {Statistical Models for Political Science Event Counts},
	url = {http://www.jstor.org/stable/2111248},
	doi = {10.2307/2111248},
	abstract = {This paper presents analytical, Monte Carlo, and empirical evidence on models for event count data. Event counts are dependent variables that measure the number of times some event occurs. Counts of international events are probably the most common, but numerous examples exist in every empirical field of the discipline. The results of the analysis below strongly suggest that the way event counts have been analyzed in hundreds of important political science studies have produced statistically and substantively unreliable results. Misspecification, inefficiency, bias, inconsistency, insufficiency, and other problems result from the unknowing application of two common methods that are without theoretical justification or empirical utility in this type of data. I show that the exponential Poisson regression {(EPR)} model provides analytically, in large samples, and empirically, in small, finite samples, a far superior model and optimal estimator. I also demonstrate the advantage of this methodology in an application to nineteenth-century party switching in the {U.S.} Congress. Its use by political scientists is strongly encouraged.},
	number = {3},
	journal = {American Journal of Political Science},
	author = {King, Gary},
	year = {1988},
	pages = {838--863},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/G5GEHHWM/King - 1988 - Statistical Models for Political Science Event Cou.pdf:application/pdf}
},

@article{schimek_non-_1997,
	title = {Non- and Semiparametric Alternatives to Generalized Linear Models},
	url = {http://papers.ssrn.com/sol3/papers.cfm?abstract_id=4847},
	abstract = {Additive and Generalized Additive Models {(GAM)} are discussed as completely nonparametric alternatives to Generalized Linear Models {(GLM).} Single Index Models {(SIM)} are reviewed as a means of nonparametrically specifying the link function in {GLMs.} Semiparametric models with a single as well as a multiple nonparametric component are considered in some detail. The penalized least squares technique is compared to Speckman's approach to partial linear models with one unparameterized explanatory variable. Further Generalized Partial Linear Models {(GPLM)} are briefly mentioned. For a multiple nonparametric component a thin plate spline approach and for a dependent vector variable a vector spline approach is discussed.},
	journal = {{SSRN} {eLibrary}},
	author = {Schimek, Michael G.},
	month = mar,
	year = {1997},
	file = {SSRN Snapshot:/home/pauljohn/Documents/Zotero/storage/MD8BAX8M/papers.html:text/html}
},

@article{newton_bayesian_1996,
	title = {Bayesian Inference for Semiparametric Binary Regression},
	volume = {91},
	issn = {0162-1459},
	url = {http://www.jstor.org/stable/2291390},
	doi = {10.2307/2291390},
	abstract = {We propose a regression model for binary response data that places no structural restrictions on the link function except monotonicity and known location and scale. Predictors enter linearly. We demonstrate Bayesian inference calculations in this model. By modifying the Dirichlet process, we obtain a natural prior measure over this semiparametric model, and we use Polya sequence theory to formulate this measure in terms of a finite number of unobserved variables. We design a Markov chain Monte Carlo algorithm for posterior simulation and apply the methodology to data on radiotherapy treatments for cancer.},
	number = {433},
	journal = {Journal of the American Statistical Association},
	author = {Newton, Michael A. and Czado, Claudia and Chappell, Rick},
	month = mar,
	year = {1996},
	pages = {142--153}
},

@article{hubbard_why_2008,
	title = {Why P Values Are Not a Useful Measure of Evidence in Statistical Significance Testing},
	volume = {18},
	url = {http://tap.sagepub.com/content/18/1/69.abstract},
	doi = {10.1177/0959354307086923},
	abstract = {Reporting p values from statistical significance tests is common in psychology's empirical literature. Sir Ronald Fisher saw the p value as playing a useful role in knowledge development by acting as an `objective' measure of inductive evidence against the null hypothesis. We review several reasons why the p value is an unobjective and inadequate measure of evidence when statistically testing hypotheses. A common theme throughout many of these reasons is that p values exaggerate the evidence against H0 . This, in turn, calls into question the validity of much published work based on comparatively small, including .05, p values. Indeed, if researchers were fully informed about the limitations of the p value as a measure of evidence, this inferential index could not possibly enjoy its ongoing ubiquity. Replication with extension research focusing on sample statistics, effect sizes, and their confidence intervals is a better vehicle for reliable knowledge development than using p values. Fisher would also have agreed with the need for replication research.},
	number = {1},
	journal = {Theory \& Psychology},
	author = {Hubbard, Raymond and Lindsay, R. Murray},
	month = feb,
	year = {2008},
	pages = {69 --88},
	file = {Snapshot:/home/pauljohn/Documents/Zotero/storage/APDBST3M/69.html:text/html}
},

@article{groemping_relative_2006,
	title = {Relative Importance for Linear Regression in R: The Package relaimpo},
	volume = {17},
	issn = {1548-7660},
	url = {http://www.jstatsoft.org/v17/i01},
	number = {1},
	journal = {Journal of Statistical Software},
	author = {Groemping, Ulrike},
	year = {2006},
	pages = {1–27}
},

@article{gelman_difference_2006,
	title = {The Difference Between {“Significant”} and {“Not} Significant” is not Itself Statistically Significant},
	volume = {60},
	issn = {0003-1305, 1537-2731},
	url = {http://pubs.amstat.org/doi/abs/10.1198/000313006X152649},
	doi = {10.1198/000313006X152649},
	journal = {The American Statistician},
	author = {Gelman, Andrew and Stern, Hal},
	month = nov,
	year = {2006},
	pages = {328--331},
	file = {American Statistical Association - The American Statistician - 60(4):328:/home/pauljohn/Documents/Zotero/storage/CG8APAT7/000313006X152649.html:text/html}
},

@article{marra_practical_2011,
	title = {Practical variable selection for generalized additive models},
	volume = {55},
	issn = {0167-9473},
	url = {http://www.sciencedirect.com/science/article/pii/S0167947311000491},
	doi = {10.1016/j.csda.2011.02.004},
	abstract = {The problem of variable selection within the class of generalized additive models, when there are many covariates to choose from but the number of predictors is still somewhat smaller than the number of observations, is considered. Two very simple but effective shrinkage methods and an extension of the nonnegative garrote estimator are introduced. The proposals avoid having to use nonparametric testing methods for which there is no general reliable distributional theory. Moreover, component selection is carried out in one single step as opposed to many selection procedures which involve an exhaustive search of all possible models. The empirical performance of the proposed methods is compared to that of some available techniques via an extensive simulation study. The results show under which conditions one method can be preferred over another, hence providing applied researchers with some practical guidelines. The procedures are also illustrated analysing data on plasma beta-carotene levels from a cross-sectional study conducted in the United States.},
	number = {7},
	journal = {Computational Statistics \& Data Analysis},
	author = {Marra, Giampiero and Wood, Simon N.},
	month = jul,
	year = {2011},
	keywords = {Generalized additive model, Nonnegative garrote estimator, Penalized thin plate regression spline, Practical variable selection, Shrinkage smoother},
	pages = {2372--2387},
	file = {ScienceDirect Full Text PDF:/home/pauljohn/Documents/Zotero/storage/T4UATDVG/Marra and Wood - 2011 - Practical variable selection for generalized addit.pdf:application/pdf;ScienceDirect Snapshot:/home/pauljohn/Documents/Zotero/storage/REX6RWBB/S0167947311000491.html:text/html}
},

@article{white_heteroskedasticity-consistent_1980,
	title = {A Heteroskedasticity-Consistent Covariance Matrix Estimator and a Direct Test for Heteroskedasticity},
	volume = {48},
	issn = {0012-9682},
	url = {http://www.jstor.org/stable/1912934},
	doi = {10.2307/1912934},
	abstract = {This paper presents a parameter covariance matrix estimator which is consistent even when the disturbances of a linear regression model are heteroskedastic. This estimator does not depend on a formal model of the structure of the heteroskedasticity. By comparing the elements of the new estimator to those of the usual covariance estimator, one obtains a direct test for heteroskedasticity, since in the absence of heteroskedasticity, the two estimators will be approximately equal, but will generally diverge otherwise. The test has an appealing least squares interpretation.},
	number = {4},
	journal = {Econometrica},
	author = {White, Halbert},
	month = may,
	year = {1980},
	pages = {817--838},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/5URHUFUJ/White - 1980 - A Heteroskedasticity-Consistent Covariance Matrix .pdf:application/pdf}
},

@book{miles_applying_2000,
	edition = {1},
	title = {Applying Regression and Correlation: A Guide for Students and Researchers},
	isbn = {0761962301},
	shorttitle = {Applying Regression and Correlation},
	publisher = {Sage Publications Ltd},
	author = {Miles, Jeremy and Shevlin, Mark},
	month = nov,
	year = {2000}
},

@article{mood_logistic_2010,
	title = {Logistic Regression: Why We Cannot Do What We Think We Can Do, and What We Can Do About It},
	volume = {26},
	shorttitle = {Logistic Regression},
	url = {http://esr.oxfordjournals.org/content/26/1/67.abstract},
	doi = {10.1093/esr/jcp006},
	abstract = {Logistic regression estimates do not behave like linear regression estimates in one important respect: They are affected by omitted variables, even when these variables are unrelated to the independent variables in the model. This fact has important implications that have gone largely unnoticed by sociologists. Importantly, we cannot straightforwardly interpret log-odds ratios or odds ratios as effect measures, because they also reflect the degree of unobserved heterogeneity in the model. In addition, we cannot compare log-odds ratios or odds ratios for similar models across groups, samples, or time points, or across models with different independent variables in a sample. This article discusses these problems and possible ways of overcoming them.},
	number = {1},
	journal = {European Sociological Review},
	author = {Mood, Carina},
	month = feb,
	year = {2010},
	pages = {67 --82},
	file = {Snapshot:/home/pauljohn/Documents/Zotero/storage/G8WB82TK/67.html:text/html}
},

@article{caffo_flexible_2007,
	title = {Flexible random intercept models for binary outcomes using mixtures of normals},
	volume = {51},
	issn = {0167-9473},
	url = {http://www.sciencedirect.com/science/article/pii/S0167947306003574},
	doi = {10.1016/j.csda.2006.09.031},
	abstract = {Random intercept models for binary data are useful tools for addressing between-subject heterogeneity. Unlike linear models, the non-linearity of link functions used for binary data force a distinction between marginal and conditional interpretations. This distinction is blurred in probit models with a normally distributed random intercept because the resulting model implies a probit marginal link as well. That is, this model is closed in the sense that the distribution associated with the marginal and conditional link functions and the random effect distribution are all of the same family. It is shown that the closure property is also attained when the distributions associated with the conditional and marginal link functions and the random effect distribution are mixtures of normals. The resulting flexible family of models is demonstrated to be related to several others present in the literature and can be used to synthesize several seemingly disparate modeling approaches. In addition, this family of models offers considerable computational benefits. A diverse series of examples is explored that illustrates the wide applicability of this approach.},
	number = {11},
	journal = {Computational Statistics \& Data Analysis},
	author = {Caffo, Brian and An, Ming-Wen and Rohde, Charles},
	month = jul,
	year = {2007},
	keywords = {Logit-normal, Marginalized multilevel models, Probit-normal},
	pages = {5220--5235},
	file = {ScienceDirect Full Text PDF:/home/pauljohn/Documents/Zotero/storage/AVR94VNE/science.html:text/html;ScienceDirect Snapshot:/home/pauljohn/Documents/Zotero/storage/T8STDIDI/S0167947306003574.html:text/html}
},

@article{liang_variable_2009,
	title = {Variable Selection for Partially Linear Models with Measurement Errors},
	volume = {104},
	issn = {0162-1459},
	doi = {10.1198/jasa.2009.0127},
	abstract = {This article focuses on variable selection for partially linear models when the covariates are measured with additive errors. We propose two classes of variable selection procedures, penalized least squares and penalized quantile regression, using the nonconvex penalized principle. The first procedure corrects the bias in the loss function caused by the measurement error by applying the so-called correction-for-attenuation approach, whereas the second procedure corrects the bias by using orthogonal regression. The sampling properties for the two procedures are investigated. The rate of convergence and the asymptotic normality of the resulting estimates are established. We further demonstrate that, with proper choices of the penalty functions and the regularization parameter, the resulting estimates perform asymptotically as well as an oracle procedure (). Choice of smoothing parameters is also discussed. Finite sample performance of the proposed variable selection procedures is assessed by Monte Carlo simulation studies. We further illustrate the proposed procedures by an application.},
	number = {485},
	journal = {Journal of the American Statistical Association},
	author = {Liang, Hua and Li, Runze},
	year = {2009},
	note = {{PMID:} 20046976
{PMCID:} {PMC2697854}},
	pages = {234--248},
	file = {PubMed Central Full Text PDF:/home/pauljohn/Documents/Zotero/storage/GT4MBREP/LIANG and LI - 2009 - Variable Selection for Partially Linear Models wit.pdf:application/pdf}
},

@book{cohen_applied_2002,
	edition = {Third},
	title = {Applied Multiple {Regression/Correlation} Analysis for the Behavioral Sciences},
	isbn = {0805822232},
	publisher = {Routledge Academic},
	author = {Cohen, Jacob and Cohen, Patricia and West, Stephen G. and Aiken, Leona S.},
	month = aug,
	year = {2002}
},

@article{coull_random_2000,
	title = {Random Effects Modeling of Multiple Binomial Responses Using the Multivariate Binomial Logit-Normal Distribution},
	volume = {56},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/2677105},
	abstract = {The multivariate binomial logit-normal distribution is a mixture distribution for which, (i) conditional on a set of success probabilities and sample size indices, a vector of counts is independent binomial variates, and (ii) the vector of logits of the parameters has a multivariate normal distribution. We use this distribution to model multivariate binomial-type responses using a vector of random effects. The vector of logits of parameters has a mean that is a linear function of explanatory variables and has an unspecified or partly specified covariance matrix. The model generalizes and provides greater flexibility than the univariate model that uses a normal random effect to account for positive correlations in clustered data. The multivariate model is useful when different elements of the response vector refer to different characteristics, each of which may naturally have its own random effect. It is also useful for repeated binary measurement of a single response when there is a nonexchangeable association structure, such as one often expects with longitudinal data or when negative association exists for at least one pair of responses. We apply the model to an influenza study with repeated responses in which some pairs are negatively associated and to a developmental toxicity study with continuation-ratio logits applied to an ordinal response with clustered observations.},
	number = {1},
	journal = {Biometrics},
	author = {Coull, Brent A. and Agresti, Alan},
	month = mar,
	year = {2000},
	pages = {73--80},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/TWDKDX3F/Coull and Agresti - 2000 - Random Effects Modeling of Multiple Binomial Respo.pdf:application/pdf}
},

@article{rabe-hesketh_parameterization_2001,
	title = {Parameterization of multivariate random effects models for categorical data},
	volume = {57},
	issn = {0006-{341X}},
	url = {http://www.ncbi.nlm.nih.gov/pubmed/11764269},
	abstract = {Alternative parameterizations and problems of identification and estimation of multivariate random effects models for categorical responses are investigated. The issues are illustrated in the context of the multivariate binomial logit-normal {(BLN)} model introduced by Coull and Agresti (2000, Biometrics 56, 73-80). We demonstrate that the {BLN} model is poorly identified unless proper restrictions are imposed on the parameters. Moreover, estimation of {BLN} models is unduly computationally complex. In the first application considered by Coull and Agresti, an identification problem results in highly unstable, highly correlated parameter estimates and large standard errors. A probit-normal version of the specified {BLN} model is demonstrated to be underidentified, whereas the {BLN} model is empirically underidentified. Identification can be achieved by constraining one of the parameters. We show that a one-factor probit model is equivalent to the probit version of the specified {BLN} model and that a one-factor logit model is empirically equivalent to the {BLN} model. Estimation is greatly simplified by using a factor model.},
	number = {4},
	journal = {Biometrics},
	author = {Rabe-Hesketh, S and Skrondal, A},
	month = dec,
	year = {2001},
	keywords = {Biometry, Data Interpretation, Statistical, Disease Outbreaks, Humans, Influenza, Human, Models, Statistical, Multivariate analysis},
	pages = {1256--1264}
},

@article{mulder_multidimensional_2008,
	title = {Multidimensional Adaptive Testing with Optimal Design Criteria for Item Selection},
	volume = {74},
	issn = {0033-3123, 1860-0980},
	url = {http://www.springerlink.com/content/31110wh62u57v179/},
	doi = {10.1007/s11336-008-9097-5},
	journal = {Psychometrika},
	author = {Mulder, Joris and Linden, Wim J.},
	month = dec,
	year = {2008},
	pages = {273--296},
	file = {SpringerLink - Psychometrika, Volume 74, Number 2:/home/pauljohn/Documents/Zotero/storage/G8A3C4JE/31110wh62u57v179.html:text/html}
},

@article{colombi_multivariate_1998,
	title = {A multivariate logit model with marginal canonical association},
	volume = {27},
	issn = {0361-0926, 1532-{415X}},
	url = {http://www.tandfonline.com/doi/abs/10.1080/03610929808832266},
	doi = {10.1080/03610929808832266},
	journal = {Communications in Statistics - Theory and Methods},
	author = {Colombi, Roberto},
	month = jan,
	year = {1998},
	pages = {2953--2971},
	file = {Taylor & Francis Online :: A multivariate logit model with marginal canonical association - Communications in Statistics - Theory and Methods - Volume 27, Issue 12:/home/pauljohn/Documents/Zotero/storage/BUNZ6VQQ/03610929808832266.html:text/html}
},

@article{bergsma_marginal_2002,
	title = {Marginal models for categorical data},
	volume = {30},
	issn = {0090-5364},
	url = {http://projecteuclid.org/euclid.aos/1015362188},
	doi = {10.1214/aos/1015362188},
	abstract = {Statistical models defined by imposing restrictions on marginal
			 distributions of contingency tables have received considerable attention
			 recently. This paper introduces a general definition of marginal log-linear
			 parameters and describes conditions for a marginal log-linear parameter to be a
			 smooth parameterization of the distribution and to be variation independent.
			 Statistical models defined by imposing affine restrictions on the marginal
			 log-linear parameters are investigated. These models generalize ordinary
			 log-linear and multivariate logistic models. Sufficient conditions for a
			 log-affine marginal model to be nonempty and to be a curved exponential family
			 are given. Standard large-sample theory is shown to apply to maximum likelihood
			 estimation of log-affine marginal models for a variety of sampling
			 procedures.},
	number = {1},
	journal = {The Annals of Statistics},
	author = {Bergsma, Wicher P.},
	month = feb,
	year = {2002},
	pages = {140--159},
	file = {Euclid Project PDF:/home/pauljohn/Documents/Zotero/storage/X56EJQQE/DPubS.html:text/html}
},

@article{heagerty_marginal_1996,
	title = {Marginal Regression Models for Clustered Ordinal Measurements},
	volume = {91},
	issn = {0162-1459},
	url = {http://www.jstor.org/stable/2291722},
	doi = {10.2307/2291722},
	abstract = {This article constructs statistical models for clustered ordinal measurements. We specify two regression models: one for the marginal means and one for the marginal pairwise global odds ratios. Of particular interest are problems in which the odds ratio regression is a focus. Simple assumptions about higher-order conditional moments give a quadratic exponential likelihood function with second-order estimating equations {(GEE2)} as score equations. But computational difficulty can arise for large clusters when both the mean response and the association between measures is of interest. First, we present {GEE1} as an alternative estimation strategy. Second, we extend to repeated ordinal measurements the method developed by Carey et al. for binary observations that is based on alternating logistic regressions {(ALR)} for the marginal mean parameters and the pairwise log-odds ratio parameters. We study the efficiency of {GEE1} and {ALR} relative to full maximum likelihood. We demonstrate the utility of our regression methods for ordinal data by applying the methods to a surgical follow-up study.},
	number = {435},
	journal = {Journal of the American Statistical Association},
	author = {Heagerty, Patrick J. and Zeger, Scott L.},
	year = {1996},
	pages = {1024--1036},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/TH8MGWAE/Heagerty and Zeger - 1996 - Marginal Regression Models for Clustered Ordinal M.pdf:application/pdf}
},

@article{ekholm_marginal_1995,
	title = {Marginal regression analysis of a multivariate binary response},
	volume = {82},
	url = {http://biomet.oxfordjournals.org/content/82/4/847.abstract},
	doi = {10.1093/biomet/82.4.847},
	abstract = {We propose the use of the mean parameter for regression analysis of a multivariate binary response. We model the association using dependence ratios defined in terms of the mean parameter, the components of which are the joint success probabilities of all orders. This permits flexible modelling of higher-order associations, using maximum likelihood estimation. We reanalyse two data sets, one with variable cluster size and the other a longitudinal data set with constant cluster size.},
	number = {4},
	journal = {Biometrika},
	author = {Ekholm, Anders and Smith, Peter W. F. and {McDonald}, John W.},
	month = dec,
	year = {1995},
	pages = {847 --854},
	file = {Snapshot:/home/pauljohn/Documents/Zotero/storage/56QPVNHP/847.html:text/html}
},

@article{ekholm_association_2000,
	title = {Association Models for a Multivariate Binary Response},
	volume = {56},
	issn = {{0006341X}},
	url = {http://eprints.ioe.ac.uk/5446/},
	doi = {10.1111/j.0006-341X.2000.00712.x},
	journal = {Biometrics},
	author = {Ekholm, Anders and {McDonald}, John W. and Smith, Peter W. F.},
	month = sep,
	year = {2000},
	pages = {712--718},
	file = {Association models for a multivariate binary response - IOE Eprints:/home/pauljohn/Documents/Zotero/storage/E5XT8CXP/5446.html:text/html}
},

@article{ananth_modeling_2004,
	title = {Modeling Multivariate Binary Responses with Multiple Levels of Nesting Based on Alternating Logistic Regressions: an Application to Caries Aggregation},
	volume = {83},
	shorttitle = {Modeling Multivariate Binary Responses with Multiple Levels of Nesting Based on Alternating Logistic Regressions},
	url = {http://jdr.sagepub.com/content/83/10/776.abstract},
	doi = {10.1177/154405910408301008},
	abstract = {Clustered binary responses are commonly encountered in dental research. Data analysis may include modeling both the marginal response probabilities (i.e., risk) and the dependence structure between pairs of responses (i.e., aggregation). While second-order generalized estimating equations {(GEE2)} is a well-known approach for such data, alternating logistic regressions {(ALR)} is a computationally efficient alternative method, especially for large clusters. We illustrate {ALR} with an application to caries aggregation using a dataset with 3 levels of nesting: tooth surfaces within an interproximal {(IP)} region, {IP} regions within a jaw, and jaws within a subject. Caries lesions appear to aggregate strongly within subjects with a spatially distributed risk. The minimum within-{IP-region} odds ratio {(OR)} was 2.25 (95\% confidence interval 1.15, 4.41), and the within-{IP-region} {ORs} were always greater than the between-{IP-region} {ORs.} {ALR} is a convenient and useful regression technique for explicit modeling of the dependence structure, and may be applicable to other dental research problems involving clustered or nested responses.},
	number = {10},
	journal = {Journal of Dental Research},
	author = {Ananth, {C.V.} and Kantor, {M.L.}},
	month = oct,
	year = {2004},
	pages = {776 --781},
	file = {Full Text PDF:/home/pauljohn/Documents/Zotero/storage/TN5TMPRG/Ananth and Kantor - 2004 - Modeling Multivariate Binary Responses with Multip.pdf:application/pdf;Snapshot:/home/pauljohn/Documents/Zotero/storage/AI5DJWRJ/776.html:text/html}
},

@article{prentice_correlated_1988,
	title = {Correlated Binary Regression with Covariates Specific to Each Binary Observation},
	volume = {44},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/2531733},
	doi = {10.2307/2531733},
	abstract = {Regression methods are considered for the analysis of correlated binary data when each binary observation may have its own covariates. It is argued that binary response models that condition on some or all binary responses in a given "block" are useful for studying certain types of dependencies, but not for the estimation of marginal response probabilities or pairwise correlations. Fully parametric approaches to these latter problems appear to be unduly complicated except in such special cases as the analysis of paired binary data. Hence, a generalized estimating equation approach is advocated for inference on response probabilities and correlations. Illustrations involving both small and large block sizes are provided.},
	number = {4},
	journal = {Biometrics},
	author = {Prentice, Ross L.},
	month = dec,
	year = {1988},
	pages = {1033--1048},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/A9CGJTAI/Prentice - 1988 - Correlated Binary Regression with Covariates Speci.pdf:application/pdf}
},

@article{basu_marginal_2003,
	title = {Marginal Likelihood and Bayes Factors for Dirichlet Process Mixture Models},
	volume = {98},
	issn = {0162-1459},
	url = {http://www.jstor.org/stable/30045209},
	abstract = {We present a method for comparing semiparametric Bayesian models, constructed under the Dirichlet process mixture {(DPM)} framework, with alternative semiparameteric or parameteric Bayesian models. A distinctive feature of the method is that it can be applied to semiparametric models containing covariates and hierarchical prior structures, and is apparently the first method of its kind. Formally, the method is based on the marginal likelihood estimation approach of Chib (1995) and requires estimation of the likelihood and posterior ordinates of the {DPM} model at a single high-density point. An interesting computation is involved in the estimation of the likelihood ordinate, which is devised via collapsed sequential importance sampling. Extensive experiments with synthetic and real data involving semiparametric binary data regression models and hierarchical longitudinal mixed-effects models are used to illustrate the implementation, performance, and applicability of the method.},
	number = {461},
	journal = {Journal of the American Statistical Association},
	author = {Basu, Sanjib and Chib, Siddhartha},
	month = mar,
	year = {2003},
	pages = {224--235},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/69A8TPVD/Basu and Chib - 2003 - Marginal Likelihood and Bayes Factors for Dirichle.pdf:application/pdf}
},

@article{corcoran_exact_2001,
	title = {An Exact Trend Test for Correlated Binary Data},
	volume = {57},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/3068436},
	abstract = {The problem of testing a dose-response relationship in the presence of exchangeably correlated binary data has been addressed using a variety of models. Most commonly used approaches are derived from likelihood or generalized estimating equations and rely on large-sample theory to justify their inferences. However, while earlier work has determined that these methods may perform poorly for small or sparse samples, there are few alternatives available to those faced with such data. We propose an exact trend test for exchangeably correlated binary data when groups of correlated observations are ordered. This exact approach is based on an exponential model derived by Molenberghs and Ryan (1999) and Ryan and Molenberghs (1999) and provides natural analogues to Fisher's exact test and the binomial trend test when the data are correlated. We use a graphical method with which one can efficiently compute the exact tail distribution and apply the test to two examples.},
	number = {3},
	journal = {Biometrics},
	author = {Corcoran, Chris and Ryan, Louise and Senchaudhuri, Pralay and Mehta, Cyrus and Patel, Nitin and Molenberghs, Geert},
	year = {2001},
	pages = {941--948},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/4AMHAWRA/Corcoran et al. - 2001 - An Exact Trend Test for Correlated Binary Data.pdf:application/pdf}
},

@article{webster_patents_2011,
	title = {Do Patents Matter for Commercialization?},
	volume = {54},
	issn = {0022-2186},
	url = {http://www.jstor.org/stable/10.1086/658487},
	abstract = {Abstract This paper estimates the effect of a patent grant on the likelihood that an invention will progress to different commercialization stages, using survey data on 3,162 inventions that were the subject of a patent application. We find that about 40 percent of all inventions advanced to the point of market launch and mass production. Although a patent grant had no effect on the decision to proceed with the commercialization process, being refused a patent reduced the probability of attempting market launch and mass production by about 13 percentage points. Over and above this, having protection from several other complementary patents increased the probability of commercialization by an additional 3–5 percentage points.},
	number = {2},
	journal = {Journal of Law and Economics},
	author = {Webster, Elizabeth and Jensen, Paul H.},
	month = may,
	year = {2011},
	pages = {431--453},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/TC2W6QK3/Webster and Jensen - 2011 - Do Patents Matter for Commercialization.pdf:application/pdf}
},

@article{ge_womens_2011,
	title = {Women’s College Decisions: How Much Does Marriage Matter?},
	volume = {29},
	issn = {0734-{306X}},
	shorttitle = {Women’s College Decisions},
	url = {http://www.jstor.org/stable/10.1086/660774},
	abstract = {This article investigates the sequential college attendance decision of young women and quantifies the effect of marriage expectations on their decision to attend and graduate from college. A dynamic choice model of college attendance, labor supply, and marriage is formulated and structurally estimated using panel data from the {NLSY79.} The model is used to simulate the effects of no marriage benefits and finds that the predicted college enrollment rate will drop from 58.0\% to 50.5\%. Using the estimated model, the college attendance behavior for a younger cohort from the {NLSY97} is predicted and used to validate the behavioral model.},
	number = {4},
	journal = {Journal of Labor Economics},
	author = {Ge, Suqin},
	month = oct,
	year = {2011},
	pages = {773--818},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/FCBSA973/Ge - 2011 - Women’s College Decisions How Much Does Marriage .pdf:application/pdf}
},

@article{damon_j._phillips_jazz_2011,
	title = {Jazz and the Disconnected: City Structural Disconnectedness and the Emergence of a Jazz Canon, 1897–1933},
	volume = {117},
	issn = {0002-9602},
	shorttitle = {Jazz and the Disconnected},
	url = {http://www.jstor.org/stable/10.1086/661757},
	abstract = {The study of organizations and markets suffers from the underdevelopment of disconnected producers. This article emphasizes the imputed identities of sources to argue that difficult-to-categorize outputs were appealing when associated with a source high in disconnectedness. Worldwide data on recordings and mobility with detailed data on Midwest recordings provide evidence that jazz from cities high in disconnectedness was rerecorded more often by musicians over time. Moreover, recordings with difficult-to-categorize elements were more likely to be rerecorded when coming from cities high in disconnectedness, despite evidence that original music was paradoxically less likely to come from these cities.},
	number = {2},
	journal = {American Journal of Sociology},
	author = {Damon J. Phillips},
	year = {2011},
	note = {{ArticleType:} research-article / Full publication date: September 2011 / Copyright © 2011 The University of Chicago Press},
	pages = {420--483},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/26DTPMIQ/Damon J. Phillips - 2011 - Jazz and the Disconnected City Structural Disconn.pdf:application/pdf}
},

@article{koenig_variable_2011,
	title = {Variable Helper Effects, Ecological Conditions, and the Evolution of Cooperative Breeding in the Acorn Woodpecker.},
	volume = {178},
	issn = {0003-0147},
	url = {http://www.jstor.org/stable/10.1086/660832},
	abstract = {Abstract The ecological conditions leading to delayed dispersal and helping behavior are generally thought to follow one of two contrasting scenarios: that conditions are stable and predictable, resulting in young being ecologically forced to remain as helpers (extrinsic constraints and the habitat saturation hypothesis), or that conditions are highly variable and unpredictable, leading to the need for helpers to raise young, at least when conditions are poor (intrinsic constraints and the hard life hypothesis). We investigated how variability in ecological conditions influences the degree to which helpers augment breeder fitness in the cooperatively breeding acorn woodpecker {(Melanerpes} formicivorus), a species in which the acorn crop, territory quality, and prior breeding experience all vary in ways that have important effects on fitness. We found that the relationship between ecological conditions and the probability that birds would remain as helpers was variable but that helpers generally yielded greater fitness benefits when ecological conditions were favorable, rather than unfavorable, for breeding. These results affirm the importance of extrinsic constraints to delayed dispersal and cooperative breeding in this species, despite its dependence on a highly variable and unpredictable acorn crop. Our findings also confirm that helpers can have very different fitness effects, depending on conditions, but that those effects are not necessarily greater when breeding conditions are unfavorable.},
	number = {2},
	journal = {The American Naturalist},
	author = {Koenig, Walter D. and Walters, Eric L. and Haydock, Joseph},
	year = {2011},
	pages = {145--158},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/64M4ZCGX/Koenig et al. - 2011 - Variable Helper Effects, Ecological Conditions, an.pdf:application/pdf}
},

@article{hinkkanen_sentencing_2011,
	title = {Sentencing Theory, Policy, and Research in the Nordic Countries},
	volume = {40},
	issn = {0192-3234},
	url = {http://www.jstor.org/stable/10.1086/661182},
	abstract = {Abstract Sentencing in the Nordic countries follows the civil law tradition. Sentencing councils and advisory boards have no role, as the guidance for the courts is given mainly in the form of legislative sentencing principles and court precedents. Legislative sentencing guidance has undergone changes both in form and substance. The 1970s decline of the rehabilitative ideal was mirrored in sentencing reforms that emphasized proportionality and predictability in sentencing. This was the case especially in Finland and Sweden. But unlike in many other countries, these ideological changes did not result to increased severity—for Finland the result was the opposite. While the Nordic legislators have shown increased interest in more detailed guidance of sentencing, courts are still left with a wide range of sentencing options and fairly broad penal latitudes. At the same time the sentencing systems place high value in consistency and uniformity in sentencing. To reconcile these aims, further devices are needed for the structuring of the sentencing decision, including legal-theoretical analyses and empirical research. The paper discusses the latter point in more detail with more recent research examples from Finland.},
	number = {1},
	journal = {Crime and Justice},
	author = {Hinkkanen, Ville and Lappi-Seppälä, Tapio},
	year = {2011},
	pages = {349--404},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/K9HUDUFS/Hinkkanen and Lappi-Seppälä - 2011 - Sentencing Theory, Policy, and Research in the Nor.pdf:application/pdf}
},

@article{delavande_criminal_2010,
	title = {Criminal Prosecution and Human Immunodeficiency {Virus–Related} Risky Behavior},
	volume = {53},
	issn = {0022-2186},
	url = {http://www.jstor.org/stable/10.1086/655806},
	abstract = {Abstract We examine the consequences of prosecuting people who are human immunodeficiency virus {(HIV)} positive and expose others to the infection. We show that the effect of such prosecutions on the spread of {HIV} is a priori ambiguous. The prosecutions deter unsafe sex. However, they also create incentives for having sex with partners who are more promiscuous, which consequently increases the spread of {HIV.} We test these predictions and find that such prosecutions are associated with a reduction in the number of partners, an increase in safe sex, and an increase in sex with prostitutes. We estimate that doubling the prosecution rate could decrease the total number of new {HIV} infections by one-third over a 10-year period.},
	number = {4},
	journal = {Journal of Law and Economics},
	author = {Delavande, Adeline and Goldman, Dana and Sood, Neeraj},
	month = nov,
	year = {2010},
	note = {{ArticleType:} research-article / Full publication date: November 2010 / Copyright © 2010 The University of Chicago},
	pages = {741--782},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/UUICACJV/Delavande et al. - 2010 - Criminal Prosecution and Human Immunodeficiency Vi.pdf:application/pdf}
},

@article{freedman_endogeneity_2010,
	title = {Endogeneity in Probit Response Models},
	volume = {18},
	issn = {1047-1987},
	url = {http://www.jstor.org/stable/25792001},
	abstract = {We look at conventional methods for removing endogeneity bias in regression models, including the linear model and the probit model. It is known that the usual Heckman two-step procedure should not be used in the probit model: from a theoretical perspective, it is unsatisfactory, and likelihood methods are superior. However, serious numerical problems occur when standard software packages try to maximize the biprobit likelihood function, even if the number of covariates is small. We draw conclusions for statistical practice. Finally, we prove the conditions under which parameters in the model are identifiable. The conditions for identification are delicate; we believe these results are new.},
	number = {2},
	journal = {Political Analysis},
	author = {Freedman, David A. and Sekhon, Jasjeet S.},
	month = apr,
	year = {2010},
	note = {{ArticleType:} research-article / Full publication date: Spring 2010 / Copyright © 2010 Society for Political Methodology},
	pages = {138--150}
},

@article{fiocco_new_2009,
	title = {A new serially correlated gamma-frailty process for longitudinal count data},
	volume = {10},
	url = {http://biostatistics.oxfordjournals.org/content/10/2/245.abstract},
	doi = {10.1093/biostatistics/kxn031},
	abstract = {We describe a new multivariate gamma distribution and discuss its implication in a Poisson-correlated gamma-frailty model. This model is introduced to account for between-subjects correlation occurring in longitudinal count data. For likelihood-based inference involving distributions in which high-dimensional dependencies are present, it may be useful to approximate likelihoods based on the univariate or bivariate marginal distributions. The merit of composite likelihood is to reduce the computational complexity of the full likelihood. A 2-stage composite-likelihood procedure is developed for estimating the model parameters. The suggested method is applied to a meta-analysis study for survival curves.},
	number = {2},
	journal = {Biostatistics},
	author = {Fiocco, M. and Putter, H. and Van Houwelingen, J.c.},
	month = apr,
	year = {2009},
	pages = {245 --257},
	file = {Full Text PDF:/home/pauljohn/Documents/Zotero/storage/TJJ8UQ5Q/Fiocco et al. - 2009 - A new serially correlated gamma-frailty process fo.pdf:application/pdf;Snapshot:/home/pauljohn/Documents/Zotero/storage/MTASXIF8/245.html:text/html}
},

@article{behncke_unemployed_2010,
	title = {Unemployed and Their Caseworkers: Should They Be Friends or Foes?},
	volume = {173},
	issn = {0964-1998},
	shorttitle = {Unemployed and Their Caseworkers},
	url = {http://www.jstor.org/stable/20622579},
	abstract = {In many countries, caseworkers in public employment offices have dual roles of counselling and monitoring unemployed people. These roles often conflict, which results in important caseworker heterogeneity: some consider providing services to their clients and satisfying their demands as their primary task. However, others may pursue their own strategies, even against the will of the unemployed person. They may assign jobs and labour market programmes without the consent of the unemployed person. On the basis of a very detailed linked jobseeker—caseworker data set for Switzerland, we investigate the effects of caseworkers' co-operativeness on the probabilities of employment of their clients. Modified statistical matching methods reveal that caseworkers who place less emphasis on a co-operative and harmonic relationship with their clients increase their chances of employment in the short and medium term.},
	number = {1},
	journal = {Journal of the Royal Statistical Society. Series A {(Statistics} in Society)},
	author = {Behncke, Stefanie and Frölich, Markus and Lechner, Michael},
	month = jan,
	year = {2010},
	pages = {67--92}
},

@article{zelner_using_2009,
	title = {Using Simulation to Interpret Results from Logit, Probit, and Other Nonlinear Models},
	volume = {30},
	issn = {0143-2095},
	url = {http://www.jstor.org/stable/27735494},
	abstract = {In a recent issue of this journal, Glenn Hoetker proposes that researchers improve the interpretation and presentation of logit and probit results by reporting the marginal effects of key independent variables at theoretically interesting or empirically relevant values of the other independent variables in the model, and also by presenting results graphically {(Hoetker}, 2007: 335, 337). In this research note, I suggest an alternative approach for achieving this objective: reporting differences in predicted probabilities associated with discrete changes in key independent variable values. This intuitive approach to interpretation is especially useful when the theoretically interesting or empirically relevant changes in independent variables values are not very small, and also for models that contain interaction terms (or higher-order terms such as quadratics). Although the graphical presentations recommended by Hoetker implicitly embody this approach, they typically fail to include appropriate measures of statistical significance, and may therefore lead to erroneous conclusions. In order to calculate such measures, I recommend and demonstrate an intuitive simulation-based approach to statistical interpretation, developed by King et al. (2000), that has gained widespread adherence in the field of political science. Throughout the article, I provide a running example based on research that has previously appeared in the Strategic Management Journal.},
	number = {12},
	journal = {Strategic Management Journal},
	author = {Zelner, Bennet A.},
	month = dec,
	year = {2009},
	pages = {1335--1348}
},

@article{gelabert_does_2009,
	title = {Does the Effect of Public Support for {R\&D} Depend on the Degree of Appropriability?},
	volume = {57},
	issn = {0022-1821},
	url = {http://www.jstor.org/stable/27750733},
	abstract = {We explore the interaction between public support for {R\&D} and appropriability using a dataset constructed from the Spanish Community Innovation Survey, for the period 2000–2005. We find that public support policy is less able to stimulate privately financed internal {R\&D} in firms where appropriability mechanisms are more effective. On average, the effect of public support for {R\&D} is three times larger for those firms reporting a level of appropriability below the median vis-à-vis those firms for which appropriability is above the median level. Furthermore, for supported firms with the highest degree of appropriability, crowding out cannot be ruled out.},
	number = {4},
	journal = {The Journal of Industrial Economics},
	author = {Gelabert, Liliana and Fosfuri, Andrea and Tribó, Josep A.},
	month = dec,
	year = {2009},
	pages = {736--767}
},

@article{smithson_better_2006,
	title = {A better lemon squeezer? Maximum-likelihood regression with beta-distributed dependent variables},
	volume = {11},
	issn = {1082-{989X}},
	shorttitle = {A better lemon squeezer?},
	url = {http://www.ncbi.nlm.nih.gov/pubmed/16594767},
	doi = {10.1037/1082-989X.11.1.54},
	abstract = {Uncorrectable skew and heteroscedasticity are among the "lemons" of psychological data, yet many important variables naturally exhibit these properties. For scales with a lower and upper bound, a suitable candidate for models is the beta distribution, which is very flexible and models skew quite well. The authors present maximum-likelihood regression models assuming that the dependent variable is conditionally beta distributed rather than Gaussian. The approach models both means (location) and variances (dispersion) with their own distinct sets of predictors (continuous and/or categorical), thereby modeling heteroscedasticity. The location sub-model link function is the logit and thereby analogous to logistic regression, whereas the dispersion sub-model is log linear. Real examples show that these models handle the independent observations case readily. The article discusses comparisons between beta regression and alternative techniques, model selection and interpretation, practical estimation, and software.},
	number = {1},
	journal = {Psychological Methods},
	author = {Smithson, Michael and Verkuilen, Jay},
	month = mar,
	year = {2006},
	keywords = {Analysis of Variance, Bias {(Epidemiology)}, Child, Data Interpretation, Statistical, Dyslexia, Humans, Least-Squares Analysis, Likelihood Functions, Linear Models, Models, Statistical, Normal Distribution, Regression analysis, Reproducibility of Results},
	pages = {54--71}
},

@article{buzkova_permutation_2011,
	title = {Permutation and parametric bootstrap tests for gene-gene and gene-environment interactions},
	volume = {75},
	issn = {1469-1809},
	url = {http://www.ncbi.nlm.nih.gov/pubmed/20384625},
	doi = {10.1111/j.1469-1809.2010.00572.x},
	abstract = {Permutation tests are widely used in genomic research as a straightforward way to obtain reliable statistical inference without making strong distributional assumptions. However, in this paper we show that in genetic association studies it is not typically possible to construct exact permutation tests of gene-gene or gene-environment interaction hypotheses. We describe an alternative to the permutation approach in testing for interaction, a parametric bootstrap approach. Using simulations, we compare the finite-sample properties of a few often-used permutation tests and the parametric bootstrap. We consider interactions of an exposure with single and multiple polymorphisms. Finally, we address when permutation tests of interaction will be approximately valid in large samples for specific test statistics.},
	number = {1},
	journal = {Annals of Human Genetics},
	author = {Bůžková, Petra and Lumley, Thomas and Rice, Kenneth},
	month = jan,
	year = {2011},
	keywords = {Animals, Computer Simulation, Environment, Epistasis, Genetic, Genetic Association Studies, Humans, Models, Genetic, Models, Statistical, Polymorphism, Genetic},
	pages = {36--45}
},

@article{balch_hierarchic_2000,
	title = {Hierarchic Social Entropy: An Information Theoretic Measure of Robot Group Diversity},
	volume = {8},
	issn = {0929-5593},
	shorttitle = {Hierarchic Social Entropy},
	url = {http://dx.doi.org/10.1023/A:1008973424594},
	doi = {10.1023/A:1008973424594},
	number = {3},
	journal = {Auton. Robots},
	author = {Balch, Tucker},
	month = jun,
	year = {2000},
	keywords = {behavioral diversity, heterogeneity, multi-robot systems},
	pages = {209–238}
},

@article{preacher_computational_2006,
	title = {Computational Tools for Probing Interactions in Multiple Linear Regression, Multilevel Modeling, and Latent Curve Analysis},
	volume = {31},
	issn = {1076-9986},
	url = {http://www.jstor.org/stable/4122453},
	abstract = {Simple slopes, regions of significance, and confidence bands are commonly used to evaluate interactions in multiple linear regression {(MLR)} models, and the use of these techniques has recently been extended to multilevel or hierarchical linear modeling {(HLM)} and latent curve analysis {(LCA).} However, conducting these tests and plotting the conditional relations is often a tedious and error-prone task. This article provides an overview of methods used to probe interaction effects and describes a unified collection of freely available online resources that researchers can use to obtain significance tests for simple slopes, compute regions of significance, and obtain confidence bands for simple slopes across the range of the moderator in the {MLR}, {HLM}, and {LCA} contexts. Plotting capabilities are also provided.},
	number = {4},
	journal = {Journal of Educational and Behavioral Statistics},
	author = {Preacher, Kristopher J. and Curran, Patrick J. and Bauer, Daniel J.},
	month = dec,
	year = {2006},
	pages = {437--448},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/U4US525I/Preacher et al. - 2006 - Computational Tools for Probing Interactions in Mu.pdf:application/pdf}
},

@book{shannon_mathematical_1949,
	address = {Urbana},
	title = {The Mathematical Theory of Communication},
	publisher = {University of Illinois Press},
	author = {Shannon, Claude Elwood and Weaver, Warren},
	year = {1949},
	keywords = {Mathematical physics, Telecommunication}
},

@book{aiken_multiple_1991,
	title = {Multiple Regression: Testing and Interpreting Interactions},
	isbn = {0761907122},
	shorttitle = {Multiple Regression},
	publisher = {Sage Publications, Inc},
	author = {Aiken, Leona S. and West, Stephen G.},
	month = jan,
	year = {1991}
},

@article{liao_estimated_2000,
	title = {Estimated Precision for Predictions from Generalized Linear Models in Sociological Research},
	volume = {34},
	issn = {0033-5177},
	url = {http://www.springerlink.com/content/p71102h877436h26/abstract/},
	doi = {10.1023/A:1004798429785},
	abstract = {In this paper I present a general method forconstructing confidence intervals for predictionsfrom the generalized linear model in sociologicalresearch. I demonstrate that the method used forconstructing confidence intervals for predictions inclassical linear models is indeed a special case ofthe method for generalized linear models. I examinefour such models – the binary logit, the binaryprobit, the ordinal logit, and the Poissonregression model – to construct confidence intervalsfor predicted values in the form of probability,odds, Z score, or event count. The estimatedconfidence interval for an event prediction, whenapplied judiciously, can give the researcher usefulinformation and an estimated measure of precisionfor the prediction so that interpretation ofestimates from the generalized linear model becomeseasier.},
	number = {2},
	journal = {Quality \& Quantity},
	author = {Liao, Tim Futing},
	year = {2000},
	keywords = {Humanities, Social Sciences and Law},
	pages = {137--152},
	file = {SpringerLink Snapshot:/home/pauljohn/Documents/Zotero/storage/68NCKMBV/Liao - 2000 - Estimated Precision for Predictions from Generaliz.html:text/html}
},

@article{lin_comparison_2010,
	title = {A comparison of multiple imputation with {EM} algorithm and {MCMC} method for quality of life missing data},
	volume = {44},
	issn = {0033-5177},
	url = {http://www.springerlink.com/content/mh5353g766075586/abstract/},
	doi = {10.1007/s11135-008-9196-5},
	abstract = {This study investigated the performance of multiple imputations with Expectation-Maximization {(EM)} algorithm and Monte Carlo Markov chain {(MCMC)} method in missing data imputation. We compared the accuracy of imputation based on some real data and set up two extreme scenarios and conducted both empirical and simulation studies to examine the effects of missing data rates and number of items used for imputation. In the empirical study, the scenario represented item of highest missing rate from a domain with fewest items. In the simulation study, we selected a domain with most items and the item imputed has lowest missing rate. In the empirical study, the results showed there was no significant difference between {EM} algorithm and {MCMC} method for item imputation, and number of items used for imputation has little impact, either. Compared with the actual observed values, the middle responses of 3 and 4 were over-imputed, and the extreme responses of 1, 2 and 5 were under-represented. The similar patterns occurred for domain imputation, and no significant difference between {EM} algorithm and {MCMC} method and number of items used for imputation has little impact. In the simulation study, we chose environmental domain to examine the effect of the following variables: {EM} algorithm and {MCMC} method, missing data rates, and number of items used for imputation. Again, there was no significant difference between {EM} algorithm and {MCMC} method. The accuracy rates did not significantly reduce with increase in the proportions of missing data. Number of items used for imputation has some contribution to accuracy of imputation, but not as much as expected.},
	number = {2},
	journal = {Quality \& Quantity},
	author = {Lin, Ting},
	year = {2010},
	keywords = {Humanities, Social Sciences and Law},
	pages = {277--287},
	file = {SpringerLink Snapshot:/home/pauljohn/Documents/Zotero/storage/THZQT2PQ/Lin - 2010 - A comparison of multiple imputation with EM algori.html:text/html}
},

@article{zheng_summarizing_2000,
	title = {Summarizing the predictive power of a generalized linear model},
	volume = {19},
	issn = {0277-6715},
	url = {http://www.ncbi.nlm.nih.gov/pubmed/10861777},
	abstract = {This paper studies summary measures of the predictive power of a generalized linear model, paying special attention to a generalization of the multiple correlation coefficient from ordinary linear regression. The population value is the correlation between the response and its conditional expectation given the predictors, and the sample value is the correlation between the observed response and the model predicted value. We compare four estimators of the measure in terms of bias, mean squared error and behaviour in the presence of overparameterization. The sample estimator and a jack-knife estimator usually behave adequately, but a cross-validation estimator has a large negative bias with large mean squared error. One can use bootstrap methods to construct confidence intervals for the population value of the correlation measure and to estimate the degree to which a model selection procedure may provide an overly optimistic measure of the actual predictive power.},
	number = {13},
	journal = {Statistics in Medicine},
	author = {Zheng, B and Agresti, A},
	month = jul,
	year = {2000},
	note = {{PMID:} 10861777},
	keywords = {Bias {(Epidemiology)}, Confidence Intervals, Data Interpretation, Statistical, Humans, Infant, Low Birth Weight, Infant, Newborn, Likelihood Functions, Linear Models, Logistic Models, Predictive Value of Tests},
	pages = {1771--1781}
},

@article{xu_improved_2006,
	title = {Improved confidence regions for a mean vector under general conditions},
	volume = {51},
	issn = {0167-9473},
	url = {http://dx.doi.org/10.1016/j.csda.2005.10.011},
	doi = {10.1016/j.csda.2005.10.011},
	number = {2},
	journal = {Comput. Stat. Data Anal.},
	author = {Xu, Jin and Gupta, Arjun K.},
	month = nov,
	year = {2006},
	keywords = {{62E20}, Confidence region, Coverage probability, Edgeworth expansion, Normalization transformation, primary {62G15}, secondary {62H10}},
	pages = {1051–1062}
},

@article{sun_confidence_2000,
	title = {Confidence bands in generalized linear models},
	volume = {28},
	issn = {0090-5364},
	url = {http://projecteuclid.org/euclid.aos/1016218225},
	doi = {10.1214/aos/1016218225},
	abstract = {Generalized linear models {(GLM)} include many useful models. This
			 paper studies simultaneous confidence regions for the mean response function in
			 these models. The coverage probabilities of these regions are related to tail
			 probabilities of maxima of Gaussian random fields, asymptotically, and hence,
			 the so-called tube formula is applicable without any modification. However, in
			 the generalized linear models, the errors are often nonadditive and
			 non-Gaussian and may be discrete. This poses a challenge to the accuracy of the
			 approximation by the tube formula in the moderate sample situation. Here two
			 alternative approaches are considered. These approaches are based on an
			 Edgeworth expansion for the distribution of a maximum likelihood estimator and
			 a version of Skorohod’s representation theorem, which are used to
			 convert an error term (which is of order \$n{\textasciicircum}{-1 /2}\$ in one-sided confidence
			 regions and of \$n{\textasciicircum}{-1} in two-sided confidence regions) from the Edgeworth
			 expansion to a “bias” term. The bias is then estimated and
			 corrected in two ways to adjust the approximation formula. Examples and
			 simulations show that our methods are viable and complementary to existing
			 methods. An application to insect data is provided. Code for implementing our
			 procedures is available via the software parfit},
	number = {2},
	journal = {The Annals of Statistics},
	author = {Sun, Jiayang and Loader, Catherine and William P., {McCormick}},
	month = apr,
	year = {2000},
	pages = {429--460}
},

@article{withers_improved_????,
	title = {Improved confidence regions based on Edgeworth expansions},
	issn = {0167-9473},
	url = {http://www.sciencedirect.com/science/article/pii/S0167947312001454},
	doi = {10.1016/j.csda.2012.03.019},
	abstract = {Let w ̂ be a consistent estimate of w in R p satisfying the standard cumulant expansion in powers of n − 1 with asymptotic covariance n − 1 V . Then n 1 / 2 ( w ̂ − w ) has the standard Edgeworth expansion about N p ( 0 , V ) . We obtain from this the Edgeworth expansions for T n ( V ) = n ( w ̂ − w ) ′ V − 1 ( w ̂ − w ) about χ p 2 and for its Studentized version, T n ( V ̂ ) . So, we obtain a confidence region for w of level α + O ( n − 2 ) .},
	number = {0},
	journal = {Computational Statistics \& Data Analysis},
	author = {Withers, Christopher S. and Nadarajah, Saralees},
	keywords = {Cumulants, Edgeworth expansions, Ellipsoidal confidence regions},
	file = {ScienceDirect Full Text PDF:/home/pauljohn/Documents/Zotero/storage/B9MUEMX9/Withers and Nadarajah - Improved confidence regions based on Edgeworth exp.pdf:application/pdf;ScienceDirect Snapshot:/home/pauljohn/Documents/Zotero/storage/GS5ZMQGA/S0167947312001454.html:text/html}
},

@article{ni_automatic_2009,
	title = {Automatic Model Selection for Partially Linear Models},
	volume = {100},
	issn = {0047-{259X}},
	doi = {10.1016/j.jmva.2009.06.009},
	abstract = {We propose and study a unified procedure for variable selection in partially linear models. A new type of double-penalized least squares is formulated, using the smoothing spline to estimate the nonparametric part and applying a shrinkage penalty on parametric components to achieve model parsimony. Theoretically we show that, with proper choices of the smoothing and regularization parameters, the proposed procedure can be as efficient as the oracle estimator (). We also study the asymptotic properties of the estimator when the number of parametric effects diverges with the sample size. Frequentist and Bayesian estimates of the covariance and confidence intervals are derived for the estimators. One great advantage of this procedure is its linear mixed model {(LMM)} representation, which greatly facilitates its implementation by using standard statistical software. Furthermore, the {LMM} framework enables one to treat the smoothing parameter as a variance component and hence conveniently estimate it together with other regression coefficients. Extensive numerical studies are conducted to demonstrate the effective performance of the proposed procedure.},
	number = {9},
	journal = {Journal of multivariate analysis},
	author = {Ni, Xiao and Zhang, Hao Helen and Zhang, Daowen},
	month = oct,
	year = {2009},
	pages = {2100--2111},
	file = {PubMed Central Full Text PDF:/home/pauljohn/Documents/Zotero/storage/R6ZCI3GB/Ni et al. - 2009 - Automatic Model Selection for Partially Linear Mod.pdf:application/pdf}
},

@article{ma_variable_2010,
	title = {Variable Selection in Measurement Error Models},
	volume = {16},
	issn = {1350-7265},
	abstract = {Measurement error data or errors-in-variable data are often collected in many studies. Natural criterion functions are often unavailable for general functional measurement error models due to the lack of information on the distribution of the unobservable covariates. Typically, the parameter estimation is via solving estimating equations. In addition, the construction of such estimating equations routinely requires solving integral equations, hence the computation is often much more intensive compared with ordinary regression models. Because of these difficulties, traditional best subset variable selection procedures are not applicable, and in the measurement error model context, variable selection remains an unsolved issue. In this paper, we develop a framework for variable selection in measurement error models via penalized estimating equations. We first propose a class of selection procedures for general parametric measurement error models and for general semiparametric measurement error models, and study the asymptotic properties of the proposed procedures. Then, under certain regularity conditions and with a properly chosen regularization parameter, we demonstrate that the proposed procedure performs as well as an oracle procedure. We assess the finite sample performance via Monte Carlo simulation studies and illustrate the proposed methodology through the empirical analysis of a familiar data set.},
	number = {1},
	journal = {Bernoulli : official journal of the Bernoulli Society for Mathematical Statistics and Probability},
	author = {Ma, Yanyuan and Li, Runze},
	year = {2010},
	pages = {274--300},
	file = {PubMed Central Full Text PDF:/home/pauljohn/Documents/Zotero/storage/XDSJSDT2/Ma and Li - 2010 - Variable Selection in Measurement Error Models.pdf:application/pdf}
},

@article{garcia_variable_2010,
	title = {Variable Selection for Regression Models with Missing Data},
	volume = {20},
	issn = {1017-0405},
	abstract = {We consider the variable selection problem for a class of statistical models with missing data, including missing covariate and/or response data. We investigate the smoothly clipped absolute deviation penalty {(SCAD)} and adaptive {LASSO} and propose a unified model selection and estimation procedure for use in the presence of missing data. We develop a computationally attractive algorithm for simultaneously optimizing the penalized likelihood function and estimating the penalty parameters. Particularly, we propose to use a model selection criterion, called the {ICQ} statistic, for selecting the penalty parameters. We show that the variable selection procedure based on {ICQ} automatically and consistently selects the important covariates and leads to efficient estimates with oracle properties. The methodology is very general and can be applied to numerous situations involving missing data, from covariates missing at random in arbitrary regression models to nonignorably missing longitudinal responses and/or covariates. Simulations are given to demonstrate the methodology and examine the finite sample performance of the variable selection procedures. Melanoma data from a cancer clinical trial is presented to illustrate the proposed methodology.},
	number = {1},
	journal = {Statistica Sinica},
	author = {Garcia, Ramon I. and Ibrahim, Joseph G. and Zhu, Hongtu},
	month = jan,
	year = {2010},
	pages = {149--165},
	file = {PubMed Central Full Text PDF:/home/pauljohn/Documents/Zotero/storage/HH2BBF3T/Garcia et al. - 2010 - VARIABLE SELECTION FOR REGRESSION MODELS WITH MISS.pdf:application/pdf}
},

@article{gibbons_mixed-effects_2008,
	title = {Mixed-effects Poisson regression analysis of adverse event reports},
	volume = {27},
	issn = {0277-6715},
	doi = {10.1002/sim.3241},
	abstract = {A new statistical methodology is developed for the analysis of spontaneous adverse event {(AE)} reports from post-marketing drug surveillance data. The method involves both empirical Bayes {(EB)} and fully Bayes estimation of rate multipliers for each drug within a class of drugs, for a particular {AE}, based on a mixed-effects Poisson regression model. Both parametric and semiparametric models for the random-effect distribution are examined. The method is applied to data from Food and Drug Administration {(FDA)’s} Adverse Event Reporting System {(AERS)} on the relationship between antidepressants and suicide. We obtain point estimates and 95 per cent confidence (posterior) intervals for the rate multiplier for each drug (e.g. antidepressants), which can be used to determine whether a particular drug has an increased risk of association with a particular {AE} (e.g. suicide). Confidence (posterior) intervals that do not include 1.0 provide evidence for either significant protective or harmful associations of the drug and the adverse effect. We also examine {EB}, parametric Bayes, and semiparametric Bayes estimators of the rate multipliers and associated confidence (posterior) intervals. Results of our analysis of the {FDA} {AERS} data revealed that newer antidepressants are associated with lower rates of suicide adverse event reports compared with older antidepressants. We recommend improvements to the existing {AERS} system, which are likely to improve its public health value as an early warning system.},
	number = {11},
	journal = {Statistics in medicine},
	author = {Gibbons, Robert D. and Segawa, Eisuke and Karabatsos, George and Amatya, Anup K. and Bhaumik, Dulal K. and Brown, C. Hendricks and Kapur, Kush and Marcus, Sue M. and Hur, Kwan and Mann, J. John},
	month = may,
	year = {2008},
	pages = {1814--1833},
	file = {PubMed Central Full Text PDF:/home/pauljohn/Documents/Zotero/storage/2RPEXKQ9/Gibbons et al. - 2008 - Mixed-effects Poisson regression analysis of adver.pdf:application/pdf}
},

@article{gibbons_random_1997,
	title = {Random Effects Probit and Logistic Regression Models for Three-Level Data},
	volume = {53},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/2533520},
	doi = {10.2307/2533520},
	abstract = {In analysis of binary data from clustered and longitudinal studies, random effect models have been recently developed to accommodate two-level problems such as subjects nested within clusters or repeated classifications within subjects. Unfortunately, these models cannot be applied to three-level problems that occur frequently in practice. For example, multicenter longitudinal clinical trials involve repeated assessments within individuals and individuals are nested within study centers. This combination of clustered and longitudinal data represents the classic three-level problem in biometry. Similarly, in prevention studies, various educational programs designed to minimize risk taking behavior (e.g., smoking prevention and cessation) may be compared where randomization to various design conditions is at the level of the school and the intervention is performed at the level of the classroom. Previous statistical approaches to the three-level problem for binary response data have either ignored one level of nesting, treated it as a fixed effect, or used first- and second-order Taylor series expansions of the logarithm of the conditional likelihood to linearize these models and estimate model parameters using more conventional procedures for measurement data. Recent studies indicate that these approximate solutions exhibit considerable bias and provide little advantage over use of traditional logistic regression analysis ignoring the hierarchical structure. In this paper, we generalize earlier results for two-level random effects probit and logistic regression models to the three-level case. Parameter estimation is based on full-information maximum marginal likelihood estimation {(MMLE)} using numerical quadrature to approximate the multiple random effects. The model is illustrated using data from 135 classrooms from 28 schools on the effects of two smoking cessation interventions.},
	number = {4},
	journal = {Biometrics},
	author = {Gibbons, Robert D. and Hedeker, Donald},
	month = dec,
	year = {1997},
	pages = {1527--1537},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/UAHXIUJD/Gibbons and Hedeker - 1997 - Random Effects Probit and Logistic Regression Mode.pdf:application/pdf}
},

@article{rossi_nonparametric_2002,
	title = {Nonparametric Item Response Function Estimates with the {EM} Algorithm},
	volume = {27},
	issn = {1076-9986},
	url = {http://www.jstor.org/stable/3648139},
	abstract = {The methods of functional data analysis are used to estimate item response functions {(IRFs)} nonparametrically. The {EM} algorithm is used to maximize the penalized marginal likelihood of the data. The penalty controls the smoothness of the estimated {IRFs}, and is chosen so that, as the penalty is increased, the estimates converge to shapes closely represented by the three-parameter logistic family. The one-dimensional latent trait model is recast as a problem of estimating a space curve or manifold, and, expressed in this way, the model no longer involves any latent constructs, and is invariant with respect to choice of latent variable. Some results from differential geometry are used to develop a data-anchored measure of ability and a new technique for assessing item discriminability. Functional data-analytic techniques are used to explore the functional variation in the estimated {IRFs.} Applications involving simulated and actual data are included.},
	number = {3},
	journal = {Journal of Educational and Behavioral Statistics},
	author = {Rossi, Natasha and Wang, Xiaohui and Ramsay, James O.},
	month = oct,
	year = {2002},
	pages = {291--317},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/6IUCZZXG/Rossi et al. - 2002 - Nonparametric Item Response Function Estimates wit.pdf:application/pdf}
},

@article{patz_straightforward_1999,
	title = {A Straightforward Approach to Markov Chain Monte Carlo Methods for Item Response Models},
	volume = {24},
	issn = {1076-9986},
	url = {http://www.jstor.org/stable/1165199},
	doi = {10.2307/1165199},
	abstract = {This paper demonstrates Markov chain Monte Carlo {(MCMC)} techniques that are particularly well-suited to complex models with item response theory {(IRT)} assumptions. {MCMC} may be thought of as a successor to the standard practice of first calibrating the items using E-M methods and then taking the item parameters to be known and fixed at their calibrated values when proceeding with inference regarding the latent trait. In contrast to this two-stage E-M approach, {MCMC} methods treat item and subject parameters at the same time; this allows us to incorporate standard errors of item estimates into trait inferences, and vice versa. We develop a {MCMC} methodology, based on Metropolis-Hastings sampling, that can be routinely implemented to fit novel {IRT} models, and we compare the algorithmic features of the Metropolis-Hastings approach to other approaches based on Gibbs sampling. For concreteness we illustrate the methodology using the familiar two-parameter logistic {(2PL)} {IRT} model; more complex models are treated in a subsequent paper {(Patz} \& Junker, in press).},
	number = {2},
	journal = {Journal of Educational and Behavioral Statistics},
	author = {Patz, Richard J. and Junker, Brian W.},
	month = jul,
	year = {1999},
	pages = {146--178},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/2G68936D/Patz and Junker - 1999 - A Straightforward Approach to Markov Chain Monte C.pdf:application/pdf}
},

@article{aitkin_general_1999,
	title = {A General Maximum Likelihood Analysis of Variance Components in Generalized Linear Models},
	volume = {55},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/2533902},
	abstract = {This paper describes an {EM} algorithm for nonparametric maximum likelihood {(ML)} estimation in generalized linear models with variance component structure. The algorithm provides an alternative analysis to approximate {MQL} and {PQL} analyses {(McGilchrist} and Aisbett, 1991, Biometrical Journal 33, 131-141; Breslow and Clayton, 1993; Journal of the American Statistical Association 88, 9-25; {McGilchrist}, 1994, Journal of the Royal Statistical Society, Series B 56, 61-69; Goldstein, 1995, Multilevel Statistical Models) and to {GEE} analyses {(Liang} and Zeger, 1986, Biometrika 73, 13-22). The algorithm, first given by Hinde and Wood (1987, in Longitudinal Data Analysis, 110-126), is a generalization of that for random effect models for overdispersion in generalized linear models, described in Aitkin (1996, Statistics and Computing 6, 251-262). The algorithm is initially derived as a form of Gaussian quadrature assuming a normal mixing distribution, but with only slight variation it can be used for a completely unknown mixing distribution, giving a straightforward method for the fully nonparametric {ML} estimation of this distribution. This is of value because the {ML} estimates of the {GLM} parameters can be sensitive to the specification of a parametric form for the mixing distribution. The nonparametric analysis can be extended straightforwardly to general random parameter models, with full {NPML} estimation of the joint distribution of the random parameters. This can produce substantial computational saving compared with full numerical integration over a specified parametric distribution for the random parameters. A simple method is described for obtaining correct standard errors for parameter estimates when using the {EM} algorithm. Several examples are discussed involving simple variance component and longitudinal models, and small-area estimation.},
	number = {1},
	journal = {Biometrics},
	author = {Aitkin, Murray},
	month = mar,
	year = {1999},
	pages = {117--128},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/RI2T7V6M/Aitkin - 1999 - A General Maximum Likelihood Analysis of Variance .pdf:application/pdf}
},

@article{harwell_item_1988,
	title = {Item Parameter Estimation Via Marginal Maximum Likelihood and an {EM} Algorithm: A Didactic},
	volume = {13},
	issn = {0362-9791},
	shorttitle = {Item Parameter Estimation Via Marginal Maximum Likelihood and an {EM} Algorithm},
	url = {http://www.jstor.org/stable/1164654},
	doi = {10.2307/1164654},
	abstract = {The Bock and Aitkin (1981) Marginal Maximum {Likelihood/EM} approach to item parameter estimation is an alternative to the classical joint maximum likelihood procedure of item response theory. Unfortunately, the complexity of the underlying mathematics and the terse nature of the existing literature has made understanding of the approach difficult. To make the approach accessible to a wider audience, the present didactic paper provides the essential mathematical details of a marginal maximum {likelihood/EM} solution and shows how it can be used to obtain consistent item parameter estimates. For pedagogical purposes, a short {BASIC} computer program is used to illustrate the underlying simplicity of the method.},
	number = {3},
	journal = {Journal of Educational Statistics},
	author = {Harwell, Michael R. and Baker, Frank B. and Zwarts, Michael},
	month = oct,
	year = {1988},
	pages = {243--271},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/C4HBTT43/Harwell et al. - 1988 - Item Parameter Estimation Via Marginal Maximum Lik.pdf:application/pdf}
},

@article{rigdon_estimation_1987,
	title = {Estimation for the Rasch Model When Both Ability and Difficulty Parameters Are Random},
	volume = {12},
	issn = {0362-9791},
	url = {http://www.jstor.org/stable/1164629},
	doi = {10.2307/1164629},
	abstract = {Estimation of the parameters of the Rasch model, a one-parameter item response model, is considered when both the item parameters and the ability parameters are considered random quantities. It is assumed that the item parameters are drawn from a N (γ, τ {\textless}sup{\textgreater}2{\textless}/sup{\textgreater}) distribution, and the abilities are drawn from a N(0, σ {\textless}sup{\textgreater}2{\textless}/sup{\textgreater}) distribution. A variation of the {EM} algorithm is used to find approximate maximum likelihood estimates of γ, τ, and σ. A second approach assumes that the difficulty parameters are drawn from a uniform distribution over part of the real line. Real and simulated data sets are discussed for illustration.},
	number = {1},
	journal = {Journal of Educational Statistics},
	author = {Rigdon, Steven E. and Tsutakawa, Robert K.},
	month = apr,
	year = {1987},
	pages = {76--86},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/HU52Z322/Rigdon and Tsutakawa - 1987 - Estimation for the Rasch Model When Both Ability a.pdf:application/pdf}
},

@article{kamata_item_2001,
	title = {Item Analysis by the Hierarchical Generalized Linear Model},
	volume = {38},
	issn = {0022-0655},
	url = {http://www.jstor.org/stable/1435439},
	abstract = {The hierarchical generalized linear model {(HGLM)} is presented as an explicit, two-level formulation of a multilevel item response model. In this paper, it is shown that the {HGLM} is equivalent to the Rasch model and that, characteristic of the {HGLM}, person ability can be expressed in the form of random effects rather than parameters. The two-level item analysis model is presented as a latent regression model with person-characteristic variables. Furthermore, it is shown that the two-level {HGLM} model can be extended to a three-level latent regression model that permits investigation of the variation of students' performance across groups, such as is found in classrooms and schools, and of the interactive effect of person-and group-characteristic variables.},
	number = {1},
	journal = {Journal of Educational Measurement},
	author = {Kamata, Akihito},
	month = apr,
	year = {2001},
	pages = {79--93},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/UZWCX4QT/Kamata - 2001 - Item Analysis by the Hierarchical Generalized Line.pdf:application/pdf}
},

@article{liu_mixed-effects_2006,
	title = {A Mixed-Effects Regression Model for Longitudinal Multivariate Ordinal Data},
	volume = {62},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/3695729},
	abstract = {A mixed-effects item response theory model that allows for three-level multivariate ordinal outcomes and accommodates multiple random subject effects is proposed for analysis of multivariate ordinal outcomes in longitudinal studies. This model allows for the estimation of different item factor loadings (item discrimination parameters) for the multiple outcomes. The covariates in the model do not have to follow the proportional odds assumption and can be at any level. Assuming either a probit or logistic response function, maximum marginal likelihood estimation is proposed utilizing multidimensional Gauss-Hermite quadrature for integration of the random effects. An iterative Fisher scoring solution, which provides standard errors for all model parameters, is used. An analysis of a longitudinal substance use data set, where four items of substance use behavior (cigarette use, alcohol use, marijuana use, and getting drunk or high) are repeatedly measured over time, is used to illustrate application of the proposed model.},
	number = {1},
	journal = {Biometrics},
	author = {Liu, Li C. and Hedeker, Donald},
	month = mar,
	year = {2006},
	pages = {261--268},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/MV7BRTTN/Liu and Hedeker - 2006 - A Mixed-Effects Regression Model for Longitudinal .pdf:application/pdf}
},

@article{hedeker_random-effects_1994,
	title = {A Random-Effects Ordinal Regression Model for Multilevel Analysis},
	volume = {50},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/2533433},
	doi = {10.2307/2533433},
	abstract = {A random-effects ordinal regression model is proposed for analysis of clustered or longitudinal ordinal response data. This model is developed for both the probit and logistic response functions. The threshold concept is used, in which it is assumed that the observed ordered category is determined by the value of a latent unobservable continuous response that follows a linear regression model incorporating random effects. A maximum marginal likelihood {(MML)} solution is described using Gauss-Hermite quadrature to numerically integrate over the distribution of random effects. An analysis of a dataset where students are clustered or nested within classrooms is used to illustrate features of random-effects analysis of clustered ordinal data, while an analysis of a longitudinal dataset where psychiatric patients are repeatedly rated as to their severity is used to illustrate features of the random-effects approach for longitudinal ordinal data.},
	number = {4},
	journal = {Biometrics},
	author = {Hedeker, Donald and Gibbons, Robert D.},
	month = dec,
	year = {1994},
	pages = {933--944},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/IT7S3TAC/Hedeker and Gibbons - 1994 - A Random-Effects Ordinal Regression Model for Mult.pdf:application/pdf}
},

@article{im_mixed_1988,
	title = {Mixed Models for Binomial Data with an Application to Lamb Mortality},
	volume = {37},
	issn = {0035-9254},
	url = {http://www.jstor.org/stable/2347339},
	doi = {10.2307/2347339},
	abstract = {The simplex method, a derivative-free function maximisation algorithm, is used as an alternative to the {EM} algorithm in computing maximum likelihood estimates in mixed probit and logit models with binomial data. The models are used to estimate heritability and to predict sire effects when analysing a lamb mortality data set.},
	number = {2},
	journal = {Journal of the Royal Statistical Society. Series C {(Applied} Statistics)},
	author = {Im, S. and Gianola, D.},
	month = jan,
	year = {1988},
	pages = {196--204},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/K4RWRRBN/Im and Gianola - 1988 - Mixed Models for Binomial Data with an Application.pdf:application/pdf}
},

@article{gibbons_random-effects_1994,
	title = {A Random-Effects Probit Model for Predicting Medical Malpractice Claims},
	volume = {89},
	issn = {0162-1459},
	url = {http://www.jstor.org/stable/2290901},
	doi = {10.2307/2290901},
	abstract = {We use Oregon state data (1981-1990) on medical malpractice claims to develop a random-effects probit model for vulnerability to a medical malpractice claim in practice year k(k = 1, 2, ..., n{\textless}sub{\textgreater}i{\textless}/sub{\textgreater}) for physician i(i = 1, 2, ..., N physicians in the sample) conditional on an n{\textless}sub{\textgreater}i{\textless}/sub{\textgreater} × p covariate matrix W{\textless}sub{\textgreater}i{\textless}/sub{\textgreater} that contains a mixture of p time-varying and time-variant covariates. In this application, time-invariant covariates were physician sex and specialty (surgical versus nonsurgical). Time-varying covariates were age, the cumulative amount of risk management education (i.e., number of courses) taken by physician i to year k, and prior claim history. In addition, the model incorporates a random effect of "claim vulnerability" assumed to be normally distributed in the population of physicians. This random effect represents unobservable and/or unmeasured characteristics that place one physician at greater risk for experiencing a medical malpractice claim than another physician. In addition, we also determine if the effects of risk management training on claim vulnerability differ before and after the physician's first malpractice claim. Results of the analysis reveal that (1) there is a sizable random physician effect; (2) risk increases between age 40 to 60; (3) physicians in a surgical specialty are at increased risk; (4) male physicians are at greater risk than female physicians; (5) risk increases following an initial claim, particularly in the year subsequent to the initial claim, and (6) some beneficial effects of risk management education are observed in physicians with a prior claim history, particularly those in anesthesiology and obstetrics and gynecology.},
	number = {427},
	journal = {Journal of the American Statistical Association},
	author = {Gibbons, Robert D. and Hedeker, Donald and Charles, Sara C. and Frisch, Paul},
	year = {1994},
	note = {{ArticleType:} research-article / Full publication date: Sep., 1994 / Copyright © 1994 American Statistical Association},
	pages = {760--767},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/WZQ82FRX/Gibbons et al. - 1994 - A Random-Effects Probit Model for Predicting Medic.pdf:application/pdf}
},

@article{gibbons_applications_2000,
	title = {Applications of Mixed-Effects Models in Biostatistics},
	volume = {62},
	issn = {0581-5738},
	url = {http://www.jstor.org/stable/25053120},
	abstract = {We present recent developments in mixed-effects models relevant to application in biostatistics. The major focus is on application of mixed-effects models to analysis of longitudinal data in general and longitudinal controlled clinical trials in detail. We present application of mixed-effects models to the case of unbalanced longitudinal data with complex residual error structures for continuous, binary and ordinal outcome measures for data with two and three levels of nesting (e.g., a multi-center longitudinal clinical trial). We also examine other applications of mixed-effects models in the biological and behavioral sciences, such as analysis of clustered data, and simultaneous assessment of multiple biologic endpoints (e.g., multivariate probit analysis). We describe the general statistical theory and then present relevant examples of these models to problems in the biological sciences.},
	number = {1},
	journal = {Sankhyā: The Indian Journal of Statistics, Series B (1960-2002)},
	author = {Gibbons, Robert D. and Hedeker, Donald},
	month = apr,
	year = {2000},
	pages = {70--103},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/WVHF4QR2/Gibbons and Hedeker - 2000 - Applications of Mixed-Effects Models in Biostatist.pdf:application/pdf}
},

@article{goldstein_improved_1996,
	title = {Improved Approximations for Multilevel Models with Binary Responses},
	volume = {159},
	issn = {0964-1998},
	url = {http://www.jstor.org/stable/2983328},
	doi = {10.2307/2983328},
	abstract = {This paper discusses the use of improved approximations for the estimation of generalized linear multilevel models where the response is a proportion. Simulation studies by Rodriguez and Goldman have shown that in extreme situations large biases can occur, most notably when the response is binary, the number of level 1 units per level 2 unit is small and the underlying random parameter values are large. An improved approximation is introduced which largely eliminates the biases in the situation described by Rodriguez and Goldman.},
	number = {3},
	journal = {Journal of the Royal Statistical Society. Series A {(Statistics} in Society)},
	author = {Goldstein, Harvey and Jon Rasbash},
	month = jan,
	year = {1996},
	pages = {505--513},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/JGN94I3I/Goldstein and Jon Rasbash - 1996 - Improved Approximations for Multilevel Models with.pdf:application/pdf}
},

@article{agresti_random-effects_2000,
	title = {Random-Effects Modeling of Categorical Response Data},
	volume = {30},
	issn = {0081-1750},
	url = {http://www.jstor.org/stable/271130},
	abstract = {In many applications observations have some type of clustering, with observations within clusters tending to be correlated. A common instance of this occurs when each subject in the sample undergoes repeated measurement, in which case a cluster consists of the set of observations for the subject. One approach to modeling clustered data introduces cluster-level random effects into the model. The use of random effects in linear models for normal responses is well established. By contrast, random effects have only recently seen much use in models for categorical data. This chapter surveys a variety of potential social science applications of random effects modeling of categorical data. Applications discussed include repeated measurement for binary or ordinal responses, shrinkage to improve multiparameter estimation of a set of proportions or rates, multivariate latent variable modeling, hierarchically structured modeling, and cluster sampling. The models discussed belong to the class of generalized linear mixed models {(GLMMs)}, an extension of ordinary linear models that permits non-normal response variables and both fixed and random effects in the predictor term. The models are {GLMMs} for either binomial or Poisson response variables, although we also present extensions to multicategory (nominal or ordinal) responses. We also summarize some of the technical issues of model-fitting that complicate the fitting of {GLMMs} even with existing software.},
	journal = {Sociological Methodology},
	author = {Agresti, Alan and Booth, James G. and Hobert, James P. and Caffo, Brian},
	month = jan,
	year = {2000},
	note = {{ArticleType:} research-article / Full publication date: 2000 / Copyright © 2000 American Sociological Association},
	pages = {27--80},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/MZZPQN4X/Agresti et al. - 2000 - Random-Effects Modeling of Categorical Response Da.pdf:application/pdf}
},

@article{ramon_applications_1995,
	title = {Applications of the {EM} Algorithm to the Analysis of Life Length Data},
	volume = {44},
	issn = {0035-9254},
	url = {http://www.jstor.org/stable/2986040},
	doi = {10.2307/2986040},
	abstract = {The parameters of the life length distribution of a given component are to be estimated. The observations on which inference is to be based are field data which are incomplete in some fashion. Thus, for example, the reported life length may include a period of unknown duration during which the component is not in use, the life length distribution may be affected by an unobserved environmental factor or the component may be part of a larger system, and failure mode analysis reveals only the module containing the failed component, not its identity. It is shown how the {EM} algorithm can be used to calculate the maximum likelihood estimates of the parameters of interest in these instances. The methodology is applied to some data on the life lengths of electronic components used in the telecommunications industry, yielding values that are similar to those obtained from complete observations on comparable components.},
	number = {3},
	journal = {Journal of the Royal Statistical Society. Series C {(Applied} Statistics)},
	author = {Ramon, Jose and Albert, G. and Baxter, Laurence A.},
	month = jan,
	year = {1995},
	pages = {323--341},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/RAQX97X5/Ramon et al. - 1995 - Applications of the EM Algorithm to the Analysis o.pdf:application/pdf}
},

@article{schoenberg_latent_1985,
	title = {Latent Variables in the Analysis of Limited Dependent Variables},
	volume = {15},
	issn = {0081-1750},
	url = {http://www.jstor.org/stable/270851},
	doi = {10.2307/270851},
	journal = {Sociological Methodology},
	author = {Schoenberg, Ronald},
	month = jan,
	year = {1985},
	pages = {212--241},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/2GU3RTCF/Schoenberg - 1985 - Latent Variables in the Analysis of Limited Depend.pdf:application/pdf}
},

@article{raudenbush_multilevel_1991,
	title = {A Multilevel, Multivariate Model for Studying School Climate with Estimation Via the {EM} Algorithm and Application to U. S. High-School Data},
	volume = {16},
	issn = {0362-9791},
	url = {http://www.jstor.org/stable/1165105},
	doi = {10.2307/1165105},
	abstract = {In many studies of school climate, researchers ask teachers a series of questions, and the responses to related questions are averaged or summed to create a scale score for each teacher on each dimension of climate under investigation. Researchers have disagreed, however, about the analysis of such data: Some have utilized the teacher as the analytic unit, and some have utilized the school as the unit. In this article, we propose a three-level, multivariate statistical modeling strategy that resolves the unit-of-analysis dilemma and unifies thinking about the analysis in such studies. A reanalysis of U. S. high-school data illustrates how to estimate and interpret: (a) the level of interteacher agreement on each climate dimension; (b) the internal consistency of measurement at the teacher and school levels; and (c) the correlations among "true" climate scores at each level. A linear model analysis utilized teacher control over school and classroom policy and teacher morale as bivariate latent outcomes to be predicted by school-level variables (e. g., sector, size, composition) and by teacher-level variables (e. g., education, race, sex, subject matter). Implications for conceptualization, design, analysis, and interpretation in future studies of school climate are considered.},
	number = {4},
	journal = {Journal of Educational Statistics},
	author = {Raudenbush, Stephen W. and Rowan, Brian and Kang, Sang Jin},
	month = dec,
	year = {1991},
	pages = {295--330},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/SWVXUKNC/Raudenbush et al. - 1991 - A Multilevel, Multivariate Model for Studying Scho.pdf:application/pdf}
},

@article{vermunt_multilevel_2003,
	title = {Multilevel Latent Class Models},
	volume = {33},
	issn = {0081-1750},
	url = {http://www.jstor.org/stable/1519857},
	abstract = {The latent class {(LC)} models that have been developed so far assume that observations are independent. Parametric and non-parametric random-coefficient {LC} models are proposed here, which will make it possible to modify this assumption. For example, the models can be used for the analysis of data collected with complex sampling designs, data with a multilevel structure, and multiple-group data for more than a few groups. An adapted {EM} algorithm is presented that makes maximum-likelihood estimation feasible. The new model is illustrated with examples from organizational, educational, and cross-national comparative research.},
	journal = {Sociological Methodology},
	author = {Vermunt, Jeroen K.},
	month = jan,
	year = {2003},
	pages = {213--239},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/I9ITCTNZ/Vermunt - 2003 - Multilevel Latent Class Models.pdf:application/pdf}
},

@article{skrondal_prediction_2009,
	title = {Prediction in Multilevel Generalized Linear Models},
	volume = {172},
	issn = {0964-1998},
	url = {http://www.jstor.org/stable/20622529},
	abstract = {We discuss prediction of random effects and of expected responses in multilevel generalized linear models. Prediction of random effects is useful for instance in small area estimation and disease mapping, effectiveness studies and model diagnostics. Prediction of expected responses is useful for planning, model interpretation and diagnostics. For prediction of random effects, we concentrate on empirical Bayes prediction and discuss three different kinds of standard errors; the posterior standard deviation and the marginal prediction error standard deviation (comparative standard errors) and the marginal sampling standard deviation (diagnostic standard error). Analytical expressions are available only for linear models and are provided in an appendix. For other multilevel generalized linear models we present approximations and suggest using parametric bootstrapping to obtain standard errors. We also discuss prediction of expectations of responses or probabilities for a new unit in a hypothetical cluster, or in a new (randomly sampled) cluster or in an existing cluster. The methods are implemented in gllamm and illustrated by applying them to survey data on reading proficiency of children nested in schools. Simulations are used to assess the performance of various predictions and associated standard errors for logistic random-intercept models under a range of conditions.},
	number = {3},
	journal = {Journal of the Royal Statistical Society. Series A {(Statistics} in Society)},
	author = {Skrondal, Anders and Rabe-Hesketh, Sophia},
	month = jun,
	year = {2009},
	pages = {659--687}
},

@article{lee_maximum_2001,
	title = {Maximum Likelihood Estimation of Two-Level Latent Variable Models with Mixed Continuous and Polytomous Data},
	volume = {57},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/3068417},
	abstract = {Two-level data with hierarchical structure and mixed continuous and polytomous data are very common in biomedical research. In this article, we propose a maximum likelihood approach for analyzing a latent variable model with these data. The maximum likelihood estimates are obtained by a Monte Carlo {EM} algorithm that involves the Gibbs sampler for approximating the E-step and the M-step and the bridge sampling for monitoring the convergence. The approach is illustrated by a two-level data set concerning the development and preliminary findings from an {AIDS} preventative intervention for Filipina commercial sex workers where the relationship between some latent quantities is investigated.},
	number = {3},
	journal = {Biometrics},
	author = {Lee, Sik-Yum and Shi, Jian-Qing},
	year = {2001},
	pages = {787--794},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/UNF7HWX9/Lee and Shi - 2001 - Maximum Likelihood Estimation of Two-Level Latent .pdf:application/pdf}
},

@article{meng_em_1997,
	title = {The {EM} Algorithm--An Old Folk-Song Sung to a Fast New Tune},
	volume = {59},
	issn = {0035-9246},
	url = {http://www.jstor.org/stable/2346009},
	abstract = {Celebrating the 20th anniversary of the presentation of the paper by Dempster, Laird and Rubin which popularized the {EM} algorithm, we investigate, after a brief historical account, strategies that aim to make the {EM} algorithm converge faster while maintaining its simplicity and stability (e.g. automatic monotone convergence in likelihood). First we introduce the idea of a `working parameter' to facilitate the search for efficient data augmentation schemes and thus fast {EM} implementations. Second, summarizing various recent extensions of the {EM} algorithm, we formulate a general alternating expectation-conditional maximization algorithm {AECM} that couples flexible data augmentation schemes with model reduction schemes to achieve efficient computations. We illustrate these methods using multivariate t-models with known or unknown degrees of freedom and Poisson models for image reconstruction. We show, through both empirical and theoretical evidence, the potential for a dramatic reduction in computational time with little increase in human effort. We also discuss the intrinsic connection between {EM-type} algorithms and the Gibbs sampler, and the possibility of using the techniques presented here to speed up the latter. The main conclusion of the paper is that, with the help of statistical considerations, it is possible to construct algorithms that are simple, stable and fast.},
	number = {3},
	journal = {Journal of the Royal Statistical Society. Series B {(Methodological)}},
	author = {Meng, Xiao-Li and Dyk, David van},
	month = jan,
	year = {1997},
	pages = {511--567},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/BFF8WKH6/Meng and Dyk - 1997 - The EM Algorithm--An Old Folk-Song Sung to a Fast .pdf:application/pdf}
},

@article{pendergast_survey_1996,
	title = {A Survey of Methods for Analyzing Clustered Binary Response Data},
	volume = {64},
	issn = {0306-7734},
	url = {http://www.jstor.org/stable/1403425},
	doi = {10.2307/1403425},
	abstract = {A comprehensive survey of regression-type models for clusters of correlated binary outcomes, including longitudinal data, is presented. In particular, we focus on models which can accommodate both between- and within-cluster categorical and continuous covariates. Emphasis is given to motivation of the model specification, interrelationships among models, parameter testing and interpretation, estimation methods (including both likelihood and non-likelihood approaches), computational issues, availability of software and other implementation issues, and to the advantages and disadvantages of the various approaches. Models discussed include naïve and response feature models, conditionally specified models, marginal models, and cluster-specific models. Extensions to ordinal data and relationships to graphical representations of models are also discussed. /// Une étude exhaustive sur les problèmes de classification basées sur des modèles de régression appliqués à des données binaires corrélées (y inclus le cas de connées longitudinales) est présenté. En particulier, on se concentre sur les modèles pouvant incorporer à la fois des variables explicatives à l'intérieur des classes et entre les classes que ces variables soient catégorielles ou continues. Une emphase est mise sur les thèmes suivants: la motivation associées aux spécifications du modèle, les connexions parmi les modèles, les tests concernant les paramètres et leurs interprétations, les méthodes d'estimation (incluant les méthodes basées sur la vraisemblance et celles qui ne le sont pas), les problèmes reliés aux calculs, la disponibilité de logiciels, les avantages et les défauts de certaines approches. Les modèles discutés comprennent ceux où les réponses sont naïves et ceux dont les réponses sont prédeterminées, les modèles conditionnellement fixés, les modèles marginalement fixés, les modèles où les classes sont prédéterminées à l'avance. Des extensions aux données ordinales et des relations avec les représentations graphiques sont aussi discutées.},
	number = {1},
	journal = {International Statistical Review / Revue Internationale de Statistique},
	author = {Pendergast, Jane F. and Gange, Stephen J. and Newton, Michael A. and Lindstrom, Mary J. and Palta, Mari and Fisher, Marian R.},
	month = apr,
	year = {1996},
	pages = {89--118},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/JRVHNQ94/Pendergast et al. - 1996 - A Survey of Methods for Analyzing Clustered Binary.pdf:application/pdf}
},

@article{goldstein_hierarchical_1995,
	title = {Hierarchical Data Modeling in the Social Sciences},
	volume = {20},
	issn = {1076-9986},
	url = {http://www.jstor.org/stable/1165357},
	doi = {10.2307/1165357},
	number = {2},
	journal = {Journal of Educational and Behavioral Statistics},
	author = {Goldstein, Harvey},
	month = jul,
	year = {1995},
	pages = {201--204},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/N85RDHQS/Goldstein - 1995 - Hierarchical Data Modeling in the Social Sciences.pdf:application/pdf}
},

@article{goldstein_restricted_1989,
	title = {Restricted Unbiased Iterative Generalized Least-Squares Estimation},
	volume = {76},
	issn = {0006-3444},
	url = {http://www.jstor.org/stable/2336130},
	doi = {10.2307/2336130},
	abstract = {It is shown that the iterative least-squares procedure for estimating the parameters in a general multilevel random coefficients linear model can be modified to produce unbiased estimates of the random parameters. In the multivariate normal case these are equivalent to restricted maximum likelihood estimates.},
	number = {3},
	journal = {Biometrika},
	author = {Goldstein, Harvey},
	year = {1989},
	pages = {622--623},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/D4U9CC82/Goldstein - 1989 - Restricted Unbiased Iterative Generalized Least-Sq.pdf:application/pdf}
},

@article{goldstein_multilevel_1991,
	title = {Multilevel Modelling of Survey Data},
	volume = {40},
	issn = {0039-0526},
	url = {http://www.jstor.org/stable/2348496},
	doi = {10.2307/2348496},
	number = {2},
	journal = {Journal of the Royal Statistical Society. Series D {(The} Statistician)},
	author = {Goldstein, Harvey},
	month = jan,
	year = {1991},
	pages = {235--244},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/87NFSZFQ/Goldstein - 1991 - Multilevel Modelling of Survey Data.pdf:application/pdf}
},

@article{paterson_new_1991,
	title = {New Statistical Methods for Analysing Social Structures: An Introduction to Multilevel Models},
	volume = {17},
	issn = {0141-1926},
	shorttitle = {New Statistical Methods for Analysing Social Structures},
	url = {http://www.jstor.org/stable/1500648},
	abstract = {An introductory account is given of developments in multilevel modelling of educational and other social data. The technique is introduced with some simple examples and its importance is explained. Examples of applications in a number of areas are given, including repeated measures designs, school effectiveness studies, area-based studies and political opinion sample surveys. Almost all data collected in the social sciences have some form of inherent hierarchical structure, and this structure should be reflected in the statistical models that are used to analyse them. It is suggested that multilevel techniques and associated software packages have reached the stage when they can and should be applied routinely in the analysis of social data, and that failure to do so can result in potentially serious misinterpretations.},
	number = {4},
	journal = {British Educational Research Journal},
	author = {Paterson, Lindsay and Goldstein, Harvey},
	month = jan,
	year = {1991},
	pages = {387--393},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/GG83DFCP/Paterson and Goldstein - 1991 - New Statistical Methods for Analysing Social Struc.pdf:application/pdf}
},

@article{jon_rasbash_efficient_1994,
	title = {Efficient Analysis of Mixed Hierarchical and Cross-Classified Random Structures Using a Multilevel Model},
	volume = {19},
	issn = {1076-9986},
	url = {http://www.jstor.org/stable/1165397},
	doi = {10.2307/1165397},
	abstract = {An efficient and straightforward procedure is described for specifying and estimating parameters of general mixed models which contain both hierarchical and crossed random factors. This is done using a model formulated for purely hierarchically structured data and generalizes the results of Raudenbush (1993). The exposition is for the continuous response linear model with natural extensions to generalized linear, nonlinear, and multivariate models.},
	number = {4},
	journal = {Journal of Educational and Behavioral Statistics},
	author = {Jon Rasbash and Goldstein, Harvey},
	month = dec,
	year = {1994},
	pages = {337--350},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/5XF963DD/Jon Rasbash and Goldstein - 1994 - Efficient Analysis of Mixed Hierarchical and Cross.pdf:application/pdf}
},

@article{yang_multilevel_2000,
	title = {Multilevel Models for Repeated Binary Outcomes: Attitudes and Voting over the Electoral Cycle},
	volume = {163},
	issn = {0964-1998},
	shorttitle = {Multilevel Models for Repeated Binary Outcomes},
	url = {http://www.jstor.org/stable/2680508},
	abstract = {Models for fitting longitudinal binary responses are explored by using a panel study of voting intentions. A standard multilevel repeated measures logistic model is shown to be inadequate owing to a substantial proportion of respondents who maintain a constant response over time. A multivariate binary response model is shown to be a better fit to the data.},
	number = {1},
	journal = {Journal of the Royal Statistical Society. Series A {(Statistics} in Society)},
	author = {Yang, Min and Goldstein, Harvey and Heath, Anthony},
	month = jan,
	year = {2000},
	note = {{ArticleType:} research-article / Full publication date: 2000 / Copyright © 2000 Royal Statistical Society},
	pages = {49--62},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/I56ISGMW/Yang et al. - 2000 - Multilevel Models for Repeated Binary Outcomes At.pdf:application/pdf}
},

@article{goldstein_multilevel_2007,
	title = {Multilevel Structural Equation Models for the Analysis of Comparative Data on Educational Performance},
	volume = {32},
	issn = {1076-9986},
	url = {http://www.jstor.org/stable/20172084},
	abstract = {The Programme for International Student Assessment comparative study of reading performance among 15-year-olds is reanalyzed using statistical procedures that allow the full complexity of the data structures to be explored. The article extends existing multilevel factor analysis and structural equation models and shows how this can extract richer information from the data and provide better fits to the data. It shows how these models can be used fully to explore the dimensionality of the data and to provide efficient, single-stage models that avoid the need for multiple imputation procedures. Markov Chain Monte Carlo methodology for parameter estimation is described.},
	number = {3},
	journal = {Journal of Educational and Behavioral Statistics},
	author = {Goldstein, Harvey and Bonnet, Gérard and Rocher, Thierry},
	year = {2007},
	pages = {252--286},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/QMKPMZ65/Goldstein et al. - 2007 - Multilevel Structural Equation Models for the Anal.pdf:application/pdf}
},

@article{yang_multivariate_2002,
	title = {Multivariate Multilevel Analyses of Examination Results},
	volume = {165},
	issn = {0964-1998},
	url = {http://www.jstor.org/stable/3559765},
	abstract = {In the study of examination results much interest centres on comparisons of curriculum subjects entered and the correlation between these at individual and institution level based on data where not every individual takes all subjects. Such 'missing' data are not missing at random because individuals deliberately select subjects that they wish to study according to criteria that will be associated with their performance. In this paper we propose multivariate multilevel models for the analysis of such data, adjusting for such subject selection effects as well as for prior achievement. This then enables more appropriate institutional comparisons and correlation estimates. We analyse A- and {AS-level} results in different mathematics papers of 52 587 students from 2592 institutions in England in 1997. Although this paper is concerned largely with methodology, substantive findings emerge on the effects of gender, age, intakes of General Certificate of Education pupils, examination board and establishment type for A- and {AS-level} mathematics.},
	number = {1},
	journal = {Journal of the Royal Statistical Society. Series A {(Statistics} in Society)},
	author = {Yang, Min and Goldstein, Harvey and Browne, William and Woodhouse, Geoffrey},
	month = jan,
	year = {2002},
	note = {{ArticleType:} research-article / Full publication date: 2002 / Copyright © 2002 Royal Statistical Society},
	pages = {137--153},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/FFXUPZPQ/Yang et al. - 2002 - Multivariate Multilevel Analyses of Examination Re.pdf:application/pdf}
},

@article{goldstein_multilevel_2000,
	title = {Multilevel Models in the Study of Dynamic Household Structures},
	volume = {16},
	issn = {0168-6577},
	url = {http://www.jstor.org/stable/20164120},
	abstract = {A modelling procedure is proposed for complex, dynamic household data structures where households change composition over time. Multilevel multiple membership models are presented for such data and their application is discussed with an example. /// Une procédure de modélisation est proposée pour des données dynamiques sur la structure des ménages dont la composition change au cours du temps. Des modèles multiniveaux à adhésion multiple sont présentés pour de telles données et leur application est discutée sur un exemple précis.},
	number = {4},
	journal = {European Journal of Population / Revue Européenne de Démographie},
	author = {Goldstein, Harvey and Jon Rasbash and Browne, William and Woodhouse, Geoffrey and Poulain, Michel},
	month = dec,
	year = {2000},
	pages = {373--387},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/XBHJ982G/Goldstein et al. - 2000 - Multilevel Models in the Study of Dynamic Househol.pdf:application/pdf}
},

@article{langford_multilevel_1999,
	title = {Multilevel Modelling of the Geographical Distributions of Diseases},
	volume = {48},
	issn = {0035-9254},
	url = {http://www.jstor.org/stable/2680801},
	abstract = {Multilevel modelling is used on problems arising from the analysis of spatially distributed health data. We use three applications to demonstrate the use of multilevel modelling in this area. The first concerns small area all-cause mortality rates from Glasgow where spatial autocorrelation between residuals is examined. The second analysis is of prostate cancer cases in Scottish counties where we use a range of models to examine whether the incidence is higher in more rural areas. The third develops a multiple-cause model in which deaths from cancer and cardiovascular disease in Glasgow are examined simultaneously in a spatial model. We discuss some of the issues surrounding the use of complex spatial models and the potential for future developments.},
	number = {2},
	journal = {Journal of the Royal Statistical Society. Series C {(Applied} Statistics)},
	author = {Langford, Ian H. and Leyland, Alastair H. and Jon Rasbash and Goldstein, Harvey},
	month = jan,
	year = {1999},
	pages = {253--268},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/XMJNBCHK/Langford et al. - 1999 - Multilevel Modelling of the Geographical Distribut.pdf:application/pdf}
},

@article{goldstein_modelling_2003,
	title = {Modelling Social Segregation},
	volume = {29},
	issn = {0305-4985},
	url = {http://www.jstor.org/stable/1050612},
	abstract = {This paper proposes a multilevel modelling approach to the analysis of social segregation in schools. Using data on free school meal eligibility it shows that the underlying variation between schools for the period 1994-1999 has increased. It also shows that the change is greater for selective than non-selective local education authorities {(LEAs).} It is suggested that the approach of this paper can be applied generally to the modelling of social segregation at institution level.},
	number = {2},
	journal = {Oxford Review of Education},
	author = {Goldstein, Harvey and Noden, Philip},
	month = jun,
	year = {2003},
	pages = {225--237},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/BKAFGTTW/Goldstein and Noden - 2003 - Modelling Social Segregation.pdf:application/pdf}
},

@article{goldstein_multilevel_2009,
	title = {Multilevel Multivariate Modelling of Childhood Growth, Numbers of Growth Measurements and Adult Characteristics},
	volume = {172},
	issn = {0964-1998},
	url = {http://www.jstor.org/stable/20622526},
	abstract = {A general latent normal model for multilevel data with mixtures of response types is extended in the case of ordered responses to deal with variates having a large number of categories and including count data. An example is analysed by using repeated measures data on child growth and adult measures of body mass index and glucose. Applications are described that are concerned with the flexible prediction of adult measurements from collections of growth measurements and for studying the relationship between the number of measurement occasions and growth trajectories.},
	number = {3},
	journal = {Journal of the Royal Statistical Society. Series A {(Statistics} in Society)},
	author = {Goldstein, Harvey and Kounali, Daphne},
	month = jun,
	year = {2009},
	note = {{ArticleType:} research-article / Issue Title: Recent Advances in Multilevel Modelling Methodology and Applications / Full publication date: Jun., 2009 / Copyright © 2009 Royal Statistical Society},
	pages = {599--613}
},

@article{gill_regression_1988,
	title = {Regression Analysis for Incomplete Mixed Cross-Section and Time-Series Data by a Modified {EM} Algorithm},
	volume = {50},
	issn = {0581-5738},
	url = {http://www.jstor.org/stable/25052526},
	abstract = {An iterative method is proposed for estimating a certain regression model with mixed cross-section and time-series data, where each observational unit is not necessarily available at each time point of the time series. We give theorems on consistency and asymptotic normality of estimators of the regression coefficients as the size of the cross-section increases while the length of the time series remains bounded. We discuss the connection between our method and the {EM} algorithm.},
	number = {1},
	journal = {Sankhyā: The Indian Journal of Statistics, Series B (1960-2002)},
	author = {Gill, Richard D.},
	month = apr,
	year = {1988},
	pages = {95--102},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/M6HUFEX8/Gill - 1988 - Regression Analysis for Incomplete Mixed Cross-Sec.pdf:application/pdf}
},

@article{walker_em_1996,
	title = {An {EM} Algorithm for Nonlinear Random Effects Models},
	volume = {52},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/2533054},
	doi = {10.2307/2533054},
	abstract = {An {EM} algorithm for exact maximum likelihood estimation of a class of nonlinear random effects models is given. The M-steps are shown to be analytically tractable and the E-steps are evaluated using Monte Carlo integration. The algorithm depends on the specification of the missing data which is taken to be the random effects.},
	number = {3},
	journal = {Biometrics},
	author = {Walker, Stephen},
	year = {1996},
	pages = {934--944},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/WGIMW7SR/Walker - 1996 - An EM Algorithm for Nonlinear Random Effects Model.pdf:application/pdf}
},

@article{vonesh_generalized_2001,
	title = {Generalized Least Squares, Taylor Series Linearization, and Fisher's Scoring in Multivariate Nonlinear Regression},
	volume = {96},
	issn = {0162-1459},
	url = {http://www.jstor.org/stable/2670366},
	abstract = {In this article, we consider a general multivariate nonlinear regression setting in which the marginal mean and variance-covariance structure share a common set of regression parameters. Estimation is carried out via iteratively reweighted generalized least squares {(IRGLS)} that entails repeated application of Taylor series linearization and estimated generalized least squares {(EGLS).} Under normality, this {IRGLS} procedure is equivalent to Fisher's method of scoring and hence maximum likelihood estimation {(MLE).} However, estimates from this procedure are also shown to minimize a bias-corrected generalized least squares objective function that does not require the assumption of normality. Under fairly mild regularity conditions, the resulting estimates are consistent, asymptotically normal, and-under normality assumptions-asymptotically efficient. The estimates are compared against those obtained as solutions to the usual generalized estimating equations {(GEE)} using both simulation and numerical examples.},
	number = {453},
	journal = {Journal of the American Statistical Association},
	author = {Vonesh, Edward F. and Wang, Hao and Majumdar, Dibyen},
	month = mar,
	year = {2001},
	pages = {282--291},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/N2ZA4T35/Vonesh et al. - 2001 - Generalized Least Squares, Taylor Series Lineariza.pdf:application/pdf}
},

@article{zeger_generalized_1991,
	title = {Generalized Linear Models With Random Effects; A Gibbs Sampling Approach},
	volume = {86},
	issn = {0162-1459},
	url = {http://www.jstor.org/stable/2289717},
	doi = {10.2307/2289717},
	abstract = {Generalized linear models have unified the approach to regression for a wide variety of discrete, continuous, and censored response variables that can be assumed to be independent across experimental units. In applications such as longitudinal studies, genetic studies of families, and survey sampling, observations may be obtained in clusters. Responses from the same cluster cannot be assumed to be independent. With linear models, correlation has been effectively modeled by assuming there are cluster-specific random effects that derive from an underlying mixing distribution. Extensions of generalized linear models to include random effects has, thus far, been hampered by the need for numerical integration to evaluate likelihoods. In this article, we cast the generalized linear random effects model in a Bayesian framework and use a Monte Carlo method, the Gibbs sampler, to overcome the current computational limitations. The resulting algorithm is flexible to easily accommodate changes in the number of random effects and in their assumed distribution when warranted. The methodology is illustrated through a simulation study and an analysis of infectious disease data.},
	number = {413},
	journal = {Journal of the American Statistical Association},
	author = {Zeger, Scott L. and Karim, M. Rezaul},
	month = mar,
	year = {1991},
	pages = {79--86},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/PAFIJ22Z/Zeger and Karim - 1991 - Generalized Linear Models With Random Effects; A G.pdf:application/pdf}
},

@article{pinheiro_approximations_1995,
	title = {Approximations to the Log-Likelihood Function in the Nonlinear Mixed-Effects Model},
	volume = {4},
	issn = {1061-8600},
	url = {http://www.jstor.org/stable/1390625},
	doi = {10.2307/1390625},
	abstract = {Nonlinear mixed-effects models have received a great deal of attention in the statistical literature in recent years because of the flexibility they offer in handling the unbalanced repeated-measures data that arise in different areas of investigation, such as pharmacokinetics and economics. Several different methods for estimating the parameters in nonlinear mixed-effects model have been proposed. We concentrate here on two of them--maximum likelihood and restricted maximum likelihood. A rather complex numerical issue for (restricted) maximum likelihood estimation in nonlinear mixed-effects models is the evaluation of the log-likelihood function of the data, because it involves the evaluation of a multiple integral that, in most cases, does not have a closed-form expression. We consider here four different approximations to the log-likelihood, comparing their computational and statistical properties. We conclude that the linear mixed-effects {(LME)} approximation suggested by Lindstrom and Bates, the Laplacian approximation, and Gaussian quadrature centered at the conditional modes of the random effects are quite accurate and computationally efficient. Gaussian quadrature centered at the expected value of the random effects is quite inaccurate for a smaller number of abscissas and computationally inefficient for a larger number of abscissas. Importance sampling is accurate, but quite inefficient computationally.},
	number = {1},
	journal = {Journal of Computational and Graphical Statistics},
	author = {Pinheiro, José C. and Bates, Douglas M.},
	month = mar,
	year = {1995},
	pages = {12--35},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/PIFTQNAC/Pinheiro and Bates - 1995 - Approximations to the Log-Likelihood Function in t.pdf:application/pdf}
},

@article{dyk_art_2001,
	title = {The Art of Data Augmentation},
	volume = {10},
	issn = {1061-8600},
	url = {http://www.jstor.org/stable/1391021},
	abstract = {The term data augmentation refers to methods for constructing iterative optimization or sampling algorithms via the introduction of unobserved data or latent variables. For deterministic algorithms, the method was popularized in the general statistical community by the seminal article by Dempster, Laird, and Rubin on the {EM} algorithm for maximizing a likelihood function or, more generally, a posterior density. For stochastic algorithms, the method was popularized in the statistical literature by Tanner and Wong's Data Augmentation algorithm for posterior sampling and in the physics literature by Swendsen and Wang's algorithm for sampling from the Ising and Potts models and their generalizations; in the physics literature, the method of data augmentation is referred to as the method of auxiliary variables. Data augmentation schemes were used by Tanner and Wong to make simulation feasible and simple, while auxiliary variables were adopted by Swendsen and Wang to improve the speed of iterative simulation. In general, however, constructing data augmentation schemes that result in both simple and fast algorithms is a matter of art in that successful strategies vary greatly with the (observed-data) models being considered. After an overview of data augmentation/auxiliary variables and some recent developments in methods for constructing such efficient data augmentation schemes, we introduce an effective search strategy that combines the ideas of marginal augmentation and conditional augmentation, together with a deterministic approximation method for selecting good augmentation schemes. We then apply this strategy to three common classes of models (specifically, multivariate t, probit regression, and mixed-effects models) to obtain efficient Markov chain Monte Carlo algorithms for posterior sampling. We provide theoretical and empirical evidence that the resulting algorithms, while requiring similar programming effort, can show dramatic improvement over the Gibbs samplers commonly used for these models in practice. A key feature of all these new algorithms is that they are positive recurrent subchains of nonpositive recurrent Markov chains constructed in larger spaces.},
	number = {1},
	journal = {Journal of Computational and Graphical Statistics},
	author = {Dyk, David A. van and Meng, Xiao-Li},
	month = mar,
	year = {2001},
	note = {{ArticleType:} research-article / Full publication date: Mar., 2001 / Copyright © 2001 American Statistical Association, Institute of Mathematical Statistics and Interface Foundation of America},
	pages = {1--50},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/8V6S2U7V/Dyk and Meng - 2001 - The Art of Data Augmentation.pdf:application/pdf}
},

@article{lindstrom_correction_1994,
	title = {Correction to Lindstrom and Bates (1988): Newton-Raphson and {EM} Algorithms for Linear Mixed Effects Models for Repeated Measures Data},
	volume = {89},
	issn = {0162-1459},
	shorttitle = {Correction to Lindstrom and Bates (1988)},
	url = {http://www.jstor.org/stable/2291042},
	doi = {10.2307/2291042},
	number = {428},
	journal = {Journal of the American Statistical Association},
	author = {Lindstrom, Mary J. and Bates, Douglas M.},
	month = dec,
	year = {1994},
	pages = {1572},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/44CV6IMS/Lindstrom and Bates - 1994 - Correction to Lindstrom and Bates (1988) Newton-R.pdf:application/pdf}
},

@article{lindstrom_newton-raphson_1988,
	title = {Newton-Raphson and {EM} Algorithms for Linear Mixed-Effects Models for Repeated-Measures Data},
	volume = {83},
	issn = {0162-1459},
	url = {http://www.jstor.org/stable/2290128},
	doi = {10.2307/2290128},
	abstract = {We develop an efficient and effective implementation of the Newton-Raphson {(NR)} algorithm for estimating the parameters in mixed-effects models for repeated-measures data. We formulate the derivatives for both maximum likelihood and restricted maximum likelihood estimation and propose improvements to the algorithm discussed by Jennrich and Schluchter (1986) to speed convergence and ensure a positive-definite covariance matrix for the random effects at each iteration. We use matrix decompositions to develop efficient and computationally stable implementations of both the {NR} algorithm and an {EM} algorithm {(Laird} and Ware 1982) for this model. We compare the two methods {(EM} vs. {NR)} in terms of computational order and performance on two sample data sets and conclude that in most situations a well-implemented {NR} algorithm is preferable to the {EM} algorithm or {EM} algorithm with Aitken's acceleration. The term repeated measures refers to experimental designs where there are several individuals and several measurements taken on each individual. In the mixed-effects model each individual's vector of responses is modeled as a parametric function, where some of the parameters or "effects" are random variables with a multivariate normal distribution. This model has been successful because it can handle unbalanced data (different designs for different individuals), missing data (observations on all individuals are taken at the same design points, but some individuals have missing data), and jointly dependent random effects. The price for this flexibility is that the parameter estimates may be difficult to compute. We propose some new methods for implementing the {EM} and {NR} algorithms and draw conclusions about their performance. We also discuss extensions of the mixed-effects model to incorporate nonindependent conditional error structure and nested-type designs.},
	number = {404},
	journal = {Journal of the American Statistical Association},
	author = {Lindstrom, Mary J. and Bates, Douglas M.},
	month = dec,
	year = {1988},
	pages = {1014--1022},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/72B4G6M9/Lindstrom and Bates - 1988 - Newton-Raphson and EM Algorithms for Linear Mixed-.pdf:application/pdf}
},

@article{lindstrom_nonlinear_1990,
	title = {Nonlinear Mixed Effects Models for Repeated Measures Data},
	volume = {46},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/2532087},
	doi = {10.2307/2532087},
	abstract = {We propose a general, nonlinear mixed effects model for repeated measures data and define estimators for its parameters. The proposed estimators are a natural combination of least squares estimators for nonlinear fixed effects models and maximum likelihood (or restricted maximum likelihood) estimators for linear mixed effects models. We implement Newton-Raphson estimation using previously developed computational methods for nonlinear fixed effects models and for linear mixed effects models. Two examples are presented and the connections between this work and recent work on generalized linear mixed effects models are discussed.},
	number = {3},
	journal = {Biometrics},
	author = {Lindstrom, Mary J. and Bates, Douglas M.},
	year = {1990},
	pages = {673--687},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/G8CKNUIE/Lindstrom and Bates - 1990 - Nonlinear Mixed Effects Models for Repeated Measur.pdf:application/pdf}
},

@article{bates_[semiparametric_2001,
	title = {{[Semiparametric} Nonlinear Mixed-Effects Models and Their Applications]: Comment},
	volume = {96},
	issn = {0162-1459},
	shorttitle = {{[Semiparametric} Nonlinear Mixed-Effects Models and Their Applications]},
	url = {http://www.jstor.org/stable/3085899},
	number = {456},
	journal = {Journal of the American Statistical Association},
	author = {Bates, Douglas M. and Lindstrom, Mary J. and Wahba, Grace},
	month = dec,
	year = {2001},
	pages = {1292--1293},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/53X7AWF6/Bates et al. - 2001 - [Semiparametric Nonlinear Mixed-Effects Models and.pdf:application/pdf}
},

@article{li_new_2007,
	title = {A New Estimation Procedure for a Partially Nonlinear Model via a Mixed-Effects Approach},
	volume = {35},
	issn = {0319-5724},
	url = {http://www.jstor.org/stable/20445264},
	abstract = {The authors consider the estimation of the parametric component of a partially nonlinear semiparametric regression model whose nonparametric component is viewed as a nuisance parameter. They show how estimation can proceed through a nonlinear mixed-effects model approach. They prove that under certain regularity conditions, the proposed estimate is consistent and asymptotically Gaussian. They investigate its finite-sample properties through simulations and illustrate its use with data on the relation between the photosynthetically active radiation and the net ecosystem-atmosphere exchange of carbon dioxide. /// Les auteurs s'intéressent à l'estimation de la partie paramétrique d'un modèle de régression semiparamétrique partiellement non linéaire dont la composante non paramétrique est considérée comme nuisible. Ils montrent comment l'estimation est possible au moyen d'un modèle non linéaire à effets mixtes. Ils démontrent que sous certaines conditions de régularité, l'estimateur proposé est convergent et asymptotiquement gaussien. Ils en étudient le comportement à taille finie au moyen de simulations et en illustrent l'emploi à l'aide de données concernant le rayonnement absorbé par photosynthèse en relation avec le bilan des échanges en bioxyde de carbone entre l'atmosphère et l'écosystème.},
	number = {3},
	journal = {The Canadian Journal of Statistics / La Revue Canadienne de Statistique},
	author = {Li, Runze and Nie, Lei},
	year = {2007},
	pages = {399--411},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/4ZC3T6Q7/Li and Nie - 2007 - A New Estimation Procedure for a Partially Nonline.pdf:application/pdf}
},

@article{lombardia_semiparametric_2008,
	title = {Semiparametric Inference in Generalized Mixed Effects Models},
	volume = {70},
	issn = {1369-7412},
	url = {http://www.jstor.org/stable/20203863},
	abstract = {The paper presents a study of the generalized partially linear model including random effects in its linear part. We propose an estimator that combines likelihood approaches for mixed effects models, with kernel methods. Following the methodology of Härdle and co-workers, we introduce a test for the hypothesis of a parametric mixed effects model against the alternative of a semiparametric mixed effects model. The critical values are estimated by using a bootstrap procedure. The asymptotic theory for the methods is provided, as are the results of a simulation study. These verify the feasibility and the excellent behaviour of the methods for samples of even moderate size. The usefulness of the methodology is illustrated with an application in which the objective is to estimate forest coverage in Galicia, Spain.},
	number = {5},
	journal = {Journal of the Royal Statistical Society. Series B {(Statistical} Methodology)},
	author = {Lombardía, María José and Sperlich, Stefan},
	month = nov,
	year = {2008},
	pages = {913--930}
},

@article{liu_simultaneous_2007,
	title = {Simultaneous Inference for Semiparametric Nonlinear Mixed-Effects Models with Covariate Measurement Errors and Missing Responses},
	volume = {63},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/4541344},
	abstract = {Semiparametric nonlinear mixed-effects {(NLME)} models are flexible for modeling complex longitudinal data. Covariates are usually introduced in the models to partially explain interindividual variations. Some covariates, however, may be measured with substantial errors. Moreover, the responses may be missing and the missingness may be nonignorable. We propose two approximate likelihood methods for semiparametric {NLME} models with covariate measurement errors and nonignorable missing responses. The methods are illustrated in a real data example. Simulation results show that both methods perform well and are much better than the commonly used naive method. /// Les modèles semi-paramétriques mixtes non linéaires {(NLME}, {≪NonLinear} Mixed-Effects≫) offrent une grande souplesse dans l'étude de données longitudinales avec des structures complexes. Les covariables sont habituellement introduites dans ces modèles afin d'expliquer une partie des variations inter-individuelles, Toutefois certaines covariables sont mesurées avec des erreurs conséquentes. Par ailleurs les réponses peuvent être manquantes avec une censure informative. Nous proposons deux méthodes basées sur la vraisemblance pour des modèles semi-paramétriques {NLME} en présence d'erreurs de mesures sur des covariables et des données manquantes de type ≪non ignorables≫. Les méthodes sont illustrées à partir d'un jeu réel de données. Les résultats obtenus par simulation mettent en évidence de bonnes performances des deux méthodes, bien meilleures que la méthode ≪naïve≫ communément utilisée.},
	number = {2},
	journal = {Biometrics},
	author = {Liu, Wei and Wu, Lang},
	month = jun,
	year = {2007},
	pages = {342--350}
},

@article{zhao_general_2006,
	title = {General Design Bayesian Generalized Linear Mixed Models},
	volume = {21},
	issn = {0883-4237},
	url = {http://www.jstor.org/stable/27645735},
	abstract = {Linear mixed models are able to handle an extraordinary range of complications in regression-type analyses. Their most common use is to account for within-subject correlation in longitudinal data analysis. They are also the standard vehicle for smoothing spatial count data. However, when treated in full generality, mixed models can also handle spline-type smoothing and closely approximate kriging. This allows for nonparametric regression models (e.g., additive models and varying coefficient models) to be handled within the mixed model framework. The key is to allow the random effects design matrix to have general structure; hence our label general design. For continuous response data, particularly when Gaussianity of the response is reasonably assumed, computation is now quite mature and supported by the R, {SAS} and {S—PLUS} packages. Such is not the case for binary and count responses, where generalized linear mixed models {(GLMMs)} are required, but are hindered by the presence of intractable multivariate integrals. Software known to us supports special cases of the {GLMM} (e.g., {PROC} {NLMIXED} in {SAS} or {glmmML} in R) or relies on the sometimes crude Laplace-type approximation of integrals (e.g., the {SAS} macro glimmix or {glmmPQL} in R). This paper describes the fitting of general design generalized linear mixed models. A Bayesian approach is taken and Markov chain Monte Carlo {(MCMC)} is used for estimation and inference. In this generalized setting, {MCMC} requires sampling from nonstandard distributions. In this article, we demonstrate that the {MCMC} package {WinBUGS} facilitates sound fitting of general design Bayesian generalized linear mixed models in practice.},
	number = {1},
	journal = {Statistical Science},
	author = {Zhao, Y. and Staudenmayer, J. and Coull, B. A. and Wand, M. P.},
	month = feb,
	year = {2006},
	pages = {35--51},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/HT3ZRMXU/Zhao et al. - 2006 - General Design Bayesian Generalized Linear Mixed M.pdf:application/pdf}
},

@article{rigby_generalized_2005,
	title = {Generalized Additive Models for Location, Scale and Shape},
	volume = {54},
	issn = {0035-9254},
	url = {http://www.jstor.org/stable/3592732},
	abstract = {A general class of statistical models for a univariate response variable is presented which we call the generalized additive model for location, scale and shape {(GAMLSS).} The model assumes independent observations of the response variable y given the parameters, the explanatory variables and the values of the random effects. The distribution for the response variable in the {GAMLSS} can be selected from a very general family of distributions including highly skew or kurtotic continuous and discrete distributions. The systematic part of the model is expanded to allow modelling not only of the mean (or location) but also of the other parameters of the distribution of y, as parametric and/or additive nonparametric (smooth) functions of explanatory variables and/or random-effects terms. Maximum (penalized) likelihood estimation is used to fit the (non)parametric models. A Newton-Raphson or Fisher scoring algorithm is used to maximize the (penalized) likelihood. The additive terms in the model are fitted by using a backfitting algorithm. Censored data are easily incorporated into the framework. Five data sets from different fields of application are analysed to emphasize the generality of the {GAMLSS} class of models.},
	number = {3},
	journal = {Journal of the Royal Statistical Society. Series C {(Applied} Statistics)},
	author = {Rigby, R. A. and Stasinopoulos, D. M.},
	month = jan,
	year = {2005},
	pages = {507--554},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/RECEPTRR/Rigby and Stasinopoulos - 2005 - Generalized Additive Models for Location, Scale an.pdf:application/pdf}
},

@article{bolfarine_influence_2007,
	title = {Influence Diagnostics for Skew-Normal Linear Mixed Models},
	volume = {69},
	issn = {0972-7671},
	url = {http://www.jstor.org/stable/25664584},
	abstract = {Normality (symmetry) of the random effects is a routine assumption in linear mixed models but it may, sometimes, be unrealistic, obscuring important features of among-subjects variation. We relax this assumption by assuming that the random effects density is skew-normal, considered as an extension of the univariate version proposed by Sahu, Dey and Branco {(CJS}, 2003). Following Zhu and Lee {(JRSSB}, 2001), we implement an {EM-type} algorithm to parameter estimation and then using the related conditional expectation of the complete-data log-likelihood function, develop diagnostic measures for implementing the local influence approach under four model perturbation schemes. Results obtained from simulated and real data sets are reported illustrating the usefulness of the approach.},
	number = {4},
	journal = {Sankhyā: The Indian Journal of Statistics (2003-2007)},
	author = {Bolfarine, Heleno and Montenegro, Lourdes C. and Lachos, Victor H.},
	month = nov,
	year = {2007},
	pages = {648--670},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/UXETFGT7/Bolfarine et al. - 2007 - Influence Diagnostics for Skew-Normal Linear Mixed.pdf:application/pdf}
},

@article{mcculloch_generalized_2003,
	title = {Generalized Linear Mixed Models},
	volume = {7},
	issn = {1935-5920},
	url = {http://www.jstor.org/stable/4153190},
	journal = {{NSF-CBMS} Regional Conference Series in Probability and Statistics},
	author = {{McCulloch}, Charles E.},
	month = jan,
	year = {2003},
	pages = {i--84},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/TUQWN8TG/McCulloch - 2003 - Generalized Linear Mixed Models.pdf:application/pdf}
},

@article{coull_crossed_2001,
	title = {Crossed Random Effect Models for Multiple Outcomes in a Study of Teratogenesis},
	volume = {96},
	issn = {0162-1459},
	url = {http://www.jstor.org/stable/3085882},
	abstract = {Human teratogens often manifest themselves through a broad spectrum of adverse effects. Although often not serious when considered individually, such outcomes taken together may represent a syndrome that can lead to serious developmental problems. Accordingly, studies that investigate the effect of human teratogens on fetal development typically record the presence or absence of a multitude of abnormalities, resulting in the data of multivariate binary form for each infant. Such studies typically have three objectives: (1) estimate an overall effect of exposure across outcomes, (2) identify subjects having the syndrome, and (3) identify those outcomes that constitute the syndrome so that doctors know what to look for when diagnosing the syndrome in other exposed newborns. This article proposes the use of a logistic regression model with crossed random effect structure to address all three questions simultaneously. We use the proposed models to analyze data from a study investigating the effects of in utero antiepileptic drug exposure on fetal development.},
	number = {456},
	journal = {Journal of the American Statistical Association},
	author = {Coull, Brent A. and Hobert, James P. and Ryan, Louise M. and Holmes, Lewis B.},
	month = dec,
	year = {2001},
	pages = {1194--1204},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/QH6NU944/Coull et al. - 2001 - Crossed Random Effect Models for Multiple Outcomes.pdf:application/pdf}
},

@article{lee_hierarchical_1996,
	title = {Hierarchical Generalized Linear Models},
	volume = {58},
	issn = {0035-9246},
	url = {http://www.jstor.org/stable/2346105},
	abstract = {We consider hierarchical generalized linear models which allow extra error components in the linear predictors of generalized linear models. The distribution of these components is not restricted to be normal; this allows a broader class of models, which includes generalized linear mixed models. We use a generalization of Henderson's joint likelihood, called a hierarchical or h-likelihood, for inferences from hierarchical generalized linear models. This avoids the integration that is necessary when marginal likelihood is used. Under appropriate conditions maximizing the h-likelihood gives fixed effect estimators that are asymptotically equivalent to those obtained from the use of marginal likelihood; at the same time we obtain the random effect estimates that are asymptotically best unbiased predictors. An adjusted profile h-likelihood is shown to give the required generalization of restricted maximum likelihood for the estimation of dispersion components. A scaled deviance test for the goodness of fit, a model selection criterion for choosing between various dispersion models and a graphical method for checking the distributional assumption of random effects are proposed. The ideas of quasi-likelihood and extended quasi-likelihood are generalized to the new class. We give examples of the Poisson-gamma, binomial-beta and gamma-inverse gamma hierarchical generalized linear models. A resolution is proposed for the apparent difference between population-averaged and subject-specific models. A unified framework is provided for viewing and extending many existing methods.},
	number = {4},
	journal = {Journal of the Royal Statistical Society. Series B {(Methodological)}},
	author = {Lee, Y. and Nelder, J. A.},
	month = jan,
	year = {1996},
	pages = {619--678},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/VSUHIJ5K/Lee and Nelder - 1996 - Hierarchical Generalized Linear Models.pdf:application/pdf}
},

@article{lin_semiparametric_2006,
	title = {Semiparametric Estimation in General Repeated Measures Problems},
	volume = {68},
	issn = {1369-7412},
	url = {http://www.jstor.org/stable/3647557},
	abstract = {The paper considers a wide class of semiparametric problems with a parametric part for some covariate effects and repeated evaluations of a nonparametric function. Special cases in our approach include marginal models for longitudinal or clustered data, conditional logistic regression for matched case-control studies, multivariate measurement error models, generalized linear mixed models with a semiparametric component, and many others. We propose profile kernel and backfitting estimation methods for these problems, derive their asymptotic distributions and show that in likelihood problems the methods are semiparametric efficient. Although generally not true, it transpires that with our methods profiling and backfitting are asymptotically equivalent. We also consider pseudolikelihood methods where some nuisance parameters are estimated from a different algorithm. The methods proposed are evaluated by using simulation studies and applied to the Kenya haemoglobin data.},
	number = {1},
	journal = {Journal of the Royal Statistical Society. Series B {(Statistical} Methodology)},
	author = {Lin, Xihong and Carroll, Raymond J.},
	month = jan,
	year = {2006},
	pages = {69--88},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/VQ9DGFDX/Lin and Carroll - 2006 - Semiparametric Estimation in General Repeated Meas.pdf:application/pdf}
},

@article{chatterjee_parametric_2008,
	title = {Parametric Bootstrap Approximation to the Distribution of {EBLUP} and Related Prediction Intervals in Linear Mixed Models},
	volume = {36},
	issn = {0090-5364},
	url = {http://www.jstor.org/stable/25464665},
	abstract = {Empirical best linear unbiased prediction {(EBLUP)} method uses a linear mixed model in combining information from different sources of information. This method is particularly useful in small area problems. The variability of an {EBLUP} is traditionally measured by the mean squared prediction error {(MSPE)}, and interval estimates are generally constructed using estimates of the {MSPE.} Such methods have shortcomings like under-coverage or over-coverage, excessive length and lack of interpretability. We propose a parametric bootstrap approach to estimate the entire distribution of a suitably centered and scaled {EBLUP.} The bootstrap histogram is highly accurate, and differs from the true {EBLUP} distribution by only {\$O(d{\textasciicircum}{3}n{\textasciicircum}{-3/2})\$}, where d is the number of parameters and n the number of observations. This result is used to obtain highly accurate prediction intervals. Simulation results demonstrate the superiority of this method over existing techniques of constructing prediction intervals in linear mixed models.},
	number = {3},
	journal = {The Annals of Statistics},
	author = {Chatterjee, Snigdhansu and Lahiri, Partha and Li, Huilin},
	month = jun,
	year = {2008},
	note = {{ArticleType:} research-article / Full publication date: Jun., 2008 / Copyright © 2008 Institute of Mathematical Statistics},
	pages = {1221--1245},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/C2KA2PCX/Chatterjee et al. - 2008 - Parametric Bootstrap Approximation to the Distribu.pdf:application/pdf}
},

@article{qiu_simplex_2008,
	title = {Simplex Mixed-Effects Models for Longitudinal Proportional Data},
	volume = {35},
	issn = {0303-6898},
	url = {http://www.jstor.org/stable/41000290},
	abstract = {Continuous proportional outcomes are collected from many practical studies, where responses are confined within the unit interval (0,1). Utilizing Barndorff-Nielsen and Jorgensen's simplex distribution, we propose a new type of generalized linear mixed-effects model for longitudinal proportional data, where the expected value of proportion is directly modelled through a logit function of fixed and random effects. We establish statistical inference along the lines of Breslow and Clayton's penalized quasi-likelihood {(PQL)} and restricted maximum likelihood {(REML)} in the proposed model. We derive the {PQL/REML} using the high-order multivariate Laplace approximation, which gives satisfactory estimation of the model parameters. The proposed model and inference are illustrated by simulation studies and a data example. The simulation studies conclude that the fourth order approximate {PQL/REML} performs satisfactorily. The data example shows that Aitchison's technique of the normal linear mixed model for logit-transformed proportional outcomes is not robust against outliers.},
	number = {4},
	journal = {Scandinavian Journal of Statistics},
	author = {{QIU}, {ZHENGUO} and {SONG}, {PETER} X.-K. and {TAN}, {MING}},
	month = dec,
	year = {2008},
	pages = {577--596}
},

@article{muller_semiparametric_2007,
	title = {Semiparametric Bayesian Inference for Multilevel Repeated Measurement Data},
	volume = {63},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/4541324},
	abstract = {We discuss inference for data with repeated measurements at multiple levels. The motivating example is data with blood counts from cancer patients undergoing multiple cycles of chemotherapy, with days nested within cycles. Some inference questions relate to repeated measurements over days within cycle, while other questions are concerned with the dependence across cycles. When the desired inference relates to both levels of repetition, it becomes important to reflect the data structure in the model. We develop a semiparametric Bayesian modeling approach, restricting attention to two levels of repeated measurements. For the top-level longitudinal sampling model we use random effects to introduce the desired dependence across repeated measurements. We use a nonparametric prior for the random effects distribution. Inference about dependence across second-level repetition is implemented by the clustering implied in the nonparametric random effects model. Practical use of the model requires that the posterior distribution on the latent random effects be reasonably precise. /// Nous discutons de l'inférence pour des données de mesures répétées à plusieurs niveaux. L'exemple qui motive ce travail consiste en des données de comptage sanguin pour des patients subissant une chimiothérapie comportant plusieurs cycles, avec les jours emboîtés dans les cycles. Certaines questions d'inférence sont reliées aux mesures répétées à l'intérieur d'un cycle tandis que d'autres concernent la dépendance entre cycles. Si l'inférence désirée se rapporte à tous les niveaux de répétition, il devient important de refléter la structure des données dans le modèle. Nous développons une approche bayésienne semi-paramétrique en restreignant l'attention aux deux niveaux de mesures répétées. Pour le niveau le plus haut de l'échantillonnage longitudinal nous utilisons des effets aléatoires pour introduire la dépendance désirée entre les mesures répétées. Nous utilisons un a priori non paramétrique pour la distribution des effets aléatoires. L'inférence sur le second niveau de répétitions est rendue possible par l'effet de groupes impliqué dans le modèle à effets aléatoires non paramétriques. L'utilisation pratique du modèle requiert que la distribution a posteriori sur les effets aléatoires latents soit raisonnablement précise.},
	number = {1},
	journal = {Biometrics},
	author = {Müller, Peter and Quintana, Fernando A. and Rosner, Gary L.},
	month = mar,
	year = {2007},
	pages = {280--289}
},

@article{stubbendick_maximum_2003,
	title = {Maximum Likelihood Methods for Nonignorable Missing Responses and Covariates in Random Effects Models},
	volume = {59},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/3695356},
	abstract = {This article analyzes quality of life {(QOL)} data from an Eastern Cooperative Oncology Group {(ECOG)} melanoma trial that compared treatment with ganglioside vaccination to treatment with high-dose interferon. The analysis of this data set is challenging due to several difficulties, namely, nonignorable missing longitudinal responses and baseline covariates. Hence, we propose a selection model for estimating parameters in the normal random effects model with nonignorable missing responses and covariates. Parameters are estimated via maximum likelihood using the Gibbs sampler and a Monte Carlo expectation maximization {(EM)} algorithm. Standard errors are calculated using the bootstrap. The method allows for nonmonotone patterns of missing data in both the response variable and the covariates. We model the missing data mechanism and the missing covariate distribution via a sequence of one-dimensional conditional distributions, allowing the missing covariates to be either categorical or continuous, as well as time-varying. We apply the proposed approach to the {ECOG} quality-of-life data and conduct a small simulation study evaluating the performance of the maximum likelihood estimates. Our results indicate that a patient treated with the vaccine has a higher {QOL} score on average at a given time point than a patient treated with high-dose interferon.},
	number = {4},
	journal = {Biometrics},
	author = {Stubbendick, Amy L. and Ibrahim, Joseph G.},
	month = dec,
	year = {2003},
	pages = {1140--1150},
	file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/79VWA33P/Stubbendick and Ibrahim - 2003 - Maximum Likelihood Methods for Nonignorable Missin.pdf:application/pdf}
}